Skip to content
This repository has been archived by the owner on May 5, 2019. It is now read-only.

Commit

Permalink
Specialize row_group_slots() on key column types to improve performance
Browse files Browse the repository at this point in the history
Looping over columns is very slow when their type is unknown at compile time.
Specialize the method on the types of the key (grouping) columns by passing
a tuple of columns rather than a DataTable. This will force compiling a specific
method for each combination of key types, but their number should remain relatively low
and the one-time cost is worth it.
  • Loading branch information
nalimilan committed Jul 11, 2017
1 parent 3875020 commit 9de6b1a
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 12 deletions.
5 changes: 5 additions & 0 deletions src/datatablerow/datatablerow.jl
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ isequal_colel(a::Nullable, b::Any) = !isnull(a) & isequal(unsafe_get(a), b)
isequal_colel(a::Any, b::Nullable) = isequal_colel(b, a)
isequal_colel(a::Nullable, b::Nullable) = isequal(a, b)

isequal_row(cols::Tuple{AbstractVector}, r1::Int, r2::Int) =
isequal_colel(cols[1][r1], cols[1][r2])
isequal_row(cols::Tuple{Vararg{AbstractVector}}, r1::Int, r2::Int) =
isequal_colel(cols[1][r1], cols[1][r2]) && isequal_row(Base.tail(cols), r1, r2)

function isequal_row(dt1::AbstractDataTable, r1::Int, dt2::AbstractDataTable, r2::Int)
if dt1 === dt2
if r1 == r2
Expand Down
20 changes: 8 additions & 12 deletions src/datatablerow/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,13 @@ end
# 3) slot array for a hash map, non-zero values are
# the indices of the first row in a group
# Optional group vector is set to the group indices of each row
function row_group_slots(dt::AbstractDataTable,
row_group_slots(dt::AbstractDataTable, groups::Union{Vector{Int}, Void} = nothing) =
row_group_slots(ntuple(i -> dt[i], ncol(dt)), hashrows(dt), groups)

function row_group_slots(cols::Tuple{Vararg{AbstractVector}},
rhashes::AbstractVector{UInt},
groups::Union{Vector{Int}, Void} = nothing)
@assert groups === nothing || length(groups) == nrow(dt)
rhashes = hashrows(dt)
@assert groups === nothing || length(groups) == length(cols[1])
# inspired by Dict code from base cf. https://github.com/JuliaData/DataTables.jl/pull/17#discussion_r102481481
sz = Base._tablesz(length(rhashes))
@assert sz >= length(rhashes)
Expand All @@ -102,17 +105,10 @@ function row_group_slots(dt::AbstractDataTable,
gix = ngroups += 1
break
elseif rhashes[i] == rhashes[g_row] # occupied slot, check if miss or hit
eq = true
for col in columns(dt)
if !isequal_colel(col, i, g_row)
eq = false # miss
break
end
end
if eq # hit
if isequal_row(cols, i, g_row) # hit
gix = groups !== nothing ? groups[g_row] : 0
break
end
break
end
slotix = slotix & szm1 + 1 # check the next slot
probe += 1
Expand Down

0 comments on commit 9de6b1a

Please sign in to comment.