Apply suggestions from code review

Co-authored-by: Milan Bouchet-Valat <nalimilan@club.fr>
JuliaData · Nov 7, 2020 · 114eddc · 114eddc
1 parent d88d0cd
commit 114eddc
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 3 deletions.
diff --git a/src/dataframerow/utils.jl b/src/dataframerow/utils.jl
@@ -43,8 +43,8 @@ function hashrows_col!(h::Vector{UInt},
                        firstcol::Bool)
     # When hashing the first column, no need to take into account previous hash,
     # which is always zero
-    # also when there are more than 90% of refs in the pool than the length of the
-    # vector avoid using this path. 90% is picked heuristically
+    # also when the number of values in the pool is more than half the length
+    # of the vector avoid using this path. 50% is roughly based on benchmarks
     if firstcol && 2 * length(rp) < length(v)
         hashes = Vector{UInt}(undef, length(rp))
         @inbounds for (i, v) in zip(eachindex(hashes), rp)

diff --git a/test/grouping.jl b/test/grouping.jl
@@ -3174,7 +3174,7 @@ end
 end
 
 @testset "hashing of pooled vectors" begin
-    # test both hashrow calculation paths - the of pool length thereshold is 50%
+    # test both hashrow calculation paths - the of pool length threshold is 50%
     for x in ([1:9; fill(1, 101)], [1:100;],
               [1:9; fill(missing, 101)], [1:99; missing])
         x1 = PooledArray(x);