Skip to content

Commit

Permalink
Fix sort by always using QuickSort
Browse files Browse the repository at this point in the history
`MergeSort` is used by default, and it fails since it attempts to compare categorical values
from different pools. Always use `QuickSort`, which gives the same result anyway since
categorical values are always strictly ordered, and is as fast as sorting an integer array.
  • Loading branch information
nalimilan committed Nov 13, 2019
1 parent b0139c1 commit e62ec8b
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 1 deletion.
14 changes: 13 additions & 1 deletion src/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -797,4 +797,16 @@ Base.Broadcast.broadcasted(::typeof(ismissing), A::CategoricalArray{T}) where {T

Base.Broadcast.broadcasted(::typeof(!ismissing), A::CategoricalArray{T}) where {T} =
T >: Missing ? Base.Broadcast.broadcasted(>, A.refs, 0) :
Base.Broadcast.broadcasted(_ -> true, A.refs)
Base.Broadcast.broadcasted(_ -> true, A.refs)

function Base.sort!(v::CategoricalVector;
alg::Base.Algorithm=QuickSort,
lt=isless,
by=identity,
rev::Bool=false,
order::Base.Ordering=Base.Forward)
# MergeSort errors when trying to compare values from different pools
# Stability does not make a difference anyway since categorical values
# are always strictly ordered
invoke(sort!, Tuple{AbstractVector}, v, alg=QuickSort, lt=lt, by=by, rev=rev, order=order)
end
17 changes: 17 additions & 0 deletions test/13_arraycommon.jl
Original file line number Diff line number Diff line change
Expand Up @@ -689,6 +689,23 @@ end
@test sort(x) == ["Young", "Young", "Middle", "Old"]
@test sort!(x) === x
@test x == ["Young", "Young", "Middle", "Old"]
@test levels(x) == ["Young", "Middle", "Old"]

# Default sort method uses MergeSort (which fails) only beyond
# a certain number of elements
if T === Missing
y = rand([1:10; missing], 1000)
else
y = rand(1:10, 1000)
end
cy = categorical(y)
levels!(cy, collect(10:-1:1))
res = [sort(collect(skipmissing(y)), rev=true);
fill(missing, count(ismissing, y))]
@test sort(cy) res
@test sort!(cy) === cy
@test cy res
@test levels(cy) == 10:-1:1
end
end

Expand Down

0 comments on commit e62ec8b

Please sign in to comment.