Skip to content

Commit

Permalink
Make compress argument to categorical a keyword argument
Browse files Browse the repository at this point in the history
The compiler is now able to infer the return type when the argument is omitted
with `@inline`. Though inference fails when passing `compress=false`, but
that's the same as with the previous approach based on a positional argument.
  • Loading branch information
nalimilan committed Apr 10, 2020
1 parent c4a6124 commit 4130f5d
Show file tree
Hide file tree
Showing 7 changed files with 125 additions and 106 deletions.
18 changes: 8 additions & 10 deletions src/array.jl
Expand Up @@ -831,7 +831,7 @@ function Base.reshape(A::CategoricalArray{T, N}, dims::Dims) where {T, N}
end

"""
categorical{T}(A::AbstractArray{T}[, compress::Bool]; levels=nothing, ordered=false)
categorical(A::AbstractArray; compress=false, levels=nothing, ordered=false)
Construct a categorical array with the values from `A`.
Expand All @@ -843,27 +843,25 @@ in ascending order; else, they are kept in their order of appearance in `A`.
The `ordered` keyword argument determines whether the array values can be compared
according to the ordering of levels or not (see [`isordered`](@ref)).
If `compress` is provided and set to `true`, the smallest reference type able to hold the
If `compress` is `true`, the smallest reference type able to hold the
number of unique values in `A` will be used. While this will reduce memory use, passing
this parameter will also introduce a type instability which can affect performance inside
the function where the call is made. Therefore, use this option with caution (the
one-argument version does not suffer from this problem).
categorical(A::CategoricalArray, compress::Bool]; levels=nothing, ordered=false)
categorical(A::CategoricalArray; compress=false, levels=nothing, ordered=false)
If `A` is already a `CategoricalArray`, its levels, orderedness and reference type
are preserved unless explicitly overriden.
"""
function categorical end

categorical(A::AbstractArray; ordered=_isordered(A)) = CategoricalArray(A, ordered=ordered)

# Type-unstable methods
function categorical(A::AbstractArray{T, N}, compress; ordered=_isordered(A)) where {T, N}
# @inline is needed so that return type is inferred when compress is not provided
@inline function categorical(A::AbstractArray{T, N};
compress::Bool=false, ordered=_isordered(A)) where {T, N}
RefType = compress ? reftype(length(unique(A))) : DefaultRefType
CategoricalArray{T, N, RefType}(A, ordered=ordered)
end
function categorical(A::CategoricalArray{T, N, R}, compress; ordered=_isordered(A)) where {T, N, R}
@inline function categorical(A::CategoricalArray{T, N, R};
compress::Bool=false, ordered=_isordered(A)) where {T, N, R}
RefType = compress ? reftype(length(levels(A))) : R
CategoricalArray{T, N, RefType}(A, ordered=ordered)
end
Expand Down
4 changes: 3 additions & 1 deletion src/deprecated.jl
Expand Up @@ -127,4 +127,6 @@ import Unicode: normalize, graphemes
@deprecate replace(x::CategoricalValue{String}, old_new::Pair...; kwargs...) replace(String(x), old_new...; kwargs...)

@deprecate index(pool::CategoricalPool) levels(pool) false
@deprecate order(pool::CategoricalPool) 1:length(levels(pool)) false
@deprecate order(pool::CategoricalPool) 1:length(levels(pool)) false

@deprecate categorical(A::AbstractArray, compress::Bool; kwargs...) categorical(A; compress=compress, kwargs...)
12 changes: 6 additions & 6 deletions test/11_array.jl
Expand Up @@ -63,14 +63,14 @@ using CategoricalArrays: DefaultRefType, leveltype
(x, R, UInt8, true),
(x, R, R, false))

x2 = categorical(y, ordered=ordered)
x2 = @inferred categorical(y, ordered=ordered)
@test x2 == x
@test isa(x2, CategoricalVector{String, R1})
@test isordered(x2) === ordered
@test leveltype(x2) === String
@test eltype(x2) === CategoricalValue{String, R1}

x2 = categorical(y, comp, ordered=ordered)
x2 = categorical(y, compress=comp, ordered=ordered)
@test x2 == x
@test isa(x2, CategoricalVector{String, R2})
@test isordered(x2) === ordered
Expand Down Expand Up @@ -278,14 +278,14 @@ using CategoricalArrays: DefaultRefType, leveltype
(x, R, UInt8, true),
(x, R, R, false))

x2 = categorical(y, ordered=ordered)
x2 = @inferred categorical(y, ordered=ordered)
@test x2 == x
@test isa(x2, CategoricalVector{Float64, R1})
@test isordered(x2) === ordered
@test leveltype(x2) === Float64
@test eltype(x2) === CategoricalValue{Float64, R1}

x2 = categorical(y, comp, ordered=ordered)
x2 = categorical(y, compress=comp, ordered=ordered)
@test x2 == x
@test isa(x2, CategoricalVector{Float64, R2})
@test isordered(x2) === ordered
Expand Down Expand Up @@ -429,12 +429,12 @@ using CategoricalArrays: DefaultRefType, leveltype
(x, R, UInt8, true),
(x, R, R, false))

x2 = categorical(y, ordered=ordered)
x2 = @inferred categorical(y, ordered=ordered)
@test x2 == x
@test isa(x2, CategoricalMatrix{String, R1})
@test isordered(x2) === ordered

x2 = categorical(y, comp, ordered=ordered)
x2 = categorical(y, compress=comp, ordered=ordered)
@test x2 == x
@test isa(x2, CategoricalMatrix{String, R2})
@test isordered(x2) === ordered
Expand Down
20 changes: 10 additions & 10 deletions test/12_missingarray.jl
Expand Up @@ -66,7 +66,7 @@ const ≅ = isequal
(x, R, UInt8, true),
(x, R, R, false))

x2 = categorical(y, ordered=ordered)
x2 = @inferred categorical(y, ordered=ordered)
@test leveltype(x2) === String
@test nonmissingtype(eltype(x2)) === CategoricalValue{String, R1}
@test x2 == y
Expand All @@ -77,7 +77,7 @@ const ≅ = isequal
end
@test isordered(x2) === ordered

x2 = categorical(y, comp, ordered=ordered)
x2 = categorical(y, compress=comp, ordered=ordered)
@test x2 == y
@test leveltype(x2) === String
@test nonmissingtype(eltype(x2)) === CategoricalValue{String, R2}
Expand Down Expand Up @@ -284,7 +284,7 @@ const ≅ = isequal
(x, R, UInt8, true),
(x, R, R, false))

x2 = categorical(y, ordered=ordered)
x2 = @inferred categorical(y, ordered=ordered)
@test x2 y
if eltype(y) >: Missing
@test isa(x2, CategoricalVector{Union{String, Missing}, R1})
Expand All @@ -293,7 +293,7 @@ const ≅ = isequal
end
@test isordered(x2) === ordered

x2 = categorical(y, comp, ordered=ordered)
x2 = categorical(y, compress=comp, ordered=ordered)
@test x2 y
if eltype(y) >: Missing
@test isa(x2, CategoricalVector{Union{String, Missing}, R2})
Expand Down Expand Up @@ -447,7 +447,7 @@ const ≅ = isequal
(x, R, UInt8, true),
(x, R, R, false))

x2 = categorical(y, ordered=ordered)
x2 = @inferred categorical(y, ordered=ordered)
@test x2 == collect(y)
if eltype(y) >: Missing
@test isa(x2, CategoricalVector{Union{Float64, Missing}, R1})
Expand All @@ -458,7 +458,7 @@ const ≅ = isequal
@test leveltype(x2) === Float64
@test nonmissingtype(eltype(x2)) === CategoricalValue{Float64, R1}

x2 = categorical(y, comp, ordered=ordered)
x2 = categorical(y, compress=comp, ordered=ordered)
@test x2 == collect(y)
if eltype(y) >: Missing
@test isa(x2, CategoricalVector{Union{Float64, Missing}, R2})
Expand Down Expand Up @@ -615,7 +615,7 @@ const ≅ = isequal
(x, R, UInt8, true),
(x, R, R, false))

x2 = categorical(y, ordered=ordered)
x2 = @inferred categorical(y, ordered=ordered)
@test x2 == y
if eltype(y) >: Missing
@test isa(x2, CategoricalMatrix{Union{String, Missing}, R1})
Expand All @@ -624,7 +624,7 @@ const ≅ = isequal
end
@test isordered(x2) === ordered

x2 = categorical(y, comp, ordered=ordered)
x2 = categorical(y, compress=comp, ordered=ordered)
@test x2 == y
if eltype(y) >: Missing
@test isa(x2, CategoricalMatrix{Union{String, Missing}, R2})
Expand Down Expand Up @@ -756,12 +756,12 @@ const ≅ = isequal
(x, R, UInt8, true),
(x, R, R, false))

x2 = categorical(y, ordered=ordered)
x2 = @inferred categorical(y, ordered=ordered)
@test x2 y
@test isa(x2, CategoricalMatrix{Union{String, Missing}, R1})
@test isordered(x2) === ordered

x2 = categorical(y, comp, ordered=ordered)
x2 = categorical(y, compress=comp, ordered=ordered)
@test x2 y
@test isa(x2, CategoricalMatrix{Union{String, Missing}, R2})
@test isordered(x2) === ordered
Expand Down
155 changes: 77 additions & 78 deletions test/13_arraycommon.jl
Expand Up @@ -966,9 +966,9 @@ end
@test y.refs !== x.refs
@test y.pool !== x.pool
end
for y in (categorical(x),
categorical(x, false),
categorical(x, true))
for y in (@inferred(categorical(x)),
categorical(x, compress=false),
categorical(x, compress=true))
@test isa(y, CategoricalArray{T, N})
@test isordered(y) === isordered(x)
@test isordered(x) === ordered_orig
Expand Down Expand Up @@ -996,9 +996,9 @@ end
@test y.refs !== x.refs
@test y.pool !== x.pool
end
for y in (categorical(x, ordered=ordered),
categorical(x, false, ordered=ordered),
categorical(x, true, ordered=ordered))
for y in (@inferred(categorical(x, ordered=ordered)),
categorical(x, compress=false, ordered=ordered),
categorical(x, compress=true, ordered=ordered))
@test isa(y, CategoricalArray{T, N})
@test isordered(y) === ordered
@test isordered(x) === ordered_orig
Expand All @@ -1023,87 +1023,86 @@ end
end
end


@testset "levels argument to constructors" begin
for T in (String, Union{String, Missing}),
ord in (false, true),
levs in (nothing, [], ["a"], ["b", "c", "a"])
for (U, x) in ((String, CategoricalArray(undef, 2, levels=levs, ordered=ord)),
(T, CategoricalArray{T}(undef, 2, levels=levs, ordered=ord)),
(T, CategoricalArray{T, 1}(undef, 2, levels=levs, ordered=ord)),
(T, CategoricalArray{T, 1, UInt32}(undef, 2, levels=levs, ordered=ord)),
(String, CategoricalVector(undef, 2, levels=levs, ordered=ord)),
(T, CategoricalVector{T}(undef, 2, levels=levs, ordered=ord)),
(T, CategoricalVector{T, UInt32}(undef, 2, levels=levs, ordered=ord)),
(String, CategoricalArray(undef, 2, 3, levels=levs, ordered=ord)),
(T, CategoricalArray{T}(undef, 2, 3, levels=levs, ordered=ord)),
(T, CategoricalArray{T, 2}(undef, 2, 3, levels=levs, ordered=ord)),
(T, CategoricalArray{T, 2, UInt32}(undef, 2, 3, levels=levs, ordered=ord)),
(String, CategoricalMatrix(undef, 2, 3, levels=levs, ordered=ord)),
(T, CategoricalMatrix{T}(undef, 2, 3, levels=levs, ordered=ord)),
(T, CategoricalMatrix{T, UInt32}(undef, 2, 3, levels=levs, ordered=ord)))
@test x isa CategoricalArray{U, <:Any, UInt32}
if U >: Missing
@test all(ismissing, x)
else
@test !any(i -> isassigned(x, i), eachindex(x))
end
@test levels(x) == something(levs, [])
@test isordered(x) === ord
@test CategoricalArrays.pool(x).levels !== levs
end

v = T["b", "c", "a"]
if levs === nothing || unique(v) levs
for x in (CategoricalArray(v, levels=levs, ordered=ord),
CategoricalArray{T}(v, levels=levs, ordered=ord),
CategoricalArray{T, 1}(v, levels=levs, ordered=ord),
CategoricalArray{T, 1, UInt32}(v, levels=levs, ordered=ord),
CategoricalVector(v, levels=levs, ordered=ord),
CategoricalVector{T}(v, levels=levs, ordered=ord),
CategoricalVector{T, UInt32}(v, levels=levs, ordered=ord),
CategoricalArray(v, levels=levs, ordered=ord))
@test x isa CategoricalVector{T, UInt32}
@test x == v
@test levels(x) == something(levs, sort!(unique(x)))
@test isordered(x) === ord
@test CategoricalArrays.pool(x).levels !== levs
end
@testset "levels argument to constructors" begin
for T in (String, Union{String, Missing}),
ord in (false, true),
levs in (nothing, [], ["a"], ["b", "c", "a"])
for (U, x) in ((String, CategoricalArray(undef, 2, levels=levs, ordered=ord)),
(T, CategoricalArray{T}(undef, 2, levels=levs, ordered=ord)),
(T, CategoricalArray{T, 1}(undef, 2, levels=levs, ordered=ord)),
(T, CategoricalArray{T, 1, UInt32}(undef, 2, levels=levs, ordered=ord)),
(String, CategoricalVector(undef, 2, levels=levs, ordered=ord)),
(T, CategoricalVector{T}(undef, 2, levels=levs, ordered=ord)),
(T, CategoricalVector{T, UInt32}(undef, 2, levels=levs, ordered=ord)),
(String, CategoricalArray(undef, 2, 3, levels=levs, ordered=ord)),
(T, CategoricalArray{T}(undef, 2, 3, levels=levs, ordered=ord)),
(T, CategoricalArray{T, 2}(undef, 2, 3, levels=levs, ordered=ord)),
(T, CategoricalArray{T, 2, UInt32}(undef, 2, 3, levels=levs, ordered=ord)),
(String, CategoricalMatrix(undef, 2, 3, levels=levs, ordered=ord)),
(T, CategoricalMatrix{T}(undef, 2, 3, levels=levs, ordered=ord)),
(T, CategoricalMatrix{T, UInt32}(undef, 2, 3, levels=levs, ordered=ord)))
@test x isa CategoricalArray{U, <:Any, UInt32}
if U >: Missing
@test all(ismissing, x)
else
@test_throws ArgumentError CategoricalArray(v, levels=levs, ordered=ord)
@test_throws ArgumentError CategoricalArray{T}(v, levels=levs, ordered=ord)
@test_throws ArgumentError CategoricalArray{T, 1}(v, levels=levs, ordered=ord)
@test_throws ArgumentError CategoricalArray{T, 1, UInt32}(v, levels=levs, ordered=ord)
@test_throws ArgumentError CategoricalVector(v, levels=levs, ordered=ord)
@test_throws ArgumentError CategoricalVector{T}(v, levels=levs, ordered=ord)
@test_throws ArgumentError CategoricalVector{T, UInt32}(v, levels=levs, ordered=ord)
@test !any(i -> isassigned(x, i), eachindex(x))
end
@test levels(x) == something(levs, [])
@test isordered(x) === ord
@test CategoricalArrays.pool(x).levels !== levs
end

m = T["c" "b"; "a" "b"]
if levs === nothing || unique(m) levs
for x in (CategoricalArray{T}(m, levels=levs, ordered=ord),
CategoricalArray{T, 2}(m, levels=levs, ordered=ord),
CategoricalArray{T, 2, UInt32}(m, levels=levs, ordered=ord),
CategoricalMatrix(m, levels=levs, ordered=ord),
CategoricalMatrix{T}(m, levels=levs, ordered=ord),
CategoricalMatrix{T, UInt32}(m, levels=levs, ordered=ord))
@test x isa CategoricalMatrix{T, UInt32}
@test x == m
v = T["b", "c", "a"]
if levs === nothing || unique(v) levs
for x in (CategoricalArray(v, levels=levs, ordered=ord),
CategoricalArray{T}(v, levels=levs, ordered=ord),
CategoricalArray{T, 1}(v, levels=levs, ordered=ord),
CategoricalArray{T, 1, UInt32}(v, levels=levs, ordered=ord),
CategoricalVector(v, levels=levs, ordered=ord),
CategoricalVector{T}(v, levels=levs, ordered=ord),
CategoricalVector{T, UInt32}(v, levels=levs, ordered=ord),
CategoricalArray(v, levels=levs, ordered=ord))
@test x isa CategoricalVector{T, UInt32}
@test x == v
@test levels(x) == something(levs, sort!(unique(x)))
@test isordered(x) === ord
@test CategoricalArrays.pool(x).levels !== levs
end
else
@test_throws ArgumentError CategoricalArray(m, levels=levs, ordered=ord)
@test_throws ArgumentError CategoricalArray{T}(m, levels=levs, ordered=ord)
@test_throws ArgumentError CategoricalArray{T, 2}(m, levels=levs, ordered=ord)
@test_throws ArgumentError CategoricalArray{T, 2, UInt32}(m, levels=levs, ordered=ord)
@test_throws ArgumentError CategoricalMatrix(m, levels=levs, ordered=ord)
@test_throws ArgumentError CategoricalMatrix{T}(m, levels=levs, ordered=ord)
@test_throws ArgumentError CategoricalMatrix{T, UInt32}(m, levels=levs, ordered=ord)
end
else
@test_throws ArgumentError CategoricalArray(v, levels=levs, ordered=ord)
@test_throws ArgumentError CategoricalArray{T}(v, levels=levs, ordered=ord)
@test_throws ArgumentError CategoricalArray{T, 1}(v, levels=levs, ordered=ord)
@test_throws ArgumentError CategoricalArray{T, 1, UInt32}(v, levels=levs, ordered=ord)
@test_throws ArgumentError CategoricalVector(v, levels=levs, ordered=ord)
@test_throws ArgumentError CategoricalVector{T}(v, levels=levs, ordered=ord)
@test_throws ArgumentError CategoricalVector{T, UInt32}(v, levels=levs, ordered=ord)
end

m = T["c" "b"; "a" "b"]
if levs === nothing || unique(m) levs
for x in (CategoricalArray{T}(m, levels=levs, ordered=ord),
CategoricalArray{T, 2}(m, levels=levs, ordered=ord),
CategoricalArray{T, 2, UInt32}(m, levels=levs, ordered=ord),
CategoricalMatrix(m, levels=levs, ordered=ord),
CategoricalMatrix{T}(m, levels=levs, ordered=ord),
CategoricalMatrix{T, UInt32}(m, levels=levs, ordered=ord))
@test x isa CategoricalMatrix{T, UInt32}
@test x == m
@test levels(x) == something(levs, sort!(unique(x)))
@test isordered(x) === ord
@test CategoricalArrays.pool(x).levels !== levs
end
else
@test_throws ArgumentError CategoricalArray(m, levels=levs, ordered=ord)
@test_throws ArgumentError CategoricalArray{T}(m, levels=levs, ordered=ord)
@test_throws ArgumentError CategoricalArray{T, 2}(m, levels=levs, ordered=ord)
@test_throws ArgumentError CategoricalArray{T, 2, UInt32}(m, levels=levs, ordered=ord)
@test_throws ArgumentError CategoricalMatrix(m, levels=levs, ordered=ord)
@test_throws ArgumentError CategoricalMatrix{T}(m, levels=levs, ordered=ord)
@test_throws ArgumentError CategoricalMatrix{T, UInt32}(m, levels=levs, ordered=ord)
end
end
end

@testset "converting from array with missings to array without missings CategoricalArray fails with missings" begin
x = CategoricalArray{Union{String, Missing}}(undef, 1)
Expand Down

0 comments on commit 4130f5d

Please sign in to comment.