From 4130f5d6af8105aa5137278b2c29829a280d66e3 Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Fri, 10 Apr 2020 11:53:51 +0200 Subject: [PATCH] Make compress argument to `categorical` a keyword argument The compiler is now able to infer the return type when the argument is omitted with `@inline`. Though inference fails when passing `compress=false`, but that's the same as with the previous approach based on a positional argument. --- src/array.jl | 18 +++-- src/deprecated.jl | 4 +- test/11_array.jl | 12 ++-- test/12_missingarray.jl | 20 +++--- test/13_arraycommon.jl | 155 ++++++++++++++++++++-------------------- test/17_deprecated.jl | 19 +++++ test/runtests.jl | 3 +- 7 files changed, 125 insertions(+), 106 deletions(-) create mode 100644 test/17_deprecated.jl diff --git a/src/array.jl b/src/array.jl index 5be48a24..94018b0d 100644 --- a/src/array.jl +++ b/src/array.jl @@ -831,7 +831,7 @@ function Base.reshape(A::CategoricalArray{T, N}, dims::Dims) where {T, N} end """ - categorical{T}(A::AbstractArray{T}[, compress::Bool]; levels=nothing, ordered=false) + categorical(A::AbstractArray; compress=false, levels=nothing, ordered=false) Construct a categorical array with the values from `A`. @@ -843,27 +843,25 @@ in ascending order; else, they are kept in their order of appearance in `A`. The `ordered` keyword argument determines whether the array values can be compared according to the ordering of levels or not (see [`isordered`](@ref)). -If `compress` is provided and set to `true`, the smallest reference type able to hold the +If `compress` is `true`, the smallest reference type able to hold the number of unique values in `A` will be used. While this will reduce memory use, passing this parameter will also introduce a type instability which can affect performance inside the function where the call is made. Therefore, use this option with caution (the one-argument version does not suffer from this problem). - categorical(A::CategoricalArray, compress::Bool]; levels=nothing, ordered=false) + categorical(A::CategoricalArray; compress=false, levels=nothing, ordered=false) If `A` is already a `CategoricalArray`, its levels, orderedness and reference type are preserved unless explicitly overriden. """ -function categorical end - -categorical(A::AbstractArray; ordered=_isordered(A)) = CategoricalArray(A, ordered=ordered) - -# Type-unstable methods -function categorical(A::AbstractArray{T, N}, compress; ordered=_isordered(A)) where {T, N} +# @inline is needed so that return type is inferred when compress is not provided +@inline function categorical(A::AbstractArray{T, N}; + compress::Bool=false, ordered=_isordered(A)) where {T, N} RefType = compress ? reftype(length(unique(A))) : DefaultRefType CategoricalArray{T, N, RefType}(A, ordered=ordered) end -function categorical(A::CategoricalArray{T, N, R}, compress; ordered=_isordered(A)) where {T, N, R} +@inline function categorical(A::CategoricalArray{T, N, R}; + compress::Bool=false, ordered=_isordered(A)) where {T, N, R} RefType = compress ? reftype(length(levels(A))) : R CategoricalArray{T, N, RefType}(A, ordered=ordered) end diff --git a/src/deprecated.jl b/src/deprecated.jl index 90faf28b..3a6da077 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -127,4 +127,6 @@ import Unicode: normalize, graphemes @deprecate replace(x::CategoricalValue{String}, old_new::Pair...; kwargs...) replace(String(x), old_new...; kwargs...) @deprecate index(pool::CategoricalPool) levels(pool) false -@deprecate order(pool::CategoricalPool) 1:length(levels(pool)) false \ No newline at end of file +@deprecate order(pool::CategoricalPool) 1:length(levels(pool)) false + +@deprecate categorical(A::AbstractArray, compress::Bool; kwargs...) categorical(A; compress=compress, kwargs...) \ No newline at end of file diff --git a/test/11_array.jl b/test/11_array.jl index bc9670f9..a35ff89a 100644 --- a/test/11_array.jl +++ b/test/11_array.jl @@ -63,14 +63,14 @@ using CategoricalArrays: DefaultRefType, leveltype (x, R, UInt8, true), (x, R, R, false)) - x2 = categorical(y, ordered=ordered) + x2 = @inferred categorical(y, ordered=ordered) @test x2 == x @test isa(x2, CategoricalVector{String, R1}) @test isordered(x2) === ordered @test leveltype(x2) === String @test eltype(x2) === CategoricalValue{String, R1} - x2 = categorical(y, comp, ordered=ordered) + x2 = categorical(y, compress=comp, ordered=ordered) @test x2 == x @test isa(x2, CategoricalVector{String, R2}) @test isordered(x2) === ordered @@ -278,14 +278,14 @@ using CategoricalArrays: DefaultRefType, leveltype (x, R, UInt8, true), (x, R, R, false)) - x2 = categorical(y, ordered=ordered) + x2 = @inferred categorical(y, ordered=ordered) @test x2 == x @test isa(x2, CategoricalVector{Float64, R1}) @test isordered(x2) === ordered @test leveltype(x2) === Float64 @test eltype(x2) === CategoricalValue{Float64, R1} - x2 = categorical(y, comp, ordered=ordered) + x2 = categorical(y, compress=comp, ordered=ordered) @test x2 == x @test isa(x2, CategoricalVector{Float64, R2}) @test isordered(x2) === ordered @@ -429,12 +429,12 @@ using CategoricalArrays: DefaultRefType, leveltype (x, R, UInt8, true), (x, R, R, false)) - x2 = categorical(y, ordered=ordered) + x2 = @inferred categorical(y, ordered=ordered) @test x2 == x @test isa(x2, CategoricalMatrix{String, R1}) @test isordered(x2) === ordered - x2 = categorical(y, comp, ordered=ordered) + x2 = categorical(y, compress=comp, ordered=ordered) @test x2 == x @test isa(x2, CategoricalMatrix{String, R2}) @test isordered(x2) === ordered diff --git a/test/12_missingarray.jl b/test/12_missingarray.jl index 73bfc609..c71406f8 100644 --- a/test/12_missingarray.jl +++ b/test/12_missingarray.jl @@ -66,7 +66,7 @@ const ≅ = isequal (x, R, UInt8, true), (x, R, R, false)) - x2 = categorical(y, ordered=ordered) + x2 = @inferred categorical(y, ordered=ordered) @test leveltype(x2) === String @test nonmissingtype(eltype(x2)) === CategoricalValue{String, R1} @test x2 == y @@ -77,7 +77,7 @@ const ≅ = isequal end @test isordered(x2) === ordered - x2 = categorical(y, comp, ordered=ordered) + x2 = categorical(y, compress=comp, ordered=ordered) @test x2 == y @test leveltype(x2) === String @test nonmissingtype(eltype(x2)) === CategoricalValue{String, R2} @@ -284,7 +284,7 @@ const ≅ = isequal (x, R, UInt8, true), (x, R, R, false)) - x2 = categorical(y, ordered=ordered) + x2 = @inferred categorical(y, ordered=ordered) @test x2 ≅ y if eltype(y) >: Missing @test isa(x2, CategoricalVector{Union{String, Missing}, R1}) @@ -293,7 +293,7 @@ const ≅ = isequal end @test isordered(x2) === ordered - x2 = categorical(y, comp, ordered=ordered) + x2 = categorical(y, compress=comp, ordered=ordered) @test x2 ≅ y if eltype(y) >: Missing @test isa(x2, CategoricalVector{Union{String, Missing}, R2}) @@ -447,7 +447,7 @@ const ≅ = isequal (x, R, UInt8, true), (x, R, R, false)) - x2 = categorical(y, ordered=ordered) + x2 = @inferred categorical(y, ordered=ordered) @test x2 == collect(y) if eltype(y) >: Missing @test isa(x2, CategoricalVector{Union{Float64, Missing}, R1}) @@ -458,7 +458,7 @@ const ≅ = isequal @test leveltype(x2) === Float64 @test nonmissingtype(eltype(x2)) === CategoricalValue{Float64, R1} - x2 = categorical(y, comp, ordered=ordered) + x2 = categorical(y, compress=comp, ordered=ordered) @test x2 == collect(y) if eltype(y) >: Missing @test isa(x2, CategoricalVector{Union{Float64, Missing}, R2}) @@ -615,7 +615,7 @@ const ≅ = isequal (x, R, UInt8, true), (x, R, R, false)) - x2 = categorical(y, ordered=ordered) + x2 = @inferred categorical(y, ordered=ordered) @test x2 == y if eltype(y) >: Missing @test isa(x2, CategoricalMatrix{Union{String, Missing}, R1}) @@ -624,7 +624,7 @@ const ≅ = isequal end @test isordered(x2) === ordered - x2 = categorical(y, comp, ordered=ordered) + x2 = categorical(y, compress=comp, ordered=ordered) @test x2 == y if eltype(y) >: Missing @test isa(x2, CategoricalMatrix{Union{String, Missing}, R2}) @@ -756,12 +756,12 @@ const ≅ = isequal (x, R, UInt8, true), (x, R, R, false)) - x2 = categorical(y, ordered=ordered) + x2 = @inferred categorical(y, ordered=ordered) @test x2 ≅ y @test isa(x2, CategoricalMatrix{Union{String, Missing}, R1}) @test isordered(x2) === ordered - x2 = categorical(y, comp, ordered=ordered) + x2 = categorical(y, compress=comp, ordered=ordered) @test x2 ≅ y @test isa(x2, CategoricalMatrix{Union{String, Missing}, R2}) @test isordered(x2) === ordered diff --git a/test/13_arraycommon.jl b/test/13_arraycommon.jl index 9ec7bbe5..8f184e9f 100644 --- a/test/13_arraycommon.jl +++ b/test/13_arraycommon.jl @@ -966,9 +966,9 @@ end @test y.refs !== x.refs @test y.pool !== x.pool end - for y in (categorical(x), - categorical(x, false), - categorical(x, true)) + for y in (@inferred(categorical(x)), + categorical(x, compress=false), + categorical(x, compress=true)) @test isa(y, CategoricalArray{T, N}) @test isordered(y) === isordered(x) @test isordered(x) === ordered_orig @@ -996,9 +996,9 @@ end @test y.refs !== x.refs @test y.pool !== x.pool end - for y in (categorical(x, ordered=ordered), - categorical(x, false, ordered=ordered), - categorical(x, true, ordered=ordered)) + for y in (@inferred(categorical(x, ordered=ordered)), + categorical(x, compress=false, ordered=ordered), + categorical(x, compress=true, ordered=ordered)) @test isa(y, CategoricalArray{T, N}) @test isordered(y) === ordered @test isordered(x) === ordered_orig @@ -1023,87 +1023,86 @@ end end end - - @testset "levels argument to constructors" begin - for T in (String, Union{String, Missing}), - ord in (false, true), - levs in (nothing, [], ["a"], ["b", "c", "a"]) - for (U, x) in ((String, CategoricalArray(undef, 2, levels=levs, ordered=ord)), - (T, CategoricalArray{T}(undef, 2, levels=levs, ordered=ord)), - (T, CategoricalArray{T, 1}(undef, 2, levels=levs, ordered=ord)), - (T, CategoricalArray{T, 1, UInt32}(undef, 2, levels=levs, ordered=ord)), - (String, CategoricalVector(undef, 2, levels=levs, ordered=ord)), - (T, CategoricalVector{T}(undef, 2, levels=levs, ordered=ord)), - (T, CategoricalVector{T, UInt32}(undef, 2, levels=levs, ordered=ord)), - (String, CategoricalArray(undef, 2, 3, levels=levs, ordered=ord)), - (T, CategoricalArray{T}(undef, 2, 3, levels=levs, ordered=ord)), - (T, CategoricalArray{T, 2}(undef, 2, 3, levels=levs, ordered=ord)), - (T, CategoricalArray{T, 2, UInt32}(undef, 2, 3, levels=levs, ordered=ord)), - (String, CategoricalMatrix(undef, 2, 3, levels=levs, ordered=ord)), - (T, CategoricalMatrix{T}(undef, 2, 3, levels=levs, ordered=ord)), - (T, CategoricalMatrix{T, UInt32}(undef, 2, 3, levels=levs, ordered=ord))) - @test x isa CategoricalArray{U, <:Any, UInt32} - if U >: Missing - @test all(ismissing, x) - else - @test !any(i -> isassigned(x, i), eachindex(x)) - end - @test levels(x) == something(levs, []) - @test isordered(x) === ord - @test CategoricalArrays.pool(x).levels !== levs - end - - v = T["b", "c", "a"] - if levs === nothing || unique(v) ⊆ levs - for x in (CategoricalArray(v, levels=levs, ordered=ord), - CategoricalArray{T}(v, levels=levs, ordered=ord), - CategoricalArray{T, 1}(v, levels=levs, ordered=ord), - CategoricalArray{T, 1, UInt32}(v, levels=levs, ordered=ord), - CategoricalVector(v, levels=levs, ordered=ord), - CategoricalVector{T}(v, levels=levs, ordered=ord), - CategoricalVector{T, UInt32}(v, levels=levs, ordered=ord), - CategoricalArray(v, levels=levs, ordered=ord)) - @test x isa CategoricalVector{T, UInt32} - @test x == v - @test levels(x) == something(levs, sort!(unique(x))) - @test isordered(x) === ord - @test CategoricalArrays.pool(x).levels !== levs - end +@testset "levels argument to constructors" begin + for T in (String, Union{String, Missing}), + ord in (false, true), + levs in (nothing, [], ["a"], ["b", "c", "a"]) + for (U, x) in ((String, CategoricalArray(undef, 2, levels=levs, ordered=ord)), + (T, CategoricalArray{T}(undef, 2, levels=levs, ordered=ord)), + (T, CategoricalArray{T, 1}(undef, 2, levels=levs, ordered=ord)), + (T, CategoricalArray{T, 1, UInt32}(undef, 2, levels=levs, ordered=ord)), + (String, CategoricalVector(undef, 2, levels=levs, ordered=ord)), + (T, CategoricalVector{T}(undef, 2, levels=levs, ordered=ord)), + (T, CategoricalVector{T, UInt32}(undef, 2, levels=levs, ordered=ord)), + (String, CategoricalArray(undef, 2, 3, levels=levs, ordered=ord)), + (T, CategoricalArray{T}(undef, 2, 3, levels=levs, ordered=ord)), + (T, CategoricalArray{T, 2}(undef, 2, 3, levels=levs, ordered=ord)), + (T, CategoricalArray{T, 2, UInt32}(undef, 2, 3, levels=levs, ordered=ord)), + (String, CategoricalMatrix(undef, 2, 3, levels=levs, ordered=ord)), + (T, CategoricalMatrix{T}(undef, 2, 3, levels=levs, ordered=ord)), + (T, CategoricalMatrix{T, UInt32}(undef, 2, 3, levels=levs, ordered=ord))) + @test x isa CategoricalArray{U, <:Any, UInt32} + if U >: Missing + @test all(ismissing, x) else - @test_throws ArgumentError CategoricalArray(v, levels=levs, ordered=ord) - @test_throws ArgumentError CategoricalArray{T}(v, levels=levs, ordered=ord) - @test_throws ArgumentError CategoricalArray{T, 1}(v, levels=levs, ordered=ord) - @test_throws ArgumentError CategoricalArray{T, 1, UInt32}(v, levels=levs, ordered=ord) - @test_throws ArgumentError CategoricalVector(v, levels=levs, ordered=ord) - @test_throws ArgumentError CategoricalVector{T}(v, levels=levs, ordered=ord) - @test_throws ArgumentError CategoricalVector{T, UInt32}(v, levels=levs, ordered=ord) + @test !any(i -> isassigned(x, i), eachindex(x)) end + @test levels(x) == something(levs, []) + @test isordered(x) === ord + @test CategoricalArrays.pool(x).levels !== levs + end - m = T["c" "b"; "a" "b"] - if levs === nothing || unique(m) ⊆ levs - for x in (CategoricalArray{T}(m, levels=levs, ordered=ord), - CategoricalArray{T, 2}(m, levels=levs, ordered=ord), - CategoricalArray{T, 2, UInt32}(m, levels=levs, ordered=ord), - CategoricalMatrix(m, levels=levs, ordered=ord), - CategoricalMatrix{T}(m, levels=levs, ordered=ord), - CategoricalMatrix{T, UInt32}(m, levels=levs, ordered=ord)) - @test x isa CategoricalMatrix{T, UInt32} - @test x == m + v = T["b", "c", "a"] + if levs === nothing || unique(v) ⊆ levs + for x in (CategoricalArray(v, levels=levs, ordered=ord), + CategoricalArray{T}(v, levels=levs, ordered=ord), + CategoricalArray{T, 1}(v, levels=levs, ordered=ord), + CategoricalArray{T, 1, UInt32}(v, levels=levs, ordered=ord), + CategoricalVector(v, levels=levs, ordered=ord), + CategoricalVector{T}(v, levels=levs, ordered=ord), + CategoricalVector{T, UInt32}(v, levels=levs, ordered=ord), + CategoricalArray(v, levels=levs, ordered=ord)) + @test x isa CategoricalVector{T, UInt32} + @test x == v @test levels(x) == something(levs, sort!(unique(x))) @test isordered(x) === ord @test CategoricalArrays.pool(x).levels !== levs - end - else - @test_throws ArgumentError CategoricalArray(m, levels=levs, ordered=ord) - @test_throws ArgumentError CategoricalArray{T}(m, levels=levs, ordered=ord) - @test_throws ArgumentError CategoricalArray{T, 2}(m, levels=levs, ordered=ord) - @test_throws ArgumentError CategoricalArray{T, 2, UInt32}(m, levels=levs, ordered=ord) - @test_throws ArgumentError CategoricalMatrix(m, levels=levs, ordered=ord) - @test_throws ArgumentError CategoricalMatrix{T}(m, levels=levs, ordered=ord) - @test_throws ArgumentError CategoricalMatrix{T, UInt32}(m, levels=levs, ordered=ord) end + else + @test_throws ArgumentError CategoricalArray(v, levels=levs, ordered=ord) + @test_throws ArgumentError CategoricalArray{T}(v, levels=levs, ordered=ord) + @test_throws ArgumentError CategoricalArray{T, 1}(v, levels=levs, ordered=ord) + @test_throws ArgumentError CategoricalArray{T, 1, UInt32}(v, levels=levs, ordered=ord) + @test_throws ArgumentError CategoricalVector(v, levels=levs, ordered=ord) + @test_throws ArgumentError CategoricalVector{T}(v, levels=levs, ordered=ord) + @test_throws ArgumentError CategoricalVector{T, UInt32}(v, levels=levs, ordered=ord) + end + + m = T["c" "b"; "a" "b"] + if levs === nothing || unique(m) ⊆ levs + for x in (CategoricalArray{T}(m, levels=levs, ordered=ord), + CategoricalArray{T, 2}(m, levels=levs, ordered=ord), + CategoricalArray{T, 2, UInt32}(m, levels=levs, ordered=ord), + CategoricalMatrix(m, levels=levs, ordered=ord), + CategoricalMatrix{T}(m, levels=levs, ordered=ord), + CategoricalMatrix{T, UInt32}(m, levels=levs, ordered=ord)) + @test x isa CategoricalMatrix{T, UInt32} + @test x == m + @test levels(x) == something(levs, sort!(unique(x))) + @test isordered(x) === ord + @test CategoricalArrays.pool(x).levels !== levs + end + else + @test_throws ArgumentError CategoricalArray(m, levels=levs, ordered=ord) + @test_throws ArgumentError CategoricalArray{T}(m, levels=levs, ordered=ord) + @test_throws ArgumentError CategoricalArray{T, 2}(m, levels=levs, ordered=ord) + @test_throws ArgumentError CategoricalArray{T, 2, UInt32}(m, levels=levs, ordered=ord) + @test_throws ArgumentError CategoricalMatrix(m, levels=levs, ordered=ord) + @test_throws ArgumentError CategoricalMatrix{T}(m, levels=levs, ordered=ord) + @test_throws ArgumentError CategoricalMatrix{T, UInt32}(m, levels=levs, ordered=ord) end end +end @testset "converting from array with missings to array without missings CategoricalArray fails with missings" begin x = CategoricalArray{Union{String, Missing}}(undef, 1) diff --git a/test/17_deprecated.jl b/test/17_deprecated.jl new file mode 100644 index 00000000..a4be23a4 --- /dev/null +++ b/test/17_deprecated.jl @@ -0,0 +1,19 @@ +module TestExtras +using Test +using CategoricalArrays + +@testset "categorical" begin + for ord in (false, true) + x = categorical(["a"], true; ordered=ord) + @test x isa CategoricalVector{String, UInt8} + @test x == ["a"] + @test isordered(x) === ord + + x = categorical(["a"], false; ordered=ord) + @test x isa CategoricalVector{String, UInt32} + @test x == ["a"] + @test isordered(x) === ord + end +end + +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 588b15db..d5525747 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -26,7 +26,8 @@ module TestCategoricalArrays "13_arraycommon.jl", "14_view.jl", "15_extras.jl", - "16_recode.jl" + "16_recode.jl", + "17_deprecated.jl" ] @testset "$test" for test in tests