From a962ac264da42af7a18419d4df4b8294d11df05b Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Sat, 11 Apr 2020 13:01:27 +0200 Subject: [PATCH] Transform SubString and AbstractString leveltypes to String by default Since CategoricalArrays are intended for cases where the number of levels is small, it makes sense to copy `SubString`s for simplicity. Calling `CategoricalArray{SubString{String}}` will still allow constructing that array type. Also replace `AbstractString` with `String` as it is the type of array one gets by mixing `String` and `SubString` objects. --- src/array.jl | 47 +++++++++++++++++++++++++++--------------- test/13_arraycommon.jl | 29 ++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 17 deletions(-) diff --git a/src/array.jl b/src/array.jl index 8dfb4730..ff2ae71f 100644 --- a/src/array.jl +++ b/src/array.jl @@ -19,6 +19,19 @@ function reftype(sz::Int) end end +fixstringtype(T::Type) = T <: SubString || T === AbstractString ? String : T +function fixstringtype(T::Union) + if isdefined(T, :a) && isdefined(T, :b) + Union{fixstringtype(T.a), fixstringtype(T.b)} + elseif isdefined(T, :a) + T.a + elseif isdefined(T, :b) + T.b + else + Union{} + end +end + """ CategoricalArray{T}(undef, dims::Dims; levels=nothing, ordered=false) CategoricalArray{T}(undef, dims::Int...; levels=nothing, ordered=false) @@ -237,35 +250,35 @@ end # From AbstractArray CategoricalArray{T, N}(A::AbstractArray{S, N}; - levels::Union{AbstractVector, Nothing}=nothing, - ordered::Bool=_isordered(A)) where {S, T, N} = + levels::Union{AbstractVector, Nothing}=nothing, + ordered::Bool=_isordered(A)) where {S, T, N} = CategoricalArray{T, N, DefaultRefType}(A, levels=levels, ordered=ordered) CategoricalArray{T}(A::AbstractArray{S, N}; - levels::Union{AbstractVector, Nothing}=nothing, - ordered::Bool=_isordered(A)) where {S, T, N} = + levels::Union{AbstractVector, Nothing}=nothing, + ordered::Bool=_isordered(A)) where {S, T, N} = CategoricalArray{T, N}(A, levels=levels, ordered=ordered) CategoricalArray(A::AbstractArray{T, N}; levels::Union{AbstractVector, Nothing}=nothing, ordered::Bool=_isordered(A)) where {T, N} = - CategoricalArray{T, N}(A, levels=levels, ordered=ordered) + CategoricalArray{fixstringtype(T), N}(A, levels=levels, ordered=ordered) CategoricalVector{T}(A::AbstractVector{S}; - levels::Union{AbstractVector, Nothing}=nothing, - ordered=_isordered(A)) where {S, T} = + levels::Union{AbstractVector, Nothing}=nothing, + ordered=_isordered(A)) where {S, T} = CategoricalArray{T, 1}(A, levels=levels, ordered=ordered) CategoricalVector(A::AbstractVector{T}; - levels::Union{AbstractVector, Nothing}=nothing, - ordered::Bool=_isordered(A)) where {T} = - CategoricalArray{T, 1}(A, levels=levels, ordered=ordered) + levels::Union{AbstractVector, Nothing}=nothing, + ordered::Bool=_isordered(A)) where {T} = + CategoricalArray{fixstringtype(T), 1}(A, levels=levels, ordered=ordered) CategoricalMatrix{T}(A::AbstractMatrix{S}; - levels::Union{AbstractVector, Nothing}=nothing, - ordered::Bool=_isordered(A)) where {S, T} = + levels::Union{AbstractVector, Nothing}=nothing, + ordered::Bool=_isordered(A)) where {S, T} = CategoricalArray{T, 2}(A, levels=levels, ordered=ordered) CategoricalMatrix(A::AbstractMatrix{T}; - levels::Union{AbstractVector, Nothing}=nothing, - ordered::Bool=_isordered(A)) where {T} = - CategoricalArray{T, 2}(A, levels=levels, ordered=ordered) + levels::Union{AbstractVector, Nothing}=nothing, + ordered::Bool=_isordered(A)) where {T} = + CategoricalArray{fixstringtype(T), 2}(A, levels=levels, ordered=ordered) # From CategoricalArray (preserve R) CategoricalArray{T, N}(A::CategoricalArray{S, N, R}; @@ -865,12 +878,12 @@ are preserved unless explicitly overriden. @inline function categorical(A::AbstractArray{T, N}; compress::Bool=false, ordered=_isordered(A)) where {T, N} RefType = compress ? reftype(length(unique(A))) : DefaultRefType - CategoricalArray{T, N, RefType}(A, ordered=ordered) + CategoricalArray{fixstringtype(T), N, RefType}(A, ordered=ordered) end @inline function categorical(A::CategoricalArray{T, N, R}; compress::Bool=false, ordered=_isordered(A)) where {T, N, R} RefType = compress ? reftype(length(levels(A))) : R - CategoricalArray{T, N, RefType}(A, ordered=ordered) + CategoricalArray{fixstringtype(T), N, RefType}(A, ordered=ordered) end function in(x::Any, y::CategoricalArray{T, N, R}) where {T, N, R} diff --git a/test/13_arraycommon.jl b/test/13_arraycommon.jl index 8f184e9f..d736ac55 100644 --- a/test/13_arraycommon.jl +++ b/test/13_arraycommon.jl @@ -1,5 +1,6 @@ module TestArrayCommon using Test +using Missings using Future: copy! using CategoricalArrays, DataAPI using CategoricalArrays: DefaultRefType @@ -1104,6 +1105,34 @@ end end end +@testset "constructors with SubString" begin + for x in ([SubString("ab", 1, 1), SubString("c", 1, 1)], + SubString[SubString("ab", 1, 1), SubString("c", 1, 1)], + [SubString("ab", 1, 1), "c"]), + f in (CategoricalArray, CategoricalVector, categorical) + y = @inferred f(x) + @test y isa CategoricalArray{String} + @test y == x + + y = @inferred f(allowmissing(x)) + @test y isa CategoricalArray{Union{String, Missing}} + @test y == x + end + + for x in ([SubString("ab", 1, 1) SubString("c", 1, 1)], + SubString[SubString("ab", 1, 1) SubString("c", 1, 1)], + [SubString("ab", 1, 1) "c"]), + f in (CategoricalArray, CategoricalMatrix, categorical) + y = @inferred f(x) + @test y isa CategoricalArray{String} + @test y == x + + y = @inferred f(allowmissing(x)) + @test y isa CategoricalArray{Union{String, Missing}} + @test y == x + end +end + @testset "converting from array with missings to array without missings CategoricalArray fails with missings" begin x = CategoricalArray{Union{String, Missing}}(undef, 1) @test_throws MissingException CategoricalArray{String}(x)