Skip to content

Commit

Permalink
Transform SubString and AbstractString leveltypes to String by default
Browse files Browse the repository at this point in the history
Since CategoricalArrays are intended for cases where the number of levels is small,
it makes sense to copy `SubString`s for simplicity. Calling
`CategoricalArray{SubString{String}}` will still allow constructing that array type.
Also replace `AbstractString` with `String` as it is the type of array one gets by
mixing `String` and `SubString` objects.
  • Loading branch information
nalimilan committed Apr 11, 2020
1 parent 797e009 commit a962ac2
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 17 deletions.
47 changes: 30 additions & 17 deletions src/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,19 @@ function reftype(sz::Int)
end
end

fixstringtype(T::Type) = T <: SubString || T === AbstractString ? String : T
function fixstringtype(T::Union)
if isdefined(T, :a) && isdefined(T, :b)
Union{fixstringtype(T.a), fixstringtype(T.b)}
elseif isdefined(T, :a)
T.a
elseif isdefined(T, :b)
T.b
else
Union{}
end
end

"""
CategoricalArray{T}(undef, dims::Dims; levels=nothing, ordered=false)
CategoricalArray{T}(undef, dims::Int...; levels=nothing, ordered=false)
Expand Down Expand Up @@ -237,35 +250,35 @@ end

# From AbstractArray
CategoricalArray{T, N}(A::AbstractArray{S, N};
levels::Union{AbstractVector, Nothing}=nothing,
ordered::Bool=_isordered(A)) where {S, T, N} =
levels::Union{AbstractVector, Nothing}=nothing,
ordered::Bool=_isordered(A)) where {S, T, N} =
CategoricalArray{T, N, DefaultRefType}(A, levels=levels, ordered=ordered)
CategoricalArray{T}(A::AbstractArray{S, N};
levels::Union{AbstractVector, Nothing}=nothing,
ordered::Bool=_isordered(A)) where {S, T, N} =
levels::Union{AbstractVector, Nothing}=nothing,
ordered::Bool=_isordered(A)) where {S, T, N} =
CategoricalArray{T, N}(A, levels=levels, ordered=ordered)
CategoricalArray(A::AbstractArray{T, N};
levels::Union{AbstractVector, Nothing}=nothing,
ordered::Bool=_isordered(A)) where {T, N} =
CategoricalArray{T, N}(A, levels=levels, ordered=ordered)
CategoricalArray{fixstringtype(T), N}(A, levels=levels, ordered=ordered)

CategoricalVector{T}(A::AbstractVector{S};
levels::Union{AbstractVector, Nothing}=nothing,
ordered=_isordered(A)) where {S, T} =
levels::Union{AbstractVector, Nothing}=nothing,
ordered=_isordered(A)) where {S, T} =
CategoricalArray{T, 1}(A, levels=levels, ordered=ordered)
CategoricalVector(A::AbstractVector{T};
levels::Union{AbstractVector, Nothing}=nothing,
ordered::Bool=_isordered(A)) where {T} =
CategoricalArray{T, 1}(A, levels=levels, ordered=ordered)
levels::Union{AbstractVector, Nothing}=nothing,
ordered::Bool=_isordered(A)) where {T} =
CategoricalArray{fixstringtype(T), 1}(A, levels=levels, ordered=ordered)

CategoricalMatrix{T}(A::AbstractMatrix{S};
levels::Union{AbstractVector, Nothing}=nothing,
ordered::Bool=_isordered(A)) where {S, T} =
levels::Union{AbstractVector, Nothing}=nothing,
ordered::Bool=_isordered(A)) where {S, T} =
CategoricalArray{T, 2}(A, levels=levels, ordered=ordered)
CategoricalMatrix(A::AbstractMatrix{T};
levels::Union{AbstractVector, Nothing}=nothing,
ordered::Bool=_isordered(A)) where {T} =
CategoricalArray{T, 2}(A, levels=levels, ordered=ordered)
levels::Union{AbstractVector, Nothing}=nothing,
ordered::Bool=_isordered(A)) where {T} =
CategoricalArray{fixstringtype(T), 2}(A, levels=levels, ordered=ordered)

# From CategoricalArray (preserve R)
CategoricalArray{T, N}(A::CategoricalArray{S, N, R};
Expand Down Expand Up @@ -865,12 +878,12 @@ are preserved unless explicitly overriden.
@inline function categorical(A::AbstractArray{T, N};
compress::Bool=false, ordered=_isordered(A)) where {T, N}
RefType = compress ? reftype(length(unique(A))) : DefaultRefType
CategoricalArray{T, N, RefType}(A, ordered=ordered)
CategoricalArray{fixstringtype(T), N, RefType}(A, ordered=ordered)
end
@inline function categorical(A::CategoricalArray{T, N, R};
compress::Bool=false, ordered=_isordered(A)) where {T, N, R}
RefType = compress ? reftype(length(levels(A))) : R
CategoricalArray{T, N, RefType}(A, ordered=ordered)
CategoricalArray{fixstringtype(T), N, RefType}(A, ordered=ordered)
end

function in(x::Any, y::CategoricalArray{T, N, R}) where {T, N, R}
Expand Down
29 changes: 29 additions & 0 deletions test/13_arraycommon.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
module TestArrayCommon
using Test
using Missings
using Future: copy!
using CategoricalArrays, DataAPI
using CategoricalArrays: DefaultRefType
Expand Down Expand Up @@ -1104,6 +1105,34 @@ end
end
end

@testset "constructors with SubString" begin
for x in ([SubString("ab", 1, 1), SubString("c", 1, 1)],
SubString[SubString("ab", 1, 1), SubString("c", 1, 1)],
[SubString("ab", 1, 1), "c"]),
f in (CategoricalArray, CategoricalVector, categorical)
y = @inferred f(x)
@test y isa CategoricalArray{String}
@test y == x

y = @inferred f(allowmissing(x))
@test y isa CategoricalArray{Union{String, Missing}}
@test y == x
end

for x in ([SubString("ab", 1, 1) SubString("c", 1, 1)],
SubString[SubString("ab", 1, 1) SubString("c", 1, 1)],
[SubString("ab", 1, 1) "c"]),
f in (CategoricalArray, CategoricalMatrix, categorical)
y = @inferred f(x)
@test y isa CategoricalArray{String}
@test y == x

y = @inferred f(allowmissing(x))
@test y isa CategoricalArray{Union{String, Missing}}
@test y == x
end
end

@testset "converting from array with missings to array without missings CategoricalArray fails with missings" begin
x = CategoricalArray{Union{String, Missing}}(undef, 1)
@test_throws MissingException CategoricalArray{String}(x)
Expand Down

0 comments on commit a962ac2

Please sign in to comment.