Skip to content

Commit

Permalink
Implement copy(::CategoricalArray) and use it instead of deepcopy
Browse files Browse the repository at this point in the history
`deepcopy` is not recommended and it is very slow, especially when there are many levels.
This has a significant impact on `setindex!`.
  • Loading branch information
nalimilan committed Sep 25, 2019
1 parent 0ede6dc commit 317950c
Show file tree
Hide file tree
Showing 8 changed files with 73 additions and 12 deletions.
9 changes: 5 additions & 4 deletions src/array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@ function CategoricalArray{T, N, R}(A::CategoricalArray{S, N, Q};
ordered=_isordered(A)) where {S, T, N, Q, R}
V = unwrap_catvaluetype(T)
res = convert(CategoricalArray{V, N, R}, A)
refs = res.refs === A.refs ? deepcopy(res.refs) : res.refs
pool = res.pool === A.pool ? deepcopy(res.pool) : res.pool
refs = res.refs === A.refs ? copy(res.refs) : res.refs
pool = res.pool === A.pool ? copy(res.pool) : res.pool
ordered!(CategoricalArray{V, N}(refs, pool), ordered)
end

Expand Down Expand Up @@ -399,7 +399,8 @@ function mergelevels(ordered, levels...)
end

# Methods preserving levels and more efficient than AbstractArray fallbacks
copy(A::CategoricalArray) = deepcopy(A)
copy(A::CategoricalArray{T, N}) where {T, N} =
CategoricalArray{T, N}(copy(A.refs), copy(A.pool))

CatArrOrSub{T, N} = Union{CategoricalArray{T, N},
SubArray{<:Any, N, <:CategoricalArray{T}}} where {T, N}
Expand Down Expand Up @@ -598,7 +599,7 @@ end
@inbounds r = A.refs[I...]

if isa(r, Array)
res = CategoricalArray{T, ndims(r)}(r, deepcopy(A.pool))
res = CategoricalArray{T, ndims(r)}(r, copy(A.pool))
return ordered!(res, isordered(A))
else
r > 0 || throw(UndefRefError())
Expand Down
4 changes: 2 additions & 2 deletions src/missingarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import Base: getindex, setindex!, push!, similar, in, collect
@inbounds r = A.refs[I...]

if isa(r, Array)
ret = CategoricalArray{T, ndims(r)}(r, deepcopy(A.pool))
ret = CategoricalArray{T, ndims(r)}(r, copy(A.pool))
return ordered!(ret, isordered(A))
else
if r > 0
Expand All @@ -33,7 +33,7 @@ in(x::Missing, y::CategoricalArray) = false
in(x::Missing, y::CategoricalArray{>:Missing}) = !all(v -> v > 0, y.refs)

function Missings.replace(a::CategoricalArray{S, N, R, V, C}, replacement::V) where {S, N, R, V, C}
pool = deepcopy(a.pool)
pool = copy(a.pool)
v = C(get!(pool, replacement), pool)
Missings.replace(a, v)
end
Expand Down
15 changes: 15 additions & 0 deletions src/pool.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
function CategoricalPool{T, R, V}(index::Vector{T},
invindex::Dict{T, R},
order::Vector{R},
ordered::Bool) where {T, R, V}
levels = similar(index)
levels[order] = index
pool = CategoricalPool{T, R, V}(index, invindex, order, levels, V[], ordered)
buildvalues!(pool)
return pool
end

function CategoricalPool(index::Vector{S},
invindex::Dict{S, T},
order::Vector{R},
Expand Down Expand Up @@ -75,6 +86,10 @@ function Base.convert(::Type{CategoricalPool{S, R}}, pool::CategoricalPool) wher
return CategoricalPool(indexS, invindexS, order, pool.ordered)
end

Base.copy(pool::CategoricalPool{T, R, V}) where {T, R, V} =
CategoricalPool{T, R, V}(copy(pool.index), copy(pool.invindex), copy(pool.order),
copy(pool.levels), copy(pool.valindex), pool.ordered)

function Base.show(io::IO, pool::CategoricalPool{T, R}) where {T, R}
@printf(io, "%s{%s,%s}([%s])", typeof(pool).name, T, R,
join(map(repr, levels(pool)), ","))
Expand Down
8 changes: 3 additions & 5 deletions src/typedefs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ mutable struct CategoricalPool{T, R <: Integer, V}
function CategoricalPool{T, R, V}(index::Vector{T},
invindex::Dict{T, R},
order::Vector{R},
levels::Vector{T},
valindex::Vector{V},
ordered::Bool) where {T, R, V}
if iscatvalue(T)
throw(ArgumentError("Level type $T cannot be a categorical value type"))
Expand All @@ -30,11 +32,7 @@ mutable struct CategoricalPool{T, R <: Integer, V}
if reftype(V) !== R
throw(ArgumentError("Reference type of the categorical value ($(reftype(V))) and of the pool ($R) do not match"))
end
levels = similar(index)
levels[order] = index
pool = new(index, invindex, order, levels, V[], ordered)
buildvalues!(pool)
return pool
new(index, invindex, order, levels, valindex, ordered)
end
end

Expand Down
29 changes: 29 additions & 0 deletions test/05_copy.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
module TestCopy
using Test
using CategoricalArrays
using CategoricalArrays: CategoricalPool, catvalue

@testset "copy" begin
pool = CategoricalPool(["d", "c", "b"])
ordered!(pool, true)
pool2 = copy(pool)

@test length(pool2) == 3
@test pool2.levels == pool2.index == ["d", "c", "b"]
@test pool2.invindex == Dict("d"=>1, "c"=>2, "b"=>3)
@test pool2.order == 1:3
@test pool2.valindex == [catvalue(i, pool2) for i in 1:3]
@test pool2.ordered

levels!(pool2, ["a", "b", "c", "d"])
ordered!(pool2, false)

@test length(pool) == 3
@test pool.levels == pool.index == ["d", "c", "b"]
@test pool.invindex == Dict("d"=>1, "c"=>2, "b"=>3)
@test pool.order == 1:3
@test pool.valindex == [catvalue(i, pool) for i in 1:3]
@test pool.ordered
end

end
2 changes: 1 addition & 1 deletion test/11_array.jl
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ using CategoricalArrays: DefaultRefType, catvaluetype, leveltype
@test x[1] == x[end]
@test levels(x) == ["e", "a", "b", "c", "zz"]

x2 = deepcopy(x)
x2 = copy(x)
@test_throws MethodError push!(x, 1)
@test x == x2
@test x.pool.index == x2.pool.index
Expand Down
17 changes: 17 additions & 0 deletions test/13_arraycommon.jl
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,23 @@ end
@test size(y) == (3,)
end

@testset "copy" begin
x = CategoricalArray{Union{T, String}}(["Old", "Young", "Middle", "Young"])
levels!(x, ["Young", "Middle", "Old"])
ordered!(x, true)

y = copy(x)
@test y == x
levels!(y, ["Z", "Middle", "Old", "Young"])
y[1] = "Z"

# Test with missing values
if T === Missing
x[3] = missing
@test copy(x) x
end
end

@testset "copy! and copyto!" begin
x = CategoricalArray{Union{T, String}}(["Old", "Young", "Middle", "Young"])
levels!(x, ["Young", "Middle", "Old"])
Expand Down
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ module TestCategoricalArrays
"03_buildfields.jl",
"04_constructors.jl",
"05_convert.jl",
"05_copy.jl",
"06_show.jl",
"06_length.jl",
"07_levels.jl",
Expand Down

0 comments on commit 317950c

Please sign in to comment.