Skip to content

Commit

Permalink
Merge pull request #309 from JuliaData/nl/copyto!
Browse files Browse the repository at this point in the history
Improve copyto! performance
  • Loading branch information
nalimilan committed Oct 31, 2020
2 parents f0e31c4 + d43b34d commit c5b35a7
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 16 deletions.
28 changes: 16 additions & 12 deletions src/array.jl
Expand Up @@ -434,15 +434,14 @@ end

function merge_pools!(A::CatArrOrSub,
B::Union{CategoricalValue, CatArrOrSub};
updaterefs::Bool=true)
updaterefs::Bool=true,
updatepool::Bool=true)
if isordered(A) && length(pool(A)) > 0 && pool(B) pool(A)
lev = A isa CategoricalValue ? get(B) : first(setdiff(levels(B), levels(A)))
throw(OrderedLevelsException(lev, levels(A)))
end
newpool = merge_pools(pool(A), pool(B))
newlevels, ordered = merge_pools(pool(A), pool(B))
oldlevels = levels(A)
newlevels = levels(newpool)
ordered = isordered(newpool)
if isordered(A) != ordered
A isa SubArray &&
throw(ArgumentError("cannot set ordered=$ordered on dest SubArray as it " *
Expand All @@ -457,8 +456,10 @@ function merge_pools!(A::CatArrOrSub,
view(newlevels, 1:length(oldlevels)) != oldlevels)
update_refs!(pA, newlevels)
end
pA.pool = newpool
A
if updatepool
pA.pool = typeof(pA.pool)(newlevels, ordered)
end
newlevels, ordered
end

@inline function setindex!(A::CategoricalArray, v::Any, I::Real...)
Expand Down Expand Up @@ -518,12 +519,10 @@ function copyto!(dest::CatArrOrSub{T, N, R}, dstart::Integer,
# For partial copy, need to recompute existing refs
# TODO: for performance, avoid ajusting refs which are going to be overwritten
updaterefs = isa(dest, SubArray) || !(n == length(dest) == length(src))
newpool = merge_pools!(dest, src, updaterefs=updaterefs)
newlevels = levels(newpool)
newlevels, ordered = merge_pools!(dest, src, updaterefs=updaterefs, updatepool=false)

# If destination levels are an ordered superset of source, no need to recompute refs
if length(dlevs) >= length(slevs) && view(dlevs, 1:length(slevs)) == slevs
newlevels != dlevs && levels!(dpool, newlevels)
if view(newlevels, 1:length(slevs)) == slevs
copyto!(drefs, dstart, srefs, sstart, n)
else # Otherwise, recompute refs according to new levels
# Then adjust refs from source
Expand All @@ -536,7 +535,12 @@ function copyto!(dest::CatArrOrSub{T, N, R}, dstart::Integer,
x = srefs[sstart+i]
drefs[dstart+i] = levelsmap[x+1]
end
destp.pool = CategoricalPool{nonmissingtype(T), R}(newlevels, isordered(newpool))
end
# Need to allocate a new pool only if reordering destination levels
if view(newlevels, 1:length(dlevs)) == dlevs
levels!(dpool, newlevels, checkunique=false)
else
destp.pool = CategoricalPool{nonmissingtype(T), R}(newlevels, ordered)
end

dest
Expand Down Expand Up @@ -567,7 +571,7 @@ function copyto!(dest::CatArrOrSub{T1, N, R}, dstart::Integer,
srclevsnm = srclevsnm === srclevs ? sort(srclevsnm) : sort!(srclevsnm)
end
newdestlevs = union(destlevs, srclevsnm)
levels!(pool(dest), newdestlevs)
levels!(pool(dest), newdestlevs, checkunique=false)
end
levelsmap = something.(indexin(srclevs, [missing; newdestlevs])) .- 1
destrefs = refs(dest)
Expand Down
9 changes: 5 additions & 4 deletions src/pool.jl
Expand Up @@ -163,7 +163,7 @@ end

# Do not override Base.merge as for internal use we need to use the type and orderedness
# of the first pool rather than promoting both pools
function merge_pools(a::CategoricalPool{T, R}, b::CategoricalPool) where {T, R}
function merge_pools(a::CategoricalPool{T}, b::CategoricalPool) where {T}
if length(a) == 0 && length(b) == 0
newlevs = T[]
ordered = isordered(a)
Expand All @@ -177,16 +177,17 @@ function merge_pools(a::CategoricalPool{T, R}, b::CategoricalPool) where {T, R}
nl, ordered = mergelevels(isordered(a), a.levels, b.levels)
newlevs = convert(Vector{T}, nl)
end
CategoricalPool{T, R}(newlevs, ordered)
newlevs, ordered
end

Base.issubset(a::CategoricalPool, b::CategoricalPool) = issubset(a.levels, keys(b.invindex))

# Contrary to the CategoricalArray one, this method only allows adding new levels at the end
# so that existing CategoricalValue objects still point to the same value
function levels!(pool::CategoricalPool{S, R}, newlevels::Vector) where {S, R}
function levels!(pool::CategoricalPool{S, R}, newlevels::Vector;
checkunique::Bool=true) where {S, R}
levs = convert(Vector{S}, newlevels)
if !allunique(levs)
if checkunique && !allunique(levs)
throw(ArgumentError(string("duplicated levels found in levs: ",
join(unique(filter(x->sum(levs.==x)>1, levs)), ", "))))
elseif length(levs) < length(pool) || view(levs, 1:length(pool)) != pool.levels
Expand Down
80 changes: 80 additions & 0 deletions test/13_arraycommon.jl
Expand Up @@ -737,6 +737,7 @@ end
levels!(y, ["Old", "Middle", "Young"])
ordered!(x, true)
@test copyf!(x, y) === x
@test x == y
@test levels(x) == ["Young", "Middle", "Old"]
@test !isordered(x)

Expand All @@ -747,6 +748,7 @@ end
y = CategoricalArray{Union{T, String}}(["Middle", "Middle", "Old", "Young"])
levels!(y, ["Young", "Middle", "Old"])
@test copyf!(x, y) === x
@test x == y
@test levels(x) == ["Young", "Middle", "Old"]
@test isordered(x)

Expand All @@ -757,6 +759,7 @@ end
ordered!(y, true)
levels!(y, ["Young", "Middle", "Old"])
@test copyf!(x, y) === x
@test x == y
@test levels(x) == ["Young", "Middle", "Old"]
@test !isordered(x)

Expand All @@ -767,6 +770,7 @@ end
x = similar(x)
ordered!(x, true)
@test copyf!(x, y) === x
@test x == y
@test levels(x) == ["Young", "Middle", "Old"]
@test isordered(x)

Expand Down Expand Up @@ -1453,6 +1457,82 @@ end
@test_throws MethodError copyto!(y, 1, x, 1, length(x))
end

@testset "copyto! from CategoricalArray" begin
vecs = (CategoricalVector([1, 2, 1, 1, 3, 2], levels=[2, 1, 3]),
CategoricalVector([2, 2, 2, 2, 2, 2], levels=[2]),
CategoricalVector([2, 2, 2, 2, 2, 2], levels=[3, 1, 2, 4]),
CategoricalVector([1, 2, 1, 3, 3, 2], levels=[3, 1, 2, 4]),
CategoricalVector(Float64[2, 3, 1, 2, 2, 1], levels=[3, 1, 2]),
view(CategoricalVector([3, 1, 2, 1, 1, 2, 2, 3], levels=[3, 1, 2, 4]), 2:7))
for y1 in vecs, x in vecs
levs = levels(y1)
newlevs, _ = CategoricalArrays.mergelevels(false, levs, levels(x))

y2 = deepcopy(y1)
@test copyto!(y2, x) === y2
@test x == y2
@test levels(y2) == newlevs

y2 = deepcopy(y1)
@test copy!(y2, x) === y2
@test x == y2
@test levels(y2) == newlevs

y2 = deepcopy(y1)
@test copyto!(y2, 1, x, 1, length(x)) === y2
@test x == y2
@test levels(y2) == newlevs

y2 = deepcopy(y1)
@test copyto!(y2, 2, x, 3, 3) === y2
@test x[3:5] y2[2:4]
@test levels(y2) == newlevs

y2 = deepcopy(y1)
@test copyto!(y2, 2, x[3:end]) === y2
@test x[3:6] y2[2:5]
@test levels(y2) == newlevs
end

vecs = (CategoricalVector{Union{Int, Missing}}(undef, 6),
CategoricalVector([1, 2, missing, 1, 3, 2], levels=[2, 1, 3]),
CategoricalVector([2, 2, missing, 2, 2, 2], levels=[2]),
CategoricalVector([2, 2, missing, 2, 2, 2], levels=[3, 1, 2, 4]),
CategoricalVector([1, 2, missing, 3, 3, 2], levels=[3, 1, 2, 4]),
CategoricalVector(Union{Float64, Missing}[2, 3, missing, 2, 2, 1], levels=[3, 1, 2]),
view(CategoricalVector([3, 1, 2, missing, 1, 2, 2, 3], levels=[3, 1, 2, 4]), 2:7),
view(CategoricalVector([3, 1, 2, 1, 1, 2, 2, missing], levels=[3, 1, 2, 4]), 2:7))
for y1 in vecs, x in vecs
levs = levels(y1)
newlevs, _ = CategoricalArrays.mergelevels(false, levs, levels(x))

y2 = deepcopy(y1)
@test copyto!(y2, x) === y2
@test x y2
@test levels(y2) == newlevs

y2 = deepcopy(y1)
@test copy!(y2, x) === y2
@test x y2
@test levels(y2) == newlevs

y2 = deepcopy(y1)
@test copyto!(y2, 1, x, 1, length(x)) === y2
@test x y2
@test levels(y2) == newlevs

y2 = deepcopy(y1)
@test copyto!(y2, 2, x, 3, 3) === y2
@test x[3:5] y2[2:4]
@test levels(y2) == newlevs

y2 = deepcopy(y1)
@test copyto!(y2, 2, x[3:6]) === y2
@test x[3:6] y2[2:5]
@test levels(y2) == newlevs
end
end

@testset "new levels can't be added through assignment when levels are ordered" begin
x = categorical([1,2,3])
ordered!(x, true)
Expand Down

0 comments on commit c5b35a7

Please sign in to comment.