Skip to content

Commit

Permalink
Correctly handle recoding pair with value range and source containing…
Browse files Browse the repository at this point in the history
… missings (#106)
  • Loading branch information
bkamins authored and nalimilan committed Dec 5, 2017
1 parent e5bc8f0 commit 8e2da16
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 12 deletions.
33 changes: 22 additions & 11 deletions src/recode.jl
Expand Up @@ -6,16 +6,21 @@ const ≅ = isequal
Fill `dest` with elements from `src`, replacing those matching a key of `pairs`
with the corresponding value.
For each `Pair` in `pairs`, if the element is equal to (according to [`isequal`](@ref))
or [`in`](@ref) the key (first item of the pair), then the corresponding value
(second item) is copied to `dest`.
For each `Pair` in `pairs`, if the element is equal to (according to [`isequal`](@ref)))
the key (first item of the pair) or to one of its entries if it is a collection,
then the corresponding value (second item) is copied to `dest`.
If the element matches no key and `default` is not provided or `nothing`, it is copied as-is;
if `default` is specified, it is used in place of the original element.
`dest` and `src` must be of the same length, but not necessarily of the same type.
Elements of `src` as well as values from `pairs` will be `convert`ed when possible
on assignment.
If an element matches more than one key, the first match is used.
recode!(dest::CategoricalArray, src::AbstractArray[, default::Any], pairs::Pair...)
If `dest` is a `CategoricalArray` then the ordering of resulting levels is determined
by the order of passed `pairs` and `default` will be the last level if provided.
recode!(dest::AbstractArray, src::AbstractArray{>:Missing}[, default::Any], pairs::Pair...)
If `src` contains missing values, they are never replaced with `default`:
Expand All @@ -36,8 +41,8 @@ function recode!(dest::AbstractArray{T}, src::AbstractArray, default::Any, pairs

for j in 1:length(pairs)
p = pairs[j]
if (!isa(p.first, Union{AbstractArray, Tuple}) && x p.first) ||
(isa(p.first, Union{AbstractArray, Tuple}) && x in p.first)
if ((isa(p.first, Union{AbstractArray, Tuple}) && any(x y for y in p.first)) ||
x p.first)
dest[i] = p.second
@goto nextitem
end
Expand Down Expand Up @@ -89,8 +94,8 @@ function recode!(dest::CategoricalArray{T}, src::AbstractArray, default::Any, pa

for j in 1:length(pairs)
p = pairs[j]
if (!isa(p.first, Union{AbstractArray, Tuple}) && x p.first) ||
(isa(p.first, Union{AbstractArray, Tuple}) && x in p.first)
if ((isa(p.first, Union{AbstractArray, Tuple}) && any(x y for y in p.first)) ||
x p.first)
drefs[i] = dupvals ? pairmap[j] : j
@goto nextitem
end
Expand Down Expand Up @@ -146,7 +151,7 @@ function recode!(dest::CategoricalArray{T}, src::CategoricalArray, default::Any,

for l in srclevels
if !(any(x -> x l, firsts) ||
any(f -> isa(f, Union{AbstractArray, Tuple}) && l in f, firsts))
any(f -> isa(f, Union{AbstractArray, Tuple}) && any(l y for y in f), firsts))
try
push!(keptlevels, l)
catch err
Expand Down Expand Up @@ -176,7 +181,8 @@ function recode!(dest::CategoricalArray{T}, src::CategoricalArray, default::Any,
# For missing values (0 if no missing in pairs' keys)
indexmap[1] = 0
for p in pairs
if ismissing(p.first)
if ((isa(p.first, Union{AbstractArray, Tuple}) && any(ismissing, p.first)) ||
ismissing(p.first))
indexmap[1] = get(dest.pool, p.second)
break
end
Expand All @@ -189,8 +195,8 @@ function recode!(dest::CategoricalArray{T}, src::CategoricalArray, default::Any,
@inbounds for (i, l) in enumerate(srcindex)
for j in 1:length(pairs)
p = pairs[j]
if (!isa(p.first, Union{AbstractArray, Tuple}) && l p.first) ||
(isa(p.first, Union{AbstractArray, Tuple}) && l in p.first)
if ((isa(p.first, Union{AbstractArray, Tuple}) && any(l y for y in p.first)) ||
l p.first)
indexmap[i+1] = pairmap[j]
@goto nextitem
end
Expand Down Expand Up @@ -268,6 +274,11 @@ If the element matches no key and `default` is not provided or `nothing`, it is
if `default` is specified, it is used in place of the original element.
If an element matches more than one key, the first match is used.
recode(a::CategoricalArray[, default::Any], pairs::Pair...)
If `a` is a `CategoricalArray` then the ordering of resulting levels is determined
by the order of passed `pairs` and `default` will be the last level if provided.
# Examples
```jldoctest
julia> using CategoricalArrays
Expand Down
45 changes: 44 additions & 1 deletion test/16_recode.jl
Expand Up @@ -123,6 +123,7 @@ end

@testset "Recoding from $(typeof(x)) to categorical array with missing values" for
x in (["a", missing, "c", "d"], CategoricalArray(["a", missing, "c", "d"]))

# check that error is thrown
y = Vector{String}(4)
@test_throws MissingException recode!(y, x, "a", "c"=>"b")
Expand Down Expand Up @@ -159,7 +160,21 @@ end
end
end

@testset "Recoding array with missings, no default and with missing as a key pair from $(typeof(x)) to $(typeof(y))" for
@testset "Collection in LHS recoding array with missings and no default from $(typeof(x)) to $(typeof(y))" for
x in (["1", missing, "3", "4", "5"], CategoricalArray(["1", missing, "3", "4", "5"])),
y in (similar(x), Array{Union{String, Missing}}(size(x)),
CategoricalArray{Union{String, Missing}}(size(x)), x)

z = @inferred recode!(y, x, ["3","4"]=>"2")
@test y === z
@test y ["1", missing, "2", "2", "5"]
if isa(y, CategoricalArray)
@test levels(y) == ["1", "5", "2"]
@test !isordered(y)
end
end

@testset "Recoding array with missings, default and with missing as a key pair from $(typeof(x)) to $(typeof(y))" for
x in (["a", missing, "c", "d"], CategoricalArray(["a", missing, "c", "d"])),
y in (similar(x), Array{Union{String, Missing}}(size(x)),
CategoricalArray{Union{String, Missing}}(size(x)), x)
Expand All @@ -173,6 +188,20 @@ end
end
end

@testset "Collection with missing in LHS recoding array with missings, default from $(typeof(x)) to $(typeof(y))" for
x in (["a", missing, "c", "d"], CategoricalArray(["a", missing, "c", "d"])),
y in (similar(x), Array{Union{String, Missing}}(size(x)),
CategoricalArray{Union{String, Missing}}(size(x)), x)

z = @inferred recode!(y, x, "a", [missing, "c"]=>"b")
@test y === z
@test y == ["a", "b", "b", "a"]
if isa(y, CategoricalArray)
@test levels(y) == ["b", "a"]
@test !isordered(y)
end
end

@testset "Recoding array with missings, no default and with missing as a key pair from $(typeof(x)) to $(typeof(y))" for
x in (["a", missing, "c", "d"], CategoricalArray(["a", missing, "c", "d"])),
y in (similar(x), Array{Union{String, Missing}}(size(x)),
Expand All @@ -187,6 +216,20 @@ end
end
end

@testset "Collection with missing in LHS recoding array with missings, no default from $(typeof(x)) to $(typeof(y))" for
x in (["a", missing, "c", "d"], CategoricalArray(["a", missing, "c", "d"])),
y in (similar(x), Array{Union{String, Missing}}(size(x)),
CategoricalArray{Union{String, Missing}}(size(x)), x)

z = @inferred recode!(y, x, ["c", missing]=>"b")
@test y === z
@test y == ["a", "b", "b", "d"]
if isa(y, CategoricalArray)
@test levels(y) == ["a", "d", "b"]
@test !isordered(y)
end
end

@testset "Recoding into an array of incompatible size from $(typeof(x)) to $(typeof(y))" for
x in (["a", missing, "c", "d"], CategoricalArray(["a", missing, "c", "d"])),
y in (similar(x, 0), Array{Union{String, Missing}}(0),
Expand Down

0 comments on commit 8e2da16

Please sign in to comment.