Skip to content

Commit

Permalink
Use extend=missing instead of allowoutside=true
Browse files Browse the repository at this point in the history
  • Loading branch information
nalimilan committed Sep 9, 2020
1 parent 7b4674c commit 46b8c7f
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 33 deletions.
54 changes: 25 additions & 29 deletions src/extras.jl
@@ -1,7 +1,7 @@
using Statistics

function fill_refs!(refs::AbstractArray, X::AbstractArray,
breaks::AbstractVector, extend::Bool, allowoutside::Bool)
breaks::AbstractVector, extend::Union{Bool, Missing})
n = length(breaks)
lower = first(breaks)
upper = last(breaks)
Expand All @@ -11,12 +11,12 @@ function fill_refs!(refs::AbstractArray, X::AbstractArray,

if ismissing(x)
refs[i] = 0
elseif extend && x == upper
elseif extend === true && x == upper
refs[i] = n-1
elseif !extend && !(lower <= x < upper)
allowoutside ||
elseif extend !== true && !(lower <= x < upper)
extend === missing ||
throw(ArgumentError("value $x (at index $i) does not fall inside the breaks: " *
"adapt them manually, or pass extend=true or allowoutside=true"))
"adapt them manually, or pass extend=true or extend=missing"))
refs[i] = 0
else
refs[i] = searchsortedlast(breaks, x)
Expand All @@ -35,7 +35,7 @@ default_formatter(from, to, i; leftclosed, rightclosed) =
@doc raw"""
cut(x::AbstractArray, breaks::AbstractVector;
labels::Union{AbstractVector{<:AbstractString},Function},
extend::Bool=false, allowoutside::Bool=false, allowempty::Bool=false)
extend::Union{Bool,Missing}=false, allowempty::Bool=false)
Cut a numeric array into intervals and return an ordered `CategoricalArray` indicating
the interval into which each entry falls. Intervals are of the form `[lower, upper)`,
Expand All @@ -45,15 +45,14 @@ If `x` accepts missing values (i.e. `eltype(x) >: Missing`) the returned array w
also accept them.
# Keyword arguments
* `extend::Bool=false`: when `false`, an error is raised if some values in `x` fall
outside of the breaks; when `true`, breaks are automatically added to include all
values in `x`, and the upper bound is included in the last interval.
* `extend::Union{Bool, Missing}=false`: when `false`, an error is raised if some values
in `x` fall outside of the breaks; when `true`, breaks are automatically added to include
all values in `x`, and the upper bound is included in the last interval; when `missing`,
values outside of the breaks generate `missing` entries.
* `labels::Union{AbstractVector,Function}`: a vector of strings giving the names to use for
the intervals; or a function `f(from, to, i; leftclosed, rightclosed)` that generates
the labels from the left and right interval boundaries and the group index. Defaults to
`"[from, to)"` (or `"[from, to]"` for the rightmost interval if `extend == true`).
* `allowoutside::Bool=false`: when `false`, an error is thrown if values outside of breaks
are encountered. When `true`, these values result in `missing` entries.
* `allowempty::Bool=false`: when `false`, an error is raised if some breaks appear
multiple times, generating empty intervals; when `true`, duplicate breaks are allowed
and the intervals they generate are kept as unused levels
Expand Down Expand Up @@ -100,33 +99,29 @@ julia> cut(-1:0.5:1, 3, labels=fmt)
```
"""
@inline function cut(x::AbstractArray, breaks::AbstractVector;
extend::Bool=false,
extend::Union{Bool, Missing}=false,
labels::Union{AbstractVector{<:AbstractString},Function}=default_formatter,
allowoutside::Bool=false,
allowmissing::Union{Bool, Nothing}=nothing,
allow_missing::Union{Bool, Nothing}=nothing,
allowempty::Bool=false)
if allow_missing !== nothing
Base.depwarn("allow_missing argument is deprecated, use allowoutside instead",
Base.depwarn("allow_missing argument is deprecated, use extend=missing instead",
:cut)
allowoutside = allow_missing
extend = missing
end
if allowmissing !== nothing
Base.depwarn("allowmissing argument is deprecated, use allowoutside instead",
Base.depwarn("allowmissing argument is deprecated, use extend=missing instead",
:cut)
allowoutside = allowmissing
extend = missing
end
return _cut(x, breaks, extend, labels,
allowoutside, allowoutside ? Missing : Union{}, allowempty)
return _cut(x, breaks, extend, labels, allowempty)
end

# Separate function for inferability (thanks to inlining of cut)
function _cut(x::AbstractArray{T, N}, breaks::AbstractVector,
extend::Bool,
extend::Union{Bool, Missing},
labels::Union{AbstractVector{<:AbstractString},Function},
allowoutside::Bool,
::Type{M},
allowempty::Bool=false) where {T, N, M}
allowempty::Bool=false) where {T, N}
if !allowempty && !allunique(breaks)
throw(ArgumentError("all breaks must be unique unless `allowempty=true`"))
end
Expand All @@ -135,7 +130,7 @@ function _cut(x::AbstractArray{T, N}, breaks::AbstractVector,
breaks = sort(breaks)
end

if extend
if extend === true
xnm = T >: Missing ? skipmissing(x) : x
length(breaks) >= 1 || throw(ArgumentError("at least one break must be provided"))
local min_x, max_x
Expand Down Expand Up @@ -166,7 +161,7 @@ function _cut(x::AbstractArray{T, N}, breaks::AbstractVector,

refs = Array{DefaultRefType, N}(undef, size(x))
try
fill_refs!(refs, x, breaks, extend, allowoutside)
fill_refs!(refs, x, breaks, extend)
catch err
# So that the error appears to come from cut() itself,
# since it refers to its keyword arguments
Expand All @@ -178,7 +173,7 @@ function _cut(x::AbstractArray{T, N}, breaks::AbstractVector,
end

n = length(breaks)
n >= 2 || throw(ArgumentError("at least two breaks must be provided when extend=false"))
n >= 2 || throw(ArgumentError("at least two breaks must be provided when extend is not true"))
if labels isa Function
from = map(x -> sprint(show, x, context=:compact=>true), breaks[1:n-1])
to = map(x -> sprint(show, x, context=:compact=>true), breaks[2:n])
Expand All @@ -188,7 +183,8 @@ function _cut(x::AbstractArray{T, N}, breaks::AbstractVector,
leftclosed=breaks[i] != breaks[i+1], rightclosed=false)
end
levs[end] = labels(from[end], to[end], n-1,
leftclosed=breaks[end-1] != breaks[end], rightclosed=extend)
leftclosed=breaks[end-1] != breaks[end],
rightclosed=coalesce(extend, false))
else
length(labels) == n-1 || throw(ArgumentError("labels must be of length $(n-1), but got length $(length(labels))"))
# Levels must have element type String for type stability of the result
Expand All @@ -204,8 +200,8 @@ function _cut(x::AbstractArray{T, N}, breaks::AbstractVector,
end

pool = CategoricalPool(levs, true)
S = T >: Missing ? Missing : M
CategoricalArray{Union{String, S}, N}(refs, pool)
S = T >: Missing || extend isa Missing ? Union{String, Missing} : String
CategoricalArray{S, N}(refs, pool)
end

"""
Expand Down
8 changes: 4 additions & 4 deletions test/15_extras.jl
Expand Up @@ -12,16 +12,16 @@ const ≅ = isequal
@test levels(x) == ["[1, 3)", "[3, 6)"]

err = @test_throws ArgumentError cut(Vector{Union{T, Int}}([2, 3, 5]), [3, 6])
@test err.value.msg == "value 2 (at index 1) does not fall inside the breaks: adapt them manually, or pass extend=true or allowoutside=true"
@test err.value.msg == "value 2 (at index 1) does not fall inside the breaks: adapt them manually, or pass extend=true or extend=missing"


err = @test_throws ArgumentError cut(Vector{Union{T, Int}}([2, 3, 5]), [2, 5])
@test err.value.msg == "value 5 (at index 3) does not fall inside the breaks: adapt them manually, or pass extend=true or allowoutside=true"
@test err.value.msg == "value 5 (at index 3) does not fall inside the breaks: adapt them manually, or pass extend=true or extend=missing"

if T === Missing
x = @inferred cut(Vector{Union{T, Int}}([2, 3, 5]), [2, 5], allowoutside=true)
x = @inferred cut(Vector{Union{T, Int}}([2, 3, 5]), [2, 5], extend=missing)
else
x = cut(Vector{Union{T, Int}}([2, 3, 5]), [2, 5], allowoutside=true)
x = cut(Vector{Union{T, Int}}([2, 3, 5]), [2, 5], extend=missing)
end
@test x ["[2, 5)", "[2, 5)", missing]
@test isa(x, CategoricalVector{Union{String, Missing}})
Expand Down

0 comments on commit 46b8c7f

Please sign in to comment.