Skip to content

Commit

Permalink
Implementing order functions where missings is the smallest value (#144)
Browse files Browse the repository at this point in the history
  • Loading branch information
alonsoC1s committed Apr 6, 2024
1 parent d034f67 commit cdeb5a7
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 4 deletions.
2 changes: 2 additions & 0 deletions README.md
Expand Up @@ -28,6 +28,8 @@ This package provides additional functionality for working with `missing` values
- `Missings.replace` to wrap a collection in a (possibly indexable) iterator replacing `missing` with another value
- `Missings.fail` to wrap a collection in a (possibly indexable) iterator throwing an error if `missing` is encountered
- `skipmissings` to loop through a collection of iterators excluding indices where any iterators are `missing`
- `missingsmallest(f)` to create a partial order function that treats `missing` as the smallest value and otherwise behaves like `f`
- `missingsmallest`: the standard `isless` function modified to treat `missing` as the smallest value rather than the largest one

## Contributing and Questions

Expand Down
84 changes: 82 additions & 2 deletions src/Missings.jl
Expand Up @@ -2,7 +2,7 @@ module Missings

export allowmissing, disallowmissing, ismissing, missing, missings,
Missing, MissingException, levels, coalesce, passmissing, nonmissingtype,
skipmissings, emptymissing
skipmissings, emptymissing, missingsmallest

using Base: ismissing, missing, Missing, MissingException

Expand Down Expand Up @@ -514,4 +514,84 @@ julia> emptymissing(first)([1], 2)
"""
emptymissing(f) = (x, args...; kwargs...) -> isempty(x) ? missing : f(x, args...; kwargs...)

end # module
# Only for internal use. Allows dispatch over anonymous functions.
struct MissingSmallest{T}
lt::T
end

"""
missingsmallest(f)
Return a function of two arguments `x` and `y` that tests whether `x` is less
than `y` such that `missing` is always less than the other argument. In other
words, return a modified version of the partial order function `f` such that
`missing` is the smallest possible value, and all other non-`missing` values are
compared according to `f`.
The behavior of the standard `isless` function modified to treat `missing` as
the smallest value can be obtained by calling the 2-argument `missingsmallest(x,
y)` function. This is equivalent to `missingsmallest(isless)(x, y)`.
# Examples
```
julia> isshorter = missingsmallest((s1, s2) -> isless(length(s1), length(s2)));
julia> isshorter("short", "longstring")
true
julia> isshorter("longstring", "short")
false
julia> isshorter("", missing)
false
```
"""
missingsmallest(f) = MissingSmallest(f)

"""
missingsmallest(x, y)
The standard partial order `isless` modified so that `missing` is always the
smallest possible value:
- If neither argument is `missing`, the function behaves exactly as `isless`.
- If `y` is `missing` the result will be `false` regardless of the value of `x`.
- If `x` is `missing` the result will be `true` unless `y` is `missing`.
See also the 1-argument method which takes a partial ordering function (like
`isless`) and modifies it to treat `missing` as explained above. These functions
can be used together with sorting functions so that missing values are sorted
first. This is useful in particular so that when sorting in reverse order
missing values appear at the end.
# Examples
```jldoctest
julia> sort(v, lt=missingsmallest)
5-element Vector{Union{Missing, Int64}}:
missing
missing
1
2
10
julia> sort(v, lt=missingsmallest, rev=true)
5-element Vector{Union{Missing, Int64}}:
10
2
1
missing
missing
julia> missingsmallest(missing, Inf)
true
julia> missingsmallest(-Inf, missing)
false
julia> missingsmallest(missing, missing)
false
"""
missingsmallest(x, y) = missingsmallest(isless)(x, y)

(ms::MissingSmallest)(x, y) = ismissing(y) ? false : ismissing(x) ? true : ms.lt(x, y)

end # module
30 changes: 28 additions & 2 deletions test/runtests.jl
Expand Up @@ -158,7 +158,7 @@ struct CubeRooter end
@test disallowmissing(Any[:a]) == [:a]
@test disallowmissing(Any[:a]) isa AbstractVector{Any}
@test_throws MethodError disallowmissing([1, missing])
@test_throws MethodError disallowmissing([missing])
@test_throws Union{MethodError, ArgumentError} disallowmissing([missing])

@test disallowmissing(Union{Int, Missing}[1 1]) == [1 1]
@test disallowmissing(Union{Int, Missing}[1 1]) isa AbstractArray{Int, 2}
Expand All @@ -167,7 +167,7 @@ struct CubeRooter end
@test disallowmissing([:a 1]) == [:a 1]
@test disallowmissing([:a 1]) isa AbstractArray{Any, 2}
@test_throws MethodError disallowmissing([1 missing])
@test_throws MethodError disallowmissing([missing missing])
@test_throws Union{MethodError, ArgumentError} disallowmissing([missing missing])

# Lifting
## functor
Expand Down Expand Up @@ -257,4 +257,30 @@ struct CubeRooter end
@test emptymissing(fun)(3, 1, c=2) == (1, 2)
end

@testset "missingsmallest" begin
@test missingsmallest(missing, Inf) == true
@test missingsmallest(-Inf, missing) == false
@test missingsmallest(missing, missing) == false
@test missingsmallest(3, 4) == true
@test missingsmallest(-Inf, Inf) == true

@test missingsmallest("a", "b") == true
@test missingsmallest("short", missing) == false
@test missingsmallest(missing, "") == true

@test missingsmallest((1, 2), (3, 4)) == true
@test missingsmallest((3, 4), (1, 2)) == false
@test missingsmallest(missing, (1e3, 1e4)) == true

# Compare strings by length, not lexicographically
isshorter = missingsmallest((s1, s2) -> isless(length(s1), length(s2)))
@test isshorter("short", "longstring") == true
@test isshorter("longstring", "short") == false
@test isshorter(missing, "short") == true
@test isshorter("", missing) == false

@test_throws MethodError missingsmallest(isless)(isless)
@test missingsmallest !== missingsmallest(isless)
end

end

0 comments on commit cdeb5a7

Please sign in to comment.