Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use custom iterators for replace(), skip() and fail() #50

Merged
merged 3 commits into from
Oct 19, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 166 additions & 3 deletions src/Nulls.jl
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,172 @@ xor(::Integer, ::Null) = null
*(d::Null, x::AbstractString) = null
*(d::AbstractString, x::Null) = null

replace(itr, x) = (ifelse(v !== null, v, x) for v in itr)
skip(itr) = (v for v in itr if v !== null)
fail(itr) = (v !== null ? v : throw(NullException()) for v in itr)
# Iterators
"""
Nulls.replace(itr, replacement)

Return an iterator wrapping iterable `itr` which replaces [`null`](@ref) values with
`replacement`. When applicable, the size of `itr` is preserved.
If the type of `replacement` differs from the element type of `itr`,
it will be converted.

See also: [`Nulls.skip`](@ref), [`Nulls.fail`](@ref)

# Examples
```jldoctest
julia> collect(Nulls.replace([1, null, 2], 0))
3-element Array{Int64,1}:
1
0
2

julia> collect(Nulls.replace([1 null; 2 null], 0))
2×2 Array{Int64,2}:
1 0
2 0

```
"""
replace(itr, replacement) = EachReplaceNull(itr, convert(eltype(itr), replacement))
struct EachReplaceNull{T, U}
x::T
replacement::U
end
Base.iteratorsize(::Type{<:EachReplaceNull{T}}) where {T} =
Base.iteratorsize(T)
Base.iteratoreltype(::Type{<:EachReplaceNull{T}}) where {T} =
Base.iteratoreltype(T)
Base.length(itr::EachReplaceNull) = length(itr.x)
Base.size(itr::EachReplaceNull) = size(itr.x)
Base.start(itr::EachReplaceNull) = start(itr.x)
Base.done(itr::EachReplaceNull, state) = done(itr.x, state)
Base.eltype(itr::EachReplaceNull) = Nulls.T(eltype(itr.x))
@inline function Base.next(itr::EachReplaceNull, state)
v, s = next(itr.x, state)
(v isa Null ? itr.replacement : v, s)
end

"""
Nulls.skip(itr)

Return an iterator wrapping iterable `itr` which skips [`null`](@ref) values.

Use [`collect`](@ref) to obtain an `Array` containing the non-`null` values in
`itr`. Note that even if `itr` is a multidimensional array, the result will always
be a `Vector` since it is not possible to remove nulls while preserving dimensions
of the input.

See also: [`Nulls.replace`](@ref), [`Nulls.fail`](@ref)

# Examples
```jldoctest
julia> collect(Nulls.skip([1, null, 2]))
2-element Array{Int64,1}:
1
2

julia> collect(Nulls.skip([1 null; 2 null]))
2-element Array{Int64,1}:
1
2

```
"""
skip(itr) = EachSkipNull(itr)
struct EachSkipNull{T}
x::T
end
Base.iteratorsize(::Type{<:EachSkipNull}) =
Base.SizeUnknown()
Base.iteratoreltype(::Type{EachSkipNull{T}}) where {T} =
Base.iteratoreltype(T)
Base.eltype(itr::EachSkipNull) = Nulls.T(eltype(itr.x))
# Fallback implementation for general iterables: we cannot access a value twice,
# so after finding the next non-null element in start() or next(), we have to
# pass it in the iterator state, which introduces a type instability since the value
# is null if the input does not contain any non-null element.
# As of Julia 0.6 and early 0.7, this instability kills performance.
@inline function Base.start(itr::EachSkipNull)
s = start(itr.x)
v = null
@inbounds while !done(itr.x, s) && v isa Null
v, s = next(itr.x, s)
end
(v, s)
end
@inline Base.done(itr::EachSkipNull, state) = isnull(state[1]) && done(itr.x, state[2])
@inline function Base.next(itr::EachSkipNull, state)
v1, s = state
v2 = null
@inbounds while !done(itr.x, s) && v2 isa Null
v2, s = next(itr.x, s)
end
(v1, (v2, s))
end
# Optimized implementation for AbstractArray, relying on the ability to access x[i] twice:
# once in done() to find the next non-null entry, and once in next() to return it.
# This works around the type instability problem of the generic fallback.
@inline function _next_nonnull_ind(x::AbstractArray, s)
idx = eachindex(x)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I couldn't find a simpler way to work with indices which would be completely generic. Linear indices would of course work, but they would be slow for LinearSlow arrays.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Couldn't we dispatch and use linear indices for most arrays but a specialized implementation for LinearSlow?

@inbounds while !done(idx, s)
i, new_s = next(idx, s)
x[i] isa Null || break
s = new_s
end
s
end
@inline Base.start(itr::EachSkipNull{<:AbstractArray}) =
_next_nonnull_ind(itr.x, start(eachindex(itr.x)))
@inline Base.done(itr::EachSkipNull{<:AbstractArray}, state) =
done(eachindex(itr.x), state)
@inline function Base.next(itr::EachSkipNull{<:AbstractArray}, state)
i, state = next(eachindex(itr.x), state)
@inbounds v = itr.x[i]::eltype(itr)
(v, _next_nonnull_ind(itr.x, state))
end

"""
Nulls.fail(itr)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still don't really understand the use for this iterator/function. Can you remind me?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, it can be used to be sure you don't pass an iterator containing nulls to a function which would accept it silently. It's faster than any(isnull, x) since checking happens on the fly. Currently sum(Nulls.fail(x)) also much faster than sum(x) since there's no type instability, but with compiler improvements I guess this difference could go away.


Return an iterator wrapping iterable `itr` which will throw a [`NullException`](@ref)
if a [`null`](@ref) value is found.

Use [`collect`](@ref) to obtain an `Array` containing the resulting values.
If `itr` is an array, the resulting array will have the same dimensions.

See also: [`Nulls.skip`](@ref), [`Nulls.replace`](@ref)

# Examples
```jldoctest
julia> collect(Nulls.fail([1 2; 3 4]))
2×2 Array{Int64,2}:
1 2
3 4

julia> collect(Nulls.fail([1, null, 2]))
ERROR: NullException()
[...]
```
"""
fail(itr) = EachFailNull(itr)
struct EachFailNull{T}
x::T
end
Base.iteratorsize(::Type{EachFailNull{T}}) where {T} =
Base.iteratorsize(T)
Base.iteratoreltype(::Type{EachFailNull{T}}) where {T} =
Base.iteratoreltype(T)
Base.length(itr::EachFailNull) = length(itr.x)
Base.size(itr::EachFailNull) = size(itr.x)
Base.start(itr::EachFailNull) = start(itr.x)
Base.done(itr::EachFailNull, state) = done(itr.x, state)
Base.eltype(itr::EachFailNull) = Nulls.T(eltype(itr.x))
@inline function Base.next(itr::EachFailNull, state)
v, s = next(itr.x, state)
# NOTE: v isa Null currently gives incorrect code, cf. JuliaLang/julia#24177
isnull(v) && throw(NullException())
(v::eltype(itr), s)
end

"""
coalesce(x, y...)
Expand Down
71 changes: 68 additions & 3 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,75 @@ using Base.Test, Nulls

@test sprint(show, null) == "null"

@test collect(Nulls.replace([1, 2, null, 4], 3)) == collect(1:4)
@test collect(Nulls.skip([1, 2, null, 4])) == [1, 2, 4]
@test collect(Nulls.fail([1, 2, 3, 4])) == [1, 2, 3, 4]
x = Nulls.replace([1, 2, null, 4], 3)
@test eltype(x) === Int
@test length(x) == 4
@test size(x) == (4,)
@test collect(x) == collect(1:4)
@test collect(x) isa Vector{Int}
x = Nulls.replace([1, 2, null, 4], 3.0)
@test eltype(x) === Int
@test length(x) == 4
@test size(x) == (4,)
@test collect(x) == collect(1:4)
@test collect(x) isa Vector{Int}
x = Nulls.replace([1 2; null 4], 3)
@test eltype(x) === Int
@test length(x) == 4
@test size(x) == (2, 2)
@test collect(x) == [1 2; 3 4]
@test collect(x) isa Matrix{Int}
x = Nulls.replace((v for v in [null, 1, null, 2, 4]), 0)
@test length(x) == 5
@test size(x) == (5,)
@test eltype(x) === Any
@test collect(x) == [0, 1, 0, 2, 4]
@test collect(x) isa Vector{Int}

x = Nulls.fail([1, 2, 3, 4])
@test eltype(x) === Int
@test length(x) == 4
@test size(x) == (4,)
@test collect(x) == [1, 2, 3, 4]
@test collect(x) isa Vector{Int}
x = Nulls.fail([1 2; 3 4])
@test eltype(x) === Int
@test length(x) == 4
@test size(x) == (2, 2)
@test collect(x) == [1 2; 3 4]
@test collect(x) isa Matrix{Int}
@test_throws NullException collect(Nulls.fail([1, 2, null, 4]))
x = Nulls.fail(v for v in [1, 2, 4])
@test eltype(x) === Any
@test length(x) == 3
@test size(x) == (3,)
@test collect(x) == [1, 2, 4]
@test collect(x) isa Vector{Int}

x = Nulls.skip([1, 2, null, 4])
@test eltype(x) === Int
@test collect(x) == [1, 2, 4]
@test collect(x) isa Vector{Int}
x = Nulls.skip([1 2; null 4])
@test eltype(x) === Int
@test collect(x) == [1, 2, 4]
@test collect(x) isa Vector{Int}
x = collect(Nulls.skip([null]))
@test eltype(x) === Union{}
@test isempty(collect(x))
@test collect(x) isa Vector{Union{}}
x = collect(Nulls.skip(Union{Int, Null}[]))
@test eltype(x) === Int
@test isempty(collect(x))
@test collect(x) isa Vector{Int}
x = Nulls.skip([null, null, 1, 2, null, 4, null, null])
@test eltype(x) === Int
@test collect(x) == [1, 2, 4]
@test collect(x) isa Vector{Int}
x = Nulls.skip(v for v in [null, 1, null, 2, 4])
@test eltype(x) === Any
@test collect(x) == [1, 2, 4]
@test collect(x) isa Vector{Int}

@test Nulls.coalesce(null, 1) === 1
@test Nulls.coalesce(1, null) === 1
Expand Down