Skip to content

Commit

Permalink
Use custom iterators for replace(), skip() and fail()
Browse files Browse the repository at this point in the history
This dramatically improves the performance of skip() on arrays by getting
rid of the type instability which is currently not well handled.
This optimization cannot be applied to non-array iterables since it relies
on passing indices and accessing an entry several times in some cases.
Hovewer, forcing inlining makes the code somewhat faster even for non-arrays.
Performance improvements are smaller but still significant for replace()
and skip(). There is 2× regression when passing a generator to fail(), though,
but the gain for the array case is worth it.

The second advantage of using custom iterators is that eltype() returns
Nulls.T(eltype(x)) when x is an array, while when using plain generators
it returned Any.
  • Loading branch information
nalimilan committed Oct 15, 2017
1 parent e6b3163 commit 2060fe5
Show file tree
Hide file tree
Showing 2 changed files with 226 additions and 6 deletions.
167 changes: 164 additions & 3 deletions src/Nulls.jl
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,170 @@ xor(::Integer, ::Null) = null
*(d::Null, x::AbstractString) = null
*(d::AbstractString, x::Null) = null

replace(itr, x) = (ifelse(v !== null, v, x) for v in itr)
skip(itr) = (v for v in itr if v !== null)
fail(itr) = (v !== null ? v : throw(NullException()) for v in itr)
# Iterators
"""
Nulls.replace(itr, replacement)
Return an iterator wrapping iterable `itr` which replaces [`null`](@ref) values with
`replacement`. When applicable, the size of `itr` is preserved.
See also: [`Nulls.skip`](@ref), [`Nulls.fail`](@ref)
# Examples
```jldoctest
julia> collect(Nulls.replace([1, null, 2], 0))
3-element Array{Int64,1}:
1
0
2
julia> collect(Nulls.replace([1 null; 2 null], 0))
2×2 Array{Int64,2}:
1 0
2 0
```
"""
replace(itr, replacement) = EachReplaceNull(itr, replacement)
struct EachReplaceNull{T, U}
x::T
replacement::U
end
Base.iteratorsize(::Type{<:EachReplaceNull{T}}) where {T} =
Base.iteratorsize(T)
Base.iteratoreltype(::Type{<:EachReplaceNull{T}}) where {T} =
Base.iteratoreltype(T)
Base.length(itr::EachReplaceNull) = length(itr.x)
Base.size(itr::EachReplaceNull) = size(itr.x)
Base.start(itr::EachReplaceNull) = start(itr.x)
Base.done(itr::EachReplaceNull, state) = done(itr.x, state)
Base.eltype(itr::EachReplaceNull) =
Union{Nulls.T(eltype(itr.x)), typeof(itr.replacement)}
@inline function Base.next(itr::EachReplaceNull, state)
v, s = next(itr.x, state)
((isnull(v) ? itr.replacement : v)::eltype(itr), s)
end

"""
Nulls.skip(itr)
Return an iterator wrapping iterable `itr` which skips [`null`](@ref) values.
Use [`collect`](@ref) to obtain an `Array` containing the non-`null` values in
`itr`. Note that even if `itr` is a multidimensional array, the result will always
be a `Vector` since it is not possible to remove nulls while preserving dimensions
of the input.
See also: [`Nulls.replace`](@ref), [`Nulls.fail`](@ref)
# Examples
```jldoctest
julia> collect(Nulls.skip([1, null, 2]))
2-element Array{Int64,1}:
1
2
julia> collect(Nulls.skip([1 null; 2 null]))
2-element Array{Int64,1}:
1
2
```
"""
skip(itr) = EachSkipNull(itr)
struct EachSkipNull{T}
x::T
end
Base.iteratorsize(::Type{<:EachSkipNull}) =
Base.SizeUnknown()
Base.iteratoreltype(::Type{EachSkipNull{T}}) where {T} =
Base.iteratoreltype(T)
Base.eltype(itr::EachSkipNull) = Nulls.T(eltype(itr.x))
# Fallback implementation for general iterables: we cannot access a value twice,
# so after finding the next non-null element in start() or next(), we have to
# pass it in the iterator state, which introduces a type instability since the value
# is null if the input does not contain any non-null element.
# As of Julia 0.6 and early 0.7, this instability kills performance.
@inline function Base.start(itr::EachSkipNull)
s = start(itr.x)
v = null
@inbounds while !done(itr.x, s) && isnull(v)
v, s = next(itr.x, s)
end
(v, s)
end
@inline Base.done(itr::EachSkipNull, state) = isnull(state[1]) && done(itr.x, state[2])
@inline function Base.next(itr::EachSkipNull, state)
v1, s = state
v2 = null
@inbounds while !done(itr.x, s) && isnull(v2)
v2, s = next(itr.x, s)
end
(v1::eltype(itr), (v2, s))
end
# Optimized implementation for AbstractArray, relying on the ability to access x[i] twice:
# once in done() to find the next non-null entry, and once in next() to return it.
# This works around the type instability problem of the generic fallback.
@inline function _next_nonnull_ind(x::AbstractArray, s)
idx = eachindex(x)
@inbounds while !done(idx, s)
i, new_s = next(idx, s)
isnull(x[i]) || break
s = new_s
end
s
end
@inline Base.start(itr::EachSkipNull{<:AbstractArray}) =
_next_nonnull_ind(itr.x, start(eachindex(itr.x)))
@inline Base.done(itr::EachSkipNull{<:AbstractArray}, state) =
done(eachindex(itr.x), state)
@inline function Base.next(itr::EachSkipNull{<:AbstractArray}, state)
i, state = next(eachindex(itr.x), state)
@inbounds v = itr.x[i]::eltype(itr)
(v, _next_nonnull_ind(itr.x, state))
end

"""
Nulls.fail(itr)
Return an iterator wrapping iterable `itr` which will throw a [`NullException`](@ref)
if a [`null`](@ref) value is found.
Use [`collect`](@ref) to obtain an `Array` containing the resulting values.
If `itr` is an array, the resulting array will have the same dimensions.
See also: [`Nulls.skip`](@ref), [`Nulls.replace`](@ref)
# Examples
```jldoctest
julia> collect(Nulls.fail([1 2; 3 4]))
2×2 Array{Int64,2}:
1 2
3 4
julia> collect(Nulls.fail([1, null, 2]))
ERROR: NullException()
[...]
```
"""
fail(itr) = EachFailNull(itr)
struct EachFailNull{T}
x::T
end
Base.iteratorsize(::Type{EachFailNull{T}}) where {T} =
Base.iteratorsize(T)
Base.iteratoreltype(::Type{EachFailNull{T}}) where {T} =
Base.iteratoreltype(T)
Base.length(itr::EachFailNull) = length(itr.x)
Base.size(itr::EachFailNull) = size(itr.x)
Base.start(itr::EachFailNull) = start(itr.x)
Base.done(itr::EachFailNull, state) = done(itr.x, state)
Base.eltype(itr::EachFailNull) = Nulls.T(eltype(itr.x))
@inline function Base.next(itr::EachFailNull, state)
v, s = next(itr.x, state)
isnull(v) && throw(NullException())
(v::eltype(itr), s)
end

"""
coalesce(x, y...)
Expand Down
65 changes: 62 additions & 3 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,69 @@ using Base.Test, Nulls

@test sprint(show, null) == "null"

@test collect(Nulls.replace([1, 2, null, 4], 3)) == collect(1:4)
@test collect(Nulls.skip([1, 2, null, 4])) == [1, 2, 4]
@test collect(Nulls.fail([1, 2, 3, 4])) == [1, 2, 3, 4]
x = Nulls.replace([1, 2, null, 4], 3)
@test eltype(x) === Int
@test length(x) == 4
@test size(x) == (4,)
@test collect(x) == collect(1:4)
@test collect(x) isa Vector{Int}
x = Nulls.replace([1 2; null 4], 3)
@test eltype(x) === Int
@test length(x) == 4
@test size(x) == (2, 2)
@test collect(x) == [1 2; 3 4]
@test collect(x) isa Matrix{Int}
x = Nulls.replace((v for v in [null, 1, null, 2, 4]), 0)
@test length(x) == 5
@test size(x) == (5,)
@test eltype(x) === Any
@test collect(x) == [0, 1, 0, 2, 4]
@test collect(x) isa Vector{Int}

x = Nulls.fail([1, 2, 3, 4])
@test eltype(x) === Int
@test length(x) == 4
@test size(x) == (4,)
@test collect(x) == [1, 2, 3, 4]
@test collect(x) isa Vector{Int}
x = Nulls.fail([1 2; 3 4])
@test eltype(x) === Int
@test length(x) == 4
@test size(x) == (2, 2)
@test collect(x) == [1 2; 3 4]
@test collect(x) isa Matrix{Int}
@test_throws NullException collect(Nulls.fail([1, 2, null, 4]))
x = Nulls.fail(v for v in [1, 2, 4])
@test eltype(x) === Any
@test length(x) == 3
@test size(x) == (3,)
@test collect(x) == [1, 2, 4]
@test collect(x) isa Vector{Int}

x = Nulls.skip([1, 2, null, 4])
@test eltype(x) === Int
@test collect(x) == [1, 2, 4]
@test collect(x) isa Vector{Int}
x = Nulls.skip([1 2; null 4])
@test eltype(x) === Int
@test collect(x) == [1, 2, 4]
@test collect(x) isa Vector{Int}
x = collect(Nulls.skip([null]))
@test eltype(x) === Union{}
@test isempty(collect(x))
@test collect(x) isa Vector{Union{}}
x = collect(Nulls.skip(Union{Int, Null}[]))
@test eltype(x) === Int
@test isempty(collect(x))
@test collect(x) isa Vector{Int}
x = Nulls.skip([null, null, 1, 2, null, 4, null, null])
@test eltype(x) === Int
@test collect(x) == [1, 2, 4]
@test collect(x) isa Vector{Int}
x = Nulls.skip(v for v in [null, 1, null, 2, 4])
@test eltype(x) === Any
@test collect(x) == [1, 2, 4]
@test collect(x) isa Vector{Int}

@test Nulls.coalesce(null, 1) === 1
@test Nulls.coalesce(1, null) === 1
Expand Down

0 comments on commit 2060fe5

Please sign in to comment.