Skip to content
This repository has been archived by the owner on May 4, 2019. It is now read-only.

Commit

Permalink
Restrict iterators to DataArrays
Browse files Browse the repository at this point in the history
Optimizations need to rely on the particular fields of DataArray.
This requires moving code to dataarray.jl since the type had not
been defined in abstractdataarray.jl.

Rather than adding specialized iterators for PooledDataArray, which
is going to be deprecated, rely on the generic iterators provided
by Nulls, which are already faster than the current ones.
  • Loading branch information
nalimilan committed Oct 19, 2017
1 parent 5a438e1 commit d7ab848
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 55 deletions.
54 changes: 0 additions & 54 deletions src/abstractdataarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,57 +47,3 @@ true
```
"""
Base.isnull(a::AbstractArray{T}, i::Real) where {T} = Null <: T ? isa(a[i], Null) : false # -> Bool

# Iterators
# TODO: Use values()
# Use DataValueIterator type?

struct EachFailNull{T<:AbstractDataArray}
da::T
end
Nulls.fail(da::AbstractDataArray) = EachFailNull(da)
Base.length(itr::EachFailNull) = length(itr.da)
Base.start(itr::EachFailNull) = 1
Base.done(itr::EachFailNull, ind::Integer) = ind > length(itr)
Base.eltype(itr::EachFailNull) = Nulls.T(eltype(itr.da))
function Base.next(itr::EachFailNull, ind::Integer)
if itr.da.na[ind]
throw(NullException())
else
(itr.da.data[ind], ind + 1)
end
end

struct EachDropNull{T<:AbstractDataArray}
da::T
end
Nulls.skip(da::AbstractDataArray) = EachDropNull(da)
function _next_nonna_ind(da::AbstractDataArray, ind::Int)
ind += 1
@inbounds while ind <= length(da) && da.na[ind]
ind += 1
end
ind
end
Base.length(itr::EachDropNull) = length(itr.da) - sum(itr.da.na)
Base.start(itr::EachDropNull) = _next_nonna_ind(itr.da, 0)
Base.done(itr::EachDropNull, ind::Int) = ind > length(itr.da)
Base.eltype(itr::EachDropNull) = Nulls.T(eltype(itr.da))
function Base.next(itr::EachDropNull, ind::Int)
(itr.da.data[ind], _next_nonna_ind(itr.da, ind))
end

struct EachReplaceNull{S<:AbstractDataArray, T}
da::S
replacement::T
end
Nulls.replace(da::AbstractDataArray, replacement::Any) =
EachReplaceNull(da, replacement)
Base.length(itr::EachReplaceNull) = length(itr.da)
Base.start(itr::EachReplaceNull) = 1
Base.done(itr::EachReplaceNull, ind::Integer) = ind > length(itr)
Base.eltype(itr::EachReplaceNull) = Nulls.T(eltype(itr.da))
function Base.next(itr::EachReplaceNull, ind::Integer)
item = itr.da.na[ind] ? itr.replacement : itr.da.data[ind]
(item, ind + 1)
end
53 changes: 52 additions & 1 deletion src/dataarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,57 @@ function Base.convert{T, N}(::Type{Array}, da::DataArray{T, N}, replacement::Any
return convert(Array{T, N}, da, replacement)
end


struct EachFailNull{T<:DataArray}
da::T
end
Nulls.fail(da::DataArray) = EachFailNull(da)
Base.length(itr::EachFailNull) = length(itr.da)
Base.start(itr::EachFailNull) = 1
Base.done(itr::EachFailNull, ind::Integer) = ind > length(itr)
Base.eltype(itr::EachFailNull) = Nulls.T(eltype(itr.da))
function Base.next(itr::EachFailNull, ind::Integer)
if itr.da.na[ind]
throw(NullException())
else
(itr.da.data[ind], ind + 1)
end
end

struct EachDropNull{T<:DataArray}
da::T
end
Nulls.skip(da::DataArray) = EachDropNull(da)
function _next_nonna_ind(da::DataArray, ind::Int)
ind += 1
@inbounds while ind <= length(da) && da.na[ind]
ind += 1
end
ind
end
Base.length(itr::EachDropNull) = length(itr.da) - sum(itr.da.na)
Base.start(itr::EachDropNull) = _next_nonna_ind(itr.da, 0)
Base.done(itr::EachDropNull, ind::Int) = ind > length(itr.da)
Base.eltype(itr::EachDropNull) = Nulls.T(eltype(itr.da))
function Base.next(itr::EachDropNull, ind::Int)
(itr.da.data[ind], _next_nonna_ind(itr.da, ind))
end

struct EachReplaceNull{S<:DataArray, T}
da::S
replacement::T
end
Nulls.replace(da::DataArray, replacement::Any) =
EachReplaceNull(da, replacement)
Base.length(itr::EachReplaceNull) = length(itr.da)
Base.start(itr::EachReplaceNull) = 1
Base.done(itr::EachReplaceNull, ind::Integer) = ind > length(itr)
Base.eltype(itr::EachReplaceNull) = Nulls.T(eltype(itr.da))
function Base.next(itr::EachReplaceNull, ind::Integer)
item = itr.da.na[ind] ? itr.replacement : itr.da.data[ind]
(item, ind + 1)
end

Base.collect(itr::EachDropNull{<:DataVector}) = itr.da.data[.!itr.da.na] # -> Vector
Base.collect(itr::EachFailNull{<:DataVector}) = copy(itr.da.data) # -> Vector

Expand Down Expand Up @@ -396,4 +447,4 @@ end
function Nulls.levels(da::DataArray) # -> DataVector{T}
unique_values, firstnull = finduniques(da)
return DataArray(unique_values)
end
end

0 comments on commit d7ab848

Please sign in to comment.