Skip to content

Commit

Permalink
Merge 1e43837 into d7ba717
Browse files Browse the repository at this point in the history
  • Loading branch information
matthieugomez committed Oct 4, 2020
2 parents d7ba717 + 1e43837 commit 984640c
Show file tree
Hide file tree
Showing 2 changed files with 157 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/DataFrames.jl
Original file line number Diff line number Diff line change
Expand Up @@ -117,5 +117,7 @@ include("dataframe/sort.jl")
include("deprecated.jl")

include("other/tables.jl")
include("wheredataframe/wheredataframe.jl")
export where

end # module DataFrames
155 changes: 155 additions & 0 deletions src/wheredataframe/wheredataframe.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
"""
WhereDataFrame{<:AbstractDataFrame,<:AbstractIndex}
The result of a [`where`](@ref) operation on an `AbstractDataFrame`; a
subset of a `AbstractDataFrame`
Not meant to be constructed directly, see `where`.
"""
struct WhereDataFrame{D<:AbstractDataFrame, T<:AbstractVector{Int}}
parent::D
rows::T
end

Base.@propagate_inbounds function WhereDataFrame(parent::AbstractDataFrame, rows::AbstractVector{Bool})
if length(rows) != nrow(parent)
throw(ArgumentError("invalid length of `AbstractVector{Bool}` row index" *
" (got $(length(rows)), expected $(nrow(parent)))"))
end
return WhereDataFrame(parent, findall(rows))
end

rows(wdf::WhereDataFrame) = getfield(wdf, :rows)
Base.parent(wdf::WhereDataFrame) = getfield(wdf, :parent)




"""
where(d::AbstractDataFrame, args...)
`where` introduces a `where` clause, which will be applied in the next function.
`args...` obey the same syntax as `select(d, args...)`
Rows that return missing are understood as false
Examples:
- `filter`/`filter!` (resp `delete!`) returns an AbstractDataFrame after filtering (resp. deleting) specified rows
- `select`/`select!` and `transform/transform` returns an AbstractDataFrame with as many rows as the original `AbstractDataFrame` after applying the transformation on specified rows
- `view`, `combine`, `describe` return the same thing as the function applied to a view of the `AbstractDataFrame`
# Examples
```julia
julia> df = DataFrame(a = repeat([1, 2, 3, missing], outer=[2]),
b = repeat([2, 1], outer=[4]),
c = randn(8))
julia> wdf = where(df, :a => x -> x .> 1)
julia> wdf = where(df, :a => x -> x .> 1, :b => x -> x .< 2)
julia> filter(wdf)
julia> delete!(wdf)
julia> view(wdf)
julia> describe(wdf)
julia> combine(wdf, :a => sum)
```
"""
function where(df::AbstractDataFrame, args...)
dfr = select(df, args...)
if any(x -> !(eltype(x) <: Union{Bool, Missing}), eachcol(dfr))
throw("Conditions do not evaluate to bool or missing")
end
if size(dfr, 2) == 1
WhereDataFrame(df, coalesce.(dfr[!, 1], false))
else
WhereDataFrame(df, coalesce.(.&(eachcol(dfr)...), false))
end
end

##############################################################################
##
## Show: show rows of parent that satisfies the `where` condition
## with original row number
##
##############################################################################

function Base.summary(io::IO, wdf::WhereDataFrame)
print(io, "Where DataFrame")
end

function Base.show(io::IO,
wdf::WhereDataFrame;
allrows::Bool = !get(io, :limit, false),
allcols::Bool = !get(io, :limit, false),
splitcols = get(io, :limit, false),
rowlabel::Symbol = :Row,
eltypes::Bool = true,
truncate::Int = 32)
summary(io, wdf)
_show(io, wdf, allrows=allrows, allcols=allcols, splitcols=splitcols,
rowlabel=rowlabel, eltypes=eltypes, truncstring=truncate)
end

function _show(io::IO,
wdf::WhereDataFrame;
allrows::Bool = !get(io, :limit, false),
allcols::Bool = !get(io, :limit, false),
splitcols = get(io, :limit, false),
rowlabel::Symbol = :Row,
eltypes::Bool = true,
truncstring::Int)

df = parent(wdf)
_check_consistency(df)

# we will pass around this buffer to avoid its reallocation in ourstrwidth
buffer = IOBuffer(Vector{UInt8}(undef, 80), read=true, write=true)

nrows = length(rows(wdf))

dsize = displaysize(io)
availableheight = dsize[1] - 7
nrowssubset = fld(availableheight, 2)
bound = min(nrowssubset - 1, nrows)
if allrows || nrows <= availableheight
rowindices1 = rows(wdf)[1:nrows]
rowindices2 = 1:0
else
rowindices1 = rows(wdf)[1:bound]
rowindices2 = rows[max(bound + 1, nrows - nrowssubset + 1):nrows]
end
maxwidths = getmaxwidths(df, io, rowindices1, rowindices2, rowlabel, nothing,
eltypes, buffer, truncstring)
width = getprintedwidth(maxwidths)
showrows(io, df, rowindices1, rowindices2, maxwidths, splitcols, allcols,
rowlabel, false, eltypes, nothing, buffer, truncstring)
return
end


##############################################################################
##
## Extend some methods defined on AbstractDataFrame
##
##############################################################################

function Base.filter(wdf::WhereDataFrame)
parent(wdf)[rows(wdf), :]
end
Base.filter!(wdf::WhereDataFrame) = delete!(parent(wdf), setdiff(1:nrow(parent(wdf)), rows(wdf)))
Base.delete!(wdf::WhereDataFrame) = delete!(parent(wdf), rows(wdf))
# TODO:
# Base.select
# Base.select!
# Base.transform
# Base.transform!



# these operations return the same thing as a view
Base.view(wdf::WhereDataFrame) = view(parent(wdf), rows(wdf), :)
DataFrame(wdf::WhereDataFrame; copycols::Bool=true) = DataFrame(view(wdf); copycols = copycols)
Base.first(df::WhereDataFrame) = first(view(df))
Base.first(df::WhereDataFrame, n::Integer) = first(view(df), n)
Base.last(df::WhereDataFrame) = last(view(df))
Base.last(df::WhereDataFrame, n::Integer) = last(view(df), n)
DataAPI.describe(wdf::WhereDataFrame, args...; kwargs...) = describe(view(wdf), args...; kwargs...)
combine(wdf::WhereDataFrame, args...; kwargs...) = combine(view(wdf), args...; kwargs...)

0 comments on commit 984640c

Please sign in to comment.