Skip to content

Commit

Permalink
Merge pull request #33 from eliascarv/colspec-dropmissing
Browse files Browse the repository at this point in the history
Implementing ColSpec interface in DropMissing
  • Loading branch information
juliohm committed Apr 3, 2022
2 parents 3c8e494 + 74c95f5 commit cb72d48
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 10 deletions.
36 changes: 26 additions & 10 deletions src/transforms/filter.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ const VecOrTuple{T} = Union{Vector{T}, NTuple{N, T}} where {T, N}

"""
DropMissing()
DropMissing(:)
Drop all rows with missing values in table.
Expand All @@ -55,20 +56,35 @@ Drop all rows with missing values in selects columns `col₁`, `col₂`, ..., `c
Drop all rows with missing values in columns that match with `regex`.
"""
DropMissing() = Filter(row -> all(!ismissing, row))
struct DropMissing{S<:ColSpec} <: Stateless
colspec::S
end

DropMissing(cols::VecOrTuple{T}) where {T<:Union{Symbol, Integer}} =
Filter(row -> all(!ismissing, getindex.(Ref(row), cols)))
DropMissing(::Tuple{}) = throw(ArgumentError("Cannot create a DropMissing object with empty tuple."))

DropMissing(cols::VecOrTuple{T}) where {T<:AbstractString} =
DropMissing(Symbol.(cols))
DropMissing() = DropMissing(:)

DropMissing(cols::T...) where {T<:ColSelector} =
DropMissing(cols)

function DropMissing(regex::Regex)
Filter() do row
cols = _filter(regex, propertynames(row))
all(!ismissing, getindex.(Ref(row), cols))
end
isrevertible(::Type{<:DropMissing}) = true

_ftrans(::DropMissing{Colon}, table) =
Filter(row -> all(!ismissing, row))

function _ftrans(transform::DropMissing, table)
allcols = Tables.columnnames(table)
cols = _filter(transform.colspec, allcols)
Filter(row -> all(!ismissing, getindex.(Ref(row), cols)))
end

function apply(transform::DropMissing, table)
ftrans = _ftrans(transform, table)
newtable, fcache = apply(ftrans, table)
newtable, (ftrans, fcache)
end

function revert(::DropMissing, newtable, cache)
ftrans, fcache = cache
revert(ftrans, newtable, fcache)
end
16 changes: 16 additions & 0 deletions test/transforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,22 @@
n1, c1 = apply(T, t)
n2 = reapply(T, t, c1)
@test n1 == n2

# throws: empty tuple
@test_throws ArgumentError DropMissing(())

# throws: empty selection
@test_throws AssertionError apply(DropMissing(r"g"), t)
@test_throws AssertionError apply(DropMissing(Symbol[]), t)
@test_throws AssertionError apply(DropMissing(String[]), t)

# throws: columns that do not exist in the original table
@test_throws AssertionError apply(DropMissing(:g, :h), t)
@test_throws AssertionError apply(DropMissing([:g, :h]), t)
@test_throws AssertionError apply(DropMissing((:g, :h)), t)
@test_throws AssertionError apply(DropMissing("g", "h"), t)
@test_throws AssertionError apply(DropMissing(["g", "h"]), t)
@test_throws AssertionError apply(DropMissing(("g", "h")), t)
end

@testset "Rename" begin
Expand Down

0 comments on commit cb72d48

Please sign in to comment.