Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/transforms/colspec.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
# types used to select a column
const ColSelector = Union{Symbol,Integer,AbstractString}

# union of types used to filter columns
const ColSpec = Union{Vector{T},NTuple{N,T},Regex,Colon} where {N,T<:ColSelector}

# filter table columns using colspec
function _filter(colspec::Vector{Symbol}, cols)
# validate columns
Expand Down
2 changes: 1 addition & 1 deletion src/transforms/filter.jl
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ DropMissing(cols::T...) where {T<:ColSelector} =

function DropMissing(regex::Regex)
Filter() do row
cols = _select(regex, propertynames(row))
cols = _filter(regex, propertynames(row))
all(!ismissing, getindex.(Ref(row), cols))
end
end
49 changes: 13 additions & 36 deletions src/transforms/select.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@ end
Tables.materializer(t::TableSelection) =
Tables.materializer(t.table)

const ColSpec = Union{Vector{Symbol}, Regex}

"""
Select(col₁, col₂, ..., colₙ)
Select([col₁, col₂, ..., colₙ])
Expand All @@ -56,39 +54,24 @@ The transform that selects columns `col₁`, `col₂`, ..., `colₙ`.
Selects the columns that match with `regex`.
"""
struct Select{S<:ColSpec} <: Stateless
cols::S
colspec::S
end

# to avoid StackOverflowError in Select() and Select(())
Select(::Tuple{}) = throw(ArgumentError("Cannot create a Select object without arguments."))
# argument errors
Select(::Tuple{}) = throw(ArgumentError("Cannot create a Select object with empty tuple."))
Select() = throw(ArgumentError("Cannot create a Select object without arguments."))

Select(cols::T...) where {T<:Union{AbstractString, Symbol}} =
Select(cols::T...) where {T<:ColSelector} =
Select(cols)

Select(cols::NTuple{N, T}) where {N, T<:Union{AbstractString, Symbol}} =
Select(collect(cols))

Select(cols::Vector{T}) where {T<:AbstractString} =
Select(Symbol.(cols))

Base.:(==)(a::Select, b::Select) = a.cols == b.cols

isrevertible(::Type{<:Select}) = true

_select(cols::Vector{Symbol}, allcols) = cols
_select(cols::Regex, allcols) =
filter(col -> occursin(cols, String(col)), allcols)

function apply(transform::Select, table)
# retrieve relevant column names
allcols = collect(Tables.columnnames(table))
select = _select(transform.cols, allcols)
select = _filter(transform.colspec, allcols)
reject = setdiff(allcols, select)

# validate selections
@assert !isempty(select) "Invalid selection"
@assert select ⊆ Tables.columnnames(table) "Invalid selection"

# keep track of indices to revert later
sinds = indexin(select, allcols)
rinds = indexin(reject, allcols)
Expand Down Expand Up @@ -143,28 +126,22 @@ The transform that discards columns `col₁`, `col₂`, ..., `colₙ`.
Discards the columns that match with `regex`.
"""
struct Reject{S<:ColSpec} <: Stateless
cols::S
colspec::S
end

# to avoid StackOverflowError in Reject() and Reject(())
Reject(::Tuple{}) = throw(ArgumentError("Cannot create a Reject object with no arguments."))
# argumet erros
Reject(::Tuple{}) = throw(ArgumentError("Cannot create a Reject object with empty tuple."))
Reject(::Colon) = throw(ArgumentError("Cannot reject all columns."))
Reject() = throw(ArgumentError("Cannot create a Reject object without arguments."))

Reject(cols::T...) where {T<:Union{AbstractString, Symbol}} =
Reject(cols::T...) where {T<:ColSelector} =
Reject(cols)

Reject(cols::NTuple{N, T}) where {N, T<:Union{AbstractString, Symbol}} =
Reject(collect(cols))

Reject(cols::Vector{T}) where {T<:AbstractString} =
Reject(Symbol.(cols))

Base.:(==)(a::Reject, b::Reject) = a.cols == b.cols

isrevertible(::Type{<:Reject}) = true

function apply(transform::Reject, table)
allcols = Tables.columnnames(table)
reject = _select(transform.cols, allcols)
reject = _filter(transform.colspec, allcols)
select = setdiff(allcols, reject)
strans = Select(select)
newtable, scache = apply(strans, table)
Expand Down
48 changes: 40 additions & 8 deletions test/transforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -155,10 +155,26 @@
tₒ = revert(T, n, c)
@test t == tₒ

# selection with single column
@test (Select(:a) == Select("a") ==
Select((:a,)) == Select(("a",)) ==
Select([:a]) == Select(["a"]))
# selection with integers
T = Select(4, 3, 2)
n, c = apply(T, t)
@test Tables.columnnames(n) == [:d, :c, :b]
tₒ = revert(T, n, c)
@test t == tₒ

# selection with tuple of integers
T = Select((4, 3, 2))
n, c = apply(T, t)
@test Tables.columnnames(n) == [:d, :c, :b]
tₒ = revert(T, n, c)
@test t == tₒ

# selection with vector of integers
T = Select([4, 3, 2])
n, c = apply(T, t)
@test Tables.columnnames(n) == [:d, :c, :b]
tₒ = revert(T, n, c)
@test t == tₒ

# reapply test
T = Select(:b, :c, :d)
Expand Down Expand Up @@ -279,10 +295,26 @@
tₒ = revert(T, n, c)
@test t == tₒ

# rejection with single column
@test (Reject(:a) == Reject("a") ==
Reject((:a,)) == Reject(("a",)) ==
Reject([:a]) == Reject(["a"]))
# rejection with integers
T = Reject(4, 3, 2)
n, c = apply(T, t)
@test Tables.columnnames(n) == [:a, :e, :f]
tₒ = revert(T, n, c)
@test t == tₒ

# rejection with tuple of integers
T = Reject((4, 3, 2))
n, c = apply(T, t)
@test Tables.columnnames(n) == [:a, :e, :f]
tₒ = revert(T, n, c)
@test t == tₒ

# rejection with vector of integers
T = Reject([4, 3, 2])
n, c = apply(T, t)
@test Tables.columnnames(n) == [:a, :e, :f]
tₒ = revert(T, n, c)
@test t == tₒ

# reapply test
T = Reject(:b, :c, :d)
Expand Down