diff --git a/src/transforms/colspec.jl b/src/transforms/colspec.jl index c01fa892..9edc1eef 100644 --- a/src/transforms/colspec.jl +++ b/src/transforms/colspec.jl @@ -5,6 +5,9 @@ # types used to select a column const ColSelector = Union{Symbol,Integer,AbstractString} +# union of types used to filter columns +const ColSpec = Union{Vector{T},NTuple{N,T},Regex,Colon} where {N,T<:ColSelector} + # filter table columns using colspec function _filter(colspec::Vector{Symbol}, cols) # validate columns diff --git a/src/transforms/filter.jl b/src/transforms/filter.jl index 0ef48934..e44849c5 100644 --- a/src/transforms/filter.jl +++ b/src/transforms/filter.jl @@ -68,7 +68,7 @@ DropMissing(cols::T...) where {T<:ColSelector} = function DropMissing(regex::Regex) Filter() do row - cols = _select(regex, propertynames(row)) + cols = _filter(regex, propertynames(row)) all(!ismissing, getindex.(Ref(row), cols)) end end diff --git a/src/transforms/select.jl b/src/transforms/select.jl index de5d9529..db389e51 100644 --- a/src/transforms/select.jl +++ b/src/transforms/select.jl @@ -42,8 +42,6 @@ end Tables.materializer(t::TableSelection) = Tables.materializer(t.table) -const ColSpec = Union{Vector{Symbol}, Regex} - """ Select(col₁, col₂, ..., colₙ) Select([col₁, col₂, ..., colₙ]) @@ -56,39 +54,24 @@ The transform that selects columns `col₁`, `col₂`, ..., `colₙ`. Selects the columns that match with `regex`. """ struct Select{S<:ColSpec} <: Stateless - cols::S + colspec::S end -# to avoid StackOverflowError in Select() and Select(()) -Select(::Tuple{}) = throw(ArgumentError("Cannot create a Select object without arguments.")) +# argument errors +Select(::Tuple{}) = throw(ArgumentError("Cannot create a Select object with empty tuple.")) +Select() = throw(ArgumentError("Cannot create a Select object without arguments.")) -Select(cols::T...) where {T<:Union{AbstractString, Symbol}} = +Select(cols::T...) where {T<:ColSelector} = Select(cols) -Select(cols::NTuple{N, T}) where {N, T<:Union{AbstractString, Symbol}} = - Select(collect(cols)) - -Select(cols::Vector{T}) where {T<:AbstractString} = - Select(Symbol.(cols)) - -Base.:(==)(a::Select, b::Select) = a.cols == b.cols - isrevertible(::Type{<:Select}) = true -_select(cols::Vector{Symbol}, allcols) = cols -_select(cols::Regex, allcols) = - filter(col -> occursin(cols, String(col)), allcols) - function apply(transform::Select, table) # retrieve relevant column names allcols = collect(Tables.columnnames(table)) - select = _select(transform.cols, allcols) + select = _filter(transform.colspec, allcols) reject = setdiff(allcols, select) - # validate selections - @assert !isempty(select) "Invalid selection" - @assert select ⊆ Tables.columnnames(table) "Invalid selection" - # keep track of indices to revert later sinds = indexin(select, allcols) rinds = indexin(reject, allcols) @@ -143,28 +126,22 @@ The transform that discards columns `col₁`, `col₂`, ..., `colₙ`. Discards the columns that match with `regex`. """ struct Reject{S<:ColSpec} <: Stateless - cols::S + colspec::S end -# to avoid StackOverflowError in Reject() and Reject(()) -Reject(::Tuple{}) = throw(ArgumentError("Cannot create a Reject object with no arguments.")) +# argumet erros +Reject(::Tuple{}) = throw(ArgumentError("Cannot create a Reject object with empty tuple.")) +Reject(::Colon) = throw(ArgumentError("Cannot reject all columns.")) +Reject() = throw(ArgumentError("Cannot create a Reject object without arguments.")) -Reject(cols::T...) where {T<:Union{AbstractString, Symbol}} = +Reject(cols::T...) where {T<:ColSelector} = Reject(cols) -Reject(cols::NTuple{N, T}) where {N, T<:Union{AbstractString, Symbol}} = - Reject(collect(cols)) - -Reject(cols::Vector{T}) where {T<:AbstractString} = - Reject(Symbol.(cols)) - -Base.:(==)(a::Reject, b::Reject) = a.cols == b.cols - isrevertible(::Type{<:Reject}) = true function apply(transform::Reject, table) allcols = Tables.columnnames(table) - reject = _select(transform.cols, allcols) + reject = _filter(transform.colspec, allcols) select = setdiff(allcols, reject) strans = Select(select) newtable, scache = apply(strans, table) diff --git a/test/transforms.jl b/test/transforms.jl index 12f3dcb1..7450a279 100644 --- a/test/transforms.jl +++ b/test/transforms.jl @@ -155,10 +155,26 @@ tₒ = revert(T, n, c) @test t == tₒ - # selection with single column - @test (Select(:a) == Select("a") == - Select((:a,)) == Select(("a",)) == - Select([:a]) == Select(["a"])) + # selection with integers + T = Select(4, 3, 2) + n, c = apply(T, t) + @test Tables.columnnames(n) == [:d, :c, :b] + tₒ = revert(T, n, c) + @test t == tₒ + + # selection with tuple of integers + T = Select((4, 3, 2)) + n, c = apply(T, t) + @test Tables.columnnames(n) == [:d, :c, :b] + tₒ = revert(T, n, c) + @test t == tₒ + + # selection with vector of integers + T = Select([4, 3, 2]) + n, c = apply(T, t) + @test Tables.columnnames(n) == [:d, :c, :b] + tₒ = revert(T, n, c) + @test t == tₒ # reapply test T = Select(:b, :c, :d) @@ -279,10 +295,26 @@ tₒ = revert(T, n, c) @test t == tₒ - # rejection with single column - @test (Reject(:a) == Reject("a") == - Reject((:a,)) == Reject(("a",)) == - Reject([:a]) == Reject(["a"])) + # rejection with integers + T = Reject(4, 3, 2) + n, c = apply(T, t) + @test Tables.columnnames(n) == [:a, :e, :f] + tₒ = revert(T, n, c) + @test t == tₒ + + # rejection with tuple of integers + T = Reject((4, 3, 2)) + n, c = apply(T, t) + @test Tables.columnnames(n) == [:a, :e, :f] + tₒ = revert(T, n, c) + @test t == tₒ + + # rejection with vector of integers + T = Reject([4, 3, 2]) + n, c = apply(T, t) + @test Tables.columnnames(n) == [:a, :e, :f] + tₒ = revert(T, n, c) + @test t == tₒ # reapply test T = Reject(:b, :c, :d)