Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New Select and Reject #23

Merged
merged 4 commits into from
Jan 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 41 additions & 17 deletions src/transforms/select.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,44 @@
# Licensed under the MIT License. See LICENSE in the project root.
# ------------------------------------------------------------------

const ColSpec = Union{Vector{Symbol}, Regex}

"""
Select(col₁, col₂, ..., colₙ)
Select([col₁, col₂, ..., colₙ])

Select((col₁, col₂, ..., colₙ))

The transform that selects columns `col₁`, `col₂`, ..., `colₙ`.

Select(regex)

Selects the columns that match with `regex`.
"""
struct Select{N} <: Stateless
cols::NTuple{N,Symbol}
struct Select{S<:ColSpec} <: Stateless
cols::S
end

Select(cols::NTuple{N,AbstractString}) where {N} =
Select(Symbol.(cols))
Select(cols::T...) where {T<:Union{AbstractString, Symbol}} =
Select(cols)

Select(cols::AbstractVector) = Select(Tuple(cols))
Select(cols::NTuple{N, T}) where {N, T<:Union{AbstractString, Symbol}} =
Select(collect(cols))

Select(cols...) = Select(cols)
Select(cols::Vector{T}) where {T<:AbstractString} =
Select(Symbol.(cols))

Base.:(==)(a::Select, b::Select) = a.cols == b.cols

isrevertible(::Type{<:Select}) = true

_select(cols::Vector{Symbol}, allcols) = cols
_select(cols::Regex, allcols) =
filter(col -> occursin(cols, String(col)), allcols)

function apply(transform::Select, table)
# retrieve relevant column names
allcols = collect(Tables.columnnames(table))
select = collect(transform.cols)
select = _select(transform.cols, allcols)
reject = setdiff(allcols, select)

# keep track of indices to revert later
Expand Down Expand Up @@ -76,26 +91,35 @@ end
"""
Reject(col₁, col₂, ..., colₙ)
Reject([col₁, col₂, ..., colₙ])
Reject((col₁, col₂, ..., colₙ))

The transform that discards columns `col₁`, `col₂`, ..., `colₙ`.

Reject(regex)

Discards the columns that match with `regex`.
"""
struct Reject{N} <: Stateless
cols::NTuple{N,Symbol}
struct Reject{S<:ColSpec} <: Stateless
cols::S
end

Reject(cols::NTuple{N,AbstractString}) where {N} =
Reject(Symbol.(cols))
Reject(cols::T...) where {T<:Union{AbstractString, Symbol}} =
Reject(cols)

Reject(cols::AbstractVector) = Reject(Tuple(cols))
Reject(cols::NTuple{N, T}) where {N, T<:Union{AbstractString, Symbol}} =
Reject(collect(cols))

Reject(cols...) = Reject(cols)
Reject(cols::Vector{T}) where {T<:AbstractString} =
Reject(Symbol.(cols))

Base.:(==)(a::Reject, b::Reject) = a.cols == b.cols

isrevertible(::Type{<:Reject}) = true

function apply(transform::Reject, table)
allcols = Tables.columnnames(table)
reject = collect(transform.cols)
select = Tuple(setdiff(allcols, reject))
reject = _select(transform.cols, allcols)
select = setdiff(allcols, reject)
strans = Select(select)
newtable, scache = apply(strans, table)
newtable, (strans, scache)
Expand All @@ -104,4 +128,4 @@ end
function revert(::Reject, newtable, cache)
strans, scache = cache
revert(strans, newtable, scache)
end
end
82 changes: 82 additions & 0 deletions test/transforms.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,20 @@
tₒ = revert(T, n, c)
@test t == tₒ

# selection with tuple of strings
T = Select(("d", "c", "b"))
n, c = apply(T, t)
@test Tables.columnnames(n) == (:d, :c, :b)
tₒ = revert(T, n, c)
@test t == tₒ

# selection with vector of strings
T = Select(["d", "c", "b"])
n, c = apply(T, t)
@test Tables.columnnames(n) == (:d, :c, :b)
tₒ = revert(T, n, c)
@test t == tₒ

# selection with single column
@test (Select(:a) == Select("a") ==
Select((:a,)) == Select(("a",)) ==
Expand All @@ -65,6 +79,33 @@
n1, c1 = apply(T, t)
n2 = reapply(T, t, c1)
@test n1 == n2

# selection with Regex
T = Select(r"[dcb]")
n, c = apply(T, t)
@test Tables.columnnames(n) == (:b, :c, :d) # the order of columns is preserved
tₒ = revert(T, n, c)
@test t == tₒ

x1 = rand(4000)
x2 = rand(4000)
y1 = rand(4000)
y2 = rand(4000)
t = Table(; x1, x2, y1, y2)

# select columns whose names contain the character x
T = Select(r"x")
n, c = apply(T, t)
@test Tables.columnnames(n) == (:x1, :x2)
tₒ = revert(T, n, c)
@test t == tₒ

# select columns whose names contain the character y
T = Select(r"y")
n, c = apply(T, t)
@test Tables.columnnames(n) == (:y1, :y2)
tₒ = revert(T, n, c)
@test t == tₒ
end

@testset "Reject" begin
Expand Down Expand Up @@ -121,6 +162,20 @@
tₒ = revert(T, n, c)
@test t == tₒ

# rejection with tuple of strings
T = Reject(("d", "c", "b"))
n, c = apply(T, t)
@test Tables.columnnames(n) == (:a, :e, :f)
tₒ = revert(T, n, c)
@test t == tₒ

# rejection with vector of strings
T = Reject(["d", "c", "b"])
n, c = apply(T, t)
@test Tables.columnnames(n) == (:a, :e, :f)
tₒ = revert(T, n, c)
@test t == tₒ

# rejection with single column
@test (Reject(:a) == Reject("a") ==
Reject((:a,)) == Reject(("a",)) ==
Expand All @@ -131,6 +186,33 @@
n1, c1 = apply(T, t)
n2 = reapply(T, t, c1)
@test n1 == n2

# rejection with Regex
T = Reject(r"[dcb]")
n, c = apply(T, t)
@test Tables.columnnames(n) == (:a, :e, :f) # the order of columns is preserved
tₒ = revert(T, n, c)
@test t == tₒ

x1 = rand(4000)
x2 = rand(4000)
y1 = rand(4000)
y2 = rand(4000)
t = Table(; x1, x2, y1, y2)

# reject columns whose names contain the character x
T = Reject(r"x")
n, c = apply(T, t)
@test Tables.columnnames(n) == (:y1, :y2)
tₒ = revert(T, n, c)
@test t == tₒ

# reject columns whose names contain the character y
T = Reject(r"y")
n, c = apply(T, t)
@test Tables.columnnames(n) == (:x1, :x2)
tₒ = revert(T, n, c)
@test t == tₒ
end

@testset "Rename" begin
Expand Down