diff --git a/src/TableTransforms.jl b/src/TableTransforms.jl index a5e70d57..483136c7 100644 --- a/src/TableTransforms.jl +++ b/src/TableTransforms.jl @@ -30,6 +30,7 @@ include("assertions.jl") include("tabletraits.jl") include("distributions.jl") include("tableselection.jl") +include("tablerows.jl") include("transforms.jl") export diff --git a/src/tablerows.jl b/src/tablerows.jl new file mode 100644 index 00000000..5fa4a12a --- /dev/null +++ b/src/tablerows.jl @@ -0,0 +1,126 @@ +""" + tablerows(table) + +Returns an appropriate iterator for table rows. +The rows are iterable, implement the `Tables.AbstractRow` interface +and the following ways of column access: + +```julia +row.colname +row."colname" +row[colindex] +row[:colname] +row["colname"] +``` +""" +function tablerows(table) + if !Tables.istable(table) + throw(ArgumentError("the argument is not a table")) + end + + if Tables.rowaccess(table) + RTableRows(table) + else + CTableRows(table) + end +end + +#------------------ +# COMMON INTERFACE +#------------------ + +abstract type TableRow end + +# column access +Base.getproperty(row::TableRow, nm::Symbol) = Tables.getcolumn(row, nm) +Base.getproperty(row::TableRow, nm::AbstractString) = Tables.getcolumn(row, Symbol(nm)) +Base.getindex(row::TableRow, i::Int) = Tables.getcolumn(row, i) +Base.getindex(row::TableRow, nm::Symbol) = Tables.getcolumn(row, nm) +Base.getindex(row::TableRow, nm::AbstractString) = Tables.getcolumn(row, Symbol(nm)) + +# iterator interface +Base.length(row::TableRow) = length(Tables.columnnames(row)) +Base.iterate(row::TableRow, state=1) = + state > length(row) ? nothing : (Tables.getcolumn(row, state), state + 1) + +#-------------- +# COLUMN TABLE +#-------------- + +struct CTableRows{T} + cols::T + nrows::Int + + function CTableRows(table) + cols = Tables.columns(table) + nrows = _nrows(cols) + new{typeof(cols)}(cols, nrows) + end +end + +# iterator interface +Base.length(rows::CTableRows) = rows.nrows +Base.iterate(rows::CTableRows, state::Int=1) = + state > length(rows) ? nothing : (CTableRow(rows.cols, state), state + 1) + +struct CTableRow{T} <: TableRow + cols::T + ind::Int +end + +# getters +getcols(row::CTableRow) = getfield(row, :cols) +getind(row::CTableRow) = getfield(row, :ind) + +# AbstractRow interface +Tables.columnnames(row::CTableRow) = Tables.columnnames(getcols(row)) +Tables.getcolumn(row::CTableRow, i::Int) = Tables.getcolumn(getcols(row), i)[getind(row)] +Tables.getcolumn(row::CTableRow, nm::Symbol) = Tables.getcolumn(getcols(row), nm)[getind(row)] + +#----------- +# ROW TABLE +#----------- + +struct RTableRows{T} + rows::T + + function RTableRows(table) + rows = Tables.rows(table) + new{typeof(rows)}(rows) + end +end + +# iterator interface +Base.length(rows::RTableRows) = length(rows.rows) +function Base.iterate(rows::RTableRows, args...) + next = iterate(rows.rows, args...) + if isnothing(next) + nothing + else + row, state = next + (RTableRow(row), state) + end +end + +struct RTableRow{T} <: TableRow + row::T +end + +# getters +getrow(row::RTableRow) = getfield(row, :row) + +# AbstractRow interface +Tables.columnnames(row::RTableRow) = Tables.columnnames(getrow(row)) +Tables.getcolumn(row::RTableRow, i::Int) = Tables.getcolumn(getrow(row), i) +Tables.getcolumn(row::RTableRow, nm::Symbol) = Tables.getcolumn(getrow(row), nm) + +#------- +# UTILS +#------- + +function _nrows(cols) + names = Tables.columnnames(cols) + isempty(names) && return 0 + column = Tables.getcolumn(cols, first(names)) + length(column) +end diff --git a/src/transforms/filter.jl b/src/transforms/filter.jl index 680c306c..239dd794 100644 --- a/src/transforms/filter.jl +++ b/src/transforms/filter.jl @@ -12,6 +12,10 @@ Filters the table returning only the rows where the `func` returns true. ```julia Filter(row -> sum(row) > 10) Filter(row -> row.a == true && row.b < 30) +Filter(row -> row."a" == true && row."b" < 30) +Filter(row -> row[1] == true && row[2] < 30) +Filter(row -> row[:a] == true && row[:b] < 30) +Filter(row -> row["a"] == true && row["b"] < 30) ``` ## Notes @@ -26,7 +30,7 @@ isrevertible(::Type{<:Filter}) = true function preprocess(transform::Filter, table) # lazy row iterator - rows = Tables.rows(table) + rows = tablerows(table) # selected indices sinds, nrows = Int[], 0 diff --git a/test/colspec.jl b/test/colspec.jl index 11598bfd..5249aa2f 100644 --- a/test/colspec.jl +++ b/test/colspec.jl @@ -3,104 +3,104 @@ tupnames = (:a, :b, :c, :d, :e, :f) # vector of symbols - colspec = TableTransforms.colspec([:a, :c, :e]) - snames = TableTransforms.choose(colspec, vecnames) + colspec = TT.colspec([:a, :c, :e]) + snames = TT.choose(colspec, vecnames) @test snames == [:a, :c, :e] - snames = TableTransforms.choose(colspec, tupnames) + snames = TT.choose(colspec, tupnames) @test snames == [:a, :c, :e] # tuple of symbols - colspec = TableTransforms.colspec((:a, :c, :e)) - snames = TableTransforms.choose(colspec, vecnames) + colspec = TT.colspec((:a, :c, :e)) + snames = TT.choose(colspec, vecnames) @test snames == [:a, :c, :e] - snames = TableTransforms.choose(colspec, tupnames) + snames = TT.choose(colspec, tupnames) @test snames == [:a, :c, :e] # vector of strings - colspec = TableTransforms.colspec(["a", "c", "e"]) - snames = TableTransforms.choose(colspec, vecnames) + colspec = TT.colspec(["a", "c", "e"]) + snames = TT.choose(colspec, vecnames) @test snames == [:a, :c, :e] - snames = TableTransforms.choose(colspec, tupnames) + snames = TT.choose(colspec, tupnames) @test snames == [:a, :c, :e] # tuple of strings - colspec = TableTransforms.colspec(("a", "c", "e")) - snames = TableTransforms.choose(colspec, vecnames) + colspec = TT.colspec(("a", "c", "e")) + snames = TT.choose(colspec, vecnames) @test snames == [:a, :c, :e] - snames = TableTransforms.choose(colspec, tupnames) + snames = TT.choose(colspec, tupnames) @test snames == [:a, :c, :e] # vector of integers - colspec = TableTransforms.colspec([1, 3, 5]) - snames = TableTransforms.choose(colspec, vecnames) + colspec = TT.colspec([1, 3, 5]) + snames = TT.choose(colspec, vecnames) @test snames == [:a, :c, :e] - snames = TableTransforms.choose(colspec, tupnames) + snames = TT.choose(colspec, tupnames) @test snames == [:a, :c, :e] # tuple of integers - colspec = TableTransforms.colspec((1, 3, 5)) - snames = TableTransforms.choose(colspec, vecnames) + colspec = TT.colspec((1, 3, 5)) + snames = TT.choose(colspec, vecnames) @test snames == [:a, :c, :e] - snames = TableTransforms.choose(colspec, tupnames) + snames = TT.choose(colspec, tupnames) @test snames == [:a, :c, :e] # regex - colspec = TableTransforms.colspec(r"[ace]") - snames = TableTransforms.choose(colspec, vecnames) + colspec = TT.colspec(r"[ace]") + snames = TT.choose(colspec, vecnames) @test snames == [:a, :c, :e] - snames = TableTransforms.choose(colspec, tupnames) + snames = TT.choose(colspec, tupnames) @test snames == [:a, :c, :e] # colon - colspec = TableTransforms.colspec(:) - snames = TableTransforms.choose(colspec, vecnames) + colspec = TT.colspec(:) + snames = TT.choose(colspec, vecnames) @test snames == [:a, :b, :c, :d, :e, :f] - snames = TableTransforms.choose(colspec, tupnames) + snames = TT.choose(colspec, tupnames) @test snames == [:a, :b, :c, :d, :e, :f] # nothing - colspec = TableTransforms.colspec(nothing) - snames = TableTransforms.choose(colspec, vecnames) + colspec = TT.colspec(nothing) + snames = TT.choose(colspec, vecnames) @test snames == Symbol[] - snames = TableTransforms.choose(colspec, tupnames) + snames = TT.choose(colspec, tupnames) @test snames == Symbol[] # throws - colspec = TableTransforms.colspec(r"x") - @test_throws AssertionError TableTransforms.choose(colspec, vecnames) - @test_throws AssertionError TableTransforms.choose(colspec, tupnames) - @test_throws AssertionError TableTransforms.colspec(Symbol[]) - @test_throws AssertionError TableTransforms.colspec(String[]) - @test_throws AssertionError TableTransforms.colspec(Int[]) - @test_throws ArgumentError TableTransforms.colspec(()) - @test_throws ArgumentError TableTransforms.colspec(missing) + colspec = TT.colspec(r"x") + @test_throws AssertionError TT.choose(colspec, vecnames) + @test_throws AssertionError TT.choose(colspec, tupnames) + @test_throws AssertionError TT.colspec(Symbol[]) + @test_throws AssertionError TT.colspec(String[]) + @test_throws AssertionError TT.colspec(Int[]) + @test_throws ArgumentError TT.colspec(()) + @test_throws ArgumentError TT.colspec(missing) # type stability - colspec = TableTransforms.colspec([:a, :b]) - @inferred TableTransforms.choose(colspec, vecnames) - @inferred TableTransforms.choose(colspec, tupnames) - colspec = TableTransforms.colspec((:a, :b)) - @inferred TableTransforms.choose(colspec, vecnames) - @inferred TableTransforms.choose(colspec, tupnames) - colspec = TableTransforms.colspec(["a", "b"]) - @inferred TableTransforms.choose(colspec, vecnames) - @inferred TableTransforms.choose(colspec, tupnames) - colspec = TableTransforms.colspec(("a", "b")) - @inferred TableTransforms.choose(colspec, vecnames) - @inferred TableTransforms.choose(colspec, tupnames) - colspec = TableTransforms.colspec([1, 2]) - @inferred TableTransforms.choose(colspec, vecnames) - @inferred TableTransforms.choose(colspec, tupnames) - colspec = TableTransforms.colspec((1, 2)) - @inferred TableTransforms.choose(colspec, vecnames) - @inferred TableTransforms.choose(colspec, tupnames) - colspec = TableTransforms.colspec(r"[ab]") - @inferred TableTransforms.choose(colspec, vecnames) - @inferred TableTransforms.choose(colspec, tupnames) - colspec = TableTransforms.colspec(:) - @inferred TableTransforms.choose(colspec, vecnames) - @inferred TableTransforms.choose(colspec, tupnames) - colspec = TableTransforms.colspec(nothing) - @inferred TableTransforms.choose(colspec, vecnames) - @inferred TableTransforms.choose(colspec, tupnames) + colspec = TT.colspec([:a, :b]) + @inferred TT.choose(colspec, vecnames) + @inferred TT.choose(colspec, tupnames) + colspec = TT.colspec((:a, :b)) + @inferred TT.choose(colspec, vecnames) + @inferred TT.choose(colspec, tupnames) + colspec = TT.colspec(["a", "b"]) + @inferred TT.choose(colspec, vecnames) + @inferred TT.choose(colspec, tupnames) + colspec = TT.colspec(("a", "b")) + @inferred TT.choose(colspec, vecnames) + @inferred TT.choose(colspec, tupnames) + colspec = TT.colspec([1, 2]) + @inferred TT.choose(colspec, vecnames) + @inferred TT.choose(colspec, tupnames) + colspec = TT.colspec((1, 2)) + @inferred TT.choose(colspec, vecnames) + @inferred TT.choose(colspec, tupnames) + colspec = TT.colspec(r"[ab]") + @inferred TT.choose(colspec, vecnames) + @inferred TT.choose(colspec, tupnames) + colspec = TT.colspec(:) + @inferred TT.choose(colspec, vecnames) + @inferred TT.choose(colspec, tupnames) + colspec = TT.colspec(nothing) + @inferred TT.choose(colspec, vecnames) + @inferred TT.choose(colspec, tupnames) end diff --git a/test/distributions.jl b/test/distributions.jl index 815cfbd1..b77c1c6e 100644 --- a/test/distributions.jl +++ b/test/distributions.jl @@ -1,6 +1,6 @@ @testset "Distributions" begin values = randn(1000) - d = TableTransforms.EmpiricalDistribution(values) + d = TT.EmpiricalDistribution(values) @test 0.0 ≤ cdf(d, rand()) ≤ 1.0 @test minimum(values) ≤ quantile(d, 0.5) ≤ maximum(values) end diff --git a/test/runtests.jl b/test/runtests.jl index 0e31044e..14798cad 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -39,8 +39,16 @@ Polynomial(args::T...) where {T<:Real} = Polynomial(collect(args)) include("metatable.jl") # list of tests -testfiles = - ["distributions.jl", "colspec.jl", "assertions.jl", "transforms.jl", "metadata.jl", "tableselection.jl", "shows.jl"] +testfiles = [ + "distributions.jl", + "colspec.jl", + "assertions.jl", + "transforms.jl", + "metadata.jl", + "tableselection.jl", + "tablerows.jl", + "shows.jl" +] @testset "TableTransforms.jl" begin for testfile in testfiles diff --git a/test/tablerows.jl b/test/tablerows.jl new file mode 100644 index 00000000..6a75a66f --- /dev/null +++ b/test/tablerows.jl @@ -0,0 +1,81 @@ +@testset "tablerows" begin + #-------------- + # COLUMN TABLE + #-------------- + + # table rows + tb = (a=[1, 2, 3], b=[4, 5, 6]) + rows = TT.tablerows(tb) + @test rows isa TT.CTableRows + # iterator interface + @test length(rows) == 3 + row, state = iterate(rows) + @test row.a == 1 + @test row.b == 4 + row, state = iterate(rows, state) + @test row.a == 2 + @test row.b == 5 + row, state = iterate(rows, state) + @test row.a == 3 + @test row.b == 6 + @test isnothing(iterate(rows, state)) + + # table row + row = first(rows) + @test row isa TT.CTableRow + # AbstractRow interface + @test Tables.columnnames(row) == (:a, :b) + @test Tables.getcolumn(row, 1) == 1 + @test Tables.getcolumn(row, :a) == 1 + # column access + @test row."a" == 1 + @test row[1] == 1 + @test row[:a] == 1 + @test row["a"] == 1 + # iterator interface + item, state = iterate(row) + @test item == 1 + item, state = iterate(row, state) + @test item == 4 + @test isnothing(iterate(row, state)) + + #----------- + # ROW TABLE + #----------- + + # table rows + tb = [(a=1, b=4), (a=2, b=5), (a=3, b=6)] + rows = TT.tablerows(tb) + @test rows isa TT.RTableRows + # iterator interface + @test length(rows) == 3 + row, state = iterate(rows) + @test row.a == 1 + @test row.b == 4 + row, state = iterate(rows, state) + @test row.a == 2 + @test row.b == 5 + row, state = iterate(rows, state) + @test row.a == 3 + @test row.b == 6 + @test isnothing(iterate(rows, state)) + + # table row + row = first(rows) + @test row isa TT.RTableRow + # AbstractRow interface + @test Tables.columnnames(row) == (:a, :b) + @test Tables.getcolumn(row, 2) == 4 + @test Tables.getcolumn(row, :b) == 4 + # column access + @test row."b" == 4 + @test row[2] == 4 + @test row[:b] == 4 + @test row["b"] == 4 + # iterator interface + item, state = iterate(row) + @test item == 1 + item, state = iterate(row, state) + @test item == 4 + @test isnothing(iterate(row, state)) +end diff --git a/test/tableselection.jl b/test/tableselection.jl index 11821b0c..84ed42c5 100644 --- a/test/tableselection.jl +++ b/test/tableselection.jl @@ -10,7 +10,7 @@ # Tables.jl interface select = [:a, :b, :e] newnames = select - s = TableTransforms.TableSelection(t, newnames, select) + s = TT.TableSelection(t, newnames, select) @test Tables.istable(s) == true @test Tables.columnaccess(s) == true @test Tables.rowaccess(s) == false @@ -29,7 +29,7 @@ # selectin with renaming select = [:c, :d, :f] newnames = [:x, :y, :z] - s = TableTransforms.TableSelection(t, newnames, select) + s = TT.TableSelection(t, newnames, select) @test Tables.columnnames(s) == [:x, :y, :z] @test Tables.getcolumn(s, :x) == t.c @test Tables.getcolumn(s, :y) == t.d @@ -42,19 +42,19 @@ select = [:a, :b, :e] newnames = select rt = Tables.rowtable(t) - s = TableTransforms.TableSelection(rt, newnames, select) + s = TT.TableSelection(rt, newnames, select) cols = Tables.columns(rt) @test Tables.getcolumn(s, :a) == Tables.getcolumn(cols, :a) @test Tables.getcolumn(s, 1) == Tables.getcolumn(cols, 1) @test Tables.getcolumn(s, 3) == Tables.getcolumn(cols, :e) # throws - @test_throws AssertionError TableTransforms.TableSelection(t, [:a, :b, :z], [:a, :b, :z]) - @test_throws AssertionError TableTransforms.TableSelection(t, [:x, :y, :z], [:c, :d, :k]) - s = TableTransforms.TableSelection(t, [:a, :b, :e], [:a, :b, :e]) + @test_throws AssertionError TT.TableSelection(t, [:a, :b, :z], [:a, :b, :z]) + @test_throws AssertionError TT.TableSelection(t, [:x, :y, :z], [:c, :d, :k]) + s = TT.TableSelection(t, [:a, :b, :e], [:a, :b, :e]) @test_throws ErrorException Tables.getcolumn(s, :f) @test_throws ErrorException Tables.getcolumn(s, 4) - s = TableTransforms.TableSelection(t, [:x, :y, :z], [:c, :d, :f]) + s = TT.TableSelection(t, [:x, :y, :z], [:c, :d, :f]) @test_throws ErrorException Tables.getcolumn(s, :c) @test_throws ErrorException Tables.getcolumn(s, 4) @test_throws ErrorException Tables.getcolumn(s, -2) diff --git a/test/transforms/filter.jl b/test/transforms/filter.jl index 0f25a2c9..b5e21570 100644 --- a/test/transforms/filter.jl +++ b/test/transforms/filter.jl @@ -65,6 +65,51 @@ tₒ = revert(T, n, c) @test t == tₒ + # column access + T = Filter(row -> row."b" == 4 || row."f" == 4) + n, c = apply(T, t) + @test n.a == [3, 2, 1, 4] + @test n.b == [2, 4, 4, 5] + @test n.c == [1, 1, 6, 2] + @test n.d == [4, 3, 7, 5] + @test n.e == [5, 5, 2, 6] + @test n.f == [4, 4, 3, 4] + tₒ = revert(T, n, c) + @test t == tₒ + + T = Filter(row -> row[2] == 4 || row[6] == 4) + n, c = apply(T, t) + @test n.a == [3, 2, 1, 4] + @test n.b == [2, 4, 4, 5] + @test n.c == [1, 1, 6, 2] + @test n.d == [4, 3, 7, 5] + @test n.e == [5, 5, 2, 6] + @test n.f == [4, 4, 3, 4] + tₒ = revert(T, n, c) + @test t == tₒ + + T = Filter(row -> row[:b] == 4 || row[:f] == 4) + n, c = apply(T, t) + @test n.a == [3, 2, 1, 4] + @test n.b == [2, 4, 4, 5] + @test n.c == [1, 1, 6, 2] + @test n.d == [4, 3, 7, 5] + @test n.e == [5, 5, 2, 6] + @test n.f == [4, 4, 3, 4] + tₒ = revert(T, n, c) + @test t == tₒ + + T = Filter(row -> row["b"] == 4 || row["f"] == 4) + n, c = apply(T, t) + @test n.a == [3, 2, 1, 4] + @test n.b == [2, 4, 4, 5] + @test n.c == [1, 1, 6, 2] + @test n.d == [4, 3, 7, 5] + @test n.e == [5, 5, 2, 6] + @test n.f == [4, 4, 3, 4] + tₒ = revert(T, n, c) + @test t == tₒ + # reapply test T = Filter(row -> all(≤(5), row)) n1, c1 = apply(T, t) diff --git a/test/transforms/stdnames.jl b/test/transforms/stdnames.jl index a204349f..e9153a0e 100644 --- a/test/transforms/stdnames.jl +++ b/test/transforms/stdnames.jl @@ -1,42 +1,42 @@ @testset "StdNames" begin names = ["apple banana", "apple\tbanana", "apple_banana", "apple-banana", "apple_Banana"] for name in names - @test TableTransforms._camel(name) == "AppleBanana" - @test TableTransforms._snake(name) == "apple_banana" - @test TableTransforms._upper(name) == "APPLEBANANA" + @test TT._camel(name) == "AppleBanana" + @test TT._snake(name) == "apple_banana" + @test TT._upper(name) == "APPLEBANANA" end names = ["a", "A", "_a", "_A", "a ", "A "] for name in names - @test TableTransforms._camel(name) == "A" - @test TableTransforms._snake(name) == "a" - @test TableTransforms._upper(name) == "A" + @test TT._camel(name) == "A" + @test TT._snake(name) == "a" + @test TT._upper(name) == "A" end # special characters name = "a&B" - @test TableTransforms._clean(name) == "aB" + @test TT._clean(name) == "aB" name = "apple#" - @test TableTransforms._clean(name) == "apple" + @test TT._clean(name) == "apple" name = "apple-tree" - @test TableTransforms._clean(name) == "apple-tree" + @test TT._clean(name) == "apple-tree" # invariance test names = ["AppleTree", "BananaFruit", "PearSeed"] for name in names - @test TableTransforms._camel(name) == name + @test TT._camel(name) == name end names = ["apple_tree", "banana_fruit", "pear_seed"] for name in names - @test TableTransforms._snake(name) == name + @test TT._snake(name) == name end names = ["APPLETREE", "BANANAFRUIT", "PEARSEED"] for name in names - @test TableTransforms._upper(name) == name + @test TT._upper(name) == name end # uniqueness test