From ac9f1ac589ade69ce18d581db5c2277b53935053 Mon Sep 17 00:00:00 2001 From: Omar Elrefaei Date: Sun, 14 Nov 2021 01:34:44 -0500 Subject: [PATCH 01/15] wip: implement Rename transform --- src/TableTransforms.jl | 5 +++-- src/transforms.jl | 1 + src/transforms/rename.jl | 43 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 src/transforms/rename.jl diff --git a/src/TableTransforms.jl b/src/TableTransforms.jl index 060ff79b..12fd1079 100644 --- a/src/TableTransforms.jl +++ b/src/TableTransforms.jl @@ -7,7 +7,7 @@ module TableTransforms using Tables using ScientificTypes using Distributions: Normal -using Transducers: tcollect +using Transducers: tcollect, push!! using LinearAlgebra using Statistics @@ -29,9 +29,10 @@ export colapply, colrevert, # built-in + Identity, Select, Reject, - Identity, + Rename3, Center, Scale, MinMax, diff --git a/src/transforms.jl b/src/transforms.jl index e035b13a..82613ca0 100644 --- a/src/transforms.jl +++ b/src/transforms.jl @@ -213,6 +213,7 @@ end include("transforms/identity.jl") include("transforms/select.jl") +include("transforms/rename.jl") include("transforms/center.jl") include("transforms/scale.jl") include("transforms/zscore.jl") diff --git a/src/transforms/rename.jl b/src/transforms/rename.jl new file mode 100644 index 00000000..3478efd7 --- /dev/null +++ b/src/transforms/rename.jl @@ -0,0 +1,43 @@ +# ------------------------------------------------------------------ +# Licensed under the MIT License. See LICENSE in the project root. +# ------------------------------------------------------------------ + +""" + Rename(Dict(:col₁ => :newcol₁, :col₂ => :newcol₂, ..., :col₁ => :newcolₙ)) + +Tha transform that renames `col₁` to `newcol₁`, `col₂` to `newcol₂`, ... +""" +struct Rename3 <: Stateless + names::Dict{Symbol,Symbol} +end + +function apply(transform::Rename3, table) + new_table = (;) + + for col_name in Tables.columnnames(table) + col_value = Tables.getcolumn(table, col_name) + + # if the current name is to be changed, retrive the new name + #and push a col with it, else push the col with the old name + if col_name in keys(transform.names) + new_name = transform.names[col_name] + new_table = push!!(new_table, new_name => col_value) + else + new_table = push!!(new_table, col_name => col_value) + end + end + + 𝒯 = table |> Tables.materializer(table) + 𝒯, nothing +end + +function revert(transform::Rename3, table) + # reversing the key-value pairs + new_names = Dict() + for (new, old) in transform.names + new_names[old] = new + end + # normal apply operation, but on a revered Dict + reversed_transform = Rename3(new_names) + apply(reversed_transform, table) |> first +end From 8b77cba9d7cea558a6762b2ad55b02be6ac9092b Mon Sep 17 00:00:00 2001 From: Omar Elrefaei Date: Sun, 14 Nov 2021 01:34:44 -0500 Subject: [PATCH 02/15] wip: implement Rename transform --- src/TableTransforms.jl | 3 ++- src/transforms.jl | 1 + src/transforms/rename.jl | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 src/transforms/rename.jl diff --git a/src/TableTransforms.jl b/src/TableTransforms.jl index 060ff79b..d429c462 100644 --- a/src/TableTransforms.jl +++ b/src/TableTransforms.jl @@ -7,7 +7,7 @@ module TableTransforms using Tables using ScientificTypes using Distributions: Normal -using Transducers: tcollect +using Transducers: tcollect, push!! using LinearAlgebra using Statistics @@ -31,6 +31,7 @@ export # built-in Select, Reject, + Rename, Identity, Center, Scale, diff --git a/src/transforms.jl b/src/transforms.jl index e035b13a..82613ca0 100644 --- a/src/transforms.jl +++ b/src/transforms.jl @@ -213,6 +213,7 @@ end include("transforms/identity.jl") include("transforms/select.jl") +include("transforms/rename.jl") include("transforms/center.jl") include("transforms/scale.jl") include("transforms/zscore.jl") diff --git a/src/transforms/rename.jl b/src/transforms/rename.jl new file mode 100644 index 00000000..d7e205bc --- /dev/null +++ b/src/transforms/rename.jl @@ -0,0 +1,40 @@ +# ------------------------------------------------------------------ +# Licensed under the MIT License. See LICENSE in the project root. +# ------------------------------------------------------------------ + +""" + Rename(:col₁ => :newcol₁, :col₂ => :newcol₂, ..., :col₁ => :newcolₙ)) + +Tha transform that renames `col₁` to `newcol₁`, `col₂` to `newcol₂`, ... +""" +struct Rename <: Stateless + names::Dict{Symbol,Symbol} +end + +function apply(transform::Rename, table) + oldnames = Tables.columnnames(table) + newnames = map(oldnames) do oldname + if oldname in keys(transform.names) + return transform.names[oldname] + else + return oldname + end + end + acols = [i for i in Tables.columns(table)] + 𝒯 = (; zip(newnames, acols)...) + 𝒯 |> Tables.materializer(table) + 𝒯, nothing +end + + + +function revert(transform::Rename, table) + # reversing the key-value pairs of the Dict + new_names = Dict() + for (new, old) in transform.names + new_names[old] = new + end + + reversed_transform = Rename(new_names) + apply(reversed_transform, table) |> first +end From 1013e40d030ce7d6cbcc592ca0b81d97b3165f1d Mon Sep 17 00:00:00 2001 From: Omar Elrefaei Date: Sun, 14 Nov 2021 17:23:37 -0500 Subject: [PATCH 03/15] fix --- src/transforms/rename.jl | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/transforms/rename.jl b/src/transforms/rename.jl index d7e205bc..7f43ff03 100644 --- a/src/transforms/rename.jl +++ b/src/transforms/rename.jl @@ -3,9 +3,9 @@ # ------------------------------------------------------------------ """ - Rename(:col₁ => :newcol₁, :col₂ => :newcol₂, ..., :col₁ => :newcolₙ)) + Rename(:col₁ => :newcol₁, :col₂ => :newcol₂, ..., :col₁ => :newcolₙ) -Tha transform that renames `col₁` to `newcol₁`, `col₂` to `newcol₂`, ... +The transform that renames `col₁` to `newcol₁`, `col₂` to `newcol₂`, ... """ struct Rename <: Stateless names::Dict{Symbol,Symbol} @@ -21,13 +21,10 @@ function apply(transform::Rename, table) end end acols = [i for i in Tables.columns(table)] - 𝒯 = (; zip(newnames, acols)...) - 𝒯 |> Tables.materializer(table) + 𝒯 = (; zip(newnames, acols)...) |> Tables.materializer(table) 𝒯, nothing end - - function revert(transform::Rename, table) # reversing the key-value pairs of the Dict new_names = Dict() From 8d3207ecbd5adf0f48e7a42c923b284e828b315e Mon Sep 17 00:00:00 2001 From: Omar Elrefaei Date: Sun, 14 Nov 2021 21:13:47 -0500 Subject: [PATCH 04/15] Actually address brought up issues --- src/TableTransforms.jl | 6 +++--- src/transforms/rename.jl | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/TableTransforms.jl b/src/TableTransforms.jl index 12fd1079..9e24008d 100644 --- a/src/TableTransforms.jl +++ b/src/TableTransforms.jl @@ -7,7 +7,7 @@ module TableTransforms using Tables using ScientificTypes using Distributions: Normal -using Transducers: tcollect, push!! +using Transducers: tcollect using LinearAlgebra using Statistics @@ -29,10 +29,10 @@ export colapply, colrevert, # built-in - Identity, Select, Reject, - Rename3, + Rename, + Identity, Center, Scale, MinMax, diff --git a/src/transforms/rename.jl b/src/transforms/rename.jl index 7f43ff03..388effef 100644 --- a/src/transforms/rename.jl +++ b/src/transforms/rename.jl @@ -2,7 +2,7 @@ # Licensed under the MIT License. See LICENSE in the project root. # ------------------------------------------------------------------ -""" +""" Rename(:col₁ => :newcol₁, :col₂ => :newcol₂, ..., :col₁ => :newcolₙ) The transform that renames `col₁` to `newcol₁`, `col₂` to `newcol₂`, ... @@ -14,10 +14,10 @@ end function apply(transform::Rename, table) oldnames = Tables.columnnames(table) newnames = map(oldnames) do oldname - if oldname in keys(transform.names) - return transform.names[oldname] + if oldname in keys(transform.names) + transform.names[oldname] else - return oldname + oldname end end acols = [i for i in Tables.columns(table)] From 078e64914e9518b711cfff34999696556e19c06b Mon Sep 17 00:00:00 2001 From: Omar Elrefaei Date: Sun, 21 Nov 2021 18:34:01 -0500 Subject: [PATCH 05/15] refactor and add a pairs of strings constructor --- src/transforms/rename.jl | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/src/transforms/rename.jl b/src/transforms/rename.jl index 388effef..dd2e920e 100644 --- a/src/transforms/rename.jl +++ b/src/transforms/rename.jl @@ -11,27 +11,34 @@ struct Rename <: Stateless names::Dict{Symbol,Symbol} end +function Rename(names...) + sympair(x) = Symbol(first(x)) => Symbol(last(x)) + map(sympair , names) |> Dict |> Rename +end + function apply(transform::Rename, table) - oldnames = Tables.columnnames(table) - newnames = map(oldnames) do oldname - if oldname in keys(transform.names) - transform.names[oldname] - else - oldname - end - end - acols = [i for i in Tables.columns(table)] - 𝒯 = (; zip(newnames, acols)...) |> Tables.materializer(table) - 𝒯, nothing + _rename(transform.names, table) end -function revert(transform::Rename, table) +function revert(transform::Rename, table, cache) # reversing the key-value pairs of the Dict new_names = Dict() - for (new, old) in transform.names - new_names[old] = new + for (old, new) in transform.names + new_names[new] = old end - reversed_transform = Rename(new_names) - apply(reversed_transform, table) |> first + _rename(new_names, table) |> first end + + +function _rename(names, table) + oldnames = Tables.columnnames(table) + + newnames = map(oldnames) do oldname + oldname in keys(names) ? names[oldname] : oldname + end + + acols = [i for i in Tables.columns(table)] + 𝒯 = (; zip(newnames, acols)...) |> Tables.materializer(table) + 𝒯, nothing +end \ No newline at end of file From cb6e7fb1f05e5172e93689f90ce7123df3c0b958 Mon Sep 17 00:00:00 2001 From: Omar Elrefaei Date: Sun, 21 Nov 2021 18:34:16 -0500 Subject: [PATCH 06/15] add tests --- test/transforms.jl | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/test/transforms.jl b/test/transforms.jl index 16c86cf6..e344937a 100644 --- a/test/transforms.jl +++ b/test/transforms.jl @@ -131,6 +131,47 @@ @test n1 == n2 end + @testset "Rename" begin + a = rand(4000) + b = rand(4000) + c = rand(4000) + d = rand(4000) + t = Table(; a, b, c, d) + + T = Rename(Dict(:a => :x, :c => :y)) + n, c = apply(T, t) + @test Tables.columnnames(n) == (:x, :b, :y, :d) + tₒ = revert(T, n, c) + @test t == tₒ + + # rename with string pairs + T = Rename("a" => "x", "c" => "y") + n, c = apply(T, t) + @test Tables.columnnames(n) == (:x, :b, :y, :d) + tₒ = revert(T, n, c) + @test t == tₒ + + # rename with symbol pairs + T = Rename(:a => :x, :c => :y) + n, c = apply(T, t) + @test Tables.columnnames(n) == (:x, :b, :y, :d) + tₒ = revert(T, n, c) + @test t == tₒ + + # rename with mixed pairs + T = Rename("a" => :x, :c => "y") + n, c = apply(T, t) + @test Tables.columnnames(n) == (:x, :b, :y, :d) + tₒ = revert(T, n, c) + @test t == tₒ + + # reapply test + T = Rename(:b => :x, :d => :y) + n1, c1 = apply(T, t) + n2 = reapply(T, t, c1) + @test n1 == n2 + end + @testset "Identity" begin x = rand(4000) y = rand(4000) From 650ddc29c2225cbdab9b75b79fc8e7e705fe9025 Mon Sep 17 00:00:00 2001 From: Omar Elrefaei Date: Sun, 21 Nov 2021 19:21:33 -0500 Subject: [PATCH 07/15] confirm to Tables.jl spec --- src/transforms/rename.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/transforms/rename.jl b/src/transforms/rename.jl index dd2e920e..8652e89d 100644 --- a/src/transforms/rename.jl +++ b/src/transforms/rename.jl @@ -37,8 +37,9 @@ function _rename(names, table) newnames = map(oldnames) do oldname oldname in keys(names) ? names[oldname] : oldname end - - acols = [i for i in Tables.columns(table)] + + cols = Tables.columns(table) + acols = [Tables.getcolumn(cols, name) for name in oldnames] 𝒯 = (; zip(newnames, acols)...) |> Tables.materializer(table) 𝒯, nothing end \ No newline at end of file From 5f8e0dde55d17f42971e9d636a0844d32852d15b Mon Sep 17 00:00:00 2001 From: Omar Elrefaei Date: Sun, 21 Nov 2021 20:19:34 -0500 Subject: [PATCH 08/15] assert that all requested renames exist --- src/transforms/rename.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/transforms/rename.jl b/src/transforms/rename.jl index 8652e89d..3aaa616c 100644 --- a/src/transforms/rename.jl +++ b/src/transforms/rename.jl @@ -13,7 +13,7 @@ end function Rename(names...) sympair(x) = Symbol(first(x)) => Symbol(last(x)) - map(sympair , names) |> Dict |> Rename + map(sympair, names) |> Dict |> Rename end function apply(transform::Rename, table) @@ -33,6 +33,10 @@ end function _rename(names, table) oldnames = Tables.columnnames(table) + @show oldnames, names + + dif = setdiff(keys(names), oldnames) |> Tuple + @assert length(dif) == 0 "The following column[s] were not found in the source table $dif" newnames = map(oldnames) do oldname oldname in keys(names) ? names[oldname] : oldname From 4bb76892d8e252ef96283fe1091bcecba1b0b25f Mon Sep 17 00:00:00 2001 From: Omar Elrefaei Date: Sun, 21 Nov 2021 20:23:31 -0500 Subject: [PATCH 09/15] update --- src/transforms/rename.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/transforms/rename.jl b/src/transforms/rename.jl index 3aaa616c..da783e70 100644 --- a/src/transforms/rename.jl +++ b/src/transforms/rename.jl @@ -16,6 +16,7 @@ function Rename(names...) map(sympair, names) |> Dict |> Rename end + function apply(transform::Rename, table) _rename(transform.names, table) end @@ -33,10 +34,9 @@ end function _rename(names, table) oldnames = Tables.columnnames(table) - @show oldnames, names - dif = setdiff(keys(names), oldnames) |> Tuple - @assert length(dif) == 0 "The following column[s] were not found in the source table $dif" + dif = setdiff(keys(names), oldnames) .|> String |> Tuple + @assert length(dif) == 0 "The following column[s] were not found in the source table: $dif" newnames = map(oldnames) do oldname oldname in keys(names) ? names[oldname] : oldname From aec00fbf65c04cfa04539ebc1b6eb01e2c18a4fc Mon Sep 17 00:00:00 2001 From: Omar Elrefaei Date: Mon, 22 Nov 2021 00:07:42 -0500 Subject: [PATCH 10/15] make it work with a single Pair, and add test --- src/transforms/rename.jl | 18 +++++++++--------- test/transforms.jl | 12 ++++++++++++ 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/src/transforms/rename.jl b/src/transforms/rename.jl index da783e70..5400888c 100644 --- a/src/transforms/rename.jl +++ b/src/transforms/rename.jl @@ -11,11 +11,10 @@ struct Rename <: Stateless names::Dict{Symbol,Symbol} end -function Rename(names...) - sympair(x) = Symbol(first(x)) => Symbol(last(x)) - map(sympair, names) |> Dict |> Rename -end +pairsyms(x::Pair) = Symbol(first(x)) => Symbol(last(x)) +Rename(names::Pair) = pairsyms(names) |> Dict |> Rename +Rename(names...) = pairsyms.(names) |> Dict |> Rename function apply(transform::Rename, table) _rename(transform.names, table) @@ -23,20 +22,21 @@ end function revert(transform::Rename, table, cache) # reversing the key-value pairs of the Dict - new_names = Dict() + newnames = Dict() for (old, new) in transform.names - new_names[new] = old + newnames[new] = old end - - _rename(new_names, table) |> first + _rename(newnames, table) |> first end function _rename(names, table) oldnames = Tables.columnnames(table) + # check if requested renames exist in the table dif = setdiff(keys(names), oldnames) .|> String |> Tuple - @assert length(dif) == 0 "The following column[s] were not found in the source table: $dif" + @assert length(dif) == 0 + "The following column[s] were not found in the source table: $dif" newnames = map(oldnames) do oldname oldname in keys(names) ? names[oldname] : oldname diff --git a/test/transforms.jl b/test/transforms.jl index e344937a..6c5d57e7 100644 --- a/test/transforms.jl +++ b/test/transforms.jl @@ -138,6 +138,12 @@ d = rand(4000) t = Table(; a, b, c, d) + T = Rename(Dict(:a => :x)) + n, c = apply(T, t) + @test Tables.columnnames(n) == (:x, :b, :c, :d) + tₒ = revert(T, n, c) + @test t == tₒ + T = Rename(Dict(:a => :x, :c => :y)) n, c = apply(T, t) @test Tables.columnnames(n) == (:x, :b, :y, :d) @@ -159,6 +165,12 @@ @test t == tₒ # rename with mixed pairs + T = Rename("a" => :x) + n, c = apply(T, t) + @test Tables.columnnames(n) == (:x, :b, :c, :d) + tₒ = revert(T, n, c) + @test t == tₒ + T = Rename("a" => :x, :c => "y") n, c = apply(T, t) @test Tables.columnnames(n) == (:x, :b, :y, :d) From 6ca788806d15b3ddb1d1f7ba2cbcb6f962c99cba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Tue, 23 Nov 2021 14:34:30 -0300 Subject: [PATCH 11/15] Update src/transforms/rename.jl --- src/transforms/rename.jl | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/transforms/rename.jl b/src/transforms/rename.jl index 5400888c..160d639e 100644 --- a/src/transforms/rename.jl +++ b/src/transforms/rename.jl @@ -34,16 +34,17 @@ function _rename(names, table) oldnames = Tables.columnnames(table) # check if requested renames exist in the table - dif = setdiff(keys(names), oldnames) .|> String |> Tuple - @assert length(dif) == 0 - "The following column[s] were not found in the source table: $dif" + diff = setdiff(keys(names), oldnames) .|> String |> Tuple + @assert length(diff) == 0 + "The following column[s] were not found in the source table: $diff" newnames = map(oldnames) do oldname oldname in keys(names) ? names[oldname] : oldname end cols = Tables.columns(table) - acols = [Tables.getcolumn(cols, name) for name in oldnames] - 𝒯 = (; zip(newnames, acols)...) |> Tables.materializer(table) + vals = [Tables.getcolumn(cols, name) for name in oldnames] + 𝒯 = (; zip(newnames, vals)...) |> Tables.materializer(table) + 𝒯, nothing end \ No newline at end of file From a34fed30ddaf41815b5a6d6b4089ba6ce1863376 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Tue, 23 Nov 2021 14:37:17 -0300 Subject: [PATCH 12/15] Update src/transforms/rename.jl --- src/transforms/rename.jl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/transforms/rename.jl b/src/transforms/rename.jl index 160d639e..3092c5d6 100644 --- a/src/transforms/rename.jl +++ b/src/transforms/rename.jl @@ -22,10 +22,7 @@ end function revert(transform::Rename, table, cache) # reversing the key-value pairs of the Dict - newnames = Dict() - for (old, new) in transform.names - newnames[new] = old - end + newnames = Dict(new => old for (old, new) in transform.names) _rename(newnames, table) |> first end From 6787a012ea93f34e4193eea5b3cf38d0d23da632 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Tue, 23 Nov 2021 14:39:52 -0300 Subject: [PATCH 13/15] Update src/transforms/rename.jl --- src/transforms/rename.jl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/transforms/rename.jl b/src/transforms/rename.jl index 3092c5d6..8a3ac00f 100644 --- a/src/transforms/rename.jl +++ b/src/transforms/rename.jl @@ -31,9 +31,7 @@ function _rename(names, table) oldnames = Tables.columnnames(table) # check if requested renames exist in the table - diff = setdiff(keys(names), oldnames) .|> String |> Tuple - @assert length(diff) == 0 - "The following column[s] were not found in the source table: $diff" + @assert keys(names) ⊆ oldnames "invalid column names" newnames = map(oldnames) do oldname oldname in keys(names) ? names[oldname] : oldname From b72f999cc869239666f84ba05d7a4a0e6f20d01e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Tue, 23 Nov 2021 14:41:25 -0300 Subject: [PATCH 14/15] Update src/transforms/rename.jl --- src/transforms/rename.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/transforms/rename.jl b/src/transforms/rename.jl index 8a3ac00f..8a2b4b80 100644 --- a/src/transforms/rename.jl +++ b/src/transforms/rename.jl @@ -33,6 +33,7 @@ function _rename(names, table) # check if requested renames exist in the table @assert keys(names) ⊆ oldnames "invalid column names" + # use new names if necessary newnames = map(oldnames) do oldname oldname in keys(names) ? names[oldname] : oldname end From 8c330eb6473228b61c9977d05480afb88d4f5c79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Tue, 23 Nov 2021 14:47:11 -0300 Subject: [PATCH 15/15] Update src/transforms/rename.jl --- src/transforms/rename.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transforms/rename.jl b/src/transforms/rename.jl index 8a2b4b80..db80f8d0 100644 --- a/src/transforms/rename.jl +++ b/src/transforms/rename.jl @@ -3,7 +3,7 @@ # ------------------------------------------------------------------ """ - Rename(:col₁ => :newcol₁, :col₂ => :newcol₂, ..., :col₁ => :newcolₙ) + Rename(:col₁ => :newcol₁, :col₂ => :newcol₂, ..., :col₁ => :newcolₙ) The transform that renames `col₁` to `newcol₁`, `col₂` to `newcol₂`, ... """