From 63ec8e72a8349ed97408ca41c3468e13c6c5484c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Fri, 3 Oct 2025 09:47:30 -0300 Subject: [PATCH 1/4] Add more arg types in Map transform --- src/transforms/map.jl | 48 ++++++++++++++++++++++++------------------ test/shows.jl | 4 ++-- test/transforms/map.jl | 30 ++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 23 deletions(-) diff --git a/src/transforms/map.jl b/src/transforms/map.jl index dea9986b..e2a1c6b2 100644 --- a/src/transforms/map.jl +++ b/src/transforms/map.jl @@ -2,10 +2,13 @@ # Licensed under the MIT License. See LICENSE in the project root. # ------------------------------------------------------------------ -const TargetName = Union{Symbol,AbstractString} -const PairWithTarget = Pair{<:Any,<:Pair{<:Function,<:TargetName}} -const PairWithoutTarget = Pair{<:Any,<:Function} -const MapPair = Union{PairWithTarget,PairWithoutTarget} +# supported argument types +const Callable = Union{Function,Type} +const Target = Union{Symbol,AbstractString} +const ColsCallableTarget = Pair{<:Any,<:Pair{<:Callable,<:Target}} +const ColsCallable = Pair{<:Any,<:Callable} +const CallableTarget = Pair{<:Callable,<:Target} +const MapArg = Union{ColsCallableTarget,ColsCallable,CallableTarget,Callable} """ Map(cols₁ => fun₁ => target₁, cols₂ => fun₂, ..., colsₙ => funₙ => targetₙ) @@ -32,7 +35,8 @@ Map(r"[abc]" => ((a, b, c) -> a^2 - 2b + c) => "col1") ## Notes -* Anonymous functions must be passed with parentheses as in the examples above; +* Anonymous functions must be passed with parentheses as in the examples above + * Some function names are treated in a special way, they are: * Anonymous functions: `#1` -> `f1`; * Composed functions: `outer ∘ inner` -> `outer_inner`; @@ -41,20 +45,20 @@ Map(r"[abc]" => ((a, b, c) -> a^2 - 2b + c) => "col1") """ struct Map <: StatelessFeatureTransform selectors::Vector{ColumnSelector} - funs::Vector{Function} + funs::Vector{Callable} targets::Vector{Union{Nothing,Symbol}} end -Map() = throw(ArgumentError("cannot create Map transform without arguments")) - -function Map(pairs::MapPair...) - tuples = map(_extract, pairs) - selectors = [t[1] for t in tuples] - funs = [t[2] for t in tuples] - targets = [t[3] for t in tuples] - Map(selectors, funs, targets) +function Map(args::MapArg...) + tups = map(_extract, args) + sels = [t[1] for t in tups] + funs = [t[2] for t in tups] + tars = [t[3] for t in tups] + Map(sels, funs, tars) end +Map() = throw(ArgumentError("cannot create Map transform without arguments")) + function applyfeat(transform::Map, feat, prep) cols = Tables.columns(feat) names = Tables.columnnames(cols) @@ -76,13 +80,10 @@ function applyfeat(transform::Map, feat, prep) newfeat, nothing end -_extract(p::PairWithTarget) = selector(first(p)), first(last(p)), Symbol(last(last(p))) -_extract(p::PairWithoutTarget) = selector(first(p)), last(p), nothing - -_funname(fun::Base.Fix1) = "fix1_" * _funname(fun.f) -_funname(fun::Base.Fix2) = "fix2_" * _funname(fun.f) -_funname(fun::ComposedFunction) = _funname(fun.outer) * "_" * _funname(fun.inner) -_funname(fun) = string(fun) +_extract(arg::ColsCallableTarget) = selector(first(arg)), first(last(arg)), Symbol(last(last(arg))) +_extract(arg::ColsCallable) = selector(first(arg)), last(arg), nothing +_extract(arg::CallableTarget) = AllSelector(), first(arg), Symbol(last(arg)) +_extract(arg::Callable) = AllSelector(), arg, nothing function _makename(snames, fun) funname = _funname(fun) @@ -91,3 +92,8 @@ function _makename(snames, fun) end Symbol(funname, :_, join(snames, "_")) end + +_funname(fun::Base.Fix1) = "fix1_" * _funname(fun.f) +_funname(fun::Base.Fix2) = "fix2_" * _funname(fun.f) +_funname(fun::ComposedFunction) = _funname(fun.outer) * "_" * _funname(fun.inner) +_funname(fun) = string(fun) diff --git a/test/shows.jl b/test/shows.jl index b3e2582f..01592a4e 100644 --- a/test/shows.jl +++ b/test/shows.jl @@ -279,14 +279,14 @@ # compact mode iostr = sprint(show, T) @test iostr == - "Map(selectors: ColumnSelector[:a, [:a, :b]], funs: Function[sin, $(nameof(fun))], targets: Union{Nothing, Symbol}[nothing, :c])" + "Map(selectors: ColumnSelector[:a, [:a, :b]], funs: Union{Function, Type}[sin, #13], targets: Union{Nothing, Symbol}[nothing, :c])" # full mode iostr = sprint(show, MIME("text/plain"), T) @test iostr == """ Map transform ├─ selectors: ColumnSelectors.ColumnSelector[:a, [:a, :b]] - ├─ funs: Function[sin, $(typeof(fun))()] + ├─ funs: Union{Function, Type}[sin, $(typeof(fun))()] └─ targets: Union{Nothing, Symbol}[nothing, :c]""" end diff --git a/test/transforms/map.jl b/test/transforms/map.jl index 7e7ef516..8ef12456 100644 --- a/test/transforms/map.jl +++ b/test/transforms/map.jl @@ -96,6 +96,36 @@ @test Tables.schema(n).names == (:fix2_hypot_a,) @test n.fix2_hypot_a == f.(t.a) + # function and target + f = (a, b, c, d) -> a + b + c + d + T = Map(f => "target") + n, c = apply(T, t) + @test Tables.schema(n).names == (:target,) + @test n.target == f.(t.a, t.b, t.c, t.d) + + # function alone + f = (a, b, c, d) -> a + b + c + d + fname = replace(string(f), "#" => "f") + colname = Symbol(fname, :_a, :_b, :_c, :_d) + T = Map(f) + n, c = apply(T, t) + @test Tables.schema(n).names == (colname,) + @test Tables.getcolumn(n, colname) == f.(t.a, t.b, t.c, t.d) + + # type and target + struct Foo a; b; c; d end + T = Map(Foo => "target") + n, c = apply(T, t) + @test Tables.schema(n).names == (:target,) + @test n.target == Foo.(t.a, t.b, t.c, t.d) + + # type alone + struct Bar a; b; c; d end + T = Map(Bar) + n, c = apply(T, t) + @test Tables.schema(n).names == (:Bar_a_b_c_d,) + @test n.Bar_a_b_c_d == Bar.(t.a, t.b, t.c, t.d) + # error: cannot create Map transform without arguments @test_throws ArgumentError Map() end From e6ddb63533e1bd6e1c5d2da89899b2e16a631b64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Fri, 3 Oct 2025 09:54:42 -0300 Subject: [PATCH 2/4] Fix show test --- test/shows.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/shows.jl b/test/shows.jl index 01592a4e..c88d40f6 100644 --- a/test/shows.jl +++ b/test/shows.jl @@ -279,7 +279,7 @@ # compact mode iostr = sprint(show, T) @test iostr == - "Map(selectors: ColumnSelector[:a, [:a, :b]], funs: Union{Function, Type}[sin, #13], targets: Union{Nothing, Symbol}[nothing, :c])" + "Map(selectors: ColumnSelector[:a, [:a, :b]], funs: Union{Function, Type}[sin, $(nameof(fun))], targets: Union{Nothing, Symbol}[nothing, :c])" # full mode iostr = sprint(show, MIME("text/plain"), T) From 7df141abd7fd815deb285a8ba79a7487f3581062 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Fri, 3 Oct 2025 12:09:46 -0300 Subject: [PATCH 3/4] Improve docstring --- src/transforms/map.jl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/transforms/map.jl b/src/transforms/map.jl index e2a1c6b2..4d7f57b0 100644 --- a/src/transforms/map.jl +++ b/src/transforms/map.jl @@ -17,7 +17,8 @@ Applies the `funᵢ` function to the columns selected by `colsᵢ` using the `map` function and saves the result in a new column named `targetᵢ`. The column selection can be a single column identifier (index or name), -a collection of identifiers or a regular expression (regex). +a collection of identifiers or a regular expression (regex). It can also +be ommited to apply the function to all columns. Passing a target column name is optional and when omitted a new name is generated by joining the function name with the selected column names. @@ -31,6 +32,8 @@ Map([2, 3] => ((b, c) -> 2b + c)) Map([:a, :c] => ((a, c) -> 2a * 3c) => :col1) Map(["c", "a"] => ((c, a) -> 3c / a) => :col1, "c" => tan) Map(r"[abc]" => ((a, b, c) -> a^2 - 2b + c) => "col1") +Map(sin => "seno") +Map(cos) ``` ## Notes From 72c7415a1cb4f38359f8bececa8b32356cb8e9e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BAlio=20Hoffimann?= Date: Fri, 3 Oct 2025 12:11:40 -0300 Subject: [PATCH 4/4] Improve docstring --- src/transforms/map.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/transforms/map.jl b/src/transforms/map.jl index 4d7f57b0..1c9cff23 100644 --- a/src/transforms/map.jl +++ b/src/transforms/map.jl @@ -15,6 +15,8 @@ const MapArg = Union{ColsCallableTarget,ColsCallable,CallableTarget,Callable} Applies the `funᵢ` function to the columns selected by `colsᵢ` using the `map` function and saves the result in a new column named `targetᵢ`. +Types are also allowed in place of functions to construct objects with +arguments from the columns. The column selection can be a single column identifier (index or name), a collection of identifiers or a regular expression (regex). It can also