Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 33 additions & 22 deletions src/transforms/map.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,25 @@
# Licensed under the MIT License. See LICENSE in the project root.
# ------------------------------------------------------------------

const TargetName = Union{Symbol,AbstractString}
const PairWithTarget = Pair{<:Any,<:Pair{<:Function,<:TargetName}}
const PairWithoutTarget = Pair{<:Any,<:Function}
const MapPair = Union{PairWithTarget,PairWithoutTarget}
# supported argument types
const Callable = Union{Function,Type}
const Target = Union{Symbol,AbstractString}
const ColsCallableTarget = Pair{<:Any,<:Pair{<:Callable,<:Target}}
const ColsCallable = Pair{<:Any,<:Callable}
const CallableTarget = Pair{<:Callable,<:Target}
const MapArg = Union{ColsCallableTarget,ColsCallable,CallableTarget,Callable}

"""
Map(cols₁ => fun₁ => target₁, cols₂ => fun₂, ..., colsₙ => funₙ => targetₙ)

Applies the `funᵢ` function to the columns selected by `colsᵢ` using
the `map` function and saves the result in a new column named `targetᵢ`.
Types are also allowed in place of functions to construct objects with
arguments from the columns.

The column selection can be a single column identifier (index or name),
a collection of identifiers or a regular expression (regex).
a collection of identifiers or a regular expression (regex). It can also
be ommited to apply the function to all columns.

Passing a target column name is optional and when omitted a new name
is generated by joining the function name with the selected column names.
Expand All @@ -28,11 +34,14 @@ Map([2, 3] => ((b, c) -> 2b + c))
Map([:a, :c] => ((a, c) -> 2a * 3c) => :col1)
Map(["c", "a"] => ((c, a) -> 3c / a) => :col1, "c" => tan)
Map(r"[abc]" => ((a, b, c) -> a^2 - 2b + c) => "col1")
Map(sin => "seno")
Map(cos)
```

## Notes

* Anonymous functions must be passed with parentheses as in the examples above;
* Anonymous functions must be passed with parentheses as in the examples above

* Some function names are treated in a special way, they are:
* Anonymous functions: `#1` -> `f1`;
* Composed functions: `outer ∘ inner` -> `outer_inner`;
Expand All @@ -41,20 +50,20 @@ Map(r"[abc]" => ((a, b, c) -> a^2 - 2b + c) => "col1")
"""
struct Map <: StatelessFeatureTransform
selectors::Vector{ColumnSelector}
funs::Vector{Function}
funs::Vector{Callable}
targets::Vector{Union{Nothing,Symbol}}
end

Map() = throw(ArgumentError("cannot create Map transform without arguments"))

function Map(pairs::MapPair...)
tuples = map(_extract, pairs)
selectors = [t[1] for t in tuples]
funs = [t[2] for t in tuples]
targets = [t[3] for t in tuples]
Map(selectors, funs, targets)
function Map(args::MapArg...)
tups = map(_extract, args)
sels = [t[1] for t in tups]
funs = [t[2] for t in tups]
tars = [t[3] for t in tups]
Map(sels, funs, tars)
end

Map() = throw(ArgumentError("cannot create Map transform without arguments"))

function applyfeat(transform::Map, feat, prep)
cols = Tables.columns(feat)
names = Tables.columnnames(cols)
Expand All @@ -76,13 +85,10 @@ function applyfeat(transform::Map, feat, prep)
newfeat, nothing
end

_extract(p::PairWithTarget) = selector(first(p)), first(last(p)), Symbol(last(last(p)))
_extract(p::PairWithoutTarget) = selector(first(p)), last(p), nothing

_funname(fun::Base.Fix1) = "fix1_" * _funname(fun.f)
_funname(fun::Base.Fix2) = "fix2_" * _funname(fun.f)
_funname(fun::ComposedFunction) = _funname(fun.outer) * "_" * _funname(fun.inner)
_funname(fun) = string(fun)
_extract(arg::ColsCallableTarget) = selector(first(arg)), first(last(arg)), Symbol(last(last(arg)))
_extract(arg::ColsCallable) = selector(first(arg)), last(arg), nothing
_extract(arg::CallableTarget) = AllSelector(), first(arg), Symbol(last(arg))
_extract(arg::Callable) = AllSelector(), arg, nothing

function _makename(snames, fun)
funname = _funname(fun)
Expand All @@ -91,3 +97,8 @@ function _makename(snames, fun)
end
Symbol(funname, :_, join(snames, "_"))
end

_funname(fun::Base.Fix1) = "fix1_" * _funname(fun.f)
_funname(fun::Base.Fix2) = "fix2_" * _funname(fun.f)
_funname(fun::ComposedFunction) = _funname(fun.outer) * "_" * _funname(fun.inner)
_funname(fun) = string(fun)
4 changes: 2 additions & 2 deletions test/shows.jl
Original file line number Diff line number Diff line change
Expand Up @@ -279,14 +279,14 @@
# compact mode
iostr = sprint(show, T)
@test iostr ==
"Map(selectors: ColumnSelector[:a, [:a, :b]], funs: Function[sin, $(nameof(fun))], targets: Union{Nothing, Symbol}[nothing, :c])"
"Map(selectors: ColumnSelector[:a, [:a, :b]], funs: Union{Function, Type}[sin, $(nameof(fun))], targets: Union{Nothing, Symbol}[nothing, :c])"

# full mode
iostr = sprint(show, MIME("text/plain"), T)
@test iostr == """
Map transform
├─ selectors: ColumnSelectors.ColumnSelector[:a, [:a, :b]]
├─ funs: Function[sin, $(typeof(fun))()]
├─ funs: Union{Function, Type}[sin, $(typeof(fun))()]
└─ targets: Union{Nothing, Symbol}[nothing, :c]"""
end

Expand Down
30 changes: 30 additions & 0 deletions test/transforms/map.jl
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,36 @@
@test Tables.schema(n).names == (:fix2_hypot_a,)
@test n.fix2_hypot_a == f.(t.a)

# function and target
f = (a, b, c, d) -> a + b + c + d
T = Map(f => "target")
n, c = apply(T, t)
@test Tables.schema(n).names == (:target,)
@test n.target == f.(t.a, t.b, t.c, t.d)

# function alone
f = (a, b, c, d) -> a + b + c + d
fname = replace(string(f), "#" => "f")
colname = Symbol(fname, :_a, :_b, :_c, :_d)
T = Map(f)
n, c = apply(T, t)
@test Tables.schema(n).names == (colname,)
@test Tables.getcolumn(n, colname) == f.(t.a, t.b, t.c, t.d)

# type and target
struct Foo a; b; c; d end
T = Map(Foo => "target")
n, c = apply(T, t)
@test Tables.schema(n).names == (:target,)
@test n.target == Foo.(t.a, t.b, t.c, t.d)

# type alone
struct Bar a; b; c; d end
T = Map(Bar)
n, c = apply(T, t)
@test Tables.schema(n).names == (:Bar_a_b_c_d,)
@test n.Bar_a_b_c_d == Bar.(t.a, t.b, t.c, t.d)

# error: cannot create Map transform without arguments
@test_throws ArgumentError Map()
end
Loading