From 4e80fee9b621b750a7bf8d9236e3d5d23f6687d2 Mon Sep 17 00:00:00 2001 From: Elias Carvalho Date: Thu, 28 Sep 2023 13:49:15 -0300 Subject: [PATCH 1/4] Add 'AbsoluteUnits' transform --- docs/src/transforms.md | 6 ++ src/TableTransforms.jl | 3 +- src/transforms.jl | 1 + src/transforms/absoluteunits.jl | 83 +++++++++++++++ src/transforms/dropunits.jl | 4 +- test/transforms.jl | 1 + test/transforms/absoluteunits.jl | 176 +++++++++++++++++++++++++++++++ 7 files changed, 271 insertions(+), 3 deletions(-) create mode 100644 src/transforms/absoluteunits.jl create mode 100644 test/transforms/absoluteunits.jl diff --git a/docs/src/transforms.md b/docs/src/transforms.md index 91733de7..94783eae 100644 --- a/docs/src/transforms.md +++ b/docs/src/transforms.md @@ -62,6 +62,12 @@ DropExtrema DropUnits ``` +## AbsoluteUnits + +```@docs +AbsoluteUnits +``` + ## Map ```@docs diff --git a/src/TableTransforms.jl b/src/TableTransforms.jl index f90f539e..c46f68c3 100644 --- a/src/TableTransforms.jl +++ b/src/TableTransforms.jl @@ -19,7 +19,7 @@ using CategoricalArrays using Random using NelderMead: optimise -using Unitful: AbstractQuantity +using Unitful: AbstractQuantity, AffineQuantity, Units, AffineUnits import Distributions: ContinuousUnivariateDistribution import Distributions: quantile, cdf @@ -58,6 +58,7 @@ export DropMissing, DropExtrema, DropUnits, + AbsoluteUnits, Map, Replace, Coalesce, diff --git a/src/transforms.jl b/src/transforms.jl index 2ddb9041..98c673a1 100644 --- a/src/transforms.jl +++ b/src/transforms.jl @@ -273,6 +273,7 @@ include("transforms/filter.jl") include("transforms/dropmissing.jl") include("transforms/dropextrema.jl") include("transforms/dropunits.jl") +include("transforms/absoluteunits.jl") include("transforms/map.jl") include("transforms/replace.jl") include("transforms/coalesce.jl") diff --git a/src/transforms/absoluteunits.jl b/src/transforms/absoluteunits.jl new file mode 100644 index 00000000..cae56adb --- /dev/null +++ b/src/transforms/absoluteunits.jl @@ -0,0 +1,83 @@ +# ------------------------------------------------------------------ +# Licensed under the MIT License. See LICENSE in the project root. +# ------------------------------------------------------------------ + +""" + AbsoluteUnits() + AbsoluteUnits(:) + +Converts the units of all columns in the table to absolute units. + + AbsoluteUnits(col₁, col₂, ..., colₙ) + AbsoluteUnits([col₁, col₂, ..., colₙ]) + AbsoluteUnits((col₁, col₂, ..., colₙ)) + +Converts the units of selected columns `col₁`, `col₂`, ..., `colₙ` to absolute units. + + AbsoluteUnits(regex) + +Converts the units of columns that match with `regex` to absolute units. + +# Examples + +```julia +AbsoluteUnits() +AbsoluteUnits([2, 3, 5]) +AbsoluteUnits([:b, :c, :e]) +AbsoluteUnits(("b", "c", "e")) +AbsoluteUnits(r"[bce]") +``` +""" +struct AbsoluteUnits{S<:ColSpec} <: StatelessFeatureTransform + colspec::S +end + +AbsoluteUnits() = AbsoluteUnits(AllSpec()) +AbsoluteUnits(spec) = AbsoluteUnits(colspec(spec)) +AbsoluteUnits(cols::T...) where {T<:Col} = AbsoluteUnits(colspec(cols)) + +isrevertible(::Type{<:AbsoluteUnits}) = true + +_absunit(x) = _absunit(x, nonmissingtype(eltype(x))) +_absunit(x, ::Type) = (x, NoUnits) +_absunit(x, ::Type{Q}) where {Q<:AbstractQuantity} = (x, unit(Q)) +function _absunit(x, ::Type{Q}) where {Q<:AffineQuantity} + u = absoluteunit(unit(Q)) + y = map(v -> uconvert(u, v), x) + (y, u) +end + +function applyfeat(transform::AbsoluteUnits, feat, prep) + cols = Tables.columns(feat) + names = Tables.columnnames(cols) + snames = choose(transform.colspec, names) + + tuples = map(names) do name + x = Tables.getcolumn(cols, name) + name ∈ snames ? _absunit(x) : (x, NoUnits) + end + + columns = first.(tuples) + units = last.(tuples) + + 𝒯 = (; zip(names, columns)...) + newfeat = 𝒯 |> Tables.materializer(feat) + newfeat, (snames, units) +end + +_revunit(x, ::Units) = x +_revunit(x, u::AbsoluteUnits) = map(v -> uconvert(u, v), x) + +function revertfeat(::AbsoluteUnits, newfeat, fcache) + cols = Tables.columns(newfeat) + names = Tables.columnnames(cols) + + snames, units = fcache + columns = map(names, units) do name, unit + x = Tables.getcolumn(cols, name) + name ∈ snames ? _revunit(x, unit) : x + end + + 𝒯 = (; zip(names, columns)...) + 𝒯 |> Tables.materializer(newfeat) +end diff --git a/src/transforms/dropunits.jl b/src/transforms/dropunits.jl index 966e2b28..59b2ac6d 100644 --- a/src/transforms/dropunits.jl +++ b/src/transforms/dropunits.jl @@ -6,7 +6,7 @@ DropUnits() DropUnits(:) -Drop units from all column in the table. +Drop units from all columns in the table. DropUnits(col₁, col₂, ..., colₙ) DropUnits([col₁, col₂, ..., colₙ]) @@ -61,7 +61,7 @@ function applyfeat(transform::DropUnits, feat, prep) end _addunit(x, ::typeof(NoUnits)) = x -_addunit(x, unit) = [v * unit for v in x] +_addunit(x, u::Units) = map(v -> v * u, x) function revertfeat(::DropUnits, newfeat, fcache) cols = Tables.columns(newfeat) diff --git a/test/transforms.jl b/test/transforms.jl index e8fe8171..dc87bf2f 100644 --- a/test/transforms.jl +++ b/test/transforms.jl @@ -8,6 +8,7 @@ transformfiles = [ "dropmissing.jl", "dropextrema.jl", "dropunits.jl", + "absoluteunits.jl", "map.jl", "replace.jl", "coalesce.jl", diff --git a/test/transforms/absoluteunits.jl b/test/transforms/absoluteunits.jl new file mode 100644 index 00000000..fd2c0631 --- /dev/null +++ b/test/transforms/absoluteunits.jl @@ -0,0 +1,176 @@ +@testset "AbsoluteUnits" begin + @test isrevertible(AbsoluteUnits()) + + a = [7, 4, 4, 7, 4, 1, 1, 6, 4, 7] * u"°C" + b = [4, 5, 4, missing, 6, 6, missing, 4, 4, 1] * u"K" + c = [3.9, 3.8, 3.5, 6.5, 7.7, 1.5, 0.6, 5.7, 4.7, 4.8] * u"K" + d = [6.3, 4.7, 7.6, missing, 1.2, missing, 5.9, 0.2, 1.9, 4.2] * u"°C" + e = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"] + t = Table(; a, b, c, d, e) + + T = AbsoluteUnits() + n, c = apply(T, t) + @test unit(eltype(n.a)) === u"K" + @test unit(nonmissingtype(eltype(n.b))) === u"K" + @test unit(eltype(n.c)) === u"K" + @test unit(nonmissingtype(eltype(n.d))) === u"K" + @test eltype(n.e) === String + @test n.e == t.e + tₒ = revert(T, n, c) + @test t.a == tₒ.a + @test isequal(t.b, tₒ.b) + @test t.c == tₒ.c + @test isequal(t.d, tₒ.d) + @test t.e == tₒ.e + + # args... + # integers + T = AbsoluteUnits(1, 2) + n, c = apply(T, t) + @test unit(eltype(n.a)) === u"K" + @test unit(nonmissingtype(eltype(n.b))) === u"K" + @test unit(eltype(n.c)) === u"K" + @test unit(nonmissingtype(eltype(n.d))) === u"°C" + tₒ = revert(T, n, c) + @test t.a == tₒ.a + @test isequal(t.b, tₒ.b) + @test t.c == tₒ.c + @test isequal(t.d, tₒ.d) + @test t.e == tₒ.e + + # symbols + T = AbsoluteUnits(:a, :b) + n, c = apply(T, t) + @test unit(eltype(n.a)) === u"K" + @test unit(nonmissingtype(eltype(n.b))) === u"K" + @test unit(eltype(n.c)) === u"K" + @test unit(nonmissingtype(eltype(n.d))) === u"°C" + tₒ = revert(T, n, c) + @test t.a == tₒ.a + @test isequal(t.b, tₒ.b) + @test t.c == tₒ.c + @test isequal(t.d, tₒ.d) + @test t.e == tₒ.e + + # strings + T = AbsoluteUnits("a", "b") + n, c = apply(T, t) + @test unit(eltype(n.a)) === u"K" + @test unit(nonmissingtype(eltype(n.b))) === u"K" + @test unit(eltype(n.c)) === u"K" + @test unit(nonmissingtype(eltype(n.d))) === u"°C" + tₒ = revert(T, n, c) + @test t.a == tₒ.a + @test isequal(t.b, tₒ.b) + @test t.c == tₒ.c + @test isequal(t.d, tₒ.d) + @test t.e == tₒ.e + + # vector + # integers + T = AbsoluteUnits([3, 4]) + n, c = apply(T, t) + @test unit(eltype(n.a)) === u"°C" + @test unit(nonmissingtype(eltype(n.b))) === u"K" + @test unit(eltype(n.c)) === u"K" + @test unit(nonmissingtype(eltype(n.d))) === u"K" + tₒ = revert(T, n, c) + @test t.a == tₒ.a + @test isequal(t.b, tₒ.b) + @test t.c == tₒ.c + @test isequal(t.d, tₒ.d) + @test t.e == tₒ.e + + # symbols + T = AbsoluteUnits([:c, :d]) + n, c = apply(T, t) + @test unit(eltype(n.a)) === u"°C" + @test unit(nonmissingtype(eltype(n.b))) === u"K" + @test unit(eltype(n.c)) === u"K" + @test unit(nonmissingtype(eltype(n.d))) === u"K" + tₒ = revert(T, n, c) + @test t.a == tₒ.a + @test isequal(t.b, tₒ.b) + @test t.c == tₒ.c + @test isequal(t.d, tₒ.d) + @test t.e == tₒ.e + + # strings + T = AbsoluteUnits(["c", "d"]) + n, c = apply(T, t) + @test unit(eltype(n.a)) === u"°C" + @test unit(nonmissingtype(eltype(n.b))) === u"K" + @test unit(eltype(n.c)) === u"K" + @test unit(nonmissingtype(eltype(n.d))) === u"K" + tₒ = revert(T, n, c) + @test t.a == tₒ.a + @test isequal(t.b, tₒ.b) + @test t.c == tₒ.c + @test isequal(t.d, tₒ.d) + @test t.e == tₒ.e + + # tuple + # integers + T = AbsoluteUnits((1, 4, 5)) + n, c = apply(T, t) + @test unit(eltype(n.a)) === u"K" + @test unit(nonmissingtype(eltype(n.b))) === u"K" + @test unit(eltype(n.c)) === u"K" + @test unit(nonmissingtype(eltype(n.d))) === u"K" + @test eltype(n.e) === String + @test n.e == t.e + tₒ = revert(T, n, c) + @test t.a == tₒ.a + @test isequal(t.b, tₒ.b) + @test t.c == tₒ.c + @test isequal(t.d, tₒ.d) + @test t.e == tₒ.e + + # symbols + T = AbsoluteUnits((:a, :d, :e)) + n, c = apply(T, t) + @test unit(eltype(n.a)) === u"K" + @test unit(nonmissingtype(eltype(n.b))) === u"K" + @test unit(eltype(n.c)) === u"K" + @test unit(nonmissingtype(eltype(n.d))) === u"K" + @test eltype(n.e) === String + @test n.e == t.e + tₒ = revert(T, n, c) + @test t.a == tₒ.a + @test isequal(t.b, tₒ.b) + @test t.c == tₒ.c + @test isequal(t.d, tₒ.d) + @test t.e == tₒ.e + + # strings + T = AbsoluteUnits(("a", "d", "e")) + n, c = apply(T, t) + @test unit(eltype(n.a)) === u"K" + @test unit(nonmissingtype(eltype(n.b))) === u"K" + @test unit(eltype(n.c)) === u"K" + @test unit(nonmissingtype(eltype(n.d))) === u"K" + @test eltype(n.e) === String + @test n.e == t.e + tₒ = revert(T, n, c) + @test t.a == tₒ.a + @test isequal(t.b, tₒ.b) + @test t.c == tₒ.c + @test isequal(t.d, tₒ.d) + @test t.e == tₒ.e + + # regex + T = AbsoluteUnits(r"[ade]") + n, c = apply(T, t) + @test unit(eltype(n.a)) === u"K" + @test unit(nonmissingtype(eltype(n.b))) === u"K" + @test unit(eltype(n.c)) === u"K" + @test unit(nonmissingtype(eltype(n.d))) === u"K" + @test eltype(n.e) === String + @test n.e == t.e + tₒ = revert(T, n, c) + @test t.a == tₒ.a + @test isequal(t.b, tₒ.b) + @test t.c == tₒ.c + @test isequal(t.d, tₒ.d) + @test t.e == tₒ.e +end From 420003c9d6ae002bd6d350816a416e9f46666218 Mon Sep 17 00:00:00 2001 From: Elias Carvalho Date: Thu, 28 Sep 2023 13:50:39 -0300 Subject: [PATCH 2/4] Fix typo --- src/TableTransforms.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TableTransforms.jl b/src/TableTransforms.jl index c46f68c3..e317e386 100644 --- a/src/TableTransforms.jl +++ b/src/TableTransforms.jl @@ -19,7 +19,7 @@ using CategoricalArrays using Random using NelderMead: optimise -using Unitful: AbstractQuantity, AffineQuantity, Units, AffineUnits +using Unitful: AbstractQuantity, AffineQuantity, AffineUnits, Units import Distributions: ContinuousUnivariateDistribution import Distributions: quantile, cdf From e2cae82d01b41f8e8153daf03b6d8bf9b4bb1b44 Mon Sep 17 00:00:00 2001 From: Elias Carvalho Date: Thu, 28 Sep 2023 14:05:18 -0300 Subject: [PATCH 3/4] Fix code --- src/transforms/absoluteunits.jl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/transforms/absoluteunits.jl b/src/transforms/absoluteunits.jl index cae56adb..aa023c2e 100644 --- a/src/transforms/absoluteunits.jl +++ b/src/transforms/absoluteunits.jl @@ -42,8 +42,9 @@ _absunit(x) = _absunit(x, nonmissingtype(eltype(x))) _absunit(x, ::Type) = (x, NoUnits) _absunit(x, ::Type{Q}) where {Q<:AbstractQuantity} = (x, unit(Q)) function _absunit(x, ::Type{Q}) where {Q<:AffineQuantity} - u = absoluteunit(unit(Q)) - y = map(v -> uconvert(u, v), x) + u = unit(Q) + a = absoluteunit(u) + y = map(v -> uconvert(a, v), x) (y, u) end @@ -66,7 +67,7 @@ function applyfeat(transform::AbsoluteUnits, feat, prep) end _revunit(x, ::Units) = x -_revunit(x, u::AbsoluteUnits) = map(v -> uconvert(u, v), x) +_revunit(x, u::AffineUnits) = map(v -> uconvert(u, v), x) function revertfeat(::AbsoluteUnits, newfeat, fcache) cols = Tables.columns(newfeat) From 63a81921fc4807a018f9aa2920c6fca6aa5d329e Mon Sep 17 00:00:00 2001 From: Elias Carvalho Date: Thu, 28 Sep 2023 14:10:22 -0300 Subject: [PATCH 4/4] Fix tests --- test/transforms/absoluteunits.jl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test/transforms/absoluteunits.jl b/test/transforms/absoluteunits.jl index fd2c0631..479321ca 100644 --- a/test/transforms/absoluteunits.jl +++ b/test/transforms/absoluteunits.jl @@ -20,7 +20,7 @@ @test t.a == tₒ.a @test isequal(t.b, tₒ.b) @test t.c == tₒ.c - @test isequal(t.d, tₒ.d) + @test all(isapprox.(skipmissing(t.d), skipmissing(tₒ.d))) @test t.e == tₒ.e # args... @@ -78,7 +78,7 @@ @test t.a == tₒ.a @test isequal(t.b, tₒ.b) @test t.c == tₒ.c - @test isequal(t.d, tₒ.d) + @test all(isapprox.(skipmissing(t.d), skipmissing(tₒ.d))) @test t.e == tₒ.e # symbols @@ -92,7 +92,7 @@ @test t.a == tₒ.a @test isequal(t.b, tₒ.b) @test t.c == tₒ.c - @test isequal(t.d, tₒ.d) + @test all(isapprox.(skipmissing(t.d), skipmissing(tₒ.d))) @test t.e == tₒ.e # strings @@ -106,7 +106,7 @@ @test t.a == tₒ.a @test isequal(t.b, tₒ.b) @test t.c == tₒ.c - @test isequal(t.d, tₒ.d) + @test all(isapprox.(skipmissing(t.d), skipmissing(tₒ.d))) @test t.e == tₒ.e # tuple @@ -123,7 +123,7 @@ @test t.a == tₒ.a @test isequal(t.b, tₒ.b) @test t.c == tₒ.c - @test isequal(t.d, tₒ.d) + @test all(isapprox.(skipmissing(t.d), skipmissing(tₒ.d))) @test t.e == tₒ.e # symbols @@ -139,7 +139,7 @@ @test t.a == tₒ.a @test isequal(t.b, tₒ.b) @test t.c == tₒ.c - @test isequal(t.d, tₒ.d) + @test all(isapprox.(skipmissing(t.d), skipmissing(tₒ.d))) @test t.e == tₒ.e # strings @@ -155,7 +155,7 @@ @test t.a == tₒ.a @test isequal(t.b, tₒ.b) @test t.c == tₒ.c - @test isequal(t.d, tₒ.d) + @test all(isapprox.(skipmissing(t.d), skipmissing(tₒ.d))) @test t.e == tₒ.e # regex @@ -171,6 +171,6 @@ @test t.a == tₒ.a @test isequal(t.b, tₒ.b) @test t.c == tₒ.c - @test isequal(t.d, tₒ.d) + @test all(isapprox.(skipmissing(t.d), skipmissing(tₒ.d))) @test t.e == tₒ.e end