JuliaML · juliohm · Sep 28, 2023 · Sep 28, 2023 · Sep 28, 2023 · Sep 28, 2023
diff --git a/docs/src/transforms.md b/docs/src/transforms.md
@@ -62,6 +62,12 @@ DropExtrema
 DropUnits
 ```
 
+## AbsoluteUnits
+
+```@docs
+AbsoluteUnits
+```
+
 ## Map
 
 ```@docs

diff --git a/src/TableTransforms.jl b/src/TableTransforms.jl
@@ -19,7 +19,7 @@ using CategoricalArrays
 using Random
 using NelderMead: optimise
 
-using Unitful: AbstractQuantity
+using Unitful: AbstractQuantity, AffineQuantity, AffineUnits, Units
 
 import Distributions: ContinuousUnivariateDistribution
 import Distributions: quantile, cdf
@@ -58,6 +58,7 @@ export
   DropMissing,
   DropExtrema,
   DropUnits,
+  AbsoluteUnits,
   Map,
   Replace,
   Coalesce,

diff --git a/src/transforms.jl b/src/transforms.jl
@@ -273,6 +273,7 @@ include("transforms/filter.jl")
 include("transforms/dropmissing.jl")
 include("transforms/dropextrema.jl")
 include("transforms/dropunits.jl")
+include("transforms/absoluteunits.jl")
 include("transforms/map.jl")
 include("transforms/replace.jl")
 include("transforms/coalesce.jl")

diff --git a/src/transforms/absoluteunits.jl b/src/transforms/absoluteunits.jl
@@ -0,0 +1,84 @@
+# ------------------------------------------------------------------
+# Licensed under the MIT License. See LICENSE in the project root.
+# ------------------------------------------------------------------
+
+"""
+    AbsoluteUnits()
+    AbsoluteUnits(:)
+
+Converts the units of all columns in the table to absolute units.
+
+    AbsoluteUnits(col₁, col₂, ..., colₙ)
+    AbsoluteUnits([col₁, col₂, ..., colₙ])
+    AbsoluteUnits((col₁, col₂, ..., colₙ))
+
+Converts the units of selected columns `col₁`, `col₂`, ..., `colₙ` to absolute units.
+
+    AbsoluteUnits(regex)
+
+Converts the units of columns that match with `regex` to absolute units.
+
+# Examples
+
+```julia
+AbsoluteUnits()
+AbsoluteUnits([2, 3, 5])
+AbsoluteUnits([:b, :c, :e])
+AbsoluteUnits(("b", "c", "e"))
+AbsoluteUnits(r"[bce]")
+```
+"""
+struct AbsoluteUnits{S<:ColSpec} <: StatelessFeatureTransform
+  colspec::S
+end
+
+AbsoluteUnits() = AbsoluteUnits(AllSpec())
+AbsoluteUnits(spec) = AbsoluteUnits(colspec(spec))
+AbsoluteUnits(cols::T...) where {T<:Col} = AbsoluteUnits(colspec(cols))
+
+isrevertible(::Type{<:AbsoluteUnits}) = true
+
+_absunit(x) = _absunit(x, nonmissingtype(eltype(x)))
+_absunit(x, ::Type) = (x, NoUnits)
+_absunit(x, ::Type{Q}) where {Q<:AbstractQuantity} = (x, unit(Q)) 
+function _absunit(x, ::Type{Q}) where {Q<:AffineQuantity}
+  u = unit(Q)
+  a = absoluteunit(u)
+  y = map(v -> uconvert(a, v), x)
+  (y, u)
+end
+
+function applyfeat(transform::AbsoluteUnits, feat, prep)
+  cols = Tables.columns(feat)
+  names = Tables.columnnames(cols)
+  snames = choose(transform.colspec, names)
+
+  tuples = map(names) do name
+    x = Tables.getcolumn(cols, name)
+    name ∈ snames ? _absunit(x) : (x, NoUnits)
+  end
+
+  columns = first.(tuples)
+  units = last.(tuples)
+
+  𝒯 = (; zip(names, columns)...)
+  newfeat = 𝒯 |> Tables.materializer(feat)
+  newfeat, (snames, units)
+end
+
+_revunit(x, ::Units) = x
+_revunit(x, u::AffineUnits) = map(v -> uconvert(u, v), x)
+
+function revertfeat(::AbsoluteUnits, newfeat, fcache)
+  cols = Tables.columns(newfeat)
+  names = Tables.columnnames(cols)
+
+  snames, units = fcache
+  columns = map(names, units) do name, unit
+    x = Tables.getcolumn(cols, name)
+    name ∈ snames ? _revunit(x, unit) : x
+  end
+
+  𝒯 = (; zip(names, columns)...)
+  𝒯 |> Tables.materializer(newfeat)
+end
diff --git a/src/transforms/dropunits.jl b/src/transforms/dropunits.jl
@@ -6,7 +6,7 @@
     DropUnits()
     DropUnits(:)
 
-Drop units from all column in the table.
+Drop units from all columns in the table.
 
     DropUnits(col₁, col₂, ..., colₙ)
     DropUnits([col₁, col₂, ..., colₙ])
@@ -61,7 +61,7 @@ function applyfeat(transform::DropUnits, feat, prep)
 end
 
 _addunit(x, ::typeof(NoUnits)) = x
-_addunit(x, unit) = [v * unit for v in x]
+_addunit(x, u::Units) = map(v -> v * u, x)
 
 function revertfeat(::DropUnits, newfeat, fcache)
   cols = Tables.columns(newfeat)

diff --git a/test/transforms.jl b/test/transforms.jl
@@ -8,6 +8,7 @@ transformfiles = [
   "dropmissing.jl",
   "dropextrema.jl",
   "dropunits.jl",
+  "absoluteunits.jl",
   "map.jl",
   "replace.jl",
   "coalesce.jl",

diff --git a/test/transforms/absoluteunits.jl b/test/transforms/absoluteunits.jl
@@ -0,0 +1,176 @@
+@testset "AbsoluteUnits" begin
+  @test isrevertible(AbsoluteUnits())
+
+  a = [7, 4, 4, 7, 4, 1, 1, 6, 4, 7] * u"°C"
+  b = [4, 5, 4, missing, 6, 6, missing, 4, 4, 1] * u"K"
+  c = [3.9, 3.8, 3.5, 6.5, 7.7, 1.5, 0.6, 5.7, 4.7, 4.8] * u"K"
+  d = [6.3, 4.7, 7.6, missing, 1.2, missing, 5.9, 0.2, 1.9, 4.2] * u"°C"
+  e = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]
+  t = Table(; a, b, c, d, e)
+
+  T = AbsoluteUnits()
+  n, c = apply(T, t)
+  @test unit(eltype(n.a)) === u"K"
+  @test unit(nonmissingtype(eltype(n.b))) === u"K"
+  @test unit(eltype(n.c)) === u"K"
+  @test unit(nonmissingtype(eltype(n.d))) === u"K"
+  @test eltype(n.e) === String
+  @test n.e == t.e
+  tₒ = revert(T, n, c)
+  @test t.a == tₒ.a
+  @test isequal(t.b, tₒ.b)
+  @test t.c == tₒ.c
+  @test all(isapprox.(skipmissing(t.d), skipmissing(tₒ.d)))
+  @test t.e == tₒ.e
+
+  # args...
+  # integers
+  T = AbsoluteUnits(1, 2)
+  n, c = apply(T, t)
+  @test unit(eltype(n.a)) === u"K"
+  @test unit(nonmissingtype(eltype(n.b))) === u"K"
+  @test unit(eltype(n.c)) === u"K"
+  @test unit(nonmissingtype(eltype(n.d))) === u"°C"
+  tₒ = revert(T, n, c)
+  @test t.a == tₒ.a
+  @test isequal(t.b, tₒ.b)
+  @test t.c == tₒ.c
+  @test isequal(t.d, tₒ.d)
+  @test t.e == tₒ.e
+
+  # symbols
+  T = AbsoluteUnits(:a, :b)
+  n, c = apply(T, t)
+  @test unit(eltype(n.a)) === u"K"
+  @test unit(nonmissingtype(eltype(n.b))) === u"K"
+  @test unit(eltype(n.c)) === u"K"
+  @test unit(nonmissingtype(eltype(n.d))) === u"°C"
+  tₒ = revert(T, n, c)
+  @test t.a == tₒ.a
+  @test isequal(t.b, tₒ.b)
+  @test t.c == tₒ.c
+  @test isequal(t.d, tₒ.d)
+  @test t.e == tₒ.e
+
+  # strings
+  T = AbsoluteUnits("a", "b")
+  n, c = apply(T, t)
+  @test unit(eltype(n.a)) === u"K"
+  @test unit(nonmissingtype(eltype(n.b))) === u"K"
+  @test unit(eltype(n.c)) === u"K"
+  @test unit(nonmissingtype(eltype(n.d))) === u"°C"
+  tₒ = revert(T, n, c)
+  @test t.a == tₒ.a
+  @test isequal(t.b, tₒ.b)
+  @test t.c == tₒ.c
+  @test isequal(t.d, tₒ.d)
+  @test t.e == tₒ.e
+
+  # vector
+  # integers
+  T = AbsoluteUnits([3, 4])
+  n, c = apply(T, t)
+  @test unit(eltype(n.a)) === u"°C"
+  @test unit(nonmissingtype(eltype(n.b))) === u"K"
+  @test unit(eltype(n.c)) === u"K"
+  @test unit(nonmissingtype(eltype(n.d))) === u"K"
+  tₒ = revert(T, n, c)
+  @test t.a == tₒ.a
+  @test isequal(t.b, tₒ.b)
+  @test t.c == tₒ.c
+  @test all(isapprox.(skipmissing(t.d), skipmissing(tₒ.d)))
+  @test t.e == tₒ.e
+
+  # symbols
+  T = AbsoluteUnits([:c, :d])
+  n, c = apply(T, t)
+  @test unit(eltype(n.a)) === u"°C"
+  @test unit(nonmissingtype(eltype(n.b))) === u"K"
+  @test unit(eltype(n.c)) === u"K"
+  @test unit(nonmissingtype(eltype(n.d))) === u"K"
+  tₒ = revert(T, n, c)
+  @test t.a == tₒ.a
+  @test isequal(t.b, tₒ.b)
+  @test t.c == tₒ.c
+  @test all(isapprox.(skipmissing(t.d), skipmissing(tₒ.d)))
+  @test t.e == tₒ.e
+
+  # strings
+  T = AbsoluteUnits(["c", "d"])
+  n, c = apply(T, t)
+  @test unit(eltype(n.a)) === u"°C"
+  @test unit(nonmissingtype(eltype(n.b))) === u"K"
+  @test unit(eltype(n.c)) === u"K"
+  @test unit(nonmissingtype(eltype(n.d))) === u"K"
+  tₒ = revert(T, n, c)
+  @test t.a == tₒ.a
+  @test isequal(t.b, tₒ.b)
+  @test t.c == tₒ.c
+  @test all(isapprox.(skipmissing(t.d), skipmissing(tₒ.d)))
+  @test t.e == tₒ.e
+
+  # tuple
+  # integers
+  T = AbsoluteUnits((1, 4, 5))
+  n, c = apply(T, t)
+  @test unit(eltype(n.a)) === u"K"
+  @test unit(nonmissingtype(eltype(n.b))) === u"K"
+  @test unit(eltype(n.c)) === u"K"
+  @test unit(nonmissingtype(eltype(n.d))) === u"K"
+  @test eltype(n.e) === String
+  @test n.e == t.e
+  tₒ = revert(T, n, c)
+  @test t.a == tₒ.a
+  @test isequal(t.b, tₒ.b)
+  @test t.c == tₒ.c
+  @test all(isapprox.(skipmissing(t.d), skipmissing(tₒ.d)))
+  @test t.e == tₒ.e
+
+  # symbols
+  T = AbsoluteUnits((:a, :d, :e))
+  n, c = apply(T, t)
+  @test unit(eltype(n.a)) === u"K"
+  @test unit(nonmissingtype(eltype(n.b))) === u"K"
+  @test unit(eltype(n.c)) === u"K"
+  @test unit(nonmissingtype(eltype(n.d))) === u"K"
+  @test eltype(n.e) === String
+  @test n.e == t.e
+  tₒ = revert(T, n, c)
+  @test t.a == tₒ.a
+  @test isequal(t.b, tₒ.b)
+  @test t.c == tₒ.c
+  @test all(isapprox.(skipmissing(t.d), skipmissing(tₒ.d)))
+  @test t.e == tₒ.e
+
+  # strings
+  T = AbsoluteUnits(("a", "d", "e"))
+  n, c = apply(T, t)
+  @test unit(eltype(n.a)) === u"K"
+  @test unit(nonmissingtype(eltype(n.b))) === u"K"
+  @test unit(eltype(n.c)) === u"K"
+  @test unit(nonmissingtype(eltype(n.d))) === u"K"
+  @test eltype(n.e) === String
+  @test n.e == t.e
+  tₒ = revert(T, n, c)
+  @test t.a == tₒ.a
+  @test isequal(t.b, tₒ.b)
+  @test t.c == tₒ.c
+  @test all(isapprox.(skipmissing(t.d), skipmissing(tₒ.d)))
+  @test t.e == tₒ.e
+
+  # regex
+  T = AbsoluteUnits(r"[ade]")
+  n, c = apply(T, t)
+  @test unit(eltype(n.a)) === u"K"
+  @test unit(nonmissingtype(eltype(n.b))) === u"K"
+  @test unit(eltype(n.c)) === u"K"
+  @test unit(nonmissingtype(eltype(n.d))) === u"K"
+  @test eltype(n.e) === String
+  @test n.e == t.e
+  tₒ = revert(T, n, c)
+  @test t.a == tₒ.a
+  @test isequal(t.b, tₒ.b)
+  @test t.c == tₒ.c
+  @test all(isapprox.(skipmissing(t.d), skipmissing(tₒ.d)))
+  @test t.e == tₒ.e
+end