Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add UnivariateTimeTypeToContinuous transformer to builtins #245

Closed
wants to merge 15 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/MLJModels.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
module MLJModels
module MLJModels

import MLJModelInterface
import MLJModelInterface: MODEL_TRAITS
Expand All @@ -9,7 +9,7 @@ import MLJBase: @load
import MLJBase: Table, Continuous, Count, Finite, OrderedFactor, Multiclass

using Requires, Pkg, Pkg.TOML, OrderedCollections, Parameters
using Tables, CategoricalArrays, StatsBase, Statistics
using Tables, CategoricalArrays, StatsBase, Statistics, Dates
import Distributions

# for administrators to update Metadata.toml:
Expand All @@ -28,7 +28,7 @@ export ConstantRegressor, ConstantClassifier,
# from model/Transformers
export FeatureSelector, StaticTransformer, UnivariateDiscretizer,
UnivariateStandardizer, Standardizer, UnivariateBoxCoxTransformer,
OneHotEncoder, ContinuousEncoder, FillImputer
OneHotEncoder, ContinuousEncoder, FillImputer, UnivariateTimeTypeToContinuous

const srcdir = dirname(@__FILE__) # the directory containing this file

Expand Down
142 changes: 140 additions & 2 deletions src/builtins/Transformers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,145 @@ end
MLJBase.inverse_transform(transformer::UnivariateStandardizer, fitresult, w) =
[inverse_transform(transformer, fitresult, y) for y in w]

##########################################################################################
## CONTINUOUS TRANSFORM OF TIME TYPE FEATURES

"""
UnivariateTimeTypeToContinuous(zero_time=nothing, step=Hour(24))

Convert a `Date`, `DateTime`, and `Time` vector to `Float64` by assuming `0.0` corresponds
to the `zero_time` parameter and the time increment to reach `1.0` is given by the `step`
parameter. The type of `zero_time` should match the type of the column if provided. If not
provided, then `zero_time` is inferred as the minimum time found in the data when `fit` is
called.

"""
mutable struct UnivariateTimeTypeToContinuous <: Unsupervised
zero_time::Union{Nothing, TimeType}
step::Period
end

function UnivariateTimeTypeToContinuous(;
zero_time=nothing, step=Dates.Hour(24))
model = UnivariateTimeTypeToContinuous(zero_time, step)
message = MLJBase.clean!(model)
isempty(message) || @warn message
return model
end
ablaom marked this conversation as resolved.
Show resolved Hide resolved

function MLJBase.clean!(model::UnivariateTimeTypeToContinuous)
# Step must be able to be added to zero_time if provided.
msg = ""
if model.zero_time !== nothing
try
tmp = model.zero_time + model.step
catch err
if err isa MethodError
model.zero_time, model.step, status, msg = _fix_zero_time_step(
model.zero_time, model.step)
if status === :error
# Unable to resolve, rethrow original error.
throw(err)
end
else
throw(err)
end
end
end
ablaom marked this conversation as resolved.
Show resolved Hide resolved
return msg
end

function _fix_zero_time_step(zero_time, step)
# Cannot add time parts to dates nor date parts to times.
# If a mismatch is encountered. Conversion from date parts to time parts
# is possible, but not from time parts to date parts because we cannot
# represent fractional date parts.
msg = ""
if zero_time isa Dates.Date && step isa Dates.TimePeriod
# Convert zero_time to a DateTime to resolve conflict.
if step % Hour(24) === Hour(0)
# We can convert step to Day safely
msg = "Cannot add `TimePeriod` `step` to `Date` `zero_time`. Converting `step` to `Day`."
step = convert(Day, step)
else
# We need datetime to be compatible with the step.
msg = "Cannot add `TimePeriod` `step` to `Date` `zero_time`. Converting `zero_time` to `DateTime`."
zero_time = convert(DateTime, zero_time)
end
return zero_time, step, :success, msg
elseif zero_time isa Dates.Time && step isa Dates.DatePeriod
# Convert step to Hour if possible. This will fail for
# isa(step, Month)
msg = "Cannot add `DatePeriod` `step` to `Time` `zero_time`. Converting `step` to `Hour`."
step = convert(Hour, step)
return zero_time, step, :success, msg
else
return zero_time, step, :error, msg
end
end

function MLJBase.fit(model::UnivariateTimeTypeToContinuous, verbosity::Int, X)
if model.zero_time !== nothing
ablaom marked this conversation as resolved.
Show resolved Hide resolved
min_dt = model.zero_time
step = model.step
# Check zero_time is compatible with X
example = first(X)
try
X - min_dt
catch err
if err isa MethodError
@warn "`$(typeof(min_dt))` `zero_time` is not compatible with `$(eltype(X))` vector. Attempting to convert `zero_time`."
min_dt = convert(eltype(X), min_dt)
else
throw(err)
end
end
else
min_dt = minimum(X)
step = model.step
message = ""
try
min_dt + step
catch err
if err isa MethodError
min_dt, step, status, message = _fix_zero_time_step(min_dt, step)
if status === :error
# Unable to resolve, rethrow original error.
throw(err)
end
else
throw(err)
end
end
isempty(message) || @warn message
end
cache = nothing
report = nothing
fitresult = (min_dt, step)
return fitresult, cache, report
end

function MLJBase.transform(model::UnivariateTimeTypeToContinuous, fitresult, X)
min_dt, step = fitresult
if typeof(min_dt) ≠ eltype(X)
# Cannot run if eltype in transform differs from zero_time from fit.
throw(ArgumentError("Different `TimeType` encountered during `transform` than expected from `fit`. Found `$(eltype(X))`, expected `$(typeof(min_dt))`"))
end
# Set the size of a single step.
ablaom marked this conversation as resolved.
Show resolved Hide resolved
next_time = min_dt + step
if next_time == min_dt
# Time type loops if step is a multiple of Hour(24), so calculate the
# number of multiples, then re-scale to Hour(12) and adjust delta to match original.
m = step / Dates.Hour(12)
delta = m * (
Float64(Dates.value(min_dt + Dates.Hour(12)) - Dates.value(min_dt)))
else
delta = Float64(Dates.value(min_dt + step) - Dates.value(min_dt))
end
return @. Float64(Dates.value(X - min_dt)) / delta
end


## STANDARDIZATION OF ORDINAL FEATURES OF TABULAR DATA

"""
Expand Down Expand Up @@ -831,7 +970,7 @@ the last class indicator column.
`Multiclass` or `OrderedFactor` column is the same in new data being
transformed as it is in the data used to fit the transformer.

### Example
### Example

```julia
X = (name=categorical(["Danesh", "Lee", "Mary", "John"]),
Expand Down Expand Up @@ -1005,4 +1144,3 @@ metadata_model(ContinuousEncoder,
weights = false,
descr = CONTINUOUS_ENCODER_DESCR,
path = "MLJModels.ContinuousEncoder")

108 changes: 108 additions & 0 deletions test/builtins/Transformers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ using Test, MLJModels
using Tables, CategoricalArrays, Random
using ScientificTypes
using StatsBase
using Dates: DateTime, Date, Time, Day, Hour

import MLJBase

Expand Down Expand Up @@ -89,6 +90,113 @@ end
infos = MLJBase.info_dict(stand)
end

### TIMETYPE TO CONTINUOUS

@testset "TimeTypeToContinuous" begin
let dt = [Date(2018, 6, 15) + Day(i) for i=0:10],
transformer = UnivariateTimeTypeToContinuous(; step=Day(1))
fr, _, _ = MLJBase.fit(transformer, 1, dt)
@test fr == (Date(2018, 6, 15), Day(1))
dt_continuous = MLJBase.transform(transformer, fr, dt)
@test all(dt_continuous .== Float64.(0:10))
end

let dt = [Date(2018, 6, 15) + Day(i) for i=0:10],
transformer = UnivariateTimeTypeToContinuous()
@test_logs(
(:warn, "Cannot add `TimePeriod` `step` to `Date` `zero_time`. Converting `step` to `Day`."),
MLJBase.fit(transformer, 1, dt)
)
fr, _, _ = MLJBase.fit(transformer, 1, dt)
@test fr == (Date(2018, 6, 15), Day(1))
dt_continuous = MLJBase.transform(transformer, fr, dt)
@test all(dt_continuous .== Float64.(0:10))
end

let dt = [Time(0, 0, 0) + Hour(i) for i=0:3:30],
transformer = UnivariateTimeTypeToContinuous(;
step = Hour(1),
zero_time = Time(7, 0, 0),
)
fr, _, _ = MLJBase.fit(transformer, 1, dt)
@test fr == (Time(7, 0, 0), Hour(1))
dt_continuous = MLJBase.transform(transformer, fr, dt)
ex = collect(0:3:30) .% 24 .- 7.0
@test all(dt_continuous .== ex)
end

let dt = [Time(0, 0, 0) + Hour(i) for i=0:3:30],
transformer = UnivariateTimeTypeToContinuous()
fr, _, _ = MLJBase.fit(transformer, 1, dt)
@test fr == (Time(0, 0, 0), Hour(24))
dt_continuous = MLJBase.transform(transformer, fr, dt)
ex = collect(0:3:30) .% 24 ./ 24
@test all(dt_continuous .== ex)
end

# test log messages
let dt = [DateTime(2018, 6, 15) + Day(i) for i=0:10],
step=Hour(1),
zero_time=Date(2018, 6, 15),
transformer = UnivariateTimeTypeToContinuous(;
step=step,
zero_time=zero_time,
)
@test_logs(
(:warn, "Cannot add `TimePeriod` `step` to `Date` `zero_time`. Converting `zero_time` to `DateTime`."),
UnivariateTimeTypeToContinuous(;
step=step,
zero_time=zero_time,
)
)
fr, _, _ = MLJBase.fit(transformer, 1, dt)

@test fr == (zero_time, step)
dt_continuous = MLJBase.transform(transformer, fr, dt)
@test all(dt_continuous .== Float64.(0:10).*24)
end

let dt = [Time(0, 0, 0) + Hour(i) for i=0:3:30],
zero_time=Time(0, 0, 0),
step=Day(1),
transformer = UnivariateTimeTypeToContinuous(;
step=step,
zero_time=zero_time,
)
@test_logs(
(:warn, "Cannot add `DatePeriod` `step` to `Time` `zero_time`. Converting `step` to `Hour`."),
UnivariateTimeTypeToContinuous(;
step=step,
zero_time=zero_time,
)
)
fr, _, _ = MLJBase.fit(transformer, 1, dt)

@test fr == (zero_time, convert(Hour, step))
dt_continuous = MLJBase.transform(transformer, fr, dt)
ex = Float64.((0:3:30) .% 24)./24
@test all(dt_continuous .== ex)
end

let dt = [DateTime(2018, 6, 15) + Day(i) for i=0:10],
step=Day(1),
zero_time=Date(2018, 6, 15),
transformer = UnivariateTimeTypeToContinuous(;
step=step,
zero_time=zero_time,
)
@test_logs(
(:warn, "`Dates.Date` `zero_time` is not compatible with `Dates.DateTime` vector. Attempting to convert `zero_time`."),
MLJBase.fit(transformer, 1, dt)
)
fr, _, _ = MLJBase.fit(transformer, 1, dt)

@test fr == (zero_time, step)
dt_continuous = MLJBase.transform(transformer, fr, dt)
@test all(dt_continuous .== Float64.(0:10))
end
end

#### STANDARDIZER ####

@testset "Standardizer" begin
Expand Down