/
balanced_model.jl
247 lines (206 loc) · 9.84 KB
/
balanced_model.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
#=
This is how the struct and the constructor for the model balancer
would look if it were to support only the probabilistic model type:
struct BalancedModel <:ProbabilisticNetworkComposite
balancer # oversampler or undersampler
model::Probabilistic # get rid of abstract types
end
BalancedModel(;model=nothing, balancer=nothing) = BalancedModel(model, balancer)
BalancedModel(model; kwargs...) = BalancedModel(; model, kwargs...)
In the following, we use macros to automate code generation of these for all model
types
=#
### 1. Define model structs
# Supported Model Types
const SUPPORTED_MODEL_TYPES = (:Probabilistic, :Deterministic, :Interval)
# A dictionary to convert e.g., from Probabilistic to BalancedModelProbabilistic
const MODELTYPE_TO_COMPOSITETYPE = Dict(atom => Symbol("BalancedModel$atom") for atom in SUPPORTED_MODEL_TYPES)
# A dictionary to convert e.g., form Probabilistic to ProbabilisticNetworkComposite
const MODELTYPE_TO_SUPERTYPE = Dict(atom => Symbol("$(atom)NetworkComposite") for atom in SUPPORTED_MODEL_TYPES)
# Define a struct for each model type (corresponds to a composite type and supertype used in struct)
for model_type in SUPPORTED_MODEL_TYPES
struct_name = MODELTYPE_TO_COMPOSITETYPE[model_type]
super_type = MODELTYPE_TO_SUPERTYPE[model_type]
ex = quote
mutable struct $struct_name{balancernames, M <: $model_type} <: $super_type
balancers
model::M
function $struct_name(balancernames, balancers, model::M) where M <: $model_type
# generate an instance and use balancernames as type parameter
return new{balancernames, M}(balancers, model)
end
end
end
eval(ex)
end
### 2. Define one keyword constructor for model structs
# A version of MODELTYPE_TO_COMPOSITETYPE with evaluated keys and values (used in keyword constructor)
const MODELTYPE_TO_COMPOSITETYPE_EVAL = Dict()
for MODELTYPE in SUPPORTED_MODEL_TYPES
type = MODELTYPE_TO_COMPOSITETYPE[MODELTYPE]
@eval(MODELTYPE_TO_COMPOSITETYPE_EVAL[$MODELTYPE] = $type)
end
# To represent any model type (to check input model type is one of them in keyword constructor)
const UNION_MODEL_TYPES = Union{keys(MODELTYPE_TO_COMPOSITETYPE_EVAL)...}
# Possible Errors (for the constructor as well)
const ERR_MODEL_UNSPECIFIED = ErrorException("Expected an atomic model as argument. None specified. ")
const WRN_BALANCER_UNSPECIFIED = "No balancer was provided. Data will be directly passed to the model. "
const PRETTY_SUPPORTED_MODEL_TYPES = join([string("`", opt, "`") for opt in SUPPORTED_MODEL_TYPES], ", ",", and ")
const ERR_UNSUPPORTED_MODEL(model) = ErrorException(
"Only these model supertypes support wrapping: "*
"$PRETTY_SUPPORTED_MODEL_TYPES.\n"*
"Model provided has type `$(typeof(model))`. "
)
const ERR_NUM_ARGS_BM = "`BalancedModel` can at most have one non-keyword argument where the model is passed."
"""
BalancedModel(; model=nothing, balancer1=balancer_model1, balancer2=balancer_model2, ...)
BalancedModel(model; balancer1=balancer_model1, balancer2=balancer_model2, ...)
Given a classification model, and one or more balancer models that all implement the `MLJModelInterface`,
`BalancedModel` allows constructing a sequential pipeline that wraps an arbitrary number of balancing models
and a classifier together in a sequential pipeline.
# Operation
- During training, data is first passed to `balancer1` and the result is passed to `balancer2` and so on, the result from the final balancer
is then passed to the classifier for training.
- During prediction, the balancers have no effect.
# Arguments
- `model::Supervised`: A classification model that implements the `MLJModelInterface`.
- `balancer1::Static=...`: The first balancer model to pass the data to. This keyword argument can have any name.
- `balancer2::Static=...`: The second balancer model to pass the data to. This keyword argument can have any name.
- and so on for an arbitrary number of balancers.
# Returns
- An instance of type ProbabilisticBalancedModel or DeterministicBalancedModel, depending on the prediction type of model.
# Example
```julia
using MLJ
using Imbalance
# generate data
X, y = Imbalance.generate_imbalanced_data(1000, 5; class_probs=[0.2, 0.3, 0.5])
# prepare classification and balancing models
SMOTENC = @load SMOTENC pkg=Imbalance verbosity=0
TomekUndersampler = @load TomekUndersampler pkg=Imbalance verbosity=0
LogisticClassifier = @load LogisticClassifier pkg=MLJLinearModels verbosity=0
oversampler = SMOTENC(k=5, ratios=1.0, rng=42)
undersampler = TomekUndersampler(min_ratios=0.5, rng=42)
logistic_model = LogisticClassifier()
# wrap them in a BalancedModel
balanced_model = BalancedModel(model=logistic_model, balancer1=oversampler, balancer2=undersampler)
# now this behaves as a unified model that can be trained, validated, fine-tuned, etc.
mach = machine(balanced_model, X, y)
fit!(mach)
```
"""
function BalancedModel(args...; model=nothing, named_balancers...)
# check model and balancer are given
length(args) <= 1 || throw(ERR_NUM_ARGS_BM)
if length(args) === 1
atom = first(args)
model === nothing ||
@warn WRN_MODEL_GIVEN
model = atom
else
model === nothing && throw(ERR_MODEL_UNSPECIFIED)
end
# check model is supported
model isa UNION_MODEL_TYPES || throw(ERR_UNSUPPORTED_MODEL(model))
nt = NamedTuple(named_balancers)
balancernames = keys(nt)
balancers = collect(nt)
# warn if balancer is not given
isempty(balancers) && @warn WRN_BALANCER_UNSPECIFIED
# call the appropriate constructor
return MODELTYPE_TO_COMPOSITETYPE_EVAL[MMI.abstract_type(model)](balancernames, balancers, model)
end
### 3. Make a property for each balancer given via keyword arguments
# set the property names to include the keyword arguments
Base.propertynames(::BalancedModelProbabilistic{balancernames}) where balancernames =
tuple(:model, balancernames...)
# overload getproperty to return the balancer form the vector in the struct
for model_type in SUPPORTED_MODEL_TYPES
struct_name = MODELTYPE_TO_COMPOSITETYPE[model_type]
ex = quote
function Base.getproperty(b::$struct_name{balancernames}, name::Symbol) where balancernames
balancers = getfield(b, :balancers)
for j in eachindex(balancernames)
name === balancernames[j] && return balancers[j]
end
return getfield(b, name)
end
end
eval(ex)
end
const ERR_NO_PROP = ArgumentError("trying to access property $name which does not exist")
# overload set property to set the property from the vector in the struct
for model_type in SUPPORTED_MODEL_TYPES
struct_name = MODELTYPE_TO_COMPOSITETYPE[model_type]
ex = quote
function Base.setproperty!(b::$struct_name{balancernames}, name::Symbol, val) where balancernames
# find the balancer model with given balancer names
idx = findfirst(==(name), balancernames)
# get it from the vector in the struct and set it with the value
!isnothing(idx) && return getfield(b, :balancers)[idx] = val
# the other only option is model
name === :model && return setfield(b, :model, val)
throw(ERR_NO_PROP)
end
end
eval(ex)
end
### 4. Define the prefit method
# used below, represents any composite model type offered by our package (e.g., BalancedProbabilisitcMode)
const UNION_COMPOSITE_TYPES{balancernames} = Union{[type{balancernames} for type in values(MODELTYPE_TO_COMPOSITETYPE_EVAL)]...}
"""
Overload the prefit method to export a learning network composed of a sequential pipeline of balancers
followed by a final model.
"""
function MLJBase.prefit(balanced_model::UNION_COMPOSITE_TYPES{balancernames}, verbosity, _X, _y) where balancernames
# the learning network:
X = source(_X)
y = source(_y)
X_over, y_over = X, y
# Let's transform the data through :balancer1, :balancer2,...
for symbolic_balancer in balancernames
balancer = getproperty(balanced_model, symbolic_balancer)
mach1 = machine(balancer)
data = MLJBase.transform(mach1, X_over, y_over)
X_over, y_over= first(data), last(data)
end
# we use the oversampled data for training:
mach2 = machine(:model, X_over, y_over) # wrap with the data to be trained
# but consume new prodution data from the source:
yhat = MLJBase.predict(mach2, X)
# return the learning network interface:
return (; predict=yhat)
end
### 5. Provide package information and pass up model traits
MMI.package_name(::Type{<:UNION_COMPOSITE_TYPES}) = "MLJBalancing"
MMI.package_license(::Type{<:UNION_COMPOSITE_TYPES}) = "MIT"
MMI.package_uuid(::Type{<:UNION_COMPOSITE_TYPES}) = "45f359ea-796d-4f51-95a5-deb1a414c586"
MMI.is_wrapper(::Type{<:UNION_COMPOSITE_TYPES}) = true
MMI.package_url(::Type{<:UNION_COMPOSITE_TYPES}) ="https://github.com/JuliaAI/MLJBalancing.jl"
# All the composite types BalancedModelProbabilistic, BalancedModelDeterministic, etc.
const COMPOSITE_TYPES = values(MODELTYPE_TO_COMPOSITETYPE)
for composite_type in COMPOSITE_TYPES
quote
MMI.iteration_parameter(::Type{<:$composite_type{balancernames, M}}) where {balancernames, M} =
MLJBase.prepend(:model, iteration_parameter(M))
end |> eval
for trait in [
:input_scitype,
:output_scitype,
:target_scitype,
:fit_data_scitype,
:predict_scitype,
:transform_scitype,
:inverse_transform_scitype,
:is_pure_julia,
:supports_weights,
:supports_class_weights,
:supports_online,
:supports_training_losses,
:is_supervised,
:prediction_type]
quote
MMI.$trait(::Type{<:$composite_type{balancernames, M}}) where {balancernames, M} = MMI.$trait(M)
end |> eval
end
end