Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "LearnTestAPI"
uuid = "3111ed91-c4f2-40e7-bb19-7f6c618409b8"
authors = ["Anthony D. Blaom <anthony.blaom@gmail.com>"]
version = "0.2.4"
version = "0.3.0"

[deps]
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
Expand All @@ -25,13 +25,13 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"

[compat]
CategoricalArrays = "0.10.8"
CategoricalDistributions = "0.1.15"
CategoricalArrays = "1"
CategoricalDistributions = "0.2"
Distributions = "0.25"
InteractiveUtils = "<0.0.1, 1"
IsURL = "0.2.0"
LearnAPI = "0.2.0,1"
LearnDataFrontEnds = "0.1"
LearnAPI = "2"
LearnDataFrontEnds = "0.2"
LinearAlgebra = "<0.0.1, 1"
MLCore = "1.0.0"
MacroTools = "0.5"
Expand Down
15 changes: 10 additions & 5 deletions src/learners/classification.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ struct ConstantClassifierFitted
learner::ConstantClassifier
probabilities
names::Vector{Symbol}
classes_seen
levels_seen
codes_seen
decoder
end
Expand All @@ -44,10 +44,15 @@ LearnAPI.features(learner::ConstantClassifier, data) =
LearnAPI.target(learner::ConstantClassifier, data) =
LearnAPI.target(learner, data, front_end)

function LearnAPI.fit(learner::ConstantClassifier, observations::FrontEnds.Obs; verbosity=1)
function LearnAPI.fit(
learner::ConstantClassifier,
observations::FrontEnds.Obs;
verbosity=LearnAPI.default_verbosity(),
)

y = observations.target # integer "codes"
names = observations.names
classes_seen = observations.classes_seen
levels_seen = observations.levels_seen
codes_seen = sort(unique(y))
decoder = observations.decoder

Expand All @@ -59,7 +64,7 @@ function LearnAPI.fit(learner::ConstantClassifier, observations::FrontEnds.Obs;
learner,
probabilities,
names,
classes_seen,
levels_seen,
codes_seen,
decoder,
)
Expand Down Expand Up @@ -89,7 +94,7 @@ function LearnAPI.predict(
probs = model.probabilities
# repeat vertically to get rows of a matrix:
probs_matrix = reshape(repeat(probs, n), (length(probs), n))'
return CategoricalDistributions.UnivariateFinite(model.classes_seen, probs_matrix)
return CategoricalDistributions.UnivariateFinite(model.levels_seen, probs_matrix)
end
LearnAPI.predict(model::ConstantClassifierFitted, ::Distribution, data) =
predict(model, Distribution(), obs(model, data))
Expand Down
6 changes: 5 additions & 1 deletion src/learners/dimension_reduction.jl
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,11 @@ LearnAPI.obs(model::TruncatedSVDFitted, data) =
LearnAPI.features(learner::TruncatedSVD, data) =
LearnAPI.features(learner, data, FrontEnds.Tarragon())

function LearnAPI.fit(learner::TruncatedSVD, observations::FrontEnds.Obs; verbosity=1)
function LearnAPI.fit(
learner::TruncatedSVD,
observations::FrontEnds.Obs;
verbosity=LearnAPI.default_verbosity(),
)

# unpack hyperparameters:
codim = learner.codim
Expand Down
15 changes: 8 additions & 7 deletions src/learners/ensembling.jl
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ LearnAPI.obs(model::EnsembleFitted, data) = LearnAPI.obs(first(model.models), da
LearnAPI.target(learner::Ensemble, data) = LearnAPI.target(learner.atom, data)
LearnAPI.features(learner::Ensemble, data) = LearnAPI.features(learner.atom, data)

function LearnAPI.fit(learner::Ensemble, data; verbosity=1)
function LearnAPI.fit(learner::Ensemble, data; verbosity=LearnAPI.default_verbosity())

# unpack hyperparameters:
atom = learner.atom
Expand Down Expand Up @@ -112,7 +112,7 @@ function LearnAPI.update(
model::EnsembleFitted,
data,
replacements::Pair{Symbol}...;
verbosity=1,
verbosity=LearnAPI.default_verbosity(),
)
learner_old = LearnAPI.learner(model)
learner = LearnAPI.clone(learner_old, replacements...)
Expand Down Expand Up @@ -205,7 +205,7 @@ LearnAPI.components(model::EnsembleFitted) = [:atom => model.models,]
# - `update`
# - `predict` (`Point` predictions)
# - `predictions` (returns predictions on all supplied data)
# - `out_of_sample_indices` (articluates which data is the internal validation data)
# - `out_of_sample_indices` (articulates which data is the internal validation data)
# - `trees`
# - `training_losses`
# - `out_of_sample_losses`
Expand Down Expand Up @@ -361,7 +361,7 @@ struct StumpRegressorFitted
rng
end

function LearnAPI.fit(learner::StumpRegressor, data; verbosity=1)
function LearnAPI.fit(learner::StumpRegressor, data; verbosity=LearnAPI.default_verbosity())

x, y = data
rng = deepcopy(learner.rng)
Expand Down Expand Up @@ -426,7 +426,7 @@ function LearnAPI.update(
model::StumpRegressorFitted,
data, # ignored as cached
replacements::Pair{Symbol}...;
verbosity=1,
verbosity=LearnAPI.default_verbosity(),
)

learner_old = LearnAPI.learner(model)
Expand Down Expand Up @@ -490,8 +490,9 @@ function LearnAPI.update(

end

# needed, because model is supervised:
LearnAPI.target(learner::StumpRegressor, observations) = last(observations)
# training data deconstructors:
LearnAPI.features(learner::StumpRegressor, data) = first(data)
LearnAPI.target(learner::StumpRegressor, data) = last(data)

LearnAPI.predict(model::StumpRegressorFitted, ::Point, x) =
_predict(model.forest, x)
Expand Down
61 changes: 31 additions & 30 deletions src/learners/gradient_descent.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ using StableRNGs
import Optimisers
import Zygote
import NNlib
import CategoricalArrays
import CategoricalDistributions
import CategoricalDistributions: pdf, mode
import ComponentArrays
Expand Down Expand Up @@ -55,7 +56,7 @@ for the specified number of `epochs`.
- `perceptron`: component array with components `weights` and `bias`
- `optimiser`: optimiser from Optimiser.jl
- `X`: feature matrix, of size `(p, n)`
- `y_hot`: one-hot encoded target, of size `(nclasses, n)`
- `y_hot`: one-hot encoded target, of size `(nlevels, n)`
- `epochs`: number of epochs
- `state`: optimiser state

Expand Down Expand Up @@ -108,7 +109,7 @@ point predictions with `predict(model, Point(), Xnew)`.

# Warm restart options

update(model, newdata, :epochs=>n, other_replacements...; verbosity=1)
update(model, newdata, :epochs=>n, other_replacements...)

If `Δepochs = n - perceptron.epochs` is non-negative, then return an updated model, with
the weights and bias of the previously learned perceptron used as the starting state in
Expand All @@ -117,7 +118,7 @@ instead of the previous training data. Any other hyperparaameter `replacements`
adopted. If `Δepochs` is negative or not specified, instead return `fit(learner,
newdata)`, where `learner=LearnAPI.clone(learner; epochs=n, replacements....)`.

update_observations(model, newdata, replacements...; verbosity=1)
update_observations(model, newdata, replacements...)

Return an updated model, with the weights and bias of the previously learned perceptron
used as the starting state in new gradient descent updates. Adopt any specified
Expand All @@ -132,38 +133,38 @@ PerceptronClassifier(; epochs=50, optimiser=Optimisers.Adam(), rng=Random.defaul
struct PerceptronClassifierObs
X::Matrix{Float32}
y_hot::BitMatrix # one-hot encoded target
classes # the (ordered) pool of `y`, as `CategoricalValue`s
levels # the (ordered) pool of `y`, as `CategoricalValue`s
end

# For pre-processing the training data:
function LearnAPI.obs(::PerceptronClassifier, data::Tuple)
X, y = data
classes = CategoricalDistributions.classes(y)
y_hot = classes .== permutedims(y) # one-hot encoding
return PerceptronClassifierObs(X, y_hot, classes)
levels = CategoricalArrays.levels(y)
y_hot = levels .== permutedims(y) # one-hot encoding
return PerceptronClassifierObs(X, y_hot, levels)
end
LearnAPI.obs(::PerceptronClassifier, observations::PerceptronClassifierObs) =
observations # involutivity

# helper:
function decode(y_hot, classes)
function decode(y_hot, levels)
n = size(y_hot, 2)
[only(classes[y_hot[:,i]]) for i in 1:n]
[only(levels[y_hot[:,i]]) for i in 1:n]
end

# implement `RadomAccess()` interface for output of `obs`:
Base.length(observations::PerceptronClassifierObs) = size(observations.y_hot, 2)
Base.getindex(observations::PerceptronClassifierObs, I) = PerceptronClassifierObs(
observations.X[:, I],
observations.y_hot[:, I],
observations.classes,
observations.levels,
)

# training data deconstructors:
LearnAPI.target(
learner::PerceptronClassifier,
observations::PerceptronClassifierObs,
) = decode(observations.y_hot, observations.classes)
) = decode(observations.y_hot, observations.levels)
LearnAPI.target(learner::PerceptronClassifier, data) =
LearnAPI.target(learner, obs(learner, data))
LearnAPI.features(
Expand All @@ -184,7 +185,7 @@ struct PerceptronClassifierFitted
learner::PerceptronClassifier
perceptron # component array storing weights and bias
state # optimiser state
classes # target classes
levels # target levels
losses
end

Expand All @@ -194,7 +195,7 @@ LearnAPI.learner(model::PerceptronClassifierFitted) = model.learner
function LearnAPI.fit(
learner::PerceptronClassifier,
observations::PerceptronClassifierObs;
verbosity=1,
verbosity=LearnAPI.default_verbosity(),
)

# unpack hyperparameters:
Expand All @@ -205,20 +206,20 @@ function LearnAPI.fit(
# unpack data:
X = observations.X
y_hot = observations.y_hot
classes = observations.classes
nclasses = length(classes)
levels = observations.levels
nlevels = length(levels)

# initialize bias and weights:
weights = randn(rng, Float32, nclasses, p)
bias = zeros(Float32, nclasses)
weights = randn(rng, Float32, nlevels, p)
bias = zeros(Float32, nlevels)
perceptron = (; weights, bias) |> ComponentArrays.ComponentArray

# initialize optimiser:
state = Optimisers.setup(optimiser, perceptron)

perceptron, state, losses = corefit(perceptron, X, y_hot, epochs, state, verbosity)

return PerceptronClassifierFitted(learner, perceptron, state, classes, losses)
return PerceptronClassifierFitted(learner, perceptron, state, levels, losses)
end

# `fit` for unprocessed data:
Expand All @@ -230,16 +231,16 @@ function LearnAPI.update_observations(
model::PerceptronClassifierFitted,
observations_new::PerceptronClassifierObs,
replacements...;
verbosity=1,
verbosity=LearnAPI.default_verbosity(),
)

# unpack data:
X = observations_new.X
y_hot = observations_new.y_hot
classes = observations_new.classes
nclasses = length(classes)
levels = observations_new.levels
nlevels = length(levels)

classes == model.classes || error("New training target has incompatible classes.")
levels == model.levels || error("New training target has incompatible levels.")

learner_old = LearnAPI.learner(model)
learner = LearnAPI.clone(learner_old, replacements...)
Expand All @@ -252,7 +253,7 @@ function LearnAPI.update_observations(
perceptron, state, losses_new = corefit(perceptron, X, y_hot, epochs, state, verbosity)
losses = vcat(losses, losses_new)

return PerceptronClassifierFitted(learner, perceptron, state, classes, losses)
return PerceptronClassifierFitted(learner, perceptron, state, levels, losses)
end
LearnAPI.update_observations(model::PerceptronClassifierFitted, data, args...; kwargs...) =
update_observations(model, obs(LearnAPI.learner(model), data), args...; kwargs...)
Expand All @@ -262,16 +263,16 @@ function LearnAPI.update(
model::PerceptronClassifierFitted,
observations::PerceptronClassifierObs,
replacements...;
verbosity=1,
verbosity=LearnAPI.default_verbosity(),
)

# unpack data:
X = observations.X
y_hot = observations.y_hot
classes = observations.classes
nclasses = length(classes)
levels = observations.levels
nlevels = length(levels)

classes == model.classes || error("New training target has incompatible classes.")
levels == model.levels || error("New training target has incompatible levels.")

learner_old = LearnAPI.learner(model)
learner = LearnAPI.clone(learner_old, replacements...)
Expand All @@ -289,7 +290,7 @@ function LearnAPI.update(
corefit(perceptron, X, y_hot, Δepochs, state, verbosity)
losses = vcat(losses, losses_new)

return PerceptronClassifierFitted(learner, perceptron, state, classes, losses)
return PerceptronClassifierFitted(learner, perceptron, state, levels, losses)
end
LearnAPI.update(model::PerceptronClassifierFitted, data, args...; kwargs...) =
update(model, obs(LearnAPI.learner(model), data), args...; kwargs...)
Expand All @@ -299,9 +300,9 @@ LearnAPI.update(model::PerceptronClassifierFitted, data, args...; kwargs...) =

function LearnAPI.predict(model::PerceptronClassifierFitted, ::Distribution, Xnew)
perceptron = model.perceptron
classes = model.classes
levels = model.levels
probs = perceptron.weights*Xnew .+ perceptron.bias |> NNlib.softmax
return CategoricalDistributions.UnivariateFinite(classes, probs')
return CategoricalDistributions.UnivariateFinite(levels, probs')
end

LearnAPI.predict(model::PerceptronClassifierFitted, ::Point, Xnew) =
Expand Down
Loading
Loading