From b60fc22fa24c17b816de9ae3f744baf69eb39107 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Sun, 19 May 2024 16:20:08 +1200 Subject: [PATCH 01/27] simplify, removing in particular, obsfit, obspredict, obstransform --- Project.toml | 2 +- docs/make.jl | 14 +- docs/src/accessor_functions.md | 5 +- docs/src/anatomy_of_an_implementation.md | 516 +++++++++++++++-------- docs/src/fit.md | 16 +- docs/src/index.md | 25 +- docs/src/kinds_of_target_proxy.md | 45 +- docs/src/minimize.md | 6 +- docs/src/obs.md | 96 ++--- docs/src/predict_transform.md | 70 +-- docs/src/reference.md | 109 ++--- docs/src/traits.md | 86 ++-- src/LearnAPI.jl | 3 +- src/accessor_functions.jl | 19 + src/fit.jl | 170 +------- src/minimize.jl | 2 +- src/obs.jl | 117 ++--- src/predict_transform.jl | 219 +++------- src/tools.jl | 43 +- src/traits.jl | 319 +++++++------- src/types.jl | 93 ++-- test/integration/regression.jl | 118 ++++-- test/integration/static_algorithms.jl | 64 +-- test/runtests.jl | 4 + test/tools.jl | 8 - test/traits.jl | 16 + 26 files changed, 1062 insertions(+), 1123 deletions(-) create mode 100644 test/traits.jl diff --git a/Project.toml b/Project.toml index f8431fdd..206a4038 100644 --- a/Project.toml +++ b/Project.toml @@ -19,4 +19,4 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["DataFrames", "LinearAlgebra", "MLUtils", "Serialization", "SparseArrays", "Tables", "Test"] +test = ["DataFrames", "LinearAlgebra", "MLUtils", "Serialization", "Tables", "Test"] diff --git a/docs/make.jl b/docs/make.jl index fd54ce70..b0705cda 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -11,13 +11,13 @@ makedocs( "Home" => "index.md", "Anatomy of an Implementation" => "anatomy_of_an_implementation.md", "Reference" => "reference.md", - "Kinds of Target Proxy" => "kinds_of_target_proxy.md", - "fit" => "fit.md", - "predict, transform, and relatives" => "predict_transform.md", - "minimize" => "minimize.md", - "obs" => "obs.md", - "Accessor Functions" => "accessor_functions.md", - "Algorithm Traits" => "traits.md", + "... fit" => "fit.md", + "... predict/transform" => "predict_transform.md", + "... Kinds of Target Proxy" => "kinds_of_target_proxy.md", + "... minimize" => "minimize.md", + "... obs" => "obs.md", + "... Accessor Functions" => "accessor_functions.md", + "... Algorithm Traits" => "traits.md", "Common Implementation Patterns" => "common_implementation_patterns.md", "Testing an Implementation" => "testing_an_implementation.md", ], diff --git a/docs/src/accessor_functions.md b/docs/src/accessor_functions.md index 07c30f1f..f35adc54 100644 --- a/docs/src/accessor_functions.md +++ b/docs/src/accessor_functions.md @@ -1,7 +1,6 @@ # [Accessor Functions](@id accessor_functions) -The sole argument of an accessor function is the output, `model`, of [`fit`](@ref) or -[`obsfit`](@ref). +The sole argument of an accessor function is the output, `model`, of [`fit`](@ref). - [`LearnAPI.algorithm(model)`](@ref) - [`LearnAPI.extras(model)`](@ref) @@ -12,6 +11,7 @@ The sole argument of an accessor function is the output, `model`, of [`fit`](@re - [`LearnAPI.feature_importances(model)`](@ref) - [`LearnAPI.training_labels(model)`](@ref) - [`LearnAPI.training_losses(model)`](@ref) +- [`LearnAPI.training_predictions(model)`](@ref) - [`LearnAPI.training_scores(model)`](@ref) - [`LearnAPI.components(model)`](@ref) @@ -33,6 +33,7 @@ LearnAPI.tree LearnAPI.trees LearnAPI.feature_importances LearnAPI.training_losses +LearnAPI.training_predictions LearnAPI.training_scores LearnAPI.training_labels LearnAPI.components diff --git a/docs/src/anatomy_of_an_implementation.md b/docs/src/anatomy_of_an_implementation.md index 2bc24a39..1c011d21 100644 --- a/docs/src/anatomy_of_an_implementation.md +++ b/docs/src/anatomy_of_an_implementation.md @@ -1,11 +1,28 @@ # Anatomy of an Implementation This section explains a detailed implementation of the LearnAPI for naive [ridge -regression](https://en.wikipedia.org/wiki/Ridge_regression). Most readers will want to -scan the [demonstration](@ref workflow) of the implementation before studying the -implementation itself. +regression](https://en.wikipedia.org/wiki/Ridge_regression) with no intercept. The kind of +workflow we want to enable has been previewed in [Sample workflow](@ref). Readers can also +refer to the [demonstration](@ref workflow) of the implementation given later. -## Defining an algorithm type +For a transformer, implementations ordinarily implement `transform` instead of +`predict`. For more on `predict` versus `transform`, see [Predict or transform?](@ref) + +!!! important + + The core implementations of `fit`, `predict`, etc, + always have a *single* `data` argument, as in `fit(algorithm, data; verbosity=1)`. + Calls like `fit(algorithm, X, y)` are provided as additional convenience methods. + +!!! note + + If the `data` object consumed by `fit`, `predict`, or `transform` is not + not a suitable table¹, array³, tuple of tables and arrays, or some + other object implementing + the MLUtils.jl `getobs`/`numobs` interface, + then an implementation must: (i) suitably overload the trait + [`LearnAPI.data_interface`](@ref); and/or (ii) overload [`obs`](@ref), as + illustrated below under [Providing an advanced data interface](@ref). The first line below imports the lightweight package LearnAPI.jl whose methods we will be extending. The second imports libraries needed for the core algorithm. @@ -16,141 +33,81 @@ using LinearAlgebra, Tables nothing # hide ``` -A struct stores the regularization hyperparameter `lambda` of our ridge regressor: +## Defining algorithms + +Here's a new type whose instances specify ridge regression parameters: ```@example anatomy -struct Ridge - lambda::Float64 +struct Ridge{T<:Real} + lambda::T end nothing # hide ``` -Instances of `Ridge` are [algorithms](@ref algorithms), in LearnAPI.jl parlance. +Instances of `Ridge` will be [algorithms](@ref algorithms), in LearnAPI.jl parlance. -A keyword argument constructor provides defaults for all hyperparameters: +To [qualify](@ref algorithms) as a LearnAPI algorithm, an object must be come with a +mechanism for creating new versions of itself, with modified property (field) values. To +this end, we implement `LearnAPI.constructor`, which must return a keyword constructor: ```@example anatomy Ridge(; lambda=0.1) = Ridge(lambda) +LearnAPI.constructor(::Ridge) = Ridge nothing # hide ``` -## Implementing `fit` - -A ridge regressor requires two types of data for training: *input features* `X`, which -here we suppose are tabular, and a [target](@ref proxy) `y`, which we suppose is a -vector. Users will accordingly call [`fit`](@ref) like this: - -```julia -algorithm = Ridge(lambda=0.05) -fit(algorithm, X, y; verbosity=1) -``` - -However, a new implementation does not overload `fit`. Rather it -implements - -```julia -obsfit(algorithm::Ridge, obsdata; verbosity=1) -``` - -for each `obsdata` returned by a data-preprocessing call `obs(fit, algorithm, X, y)`. You -can read "obs" as "observation-accessible", for reasons explained shortly. The -LearnAPI.jl definition - -```julia -fit(algorithm, data...; verbosity=1) = - obsfit(algorithm, obs(fit, algorithm, data...), verbosity) -``` -then takes care of `fit`. - -The `obs` and `obsfit` method are public, and the user can call them like this: - -```julia -obsdata = obs(fit, algorithm, X, y) -model = obsfit(algorithm, obsdata) -``` - -We begin by defining a struct¹ for the output of our data-preprocessing operation, `obs`, -which will store `y` and the matrix representation of `X`, together with it's column names -(needed for recording named coefficients for user inspection): - -```@example anatomy -struct RidgeFitData{T} - A::Matrix{T} # p x n - names::Vector{Symbol} - y::Vector{T} -end -nothing # hide -``` - -And we overload [`obs`](@ref) like this +So, if `algorithm = Ridge(lambda=0.1)` then `LearnAPI.constructor(algorithm)(lambda=0.05)` +is another algorithm with the same properties, except that the value of `lambda` has been +changed to `0.05`. -```@example anatomy -function LearnAPI.obs(::typeof(fit), ::Ridge, X, y) - table = Tables.columntable(X) - names = Tables.columnnames(table) |> collect - return RidgeFitData(Tables.matrix(table, transpose=true), names, y) -end -nothing # hide -``` - -so that `obs(fit, Ridge(), X, y)` returns a combined `RidgeFitData` object with everything -the core algorithm will need. -Since `obs` is public, the user will have access to this object, but to make it useful to -her (and to fulfill the [`obs`](@ref) contract) this object must implement the -[MLUtils.jl](https://github.com/JuliaML/MLUtils.jl) `getobs`/`numobs` interface, to enable -observation-resampling (which will be efficient, because observations are now columns). It -usually suffices to overload `Base.getindex` and `Base.length` (which are the -`getobs`/`numobs` fallbacks) so we won't actually need to depend on MLUtils.jl: +## Implementing `fit` -```@example anatomy -Base.getindex(data::RidgeFitData, I) = - RidgeFitData(data.A[:,I], data.names, y[I]) -Base.length(data::RidgeFitData, I) = length(data.y) -nothing # hide -``` +A ridge regressor requires two types of data for training: *input features* `X`, which +here we suppose are tabular¹, and a [target](@ref proxy) `y`, which we suppose is a +vector. -Next, we define a second struct for storing the outcomes of training, including named -versions of the learned coefficients: +It is convenient to define a new type for the `fit` output, which will include +coefficients labelled by feature name for inspection after training: ```@example anatomy struct RidgeFitted{T,F} - algorithm::Ridge - coefficients::Vector{T} - named_coefficients::F + algorithm::Ridge + coefficients::Vector{T} + named_coefficients::F end nothing # hide ``` -We include `algorithm`, which must be recoverable from the output of `fit`/`obsfit` (see -[Accessor functions](@ref) below). +Note that we also include `algorithm` in the struct, for it must be possible to recover +`algorithm` from the output of `fit`; see [Accessor functions](@ref) below. -We are now ready to implement a suitable `obsfit` method to execute the core training: +The core implementation of `fit` looks like this: ```@example anatomy -function LearnAPI.obsfit(algorithm::Ridge, obsdata::RidgeFitData, verbosity) +function LearnAPI.fit(algorithm::Ridge, data; verbosity=1) - lambda = algorithm.lambda - A = obsdata.A - names = obsdata.names - y = obsdata.y + X, y = data - # apply core algorithm: - coefficients = (A*A' + algorithm.lambda*I)\(A*y) # 1 x p matrix + # data preprocessing: + table = Tables.columntable(X) + names = Tables.columnnames(table) |> collect + A = Tables.matrix(table, transpose=true) - # determine named coefficients: - named_coefficients = [names[j] => coefficients[j] for j in eachindex(names)] + lambda = algorithm.lambda - # make some noise, if allowed: - verbosity > 0 && @info "Coefficients: $named_coefficients" + # apply core algorithm: + coefficients = (A*A' + algorithm.lambda*I)\(A*y) # vector - return RidgeFitted(algorithm, coefficients, named_coefficients) + # determine named coefficients: + named_coefficients = [names[j] => coefficients[j] for j in eachindex(names)] + # make some noise, if allowed: + verbosity > 0 && @info "Coefficients: $named_coefficients" + + return RidgeFitted(algorithm, coefficients, named_coefficients) end -nothing # hide ``` -Users set `verbosity=0` for warnings only, and `verbosity=-1` for silence. - ## Implementing `predict` @@ -163,45 +120,29 @@ predict(model, LiteralTarget(), Xnew) where `Xnew` is a table (of the same form as `X` above). The argument `LiteralTarget()` signals that we want literal predictions of the target variable, as opposed to a proxy for the target, such as probability density functions. `LiteralTarget` is an example of a -[`LearnAPI.KindOfProxy`](@ref proxy_types) type. Targets and target proxies are defined +[`LearnAPI.KindOfProxy`](@ref proxy_types) type. Targets and target proxies are discussed [here](@ref proxy). -Rather than overload the primary signature above, however, we overload for -"observation-accessible" input, as we did for `fit`, - -```@example anatomy -LearnAPI.obspredict(model::RidgeFitted, ::LiteralTarget, Anew::Matrix) = - ((model.coefficients)'*Anew)' -nothing # hide -``` - -and overload `obs` to make the table-to-matrix conversion: +Here's the implementation for our ridge regressor: ```@example anatomy -LearnAPI.obs(::typeof(predict), ::Ridge, Xnew) = Tables.matrix(Xnew, transpose=true) +LearnAPI.predict(model::RidgeFitted, ::LiteralTarget, Xnew) = + Tables.matrix(Xnew)*model.coefficients ``` -As matrices (with observations as columns) already implement the MLUtils.jl -`getobs`/`numobs` interface, we already satisfy the [`obs`](@ref) contract, and there was -no need to create a wrapper for `obs` output. - -The primary `predict` method, handling tabular input, is provided by a -LearnAPI.jl fallback similar to the `fit` fallback. - - ## Accessor functions -An [accessor function](@ref accessor_functions) has the output of [`fit`](@ref) (a -"model") as it's sole argument. Every new implementation must implement the accessor -function [`LearnAPI.algorithm`](@ref) for recovering an algorithm from a fitted object: +An [accessor function](@ref accessor_functions) has the output of [`fit`](@ref) as it's +sole argument. Every new implementation must implement the accessor function +[`LearnAPI.algorithm`](@ref) for recovering an algorithm from a fitted object: ```@example anatomy LearnAPI.algorithm(model::RidgeFitted) = model.algorithm ``` Other accessor functions extract learned parameters or some standard byproducts of -training, such as feature importances or training losses.² Implementing the -[`LearnAPI.coefficients`](@ref) accessor function is straightforward: +training, such as feature importances or training losses.² Here we implement an accessor +function to extract the linear coefficients: ```@example anatomy LearnAPI.coefficients(model::RidgeFitted) = model.named_coefficients @@ -215,56 +156,84 @@ overload it to dump the named version of the coefficients: ```@example anatomy LearnAPI.minimize(model::RidgeFitted) = - RidgeFitted(model.algorithm, model.coefficients, nothing) + RidgeFitted(model.algorithm, model.coefficients, nothing) ``` +Crucially, we can still use `LearnAPI.minimize(model)` in place of `model` to make new +predictions. + + ## Algorithm traits Algorithm [traits](@ref traits) record extra generic information about an algorithm, or -make specific promises of behavior. They usually have an algorithm as the single argument. +make specific promises of behavior. They usually have an algorithm as the single +argument. We regard [`LearnAPI.constructor`](@ref) defined above as a trait. In LearnAPI.jl `predict` always outputs a [target or target proxy](@ref proxy), where -"target" is understood very broadly. We overload a trait to record the fact that the -target variable explicitly appears in training (i.e, the algorithm is supervised) and -where exactly it appears: +"target" is understood very broadly. We overload a trait to record the fact here that the +target variable explicitly appears in training (i.e, the algorithm is supervised): ```julia -LearnAPI.position_of_target(::Ridge) = 2 +LearnAPI.target(::Ridge) = true ``` -Or, you can use the shorthand + +or, using a shortcut: ```julia -@trait Ridge position_of_target = 2 +@trait Ridge target = true ``` -The macro can also be used to specify multiple traits simultaneously: +The macro can be used to specify multiple traits simultaneously: ```@example anatomy @trait( - Ridge, - position_of_target = 2, - kinds_of_proxy=(LiteralTarget(),), - descriptors = (:regression,), - functions = ( - fit, - obsfit, - minimize, - predict, - obspredict, - obs, - LearnAPI.algorithm, - LearnAPI.coefficients, - ) + Ridge, + constructor = Ridge, + target = true, + kinds_of_proxy=(LiteralTarget(),), + descriptors = (:regression,), + functions = ( + fit, + minimize, + predict, + obs, + LearnAPI.algorithm, + LearnAPI.coefficients, + ) ) nothing # hide ``` -Implementing the last trait, [`LearnAPI.functions`](@ref), which must include all -non-trait functions overloaded for `Ridge`, is compulsory. This is the only universally -compulsory trait. It is worthwhile studying the [list of all traits](@ref traits_list) to -see which might apply to a new implementation, to enable maximum buy into functionality -provided by third party packages, and to assist third party algorithms that match machine -learning algorithms to user defined tasks. +The trait `kinds_of_proxy` is required here, because we implemented `predict`. + +The last trait `functions` returns a list of all LearnAPI.jl methods that can be +meaninfully applied to the algorithm or associated model. See [`LearnAPI.functions`](@ref) +for a checklist. This, and [`LearnAPI.constructor`](@ref), are the only universally +compulsory traits. However, it is worthwhile studying the [list of all traits](@ref +traits_list) to see which might apply to a new implementation, to enable maximum buy into +functionality provided by third party packages, and to assist third party algorithms that +match machine learning algorithms to user-defined tasks. + +Having set `LearnAPI.target(::Ridge) == true` we are obliged to overload a multi-argument +version of `LearnAPI.target` to extract the target from the `data` that gets supplied to +`fit`: + +```@example anatomy +LearnAPI.target(::Ridge, data) = last(data) +``` + +## Convenience methods + +Finally, we extend `fit` and `predict` with signatures convenient for user interaction, +enabling the kind of workflow previewed in [Sample workflow](@ref): + +```@example anatomy +LearnAPI.fit(algorithm::Ridge, X, y; kwargs...) = + fit(algorithm, (X, y); kwargs...) + +LearnAPI.predict(model::RidgeFitted, Xnew) = + predict(model, LiteralTarget(), Xnew) +``` ## [Demonstration](@id workflow) @@ -279,38 +248,21 @@ test = 7:10 a, b, c = rand(n), rand(n), rand(n) X = (; a, b, c) y = 2a - b + 3c + 0.05*rand(n) +nothing # hide +``` +```@example anatomy algorithm = Ridge(lambda=0.5) -LearnAPI.functions(algorithm) +foreach(println, LearnAPI.functions(algorithm)) ``` -### Naive user workflow - -Training and predicting with external resampling: +Training and predicting: ```@example anatomy -using Tables model = fit(algorithm, Tables.subset(X, train), y[train]) ŷ = predict(model, LiteralTarget(), Tables.subset(X, test)) ``` -### Advanced workflow - -We now train and predict using internal data representations, resampled using the generic -MLUtils.jl interface. - -```@example anatomy -import MLUtils -fit_data = obs(fit, algorithm, X, y) -predict_data = obs(predict, algorithm, X) -model = obsfit(algorithm, MLUtils.getobs(fit_data, train)) -ẑ = obspredict(model, LiteralTarget(), MLUtils.getobs(predict_data, test)) -@assert ẑ == ŷ -nothing # hide -``` - -### Applying an accessor function and serialization - Extracting coefficients: ```@example anatomy @@ -319,21 +271,213 @@ LearnAPI.coefficients(model) Serialization/deserialization: -```julia +```@example anatomy using Serialization small_model = minimize(model) -serialize("my_ridge.jls", small_model) +filename = tempname() +serialize(filename, small_model) +``` -recovered_model = deserialize("my_ridge.jls") +```julia +recovered_model = deserialize(filename) @assert LearnAPI.algorithm(recovered_model) == algorithm -predict(recovered_model, LiteralTarget(), X) == predict(model, LiteralTarget(), X) +@assert predict(recovered_model, X) == predict(model, X) +``` + +## Providing an advanced data interface + +```@setup anatomy2 +using LearnAPI +using LinearAlgebra, Tables + +struct Ridge{T<:Real} + lambda::T +end + +Ridge(; lambda=0.1) = Ridge(lambda) + +struct RidgeFitted{T,F} + algorithm::Ridge + coefficients::Vector{T} + named_coefficients::F +end + +LearnAPI.algorithm(model::RidgeFitted) = model.algorithm +LearnAPI.coefficients(model::RidgeFitted) = model.named_coefficients +LearnAPI.minimize(model::RidgeFitted) = + RidgeFitted(model.algorithm, model.coefficients, nothing) + +LearnAPI.fit(algorithm::Ridge, X, y; kwargs...) = + fit(algorithm, (X, y); kwargs...) +LearnAPI.predict(model::RidgeFitted, Xnew) = predict(model, LiteralTarget(), Xnew) + +@trait( + Ridge, + constructor = Ridge, + target = true, + kinds_of_proxy=(LiteralTarget(),), + descriptors = (:regression,), + functions = ( + fit, + minimize, + predict, + obs, + LearnAPI.algorithm, + LearnAPI.coefficients, + ) +) + +n = 10 # number of observations +train = 1:6 +test = 7:10 +a, b, c = rand(n), rand(n), rand(n) +X = (; a, b, c) +y = 2a - b + 3c + 0.05*rand(n) +``` + +An implementation may optionally implement [`obs`](@ref), to expose to the user (or some +meta-algorithm like cross-validation) the representation of input data internal to `fit` +or `predict`, such as the matrix version `A` of `X` in the ridge example. Here we +specifically wrap all the pre-processed data into single object, for which we introduce a +new type: + +```@example anatomy2 +struct RidgeFitObs{T,M<:AbstractMatrix{T}} + A::M # p x n + names::Vector{Symbol} # features + y::Vector{T} # target +end +``` + +Now we overload `obs` to carry out the data pre-processing previously in `fit`, like this: + +```@example anatomy2 +function LearnAPI.obs(::Ridge, data) + X, y = data + table = Tables.columntable(X) + names = Tables.columnnames(table) |> collect + return RidgeFitObs(Tables.matrix(table)', names, y) +end +``` + +We informally refer to the output of `obs` as "observations" (see [The `obs` +contract](@ref) below). The previous core `fit` signature is now replaced with two +methods - one to handle "regular" input, and one to handle the pre-processed data +(observations) which appears first below: + +```@example anatomy2 +function LearnAPI.fit(algorithm::Ridge, observations::RidgeFitObs; verbosity=1) + + lambda = algorithm.lambda + + A = observations.A + names = observations.names + y = observations.y + + # apply core algorithm: + coefficients = (A*A' + algorithm.lambda*I)\(A*y) # 1 x p matrix + + # determine named coefficients: + named_coefficients = [names[j] => coefficients[j] for j in eachindex(names)] + + # make some noise, if allowed: + verbosity > 0 && @info "Coefficients: $named_coefficients" + + return RidgeFitted(algorithm, coefficients, named_coefficients) + +end + +LearnAPI.fit(algorithm::Ridge, data; kwargs...) = + fit(algorithm, obs(algorithm, data); kwargs...) +``` + +We provide an overloading of `LearnAPI.target` to handle the additional supported data +argument of `fit`: + +```@example anatomy2 +LearnAPI.target(::Ridge, observations::RidgeFitObs) = observations.y +``` + +### The `obs` contract + +Providing `fit` signatures matching the output of `obs`, is the first part of the `obs` +contract. The second part is this: *The outupt of `obs` must implement the* +[MLUtils.jl](https://juliaml.github.io/MLUtils.jl/dev/) `getobs/numobs` *interface for +accessing individual observations*. It usually suffices to overload `Base.getindex` and +`Base.length` (which are the `getobs/numobs` fallbacks): + +```@example anatomy2 +Base.getindex(data::RidgeFitObs, I) = + RidgeFitObs(data.A[:,I], data.names, y[I]) +Base.length(data::RidgeFitObs, I) = length(data.y) +``` + +We can do something similar for `predict`, but there's no need for a new type in this +case: + +```@example anatomy2 +LearnAPI.obs(::RidgeFitted, Xnew) = Tables.matrix(Xnew)' + +LearnAPI.predict(model::RidgeFitted, ::LiteralTarget, observations::AbstractMatrix) = + observations'*model.coefficients + +LearnAPI.predict(model::RidgeFitted, ::LiteralTarget, Xnew) = + predict(model, LiteralTarget(), obs(model, Xnew)) +``` + +### Important notes: + +- The observations to be consumed by `fit` are returned by `obs(algorithm::Ridge, ...)`, + while those consumed by `predict` are returned by `obs(model::RidgeFitted, ...)`. We + need the different signatures because the form of data consumed by `fit` and `predict` + are generally different. + +- We need the adjoint operator, `'`, because the last dimension in arrays is the + observation dimension, according to the MLUtils.jl convention. Remember, `Xnew` is a + table here. + +Since LearnAPI.jl provides fallbacks for `obs` that simply return the unadulterated data +input, overloading `obs` is optional. This is provided data in publicized `fit`/`predict` +signatures consists of objects implementing the `getobs/numobs` interface (such as tables¹ +and arrays³). + +To buy out of supporting the MLUtils.jl interface altogether, an implementation must +overload the trait, [`LearnAPI.data_interface(algorithm)`](@ref). + +For more on data interfaces, see [`obs`](@ref) and +[`LearnAPI.data_interface(algorithm)`](@ref). + + +## Demonstration of an advanced `obs` workflow + +We now can train and predict using internal data representations, resampled using the +generic MLUtils.jl interface: + +```@example anatomy2 +import MLUtils +algorithm = Ridge() +observations_for_fit = obs(algorithm, (X, y)) +model = fit(algorithm, MLUtils.getobs(observations_for_fit, train)) +observations_for_predict = obs(model, X) +ẑ = predict(model, MLUtils.getobs(observations_for_predict, test)) +``` + +```julia +@assert ẑ == ŷ ``` --- -¹ The definition of this and other structs above is not an explicit requirement of -LearnAPI.jl, whose constructs are purely functional. +¹ In LearnAPI.jl a *table* is any object `X` implementing the +[Tables.jl](https://tables.juliadata.org/dev/) interface, additionally satisfying +`Tables.istable(X) == true` and implementing `DataAPI.nrow` (and whence +`MLUtils.numobs`). Tables that are also (unnamed) tuples are disallowed. ² An implementation can provide further accessor functions, if necessary, but like the native ones, they must be included in the [`LearnAPI.functions`](@ref) declaration. + +³ The last index must be the observation index. + +⁴ Guaranteed assuming +`LearnAPI.data_interface(algorithm) == Base.HasLength()`, the default. diff --git a/docs/src/fit.md b/docs/src/fit.md index f2709611..c3727110 100644 --- a/docs/src/fit.md +++ b/docs/src/fit.md @@ -1,10 +1,13 @@ # [`fit`](@ref fit) ```julia -fit(algorithm, data...; verbosity=1) -> model -fit(model, data...; verbosity=1) -> updated_model +fit(algorithm, data; verbosity=1) -> model +fit(model, data; verbosity=1) -> updated_model ``` +When `fit` expects an tuple form of argument, `data = (X1, ..., Xn)`, then the signature +`fit(algorithm, X1, ..., Xn)` is also provided. + ## Typical workflow ```julia @@ -20,17 +23,14 @@ LearnAPI.feature_importances(model) ## Implementation guide -The `fit` method is not implemented directly. Instead, implement [`obsfit`](@ref). +| method | fallback | compulsory? | +|:--------------------------|:---------|-------------| +| [`fit`](@ref)`(alg, ...)` | none | yes | -| method | fallback | compulsory? | requires | -|:-----------------------------|:---------|-------------|-----------------------------| -| [`obsfit`](@ref)`(alg, ...)` | none | yes | [`obs`](@ref) in some cases | -| | | | | ## Reference ```@docs LearnAPI.fit -LearnAPI.obsfit ``` diff --git a/docs/src/index.md b/docs/src/index.md index f5c793f7..4f979070 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -22,11 +22,11 @@ promising specific behavior. !!! warning - The API described here is under active development and not ready for adoption. - Join an ongoing design discussion at - [this](https://discourse.julialang.org/t/ann-learnapi-jl-proposal-for-a-basement-level-machine-learning-api/93048) + The API described here is under active development and not ready for adoption. + Join an ongoing design discussion at + [this](https://discourse.julialang.org/t/ann-learnapi-jl-proposal-for-a-basement-level-machine-learning-api/93048) Julia Discourse thread. - + ## Sample workflow @@ -69,12 +69,17 @@ on the usual supervised/unsupervised learning dichotomy. From this point of view supervised algorithm is simply one in which a target variable exists, and happens to appear as an input to training but not to prediction. -In LearnAPI.jl, a method called [`obs`](@ref data_interface) gives users access to an -"internal", algorithm-specific, representation of input data, which is always -"observation-accessible", in the sense that it can be resampled using -[MLUtils.jl](https://github.com/JuliaML/MLUtils.jl) `getobs/numobs` interface. The -implementation can arrange for this resampling to be efficient, and workflows based on -`obs` can have performance benefits. +Algorithms are free to consume data in any format. However, a method called [`obs`](@ref +data_interface) (read as "observations") gives users and meta-algorithms access to an +algorithm-specific representation of input data, which is also guaranteed to implement a +standard interface for accessing individual observations, unless an algorithm explicitly +opts out. The `fit` and `predict` methods consume these alternative representations of data. + +The fallback data interface is the [MLUtils.jl](https://github.com/JuliaML/MLUtils.jl) +`getobs/numobs` interface, and if the input consumed by the algorithm already implements +that interface (tables, arrays, etc.) then overloading `obs` is completely optional. A +plain iteration interface (to support, e.g., data loaders reading images from disk files) +can also be specified. ## Learning more diff --git a/docs/src/kinds_of_target_proxy.md b/docs/src/kinds_of_target_proxy.md index 03c7e032..a34e1f42 100644 --- a/docs/src/kinds_of_target_proxy.md +++ b/docs/src/kinds_of_target_proxy.md @@ -1,17 +1,19 @@ # [Kinds of Target Proxy](@id proxy_types) -The available kinds of [target proxy](@ref proxy) are classified by subtypes of -`LearnAPI.KindOfProxy`. These types are intended for dispatch only and have no fields. +The available kinds of [target proxy](@ref proxy) (used for `predict` dispatch) are +classified by subtypes of `LearnAPI.KindOfProxy`. These types are intended for dispatch +only and have no fields. ```@docs LearnAPI.KindOfProxy ``` + +## Simple target proxies + ```@docs LearnAPI.IID ``` -## Simple target proxies (subtypes of `LearnAPI.IID`) - | type | form of an observation | |:-------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | `LearnAPI.LiteralTarget` | same as target observations | @@ -24,11 +26,13 @@ LearnAPI.IID | `LearnAPI.LabelAmbiguous` | collections of labels (in case of multi-class target) but without a known correspondence to the original target labels (and of possibly different number) as in, e.g., clustering | | `LearnAPI.LabelAmbiguousSampleable` | sampleable version of `LabelAmbiguous`; see `Sampleable` above | | `LearnAPI.LabelAmbiguousDistribution` | pdf/pmf version of `LabelAmbiguous`; see `Distribution` above | +| `LearnAPI.LabelAmbiguousFuzzy` | same as `LabelAmbiguous` but with multiple values of indeterminant number | | `LearnAPI.ConfidenceInterval` | confidence interval | -| `LearnAPI.Set` | finite but possibly varying number of target observations | -| `LearnAPI.ProbabilisticSet` | as for `Set` but labeled with probabilities (not necessarily summing to one) | +| `LearnAPI.Fuzzy` | finite but possibly varying number of target observations | +| `LearnAPI.ProbabilisticFuzzy` | as for `Fuzzy` but labeled with probabilities (not necessarily summing to one) | | `LearnAPI.SurvivalFunction` | survival function | | `LearnAPI.SurvivalDistribution` | probability distribution for survival time | +| `LearnAPI.SurvivalHazardFunction` | hazard function for survival time | | `LearnAPI.OutlierScore` | numerical score reflecting degree of outlierness (not necessarily normalized) | | `LearnAPI.Continuous` | real-valued approximation/interpolation of a discrete-valued target, such as a count (e.g., number of phone calls) | @@ -38,18 +42,31 @@ representation](https://github.com/alan-turing-institute/MLJ.jl/blob/dev/paper/p > Table of concrete subtypes of `LearnAPI.IID <: LearnAPI.KindOfProxy`. -## When the proxy for the target is a single object +## Proxies for distribution-fitting algorithms + +```@docs +LearnAPI.Single +``` + +| type `T` | form of output of `predict(model, ::T)` | +|:--------------------------------:|:-----------------------------------------------------------------------| +| `LearnAPI.SingleSampleable` | object that can be sampled to obtain a single target observation | +| `LearnAPI.SingleDistribution` | explicit probability density/mass function for sampling the target | +| `LearnAPI.SingleLogDistribution` | explicit log-probability density/mass function for sampling the target | + +> Table of `LearnAPI.KindOfProxy` subtypes subtyping `LearnAPI.Single` + -In the following table of subtypes `T <: LearnAPI.KindOfProxy` not falling under the `IID` -umbrella, it is understood that `predict(model, ::T, ...)` is -not divided into individual observations, but represents a *single* probability -distribution for the sample space ``Y^n``, where ``Y`` is the space the target variable -takes its values, and `n` is the number of observations in `data`. +## Joint probability distributions + +```@docs +LearnAPI.Joint +``` -| type `T` | form of output of `predict(model, ::T, data...)` | +| type `T` | form of output of `predict(model, ::T, data)` | |:-------------------------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------| | `LearnAPI.JointSampleable` | object that can be sampled to obtain a *vector* whose elements have the form of target observations; the vector length matches the number of observations in `data`. | | `LearnAPI.JointDistribution` | explicit probability density/mass function whose sample space is vectors of target observations; the vector length matches the number of observations in `data` | | `LearnAPI.JointLogDistribution` | explicit log-probability density/mass function whose sample space is vectors of target observations; the vector length matches the number of observations in `data` | -> Table of `LearnAPI.KindOfProxy` subtypes not subtyping `LearnAPI.IID` +> Table of `LearnAPI.KindOfProxy` subtypes subtyping `LearnAPI.Joint` diff --git a/docs/src/minimize.md b/docs/src/minimize.md index a9423780..6fad919a 100644 --- a/docs/src/minimize.md +++ b/docs/src/minimize.md @@ -23,9 +23,9 @@ LearnAPI.feature_importances(recovered_model) # Implementation guide -| method | compulsory? | fallback | requires | -|:-----------------------------|:-----------:|:--------:|:-------------:| -| [`minimize`](@ref) | no | identity | [`fit`](@ref) | +| method | compulsory? | fallback | +|:-----------------------------|:-----------:|:--------:| +| [`minimize`](@ref) | no | identity | # Reference diff --git a/docs/src/obs.md b/docs/src/obs.md index bfb35a69..fe198a85 100644 --- a/docs/src/obs.md +++ b/docs/src/obs.md @@ -1,45 +1,40 @@ # [`obs`](@id data_interface) -The [MLUtils.jl](https://github.com/JuliaML/MLUtils.jl) package provides two methods -`getobs` and `numobs` for resampling data divided into multiple observations, including -arrays and tables. The data objects returned below are guaranteed to implement this -interface and can be passed to the relevant method (`obsfit`, `obspredict` or -`obstransform`) possibly after resampling using `MLUtils.getobs`. This may provide -performance advantages over naive workflows. +The `obs` method takes data intended as input to `fit`, `predict` or `transform`, and +transforms it to an algorithm-specific form guaranteed to implement a form of observation +access designated by the algorithm. The transformed data can then be resampled and passed +on to the relevant method in place of the original input. Using `obs` may provide +performance advantages over naive workflows in some cases (e.g., cross-validation). ```julia -obs(fit, algorithm, data...) -> -obs(predict, algorithm, data...) -> -obs(transform, algorithm, data...) -> +obs(algorithm, data) # can be passed to `fit` instead of `data` +obs(model, data) # can be passed to `predict` or `tranform` instead of `data` ``` ## Typical workflows -LearnAPI.jl makes no assumptions about the form of data `X` and `y` in a call like -`fit(algorithm, X, y)`. The particular `algorithm` is free to articulate it's own -requirements. However, in this example, the definition +LearnAPI.jl makes no explicit assumptions about the form of data `X` and `y` in a call +like `fit(algorithm, (X, y))`. However, if we define ```julia -obsdata = obs(fit, algorithm, X, y) +observations = obs(algorithm, (X, y)) ``` -combines `X` and `y` in a single object guaranteed to implement the MLUtils.jl -`getobs`/`numobs` interface, which can be passed to `obsfit` instead of `fit`, as is, or -after resampling using `MLUtils.getobs`: +then, assuming the typical case that `LearnAPI.data_interface(algorithm) == Base.HasLength()`, `observations` implements the [MLUtils.jl](https://juliaml.github.io/MLUtils.jl/dev/) `getobs`/`numobs` interface. Moreover, we can pass `observations` to `fit` in place of +the original data, or first resample it using `MLUtils.getobs`: ```julia -# equivalent to `mode = fit(algorithm, X, y)`: -model = obsfit(algorithm, obsdata) +# equivalent to `model = fit(algorithm, (X, y))` (or `fit(algorithm, X, y))`: +model = fit(algorithm, observations) # with resampling: -resampled_obsdata = MLUtils.getobs(obsdata, 1:100) -model = obsfit(algorithm, resampled_obsdata) +resampled_observations = MLUtils.getobs(observations, 1:10) +model = fit(algorithm, resampled_observations) ``` In some implementations, the alternative pattern above can be used to avoid repeating unnecessary internal data preprocessing, or inefficient resampling. For example, here's -how a user might call `obs` and `MLUtils.getobs` to perform efficient -cross-validation: +how a user might call `obs` and `MLUtils.getobs` to perform efficient cross-validation: ```julia using LearnAPI @@ -49,53 +44,48 @@ X = y = algorithm = -test_train_folds = map([1:10, 11:20, 21:30]) do test - (test, setdiff(1:30, test)) -end +train_test_folds = map([1:10, 11:20, 21:30]) do test + (setdiff(1:30, test), test) +end -# create fixed model-specific representations of the whole data set: -fit_data = obs(fit, algorithm, X, y) -predict_data = obs(predict, algorithm, predict, X) +fitobs = obs(algorithm, (X, y)) +never_trained = true -scores = map(train_test_folds) do (train_indices, test_indices) - - # train using model-specific representation of data: - train_data = MLUtils.getobs(fit_data, train_indices) - model = obsfit(algorithm, train_data) - - # predict on the fold complement: - test_data = MLUtils.getobs(predict_data, test_indices) - ŷ = obspredict(model, LiteralTarget(), test_data) +scores = map(train_test_folds) do (train, test) + + # train using model-specific representation of data: + trainobs = MLUtils.getobs(fitobs, train) + model = fit(algorithm, trainobs) + + # predict on the fold complement: + if never_trained + global predictobs = obs(model, X) + global never_trained = false + end + testobs = MLUtils.getobs(predictobs, test) + ŷ = predict(model, LiteralTarget(), testobs) return - -end + +end ``` -Note here that the output of `obspredict` will match the representation of `y` , i.e., +Note here that the output of `predict` will match the representation of `y` , i.e., there is no concept of an algorithm-specific representation of *outputs*, only inputs. ## Implementation guide -| method | compulsory? | fallback | -|:--------------|:-----------:|:----------------------:| -| [`obs`](@ref) | depends | slurps `data` argument | -| | | | +| method | compulsory? | fallback | +|:----------------------------------------|:-----------:|:--------------:| +| [`obs(algorithm_or_model, data)`](@ref) | depends | returns `data` | +| | | | -If the `data` consumed by `fit`, `predict` or `transform` consists only of tables and -arrays (with last dimension the observation dimension) then overloading `obs` is -optional. However, if an implementation overloads `obs` to return a (thinly wrapped) -representation of user data that is closer to what the core algorithm actually uses, and -overloads `MLUtils.getobs` (or, more typically `Base.getindex`) to make resampling of that -representation efficient, then those optimizations become available to the user, without -the user concerning herself with the details of the representation. +A sample implementation is given in [Providing an advanced data interface](@ref). -A sample implementation is given in the [`obs`](@ref) document-string below. ## Reference ```@docs obs ``` - diff --git a/docs/src/predict_transform.md b/docs/src/predict_transform.md index 382216b8..35fb52d7 100644 --- a/docs/src/predict_transform.md +++ b/docs/src/predict_transform.md @@ -1,72 +1,74 @@ -# [`predict`, `transform`, and relatives](@id operations) - -Standard methods: +# [`predict`, `transform` and `inverse_transform`](@id operations) ```julia -predict(model, kind_of_proxy, data...) -> prediction -transform(model, data...) -> transformed_data -inverse_transform(model, data...) -> inverted_data +predict(model, kind_of_proxy, data) +transform(model, data) +inverse_transform(model, data) ``` -Methods consuming output, `obsdata`, of data-preprocessor [`obs`](@ref): - -```julia -obspredict(model, kind_of_proxy, obsdata) -> prediction -obstransform(model, obsdata) -> transformed_data -``` +When a method expects a tuple form of argument, `data = (X1, ..., Xn)`, then a slurping +signature is also provided, as in `transform(model, X1, ..., Xn)`. ## Typical worklows +Train some supervised `algorithm`: + ```julia -# Train some supervised `algorithm`: model = fit(algorithm, X, y) +``` -# Predict probability distributions: +Predict probability distributions: + +```julia ŷ = predict(model, Distribution(), Xnew) +``` + +Generate point predictions: -# Generate point predictions: +```julia ŷ = predict(model, LiteralTarget(), Xnew) ``` +Train a dimension-reducing `algorithm`: + ```julia -# Training a dimension-reducing `algorithm`: model = fit(algorithm, X) Xnew_reduced = transform(model, Xnew) +``` + +Apply an approximate right inverse: -# Apply an approximate right inverse: +```julia inverse_transform(model, Xnew_reduced) ``` ### An advanced workflow ```julia -fitdata = obs(fit, algorithm, X, y) -predictdata = obs(predict, algorithm, Xnew) -model = obsfit(algorithm, obsdata) -ŷ = obspredict(model, LiteralTarget(), predictdata) +fitobs = obs(algorithm, (X, y)) # algorithm-specific repr. of data +model = fit(algorithm, MLUtils.getobs(fitobs, 1:100)) +predictobs = obs(model, MLUtils.getobs(X, 101:150)) +ŷ = predict(model, LiteralTarget(), predictobs) ``` ## Implementation guide -The methods `predict` and `transform` are not directly overloaded. Implement `obspredict` -and `obstransform` instead: - -| method | compulsory? | fallback | requires | -|:----------------------------|:-----------:|:--------:|:-------------------------------------:| -| [`obspredict`](@ref) | no | none | [`fit`](@ref) | -| [`obstransform`](@ref) | no | none | [`fit`](@ref) | -| [`inverse_transform`](@ref) | no | none | [`fit`](@ref), [`obstransform`](@ref) | +| method | compulsory? | fallback | +|:----------------------------|:-----------:|:--------:| +| [`predict`](@ref) | no | none | +| [`transform`](@ref) | no | none | +| [`inverse_transform`](@ref) | no | none | ### Predict or transform? -If the algorithm has a notion of [target variable](@ref proxy), then arrange for -[`obspredict`](@ref) to output each supported [kind of target proxy](@ref +If the algorithm has a notion of [target variable](@ref proxy), then use +[`predict`](@ref) to output each supported [kind of target proxy](@ref proxy_types) (`LiteralTarget()`, `Distribution()`, etc). -For output not associated with a target variable, implement [`obstransform`](@ref) +For output not associated with a target variable, implement [`transform`](@ref) instead, which does not dispatch on [`LearnAPI.KindOfProxy`](@ref), but can be optionally -paired with an implementation of [`inverse_transform`](@ref) for returning (approximate) +paired with an implementation of [`inverse_transform`](@ref), for returning (approximate) right inverses to `transform`. @@ -74,8 +76,6 @@ right inverses to `transform`. ```@docs predict -obspredict transform -obstransform inverse_transform ``` diff --git a/docs/src/reference.md b/docs/src/reference.md index 5a46c6ab..5b15e03e 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -21,12 +21,13 @@ undertood that individual objects share the same number of observations, and tha resampling of one component implies synchronized resampling of the others. A `DataFrame` instance, from [DataFrames.jl](https://dataframes.juliadata.org/stable/), is -an example of data, the observations being the rows. LearnAPI.jl makes no assumptions -about how observations can be accessed, except in the case of the output of [`obs`](@ref -data_interface), which must implement the MLUtils.jl `getobs`/`numobs` interface. For -example, it is generally ambiguous whether the rows or columns of a matrix are considered -observations, but if a matrix is returned by [`obs`](@ref data_interface) the observations -must be the columns. +an example of data, the observations being the rows. Typically, data provided to +LearnAPI.jl algorithms, will implement the +[MLUtils.jl](https://juliaml.github.io/MLUtils.jl/stable) `getobs/numobs` interface for +accessing individual observations, but implementations can opt out of this requirement; +see [`obs`](@ref) and [`LearnAPI.data_interface`](@ref) for details. In the MLUtils.jl +convention, observations in tables are the rows but observations in a matrix are the +columns. ### [Hyperparameters](@id hyperparameters) @@ -69,38 +70,31 @@ by the package ### [Algorithms](@id algorithms) An object implementing the LearnAPI.jl interface is called an *algorithm*, although it is -more accurately "the configuration of some algorithm".¹ It will have a type name -reflecting the name of some ML/statistics algorithm (e.g., `RandomForestRegressor`) and it -will encapsulate a particular set of user-specified [hyperparameters](@ref). +more accurately "the configuration of some algorithm".¹ An algorithm encapsulates a +particular set of user-specified [hyperparameters](@ref) as the object's properties. It +does not store learned parameters. -Additionally, for `alg::Alg` to be a LearnAPI algorithm, we require: +For `algorithm` to be a valid LearnAPI.jl algorithm, +[`LearnAPI.constructor(algorithm)`](@ref) must be defined and return a keyword constructor +enabling recovery of `algorithm` from its properties: -- `Base.propertynames(alg)` returns the hyperparameter names; values can be accessed using - `Base.getproperty` - -- If `alg` is an algorithm, then so are all instances of the same type. - -- If `_alg` is another algorithm, then `alg == _alg` if and only if `typeof(alg) == - typeof(_alg)` and corresponding properties are `==`. This includes properties that are - random number generators (which should be copied in training to avoid mutation). - -- If an algorithm has other algorithms as hyperparameters, then - [`LearnAPI.is_composite`](@ref)`(alg)` must be `true` (fallback is `false`). - -- A keyword constructor for `Alg` exists, providing default values for *all* non-algorithm - hyperparameters. - -- At least one non-trait LearnAPI.jl function must be overloaded for instances of `Alg`, - and accordingly `LearnAPI.functions(algorithm)` must be non-empty. +```julia +properties = propertynames(algorithm) +named_properties = NamedTuple{properties}(getproperty.(Ref(algorithm), properties)) +@assert algorithm == LearnAPI.constructor(algorithm)(; named_properties...) +``` -Any object `alg` for which [`LearnAPI.functions`](@ref)`(alg)` is non-empty is understood -have a valid implementation of the LearnAPI.jl interface. +Note that if if `algorithm` is an instance of a *mutable* struct, this requirement +generally requires overloading `Base.==` for the struct. +A *composite algorithm* is one with a property that can take other algorithms as values; +for such algorithms [`LearnAPI.is_composite`](@ref)`(algorithm)` must be `true` (fallback +is `false`). Generally, the keyword constructor provided by [`LearnAPI.constructor`](@ref) +must provide default values for all non-algorithm properties. ### Example -Any instance of `GradientRidgeRegressor` defined below meets all but the last criterion -above: +Any instance of `GradientRidgeRegressor` defined below is a valid algorithm. ```julia struct GradientRidgeRegressor{T<:Real} @@ -110,27 +104,33 @@ struct GradientRidgeRegressor{T<:Real} end GradientRidgeRegressor(; learning_rate=0.01, epochs=10, l2_regularization=0.01) = GradientRidgeRegressor(learning_rate, epochs, l2_regularization) +LearnAPI.constructor(::GradientRidgeRegressor) = GradientRidgeRegressor ``` -The same is not true if we make this a `mutable struct`. In that case we will need to -appropriately overload `Base.==` for `GradientRidgeRegressor`. +Any object `algorithm` for which [`LearnAPI.functions`](@ref)`(algorithm)` is non-empty is +understood have a valid implementation of the LearnAPI.jl interface. ## Methods -Only these method names are exported: `fit`, `obsfit`, `predict`, `obspredict`, -`transform`, `obstransform`, `inverse_transform`, `minimize`, and `obs`. All new -implementations must implement [`obsfit`](@ref), the accessor function -[`LearnAPI.algorithm`](@ref algorithm_minimize) and the trait -[`LearnAPI.functions`](@ref). +Only these method names are exported by LearnAPI: `fit`, `transform`, `inverse_transform`, +`minimize`, and `obs`. All new implementations must implement [`fit`](@ref), +[`LearnAPI.algorithm`](@ref algorithm_minimize), [`LearnAPI.constructor`](@ref) and +[`LearnAPI.functions`](@ref). The last two are algorithm traits, which can be set with the +[`@trait`](@ref) macro. -- [`fit`](@ref fit)/[`obsfit`](@ref): for training algorithms that generalize to new data +### List of methods -- [`predict`](@ref operations)/[`obspredict`](@ref): for outputting [targets](@ref proxy) - or [target proxies](@ref proxy) (such as probability density functions) +- [`fit`](@ref fit): for training or updating algorithms that generalize to new data. For + non-generalizing ("static") algorithms, `fit(algorithm)` generally wraps algorithm in a + mutable struct that can be mutated by `predict`/`transform` to record byproducts of + those operations. -- [`transform`](@ref operations)/[`obstransform`](@ref): similar to `predict`, but for - arbitrary kinds of output, and which can be paired with an `inverse_transform` method +- [`predict`](@ref operations): for outputting [targets](@ref proxy) or [target + proxies](@ref proxy) (such as probability density functions) + +- [`transform`](@ref operations): similar to `predict`, but for arbitrary kinds of output, + and which can be paired with an `inverse_transform` method - [`inverse_transform`](@ref operations): for inverting the output of `transform` ("inverting" broadly understood) @@ -138,21 +138,22 @@ implementations must implement [`obsfit`](@ref), the accessor function - [`minimize`](@ref algorithm_minimize): for stripping the `model` output by `fit` of inessential content, for purposes of serialization. -- [`obs`](@ref data_interface): a method for exposing to the user "optimized", - algorithm-specific representations of data, which can be passed to `obsfit`, - `obspredict` or `obstransform`, but which can also be efficiently resampled using the - `getobs`/`numobs` interface provided by - [MLUtils.jl](https://github.com/JuliaML/MLUtils.jl). - +- [`obs`](@ref data_interface): a method for exposing to the user algorithm-specific + representations of data guaranteed to implement observation access according to the + value of the [`LearnAPI.data_interface`](@ref) trait + - [Accessor functions](@ref accessor_functions): include things like `feature_importances` and `training_losses`, for extracting, from training outcomes, information common to many algorithms. -- [Algorithm traits](@ref traits): special methods that promise specific algorithm - behavior or for recording general information about the algorithm. The only universally - compulsory trait is `LearnAPI.functions(algorithm)`, which returns a list of the - explicitly overloaded non-trait methods. - +- [Algorithm traits](@ref traits): special methods, that promise specific algorithm + behavior or for recording general information about the algorithm. Only + [`LearnAPI.constructor`](@ref) and [`LearnAPI.functions`](@ref) are universally + compulsory. + +- [`LearnAPI.target`](@ref) and [`LearnAPI.weights`](@ref) are both traits and methods to + extract, from `fit` input data, the target and per-observation weights, when available. + --- ¹ We acknowledge users may not like this terminology, and may know "algorithm" by some diff --git a/docs/src/traits.md b/docs/src/traits.md index 3a263595..9ff63967 100644 --- a/docs/src/traits.md +++ b/docs/src/traits.md @@ -1,9 +1,8 @@ # [Algorithm Traits](@id traits) Traits generally promise specific algorithm behavior, such as: *This algorithm supports -per-observation weights, which must appear as the third argument of `fit`*, or *This -algorithm's `transform` method predicts `Real` vectors*. They also record more mundane -information, such as a package license. +per-observation weights, or *This algorithm's `transform` method predicts `Real` +vectors*. They also record more mundane information, such as a package license. Algorithm traits are functions whose first (and usually only) argument is an algorithm. @@ -20,46 +19,40 @@ one argument. In the examples column of the table below, `Table`, `Continuous`, `Sampleable` are names owned by the package [ScientificTypesBase.jl](https://github.com/JuliaAI/ScientificTypesBase.jl/). -| trait | return value | fallback value | example | -|:----------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------|:----------------------|:---------------------------------------------------------| -| [`LearnAPI.functions`](@ref)`(algorithm)` | functions you can apply to `algorithm` or associated model (traits excluded) | `()` | `(LearnAPI.fit, LearnAPI.predict, LearnAPI.algorithm)` | -| [`LearnAPI.kinds_of_proxy`](@ref)`(algorithm)` | instances `kop` of `KindOfProxy` for which an implementation of `LearnAPI.predict(algorithm, kop, ...)` is guaranteed. | `()` | `(Distribution(), Interval())` | -| [`LearnAPI.position_of_target`](@ref)`(algorithm)` | the positional index¹ of the **target** in `data` in `fit(algorithm, data...)` calls | `0` | 2 | -| [`LearnAPI.position_of_weights`](@ref)`(algorithm)` | the positional index¹ of **per-observation weights** in `data` in `fit(algorithm, data...)` | `0` | 3 | -| [`LearnAPI.descriptors`](@ref)`(algorithm)` | lists one or more suggestive algorithm descriptors from `LearnAPI.descriptors()` | `()` | (:regression, :probabilistic) | -| [`LearnAPI.is_pure_julia`](@ref)`(algorithm)` | `true` if implementation is 100% Julia code | `false` | `true` | -| [`LearnAPI.pkg_name`](@ref)`(algorithm)` | name of package providing core code (may be different from package providing LearnAPI.jl implementation) | `"unknown"` | `"DecisionTree"` | -| [`LearnAPI.pkg_license`](@ref)`(algorithm)` | name of license of package providing core code | `"unknown"` | `"MIT"` | -| [`LearnAPI.doc_url`](@ref)`(algorithm)` | url providing documentation of the core code | `"unknown"` | `"https://en.wikipedia.org/wiki/Decision_tree_learning"` | -| [`LearnAPI.load_path`](@ref)`(algorithm)` | a string indicating where the struct for `typeof(algorithm)` is defined, beginning with name of package providing implementation | `"unknown"` | `FastTrees.LearnAPI.DecisionTreeClassifier` | -| [`LearnAPI.is_composite`](@ref)`(algorithm)` | `true` if one or more properties (fields) of `algorithm` may be an algorithm | `false` | `true` | -| [`LearnAPI.human_name`](@ref)`(algorithm)` | human name for the algorithm; should be a noun | type name with spaces | "elastic net regressor" | -| [`LearnAPI.iteration_parameter`](@ref)`(algorithm)` | symbolic name of an iteration parameter | `nothing` | :epochs | -| [`LearnAPI.fit_scitype`](@ref)`(algorithm)` | upper bound on `scitype(data)` ensuring `fit(algorithm, data...)` works | `Union{}` | `Tuple{Table(Continuous), AbstractVector{Continuous}}` | -| [`LearnAPI.fit_observation_scitype`](@ref)`(algorithm)` | upper bound on `scitype(observation)` for `observation` in `data` ensuring `fit(algorithm, data...)` works | `Union{}` | `Tuple{AbstractVector{Continuous}, Continuous}` | -| [`LearnAPI.fit_type`](@ref)`(algorithm)` | upper bound on `typeof(data)` ensuring `fit(algorithm, data...)` works | `Union{}` | `Tuple{AbstractMatrix{<:Real}, AbstractVector{<:Real}}` | -| [`LearnAPI.fit_observation_type`](@ref)`(algorithm)` | upper bound on `typeof(observation)` for `observation` in `data` ensuring `fit(algorithm, data...)` works | `Union{}` | `Tuple{AbstractVector{<:Real}, Real}` | -| [`LearnAPI.predict_input_scitype`](@ref)`(algorithm)` | upper bound on `scitype(data)` ensuring `predict(model, kop, data...)` works | `Union{}` | `Table(Continuous)` | -| [`LearnAPI.predict_input_observation_scitype`](@ref)`(algorithm)` | upper bound on `scitype(observation)` for `observation` in `data` ensuring `predict(model, kop, data...)` works | `Union{}` | `Vector{Continuous}` | -| [`LearnAPI.predict_input_type`](@ref)`(algorithm)` | upper bound on `typeof(data)` ensuring `predict(model, kop, data...)` works | `Union{}` | `AbstractMatrix{<:Real}` | -| [`LearnAPI.predict_input_observation_type`](@ref)`(algorithm)` | upper bound on `typeof(observation)` for `observation` in `data` ensuring `predict(model, kop, data...)` works | `Union{}` | `Vector{<:Real}` | -| [`LearnAPI.predict_output_scitype`](@ref)`(algorithm, kind_of_proxy)` | upper bound on `scitype(predict(model, ...))` | `Any` | `AbstractVector{Continuous}` | -| [`LearnAPI.predict_output_type`](@ref)`(algorithm, kind_of_proxy)` | upper bound on `typeof(predict(model, ...))` | `Any` | `AbstractVector{<:Real}` | -| [`LearnAPI.transform_input_scitype`](@ref)`(algorithm)` | upper bound on `scitype(data)` ensuring `transform(model, data...)` works | `Union{}` | `Table(Continuous)` | -| [`LearnAPI.transform_input_observation_scitype`](@ref)`(algorithm)` | upper bound on `scitype(observation)` for `observation` in `data` ensuring `transform(model, data...)` works | `Union{}` | `Vector{Continuous}` | -| [`LearnAPI.transform_input_type`](@ref)`(algorithm)` | upper bound on `typeof(data)`ensuring `transform(model, data...)` works | `Union{}` | `AbstractMatrix{<:Real}}` | -| [`LearnAPI.transform_input_observation_type`](@ref)`(algorithm)` | upper bound on `typeof(observation)` for `observation` in `data` ensuring `transform(model, data...)` works | `Union{}` | `Vector{Continuous}` | -| [`LearnAPI.transform_output_scitype`](@ref)`(algorithm)` | upper bound on `scitype(transform(model, ...))` | `Any` | `Table(Continuous)` | -| [`LearnAPI.transform_output_type`](@ref)`(algorithm)` | upper bound on `typeof(transform(model, ...))` | `Any` | `AbstractMatrix{<:Real}` | -| [`LearnAPI.predict_or_transform_mutates`](@ref)`(algorithm)` | `true` if `predict` or `transform` mutates first argument | `false` | `true` | - -¹ If the value is `0`, then the variable in boldface type is not supported and not -expected to appear in `data`. If `length(data)` is less than the trait value, then `data` -is understood to exclude the variable, but note that `fit` can have multiple signatures of -varying lengths, as in `fit(algorithm, X, y)` and `fit(algorithm, X, y, w)`. A non-zero -value is a promise that `fit` includes a signature of sufficient length to include the -variable. - +| trait | return value | fallback value | example | +|:----------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------|:---------------------------------------------------------| +| [`LearnAPI.constructor`](@ref)`(algorithm)` | constructor for generating new or modified versions of `algorithm` | (no fallback) | `RidgeRegressor` | +| [`LearnAPI.functions`](@ref)`(algorithm)` | functions you can apply to `algorithm` or associated model (traits excluded) | `()` | `(fit, predict, minimize, LearnAPI.algorithm, obs)` | +| [`LearnAPI.kinds_of_proxy`](@ref)`(algorithm)` | instances `kind` of `KindOfProxy` for which an implementation of `LearnAPI.predict(algorithm, kind, ...)` is guaranteed. | `()` | `(Distribution(), Interval())` | +| [`LearnAPI.target`](@ref)`(algorithm)` | `true` if target can appear in `fit` data | `false` | `true` | +| [`LearnAPI.weights`](@ref)`(algorithm)` | `true` if per-observation weights can appear in `fit` data | `false` | `true` | +| [`LearnAPI.descriptors`](@ref)`(algorithm)` | lists one or more suggestive algorithm descriptors from `LearnAPI.descriptors()` | `()` | (:regression, :probabilistic) | +| [`LearnAPI.is_pure_julia`](@ref)`(algorithm)` | `true` if implementation is 100% Julia code | `false` | `true` | +| [`LearnAPI.pkg_name`](@ref)`(algorithm)` | name of package providing core code (may be different from package providing LearnAPI.jl implementation) | `"unknown"` | `"DecisionTree"` | +| [`LearnAPI.pkg_license`](@ref)`(algorithm)` | name of license of package providing core code | `"unknown"` | `"MIT"` | +| [`LearnAPI.doc_url`](@ref)`(algorithm)` | url providing documentation of the core code | `"unknown"` | `"https://en.wikipedia.org/wiki/Decision_tree_learning"` | +| [`LearnAPI.load_path`](@ref)`(algorithm)` | a string indicating where the struct for `typeof(algorithm)` is defined, beginning with name of package providing implementation | `"unknown"` | `FastTrees.LearnAPI.DecisionTreeClassifier` | +| [`LearnAPI.is_composite`](@ref)`(algorithm)` | `true` if one or more properties (fields) of `algorithm` may be an algorithm | `false` | `true` | +| [`LearnAPI.human_name`](@ref)`(algorithm)` | human name for the algorithm; should be a noun | type name with spaces | "elastic net regressor" | +| [`LearnAPI.data_interface`](@ref)`(algorithm)` | Interface implemented by objects returned by [`obs`](@ref) | `Base.HasLength()` (supports `MLUtils.getobs/numobs`) | `Base.SizeUnknown()` (supports `iterate`) | +| [`LearnAPI.iteration_parameter`](@ref)`(algorithm)` | symbolic name of an iteration parameter | `nothing` | :epochs | +| [`LearnAPI.fit_scitype`](@ref)`(algorithm)` | upper bound on `scitype(data)` ensuring `fit(algorithm, data...)` works | `Union{}` | `Tuple{Table(Continuous), AbstractVector{Continuous}}` | +| [`LearnAPI.fit_observation_scitype`](@ref)`(algorithm)` | upper bound on `scitype(observation)` for `observation` in `data` ensuring `fit(algorithm, data...)` works | `Union{}` | `Tuple{AbstractVector{Continuous}, Continuous}` | +| [`LearnAPI.fit_type`](@ref)`(algorithm)` | upper bound on `typeof(data)` ensuring `fit(algorithm, data...)` works | `Union{}` | `Tuple{AbstractMatrix{<:Real}, AbstractVector{<:Real}}` | +| [`LearnAPI.fit_observation_type`](@ref)`(algorithm)` | upper bound on `typeof(observation)` for `observation` in `data` ensuring `fit(algorithm, data...)` works | `Union{}` | `Tuple{AbstractVector{<:Real}, Real}` | +| [`LearnAPI.predict_input_scitype`](@ref)`(algorithm)` | upper bound on `scitype(data)` ensuring `predict(model, kind, data...)` works | `Union{}` | `Table(Continuous)` | +| [`LearnAPI.predict_input_observation_scitype`](@ref)`(algorithm)` | upper bound on `scitype(observation)` for `observation` in `data` ensuring `predict(model, kind, data...)` works | `Union{}` | `Vector{Continuous}` | +| [`LearnAPI.predict_input_type`](@ref)`(algorithm)` | upper bound on `typeof(data)` ensuring `predict(model, kind, data...)` works | `Union{}` | `AbstractMatrix{<:Real}` | +| [`LearnAPI.predict_input_observation_type`](@ref)`(algorithm)` | upper bound on `typeof(observation)` for `observation` in `data` ensuring `predict(model, kind, data...)` works | `Union{}` | `Vector{<:Real}` | +| [`LearnAPI.predict_output_scitype`](@ref)`(algorithm, kind_of_proxy)` | upper bound on `scitype(predict(model, ...))` | `Any` | `AbstractVector{Continuous}` | +| [`LearnAPI.predict_output_type`](@ref)`(algorithm, kind_of_proxy)` | upper bound on `typeof(predict(model, ...))` | `Any` | `AbstractVector{<:Real}` | +| [`LearnAPI.transform_input_scitype`](@ref)`(algorithm)` | upper bound on `scitype(data)` ensuring `transform(model, data...)` works | `Union{}` | `Table(Continuous)` | +| [`LearnAPI.transform_input_observation_scitype`](@ref)`(algorithm)` | upper bound on `scitype(observation)` for `observation` in `data` ensuring `transform(model, data...)` works | `Union{}` | `Vector{Continuous}` | +| [`LearnAPI.transform_input_type`](@ref)`(algorithm)` | upper bound on `typeof(data)`ensuring `transform(model, data...)` works | `Union{}` | `AbstractMatrix{<:Real}}` | +| [`LearnAPI.transform_input_observation_type`](@ref)`(algorithm)` | upper bound on `typeof(observation)` for `observation` in `data` ensuring `transform(model, data...)` works | `Union{}` | `Vector{Continuous}` | +| [`LearnAPI.transform_output_scitype`](@ref)`(algorithm)` | upper bound on `scitype(transform(model, ...))` | `Any` | `Table(Continuous)` | +| [`LearnAPI.transform_output_type`](@ref)`(algorithm)` | upper bound on `typeof(transform(model, ...))` | `Any` | `AbstractMatrix{<:Real}` | +| [`LearnAPI.predict_or_transform_mutates`](@ref)`(algorithm)` | `true` if `predict` or `transform` mutates first argument | `false` | `true` | ### Derived Traits @@ -117,10 +110,11 @@ informative (as in `LearnAPI.predict_type(algorithm) = Any`). ## Reference ```@docs +LearnAPI.constructor LearnAPI.functions LearnAPI.kinds_of_proxy -LearnAPI.position_of_target -LearnAPI.position_of_weights +LearnAPI.target +LearnAPI.weights LearnAPI.descriptors LearnAPI.is_pure_julia LearnAPI.pkg_name @@ -129,6 +123,7 @@ LearnAPI.doc_url LearnAPI.load_path LearnAPI.is_composite LearnAPI.human_name +LearnAPI.data_interface LearnAPI.iteration_parameter LearnAPI.fit_scitype LearnAPI.fit_type @@ -147,4 +142,5 @@ LearnAPI.transform_input_observation_type LearnAPI.predict_or_transform_mutates LearnAPI.transform_output_scitype LearnAPI.transform_output_type +LearnAPI.@trait ``` diff --git a/src/LearnAPI.jl b/src/LearnAPI.jl index 24626bcd..9ba6b54e 100644 --- a/src/LearnAPI.jl +++ b/src/LearnAPI.jl @@ -12,8 +12,7 @@ include("accessor_functions.jl") include("traits.jl") export @trait -export fit, predict, transform, inverse_transform, fit_transform, minimize -export obs, obsfit, obspredict, obstransform +export fit, predict, transform, inverse_transform, minimize, obs for name in Symbol.(CONCRETE_TARGET_PROXY_TYPES_SYMBOLS) @eval export $name diff --git a/src/accessor_functions.jl b/src/accessor_functions.jl index d20f1da2..b87a3ab1 100644 --- a/src/accessor_functions.jl +++ b/src/accessor_functions.jl @@ -160,6 +160,24 @@ $(DOC_IMPLEMENTED_METHODS(:training_losses)). """ function training_losses end +""" + LearnAPI.training_predictions(model) + +Return internally computed training predictions when running `model = fit(algorithm, ...)` +for some `algorithm`. + +See also [`fit`](@ref). + +# New implementations + +Implement for iterative algorithms that compute and record training losses as part of +training (e.g. neural networks). + +$(DOC_IMPLEMENTED_METHODS(:training_predictions)). + +""" +function training_predictions end + """ LearnAPI.training_scores(model) @@ -227,6 +245,7 @@ const ACCESSOR_FUNCTIONS_WITHOUT_EXTRAS = ( feature_importances, training_labels, training_losses, + training_predictions, training_scores, components, ) diff --git a/src/fit.jl b/src/fit.jl index 010e53e0..316d0eab 100644 --- a/src/fit.jl +++ b/src/fit.jl @@ -6,179 +6,35 @@ const TRAINING_FUNCTIONS = (:fit,) # # FIT """ - LearnAPI.fit(algorithm, data...; verbosity=1) + LearnAPI.fit(algorithm, data; verbosity=1) + LearnAPI.fit(algorithm; verbosity=1) Execute the algorithm with configuration `algorithm` using the provided training `data`, returning an object, `model`, on which other methods, such as [`predict`](@ref) or [`transform`](@ref), can be dispatched. [`LearnAPI.functions(algorithm)`](@ref) returns a list of methods that can be applied to either `algorithm` or `model`. -# Arguments +The second signature applies to algorithms which do not generalize to new observations. In +that case `predict` or `transform` actually execute the algorithm, but may also write to +the (mutable) object returned by `fit`. -- `algorithm`: property-accessible object whose properties are the hyperparameters of - some ML/statistical algorithm - -$(DOC_ARGUMENTS(:fit)) - -- `verbosity=1`: logging level; set to `0` for warnings only, and `-1` for silent training - -See also [`obsfit`](@ref), [`predict`](@ref), [`transform`](@ref), -[`inverse_transform`](@ref), [`LearnAPI.functions`](@ref), [`obs`](@ref). - -# Extended help - -# New implementations - -LearnAPI.jl provides the following definition of `fit`, which is never directly overloaded: - -```julia -fit(algorithm, data...; verbosity=1) = - obsfit(algorithm, Obs(), obs(fit, algorithm, data...); verbosity) -``` - -Rather, new algorithms should overload [`obsfit`](@ref). See also [`obs`](@ref). - -""" -fit(algorithm, data...; verbosity=1) = - obsfit(algorithm, obs(fit, algorithm, data...), verbosity) - -""" - obsfit(algorithm, obsdata; verbosity=1) - -A lower-level alternative to [`fit`](@ref), this method consumes a pre-processed form of -user data. Specifically, the following two code snippets are equivalent: +When `data` is a tuple, a data slurping form of `fit` is typically provided. ```julia -model = fit(algorithm, data...) +model = fit(algorithm, (X, y)) # or `fit(algorithm, X, y)` +ŷ = predict(model, X) ``` -and -```julia -obsdata = obs(fit, algorithm, data...) -model = obsfit(algorithm, obsdata) -``` - -Here `obsdata` is algorithm-specific, "observation-accessible" data, meaning it implements -the MLUtils.jl `getobs`/`numobs` interface for observation resampling (even if `data` does -not). Moreover, resampled versions of `obsdata` may be passed to `obsfit` in its place. +Use `verbosity=0` for warnings only, and `-1` for silent training. -The use of `obsfit` may offer performance advantages. See more at [`obs`](@ref). - -See also [`fit`](@ref), [`obs`](@ref). +See also [`predict`](@ref), [`transform`](@ref), [`inverse_transform`](@ref), +[`LearnAPI.functions`](@ref), [`obs`](@ref). # Extended help # New implementations -Implementation of the following method signature is compulsory for all new algorithms: - -```julia -LearnAPI.obsfit(algorithm, obsdata, verbosity) -``` - -Here `obsdata` has the form explained above. If [`obs`](@ref)`(fit, ...)` is not being -overloaded, then a fallback gives `obsdata = data` (always a tuple!). Note that -`verbosity` is a positional argument, not a keyword argument in the overloaded signature. - -New implementations must also implement [`LearnAPI.algorithm`](@ref). - -If overloaded, then the functions `LearnAPI.obsfit` and `LearnAPI.fit` must be included in -the tuple returned by the [`LearnAPI.functions(algorithm)`](@ref) trait. - -## Non-generalizing algorithms - -If the algorithm does not generalize to new data (e.g, DBSCAN clustering) then `data = ()` -and `obsfit` carries out no computation, as this happen entirely in a `transform` and/or -`predict` call. In such cases, `obsfit(algorithm, ...)` may return `algorithm`, but -another possibility is allowed: To provide a mechanism for `transform`/`predict` to report -byproducts of the computation (e.g., a list of boundary points in DBSCAN clustering) they -are allowed to *mutate* the `model` object returned by `obsfit`, which is then arranged to -be a mutable struct wrapping `algorithm` and fields to store the byproducts. In that case, -[`LearnAPI.predict_or_transform_mutates(algorithm)`](@ref) must be overloaded to return -`true`. - -""" -obsfit(algorithm, obsdata; verbosity=1) = - obsfit(algorithm, obsdata, verbosity) - - -# # UPDATE - -""" - LearnAPI.update!(algorithm, verbosity, fitted_params, state, data...) - -Based on the values of `state`, and `fitted_params` returned by a preceding call to -[`LearnAPI.fit`](@ref), [`LearnAPI.ingest!`](@ref), or [`LearnAPI.update!`](@ref), update a -algorithm's fitted parameters, returning new (or mutated) `state` and `fitted_params`. - -Intended for retraining when the training data has not changed, but `algorithm` -properties (hyperparameters) may have changed, e.g., when increasing an iteration -parameter. Specifically, the assumption is that `data` have the same values -seen in the most recent call to `fit/update!/ingest!`. - -For incremental training (same algorithm, new data) see instead [`LearnAPI.ingest!`](@ref). - -# Return value - -Same as [`LearnAPI.fit`](@ref), namely a tuple (`fitted_params`, `state`, `report`). See -[`LearnAPI.fit`](@ref) for details. - - -# New implementations - -Overloading this method is optional. A fallback calls `LearnAPI.fit`: - -```julia -LearnAPI.update!(algorithm, verbosity, fitted_params, state, data...) = - fit(algorithm, verbosity, data) -``` -$(DOC_IMPLEMENTED_METHODS(:fit)) - -The most common use case is continuing training of an iterative algorithm: `state` is -simply a copy of the algorithm used in the last training call (`fit`, `update!` or -`ingest!`) and this will include the current number of iterations as a property. If -`algorithm` and `state` differ only in the number of iterations (e.g., epochs in a neural -network), which has increased, then the fitted parameters (network weights and biases) are -updated, rather than computed from scratch. Otherwise, `update!` simply calls `fit`, to -force retraining from scratch. - -It is permitted to return mutated versions of `state` and `fitted_params`. - -See also [`LearnAPI.fit`](@ref), [`LearnAPI.ingest!`](@ref). - -""" - - -# # INGEST - -""" - LearnAPI.ingest!(algorithm, verbosity, fitted_params, state, data...) - -For an algorithm that supports incremental learning, update the fitted parameters using -`data`, which has typically not been seen before. The arguments `state` and -`fitted_params` are the output of a preceding call to [`LearnAPI.fit`](@ref), -[`LearnAPI.ingest!`](@ref), or [`LearnAPI.update!`](@ref), of which mutated or new -versions are returned. - -For updating fitted parameters using the *same* data but new hyperparameters, see instead -[`LearnAPI.update!`](@ref). - -For training an algorithm with new hyperparameters but *unchanged* data, see instead -[`LearnAPI.update!`](@ref). - - -# Return value - -Same as [`LearnAPI.fit`](@ref), namely a tuple (`fitted_params`, `state`, `report`). See -[`LearnAPI.fit`](@ref) for details. - - -# New implementations - -Implementing this method is optional. It has no fallback. - -$(DOC_IMPLEMENTED_METHODS(:fit)) - -See also [`LearnAPI.fit`](@ref), [`LearnAPI.update!`](@ref). +Implementation is compulsory. The signature must include `verbosity`. """ +fit(algorithm, data...; kwargs...) = nothing diff --git a/src/minimize.jl b/src/minimize.jl index 173ee24f..f37b9d0a 100644 --- a/src/minimize.jl +++ b/src/minimize.jl @@ -5,7 +5,7 @@ Return a version of `model` that will generally have a smaller memory allocation `model`, suitable for serialization. Here `model` is any object returned by [`fit`](@ref). Accessor functions that can be called on `model` may not work on `minimize(model)`, but [`predict`](@ref), [`transform`](@ref) and -[`inverse_transform`](@ref) will work, if implemented for `model`. Check +[`inverse_transform`](@ref) will work, if implemented. Check `LearnAPI.functions(LearnAPI.algorithm(model))` to view see what the original `model` implements. diff --git a/src/obs.jl b/src/obs.jl index 75da42f4..0348d3da 100644 --- a/src/obs.jl +++ b/src/obs.jl @@ -1,17 +1,20 @@ """ - obs(func, algorithm, data...) + obs(algorithm, data) + obs(model, data) -Where `func` is `fit`, `predict` or `transform`, return a combined, algorithm-specific, -representation of `data...`, which can be passed directly to `obsfit`, `obspredict` or -`obstransform`, as shown in the example below. +Return an algorithm-specific representation of `data`, suitable for passing to `fit` +(first signature) or to `predict` and `transform` (second signature), in place of +`data`. Here `model` is the return value of `fit(algorithm, ...)` for some LearnAPI.jl +algorithm, `algorithm`. -The returned object implements the `getobs`/`numobs` observation-resampling interface -provided by MLUtils.jl, even if `data` does not. +The returned object is guaranteed to implement observation access as indicated +by [`LearnAPI.data_interface(algorithm)`](@ref) (typically the +[MLUtils.jl](https://juliaml.github.io/MLUtils.jl/dev/) `getobs`/`numobs` interface). -Calling `func` on the returned object may be cheaper than calling `func` directly on -`data...`. And resampling the returned object using `MLUtils.getobs` may be cheaper than -directly resampling the components of `data` (an operation not provided by the LearnAPI.jl -interface). +Calling `fit`/`predict`/`transform` on the returned objects may have performance +advantages over calling directly on `data` in some contexts. And resampling the returned +object using `MLUtils.getobs` may be cheaper than directly resampling the components of +`data`. # Example @@ -23,100 +26,52 @@ y = Xtrain = Tables.select(X, 1:100) ytrain = y[1:100] -model = fit(algorithm, Xtrain, ytrain) +model = fit(algorithm, (Xtrain, ytrain)) ŷ = predict(model, LiteralTarget(), y[101:150]) ``` -Alternative workflow using `obs`: +Alternative workflow using `obs` and the MLUtils.jl API: ```julia import MLUtils -fitdata = obs(fit, algorithm, X, y) -predictdata = obs(predict, algorithm, X) +fit_obsevations = obs(algorithm, (X, y)) +model = fit(algorithm, MLUtils.getobs(fit_observations, 1:100)) -model = obsfit(algorithm, MLUtils.getobs(fitdata, 1:100)) -ẑ = obspredict(model, LiteralTarget(), MLUtils.getobs(predictdata, 101:150)) +predict_observations = obs(model, X) +ẑ = predict(model, LiteralTarget(), MLUtils.getobs(predict_observations, 101:150)) @assert ẑ == ŷ ``` -See also [`obsfit`](@ref), [`obspredict`](@ref), [`obstransform`](@ref). +See also [`LearnAPI.data_interface`](@ref). # Extended help # New implementations -If the `data` to be consumed in standard user calls to `fit`, `predict` or `transform` -consists only of tables and arrays (with last dimension the observation dimension) then -overloading `obs` is optional, but the user will get no performance benefits by using -it. The implementation of `obs` is optional under more general circumstances stated at the -end. +Implementation is typically optional. -The fallback for `obs` just slurps the provided data: +For each supported form of `data` in `fit(algorithm, data)`, `predict(model, data)`, and +`transform(model, data)`, it must be true that `model = fit(algorithm, observations)` is +supported, whenever `observations = obs(algorithm, data)`, and that `predict(model, +observations)` and `transform(model, observations)` are supported, whenever `observations += obs(model, data)`. -```julia -obs(func, alg, data...) = data -``` +The fallback for `obs` is `obs(model_or_algorithm, data) = data`, and the fallback for +`LearnAPI.data_interface(algorithm)` indicates MLUtils.jl as the adopted interface. For +details refer to the [`LearnAPI.data_interface`](@ref) document string. -The only contractual obligation of `obs` is to return an object implementing the -`getobs`/`numobs` interface. Generally it suffices to overload `Base.getindex` and -`Base.length`. However, note that implementations of [`obsfit`](@ref), -[`obspredict`](@ref), and [`obstransform`](@ref) depend on the form of output of `obs`. - -$(DOC_IMPLEMENTED_METHODS(:(obs), overloaded=true)) +In particular, if the `data` to be consumed by `fit`, `predict` or `transform` consists +only of suitable tables and arrays, then `obs` and `LearnAPI.data_interface` do not need +to be overloaded. However, the user will get no performance benefits by using `obs` in +that case. ## Sample implementation -Suppose that `fit`, for an algorithm of type `Alg`, is to have the primary signature - -```julia -fit(algorithm::Alg, X, y) -``` - -where `X` is a table, `y` a vector. Internally, the algorithm is to call a lower level -function - -`train(A, names, y)` - -where `A = Tables.matrix(X)'` and `names` are the column names of `X`. Then relevant parts -of an implementation might look like this: - -```julia -# thin wrapper for algorithm-specific representation of data: -struct ObsData{T} - A::Matrix{T} - names::Vector{Symbol} - y::Vector{T} -end - -# (indirect) implementation of `getobs/numobs`: -Base.getindex(data::ObsData, I) = - ObsData(data.A[:,I], data.names, y[I]) -Base.length(data::ObsData, I) = length(data.y) - -# implementation of `obs`: -function LearnAPI.obs(::typeof(fit), ::Alg, X, y) - table = Tables.columntable(X) - names = Tables.columnnames(table) |> collect - return ObsData(Tables.matrix(table)', names, y) -end - -# implementation of `obsfit`: -function LearnAPI.obsfit(algorithm::Alg, data::ObsData; verbosity=1) - coremodel = train(data.A, data.names, data.y) - data.verbosity > 0 && @info "Training using these features: $names." - - return model -end -``` - -## When is overloading `obs` optional? +Refer to the "Anatomy of an Implemetation" section of the LearnAPI +[manual](https://juliaai.github.io/LearnAPI.jl/dev/). -Overloading `obs` is optional, for a given `typeof(algorithm)` and `typeof(fun)`, if the -components of `data` in the standard call `func(algorithm_or_model, data...)` are already -expected to separately implement the `getobs`/`numbobs` interface. This is true for arrays -whose last dimension is the observation dimension, and for suitable tables. """ -obs(func, alg, data...) = data +obs(algorithm_or_model, data) = data diff --git a/src/predict_transform.jl b/src/predict_transform.jl index 71e4e730..a20598f8 100644 --- a/src/predict_transform.jl +++ b/src/predict_transform.jl @@ -8,20 +8,12 @@ const OPERATIONS = (:predict, :transform, :inverse_transform) const DOC_OPERATIONS_LIST_SYMBOL = join(map(op -> "`:$op`", OPERATIONS), ", ") const DOC_OPERATIONS_LIST_FUNCTION = join(map(op -> "`LearnAPI.$op`", OPERATIONS), ", ") -DOC_ARGUMENTS(func) = -""" -- `data`: tuple of data objects with a common number of observations, for example, - `data = (X, y, w)` where `X` is a table of features, `y` is a target vector with the - same number of rows, and `w` a vector of per-observation weights. - -""" - DOC_MUTATION(op) = """ If [`LearnAPI.predict_or_transform_mutates(algorithm)`](@ref) is overloaded to return `true`, then `$op` may mutate it's first argument, but not in a way that alters the - result of a subsequent call to `obspredict`, `obstransform` or + result of a subsequent call to `predict`, `transform` or `inverse_transform`. This is necessary for some non-generalizing algorithms but is otherwise discouraged. See more at [`fit`](@ref). @@ -43,25 +35,20 @@ DOC_MINIMIZE(func) = # # METHOD STUBS/FALLBACKS """ - predict(model, kind_of_proxy::LearnAPI.KindOfProxy, data...) - predict(model, data...) - -The first signature returns target or target proxy predictions for input features `data`, -according to some `model` returned by [`fit`](@ref) or [`obsfit`](@ref). Where supported, -these are literally target predictions if `kind_of_proxy = LiteralTarget()`, and -probability density/mass functions if `kind_of_proxy = Distribution()`. List all options -with [`LearnAPI.kinds_of_proxy(algorithm)`](@ref), where `algorithm = + predict(model, kind_of_proxy::LearnAPI.KindOfProxy, data) + predict(model, data) + +The first signature returns target predictions, or proxies for target predictions, for +input features `data`, according to some `model` returned by [`fit`](@ref). Where +supported, these are literally target predictions if `kind_of_proxy = LiteralTarget()`, +and probability density/mass functions if `kind_of_proxy = Distribution()`. List all +options with [`LearnAPI.kinds_of_proxy(algorithm)`](@ref), where `algorithm = LearnAPI.algorithm(model)`. -The shortcut `predict(model, data...) = predict(model, LiteralTarget(), data...)` is also -provided. - -# Arguments +The shortcut `predict(model, data)` calls the first method with an algorithm-specific +`kind_of_proxy`. -- `model` is anything returned by a call of the form `fit(algorithm, ...)`, for some - LearnAPI-complaint `algorithm`. - -$(DOC_ARGUMENTS(:predict)) +The argument `model` is anything returned by a call of the form `fit(algorithm, ...)`. # Example @@ -76,181 +63,84 @@ predict(model, LiteralTarget(), Xnew) Note `predict ` does not mutate any argument, except in the special case `LearnAPI.predict_or_transform_mutates(algorithm) = true`. -See also [`obspredict`](@ref), [`fit`](@ref), [`transform`](@ref), -[`inverse_transform`](@ref). - -# Extended help - -# New implementations - -LearnAPI.jl provides the following definition of `predict` which is never to be directly -overloaded: - -```julia -predict(model, kop::LearnAPI.KindOfProxy, data...) = - obspredict(model, kop, obs(predict, LearnAPI.algorithm(model), data...)) -``` - -Rather, new algorithms overload [`obspredict`](@ref). - -""" -predict(model, kind_of_proxy::KindOfProxy, data...) = - obspredict(model, kind_of_proxy, obs(predict, algorithm(model), data...)) -predict(model, data...) = predict(model, LiteralTarget(), data...) - -""" - obspredict(model, kind_of_proxy::LearnAPI.KindOfProxy, obsdata) - -Similar to `predict` but consumes algorithm-specific representations of input data, -`obsdata`, as returned by `obs(predict, algorithm, data...)`. Here `data...` is the form of -data expected in the main [`predict`](@ref) method. Alternatively, such `obsdata` may be -replaced by a resampled version, where resampling is performed using `MLUtils.getobs` -(always supported). - -For some algorithms and workflows, `obspredict` will have a performance benefit over -[`predict`](@ref). See more at [`obs`](@ref). - -# Example - -In the following, `algorithm` is some supervised learning algorithm with -training features `X`, training target `y`, and test features `Xnew`: - -```julia -model = fit(algorithm, X, y) -obsdata = obs(predict, algorithm, Xnew) -ŷ = obspredict(model, LiteralTarget(), obsdata) -@assert ŷ == predict(model, LiteralTarget(), Xnew) -``` - -See also [`predict`](@ref), [`fit`](@ref), [`transform`](@ref), -[`inverse_transform`](@ref), [`obs`](@ref). +See also [`fit`](@ref), [`transform`](@ref), [`inverse_transform`](@ref). # Extended help # New implementations -Implementation of `obspredict` is optional, but required to enable `predict`. The method -must also handle `obsdata` in the case it is replaced by `MLUtils.getobs(obsdata, I)` for -some collection `I` of indices. If [`obs`](@ref) is not overloaded, then `obsdata = data`, -where `data...` is what the standard [`predict`](@ref) call expects, as in the call -`predict(model, kind_of_proxy, data...)`. Note `data` is always a tuple, even if `predict` -has only one data argument. See more at [`obs`](@ref). - +If there is no notion of a "target" variable in the LearnAPI.jl sense, or you need an +operation with an inverse, implement [`transform`](@ref) instead. -$(DOC_MUTATION(:obspredict)) +Implementation is optional. If the first signature is implemented for some +`kind_of_proxy`, then the implementation should provide an implementation of the second +convenience form, but it is free to choose the fallback `kind_of_proxy`. Each +`kind_of_proxy` that gets an implementation must be added to the list returned by +[`LearnAPI.kinds_of_proxy`](@ref). -If overloaded, you must include both `LearnAPI.obspredict` and `LearnAPI.predict` in the -list of methods returned by the [`LearnAPI.functions`](@ref) trait. +$(DOC_IMPLEMENTED_METHODS(:predict)) -An implementation is provided for each kind of target proxy you wish to support. See the -LearnAPI.jl documentation for options. Each supported `kind_of_proxy` instance should be -listed in the return value of the [`LearnAPI.kinds_of_proxy(algorithm)`](@ref) trait. +$(DOC_MINIMIZE(:predict)) -$(DOC_MINIMIZE(:obspredict)) - -""" -function obspredict end +$(DOC_MUTATION(:predict)) """ - transform(model, data...) +function predict end -Return a transformation of some `data`, using some `model`, as returned by [`fit`](@ref). -# Arguments +""" + transform(model, data) -- `model` is anything returned by a call of the form `fit(algorithm, ...)`, for some - LearnAPI-complaint `algorithm`. +Return a transformation of some `data`, using some `model`, as returned by +[`fit`](@ref). -$(DOC_ARGUMENTS(:transform)) +For `data` that consists of a tuple, a slurping version is typically provided, i.e., +`transform(model, X1, X2, X3)` in place of `transform(model, (X1, X2, X3))`. # Example -Here `X` and `Xnew` are data of the same form: +Below, `X` and `Xnew` are data of the same form. + +For an `algorithm` that generalizes to new data ("learns"): ```julia -# For an algorithm that generalizes to new data ("learns"): model = fit(algorithm, X; verbosity=0) transform(model, Xnew) - -# For a static (non-generalizing) transformer: -model = fit(algorithm) -transform(model, X) ``` -Note `transform` does not mutate any argument, except in the special case -`LearnAPI.predict_or_transform_mutates(algorithm) = true`. - -See also [`obstransform`](@ref), [`fit`](@ref), [`predict`](@ref), -[`inverse_transform`](@ref). - -# Extended help - -# New implementations - -LearnAPI.jl provides the following definition of `transform` which is never to be directly -overloaded: - +For a static (non-generalizing) transformer: ```julia -transform(model, data...) = - obstransform(model, obs(predict, LearnAPI.algorithm(model), data...)) +model = fit(algorithm) +W = transform(model, X) ``` -Rather, new algorithms overload [`obstransform`](@ref). - -""" -transform(model, data...) = - obstransform(model, obs(transform, LearnAPI.algorithm(model), data...)) - -""" - obstransform(model, kind_of_proxy::LearnAPI.KindOfProxy, obsdata) - -Similar to `transform` but consumes algorithm-specific representations of input data, -`obsdata`, as returned by `obs(transform, algorithm, data...)`. Here `data...` is the -form of data expected in the main [`transform`](@ref) method. Alternatively, such -`obsdata` may be replaced by a resampled version, where resampling is performed using -`MLUtils.getobs` (always supported). - -For some algorithms and workflows, `obstransform` will have a performance benefit over -[`transform`](@ref). See more at [`obs`](@ref). - -# Example - -In the following, `algorithm` is some unsupervised learning algorithm with -training features `X`, and test features `Xnew`: +or, in one step: ```julia -model = fit(algorithm, X, y) -obsdata = obs(transform, algorithm, Xnew) -W = obstransform(model, obsdata) -@assert W == transform(model, Xnew) +W = transform(algorithm, X) ``` -See also [`transform`](@ref), [`fit`](@ref), [`predict`](@ref), -[`inverse_transform`](@ref), [`obs`](@ref). +Note `transform` does not mutate any argument, except in the special case +`LearnAPI.predict_or_transform_mutates(algorithm) = true`. + +See also [`fit`](@ref), [`predict`](@ref), +[`inverse_transform`](@ref). # Extended help # New implementations -Implementation of `obstransform` is optional, but required to enable `transform`. The -method must also handle `obsdata` in the case it is replaced by `MLUtils.getobs(obsdata, -I)` for some collection `I` of indices. If [`obs`](@ref) is not overloaded, then `obsdata -= data`, where `data...` is what the standard [`transform`](@ref) call expects, as in the -call `transform(model, data...)`. Note `data` is always a tuple, even if `transform` has -only one data argument. See more at [`obs`](@ref). - -$(DOC_MUTATION(:obstransform)) +Implementation for new LearnAPI.jl algorithms is optional. +$(DOC_IMPLEMENTED_METHODS(:transform)) -If overloaded, you must include both `LearnAPI.obstransform` and `LearnAPI.transform` in -the list of methods returned by the [`LearnAPI.functions`](@ref) trait. +$(DOC_MINIMIZE(:transform)) -Each supported `kind_of_proxy` should be listed in the return value of the -[`LearnAPI.kinds_of_proxy(algorithm)`](@ref) trait. +$(DOC_MUTATION(:transform)) -$(DOC_MINIMIZE(:obstransform)) """ -function obstransform end +function transform end + """ inverse_transform(model, data) @@ -259,20 +149,13 @@ Inverse transform `data` according to some `model` returned by [`fit`](@ref). He "inverse" is to be understood broadly, e.g, an approximate right inverse for [`transform`](@ref). -# Arguments - -- `model`: anything returned by a call of the form `fit(algorithm, ...)`, for some - LearnAPI-complaint `algorithm`. - -- `data`: something having the same form as the output of `transform(model, inputs...)` - # Example In the following, `algorithm` is some dimension-reducing algorithm that generalizes to new data (such as PCA); `Xtrain` is the training input and `Xnew` the input to be reduced: ```julia -model = fit(algorithm, Xtrain; verbosity=0) +model = fit(algorithm, Xtrain) W = transform(model, Xnew) # reduced version of `Xnew` Ŵ = inverse_transform(model, W) # embedding of `W` in original space ``` @@ -283,7 +166,7 @@ See also [`fit`](@ref), [`transform`](@ref), [`predict`](@ref). # New implementations -Implementation is optional. $(DOC_IMPLEMENTED_METHODS(:inverse_transform, )) +Implementation is optional. $(DOC_IMPLEMENTED_METHODS(:inverse_transform)) $(DOC_MINIMIZE(:inverse_transform)) diff --git a/src/tools.jl b/src/tools.jl index 7a211729..d86e3d8d 100644 --- a/src/tools.jl +++ b/src/tools.jl @@ -8,6 +8,27 @@ function name_value_pair(ex) return (ex.args[1], ex.args[2]) end +""" + @trait(TypeEx, trait1=value1, trait2=value2, ...) + +Overload a number of traits for algorithms of type `TypeEx`. For example, the code + +```julia +@trait( + RidgeRegressor, + descriptors = ("regression", ), + doc_url = "https://some.cool.documentation", +) +``` + +is equivalent to + +```julia +LearnAPI.descriptors(::RidgeRegressor) = ("regression", ), +LearnAPI.doc_url(::RidgeRegressor) = "https://some.cool.documentation", +``` + +""" macro trait(algorithm_ex, exs...) program = quote end for ex in exs @@ -20,28 +41,6 @@ macro trait(algorithm_ex, exs...) return esc(program) end -# """ -# typename(x) - -# Return a symbolic representation of the name of `type(x)`, stripped of any type-parameters -# and module qualifications. For example, if - -# typeof(x) = MLJBase.Machine{MLJAlgorithms.ConstantRegressor,true} - -# Then `typename(x)` returns `:Machine`. - -# """ -function typename(x) - M = typeof(x) - if isdefined(M, :name) - return M.name.name - elseif isdefined(M, :body) - return typename(M.body) - else - return Symbol(string(M)) - end -end - function is_uppercase(char::Char) i = Int(char) i > 64 && i < 91 diff --git a/src/traits.jl b/src/traits.jl index 73c3b03a..f5709206 100644 --- a/src/traits.jl +++ b/src/traits.jl @@ -13,12 +13,22 @@ DOC_ONLY_ONE(func) = "`LearnAPI.$(func)_observation_scitype`, "* "`LearnAPI.$(func)_observation_type`." +const DOC_EXPLAIN_EACHOBS = + """ + + Here, "for each `o` in `observations`" is understood in the sense of + [`LearnAPI.data_interface(algorithm)`](@ref). For example, if + `LearnAPI.data_interface(algorithm) == Base.HasLength()`, then this means "for `o` in + `MLUtils.eachobs(observations)`". + + """ const TRAITS = [ + :constructor, :functions, :kinds_of_proxy, - :position_of_target, - :position_of_weights, + :target, + :weights, :descriptors, :is_pure_julia, :pkg_name, @@ -28,6 +38,7 @@ const TRAITS = [ :is_composite, :human_name, :iteration_parameter, + :data_interface, :predict_or_transform_mutates, :fit_scitype, :fit_observation_scitype, @@ -48,18 +59,51 @@ const TRAITS = [ # # OVERLOADABLE TRAITS +""" + Learn.API.constructor(algorithm) + +Return a keyword constructor that can be used to clone `algorithm` or make copies with +selectively altered property values: + +```julia-repl +julia> algorithm.lambda +0.1 +julia> C = LearnAPI.constructor(algorithm) +julia> algorithm2 = C(lambda=0.2) +julia> algorithm2.lambda +0.2 +``` + +# New implementations + +All new implementations must overload this trait. It must be possible to recover an +algorithm from the constructor returned as follows: + +```julia +properties = propertynames(algorithm) +named_properties = NamedTuple{properties}(getproperty.(Ref(algorithm), properties)) +@assert algorithm == LearnAPI.constructor(algorithm)(; named_properties...) +``` + +The keyword constructor provided by `LearnAPI.constructor` must provide default values for +all properties, with the exception of those that can take other LearnAPI.jl algorithms as +values. + +""" +function constructor end + """ LearnAPI.functions(algorithm) -Return a tuple of functions that can be sensibly applied to `algorithm`, or to objects -having the same type as `algorithm`, or to associated models (objects returned by -`fit(algorithm, ...)`. Algorithm traits are excluded. +Return a tuple of functions that can be meaningfully applied with `algorithm`, or an +associate model (object returned by `fit(algorithm, ...)`, as the first +argument. Algorithm traits (`algorithm` is the *only* argument) are excluded. In addition to functions, the returned tuple may include expressions, like `:(DecisionTree.print_tree)`, which reference functions not owned by LearnAPI.jl. -The understanding is that `algorithm` is a LearnAPI-compliant object whenever this is -non-empty. +The understanding is that `algorithm` is a LearnAPI-compliant object whenever the return +value is non-empty. # Extended help @@ -68,18 +112,15 @@ non-empty. All new implementations must overload this trait. Here's a checklist for elements in the return value: -| function | needs explicit implementation? | include in returned tuple? | -|----------------------|---------------------------------|----------------------------------| -| `fit` | no | yes | -| `obsfit` | yes | yes | -| `minimize` | optional | yes | -| `predict` | no | if `obspredict` is implemented | -| `obspredict` | optional | if implemented | -| `transform` | no | if `obstransform` is implemented | -| `obstransform` | optional | if implemented | -| `obs` | optional | yes | -| `inverse_transform` | optional | if implemented | -| `LearnAPI.algorithm` | yes | yes | +| function | implementation/overloading compulsory? | include in returned tuple? | +|----------------------|----------------------------------------|----------------------------| +| `fit` | yes | yes | +| `minimize` | no | yes | +| `obs` | no | yes | +| `LearnAPI.algorithm` | yes | yes | +| `inverse_transform` | no | only if implemented | +| `predict` | no | only if implemented | +| `transform` | no | only if implemented | Also include any implemented accessor functions. The LearnAPI.jl accessor functions are: $ACCESSOR_FUNCTIONS_LIST. @@ -125,29 +166,40 @@ For more on target variables and target proxies, refer to the LearnAPI documenta kinds_of_proxy(::Any) = () """ - LearnAPI.position_of_target(algorithm) + LearnAPI.target(algorithm)::Bool + LearnAPI.target(algorithm, data) -> target -Return the expected position of the target variable within `data` in calls of the form -[`LearnAPI.fit`](@ref)`(algorithm, verbosity, data...)`. +First method (an algorithm trait) returns `true` if the second method returns a target +variable for some value(s) of `data`, where `data` is a supported argument in +[`fit(algorithm, data)`](@ref). -If this number is `0`, then no target is expected. If this number exceeds `length(data)`, -then `data` is understood to exclude the target variable. +# New implementations + +The trait fallback returns `false`. A fallback for the second method returns `nothing`. """ -position_of_target(::Any) = 0 +target(::Any) = false +target(::Any, data) = nothing """ - LearnAPI.position_of_weights(algorithm) + LearnAPI.weights(algorithm)::Bool + LearnAPI.target(algorithm, data) -> weights + +First method (an algorithm trait) returns `true` if the second method returns +per-observation weights, for some value(s) of `data`, where `data` is a supported argument +in [`fit(algorithm, data)`](@ref). -Return the expected position of per-observation weights within `data` in -calls of the form [`LearnAPI.fit`](@ref)`(algorithm, data...)`. +Otherwise, weights, if they apply, are assumed uniform. -If this number is `0`, then no weights are expected. If this number exceeds -`length(data)`, then `data` is understood to exclude weights, which are assumed to be -uniform. +# New implementations + +The trait fallback returns `false`. A fallback for the second method returns `nothing`, +which is interpreted as uniform weights. """ -position_of_weights(::Any) = 0 +weights(::Any) = false +weights(::Any, data) = nothing + descriptors() = [ :regression, @@ -289,8 +341,8 @@ is_composite(::Any) = false """ LearnAPI.human_name(algorithm) -A human-readable string representation of `typeof(algorithm)`. Primarily intended for -auto-generation of documentation. +Return a human-readable string representation of `typeof(algorithm)`. Primarily intended +for auto-generation of documentation. # New implementations @@ -302,6 +354,32 @@ to return `"K-nearest neighbors regressor"`. Ideally, this is a "concrete" noun """ human_name(M) = snakecase(name(M), delim=' ') # `name` defined below +""" + LearnAPI.data_interface(algorithm) + +Return the data interface supported by `algorithm` for accessing individual observations in +representations of input data returned by [`obs(algorithm, data)`](@ref) or [`obs(model, +data)`](@ref). Here `data` is `fit`, `predict`, or `transform`-consumable data. + +Options for the return value: + +- `Base.HasLength()`: Data returned by `obs` implements the + [MLUtils.jl](https://juliaml.github.io/MLUtils.jl/dev/) `getobs/numobs` interface; it + usually suffices to overload `Base.getindex` and `Base.length` (which are the + `getobs/numobs` fallbacks). + +- `Base.SizeUnknown()`: Data returned by `obs` implements Julia's `iterate` + interface. + +See also [`obs`](@ref). + +# New implementations + +The fallback returns `Base.HasLength`. + +""" +data_interface(::Any) = Base.HasLength() + """ LearnAPI.predict_or_transform_mutates(algorithm) @@ -334,17 +412,9 @@ iteration_parameter(::Any) = nothing """ LearnAPI.fit_scitype(algorithm) -Return an upper bound on the scitype of `data` guaranteed to work when calling -`fit(algorithm, data...)`. - -Specifically, if the return value is `S` and `ScientificTypes.scitype(data) <: S`, then -all the following calls are guaranteed to work: - -```julia -fit(algorithm, data...) -obsdata = obs(fit, algorithm, data...) -fit(algorithm, Obs(), obsdata) -``` +Return an upper bound `S` on the scitype of `data` guaranteed to work when calling +`fit(algorithm, data)`: if `ScientificTypes.scitype(data) <: S`, then is `fit(algorithm, +data)` is supported. See also [`LearnAPI.fit_type`](@ref), [`LearnAPI.fit_observation_scitype`](@ref), [`LearnAPI.fit_observation_type`](@ref). @@ -359,27 +429,12 @@ fit_scitype(::Any) = Union{} """ LearnAPI.fit_observation_scitype(algorithm) -Return an upper bound on the scitype of observations guaranteed to work when calling -`fit(algorithm, data...)`, independent of the type/scitype of the data container -itself. Here "observations" is in the sense of MLUtils.jl. Assuming this trait has -value different from `Union{}` the understanding is that `data` implements the MLUtils.jl -`getobs`/`numobs` interface. - -Specifically, denoting the type returned above by `S`, supposing `S != Union{}`, and that -user supplies `data` satisfying - -```julia -ScientificTypes.scitype(MLUtils.getobs(data, i)) <: S -``` - -for any valid index `i`, then all the following are guaranteed to work: +Return an upper bound `S` on the scitype of individual observations guaranteed to work +when calling `fit`: if `observations = obs(algorithm, data)` and +`ScientificTypes.scitype(o) <:S` for each `o` in `observations`, then the call +`fit(algorithm, data)` is supported. - -```julia -fit(algorithm, data....) -obsdata = obs(fit, algorithm, data...) -fit(algorithm, Obs(), obsdata) -``` +$DOC_EXPLAIN_EACHOBS See also See also [`LearnAPI.fit_type`](@ref), [`LearnAPI.fit_scitype`](@ref), [`LearnAPI.fit_observation_type`](@ref). @@ -394,17 +449,8 @@ fit_observation_scitype(::Any) = Union{} """ LearnAPI.fit_type(algorithm) -Return an upper bound on the type of `data` guaranteed to work when calling -`fit(algorithm, data...)`. - -Specifically, if the return value is `T` and `typeof(data) <: T`, then -all the following calls are guaranteed to work: - -```julia -fit(algorithm, data...) -obsdata = obs(fit, algorithm, data...) -fit(algorithm, Obs(), obsdata) -``` +Return an upper bound `T` on the type of `data` guaranteed to work when calling +`fit(algorithm, data)`: if `typeof(data) <: T`, then `fit(algorithm, data)` is supported. See also [`LearnAPI.fit_scitype`](@ref), [`LearnAPI.fit_observation_type`](@ref). [`LearnAPI.fit_observation_scitype`](@ref) @@ -419,26 +465,12 @@ fit_type(::Any) = Union{} """ LearnAPI.fit_observation_type(algorithm) -Return an upper bound on the type of observations guaranteed to work when calling -`fit(algorithm, data...)`, independent of the type/scitype of the data container -itself. Here "observations" is in the sense of MLUtils.jl. Assuming this trait has value -different from `Union{}` the understanding is that `data` implements the MLUtils.jl -`getobs`/`numobs` interface. +Return an upper bound `T` on the type of individual observations guaranteed to work +when calling `fit`: if `observations = obs(algorithm, data)` and +`typeof(o) <:S` for each `o` in `observations`, then the call +`fit(algorithm, data)` is supported. -Specifically, denoting the type returned above by `T`, supposing `T != Union{}`, and that -user supplies `data` satisfying - -```julia -typeof(MLUtils.getobs(data, i)) <: T -``` - -for any valid index `i`, then the following is guaranteed to work: - -```julia -fit(algorithm, data....) -obsdata = obs(fit, algorithm, data...) -fit(algorithm, Obs(), obsdata) -``` +$DOC_EXPLAIN_EACHOBS See also See also [`LearnAPI.fit_type`](@ref), [`LearnAPI.fit_scitype`](@ref), [`LearnAPI.fit_observation_scitype`](@ref). @@ -456,18 +488,9 @@ function DOC_INPUT_SCITYPE(op) """ LearnAPI.$(op)_input_scitype(algorithm) - Return an upper bound on the scitype of `data` guaranteed to work in the call - `$op(algorithm,$extra data...)`. - - Specifically, if `S` is the value returned and `ScientificTypes.scitype(data) <: S`, - then the following is guaranteed to work: - - ```julia - $op(model,$extra data...) - obsdata = obs($op, algorithm, data...) - $op(model,$extra Obs(), obsdata) - ``` - whenever `algorithm = LearnAPI.algorithm(model)`. + Return an upper bound `S` on the scitype of `data` guaranteed to work in the call + `$op(algorithm,$extra data)`: if `ScientificTypes.scitype(data) <: S`, + then `$op(algorithm,$extra data)` is supported. See also [`LearnAPI.$(op)_input_type`](@ref). @@ -484,27 +507,12 @@ function DOC_INPUT_OBSERVATION_SCITYPE(op) """ LearnAPI.$(op)_observation_scitype(algorithm) - Return an upper bound on the scitype of observations guaranteed to work when calling - `$op(model,$extra data...)`, independent of the type/scitype of the data container - itself. Here "observations" is in the sense of MLUtils.jl. Assuming this trait has - value different from `Union{}` the understanding is that `data` implements the - MLUtils.jl `getobs`/`numobs` interface. + Return an upper bound `S` on the scitype of individual observations guaranteed to work + when calling `$op`: if `observations = obs(model, data)`, for some `model` returned by + `fit(algorithm, ...)`, and `ScientificTypes.scitype(o) <: S` for each `o` in + `observations`, then the call `$(op)(model,$extra data)` is supported. - Specifically, denoting the type returned above by `S`, supposing `S != Union{}`, and - that user supplies `data` satisfying - - ```julia - ScientificTypes.scitype(MLUtils.getobs(data, i)) <: S - ``` - - for any valid index `i`, then all the following are guaranteed to work: - - ```julia - $op(model,$extra data...) - obsdata = obs($op, algorithm, data...) - $op(model,$extra Obs(), obsdata) - ``` - whenever `algorithm = LearnAPI.algorithm(model)`. + $DOC_EXPLAIN_EACHOBS See also See also [`LearnAPI.fit_type`](@ref), [`LearnAPI.fit_scitype`](@ref), [`LearnAPI.fit_observation_type`](@ref). @@ -522,19 +530,11 @@ function DOC_INPUT_TYPE(op) """ LearnAPI.$(op)_input_type(algorithm) - Return an upper bound on the type of `data` guaranteed to work in the call - `$op(algorithm,$extra data...)`. - - Specifically, if `T` is the value returned and `typeof(data) <: T`, then the following - is guaranteed to work: - - ```julia - $op(model,$extra data...) - obsdata = obs($op, model, data...) - $op(model,$extra Obs(), obsdata) - ``` + Return an upper bound `T` on the scitype of `data` guaranteed to work in the call + `$op(algorithm,$extra data)`: if `typeof(data) <: T`, + then `$op(algorithm,$extra data)` is supported. - See also [`LearnAPI.$(op)_input_scitype`](@ref). + See also [`LearnAPI.$(op)_input_type`](@ref). # New implementations @@ -550,27 +550,12 @@ function DOC_INPUT_OBSERVATION_TYPE(op) """ LearnAPI.$(op)_observation_type(algorithm) - Return an upper bound on the type of observations guaranteed to work when calling - `$op(model,$extra data...)`, independent of the type/scitype of the data container - itself. Here "observations" is in the sense of MLUtils.jl. Assuming this trait has - value different from `Union{}` the understanding is that `data` implements the - MLUtils.jl `getobs`/`numobs` interface. - - Specifically, denoting the type returned above by `T`, supposing `T != Union{}`, and - that user supplies `data` satisfying - - ```julia - typeof(MLUtils.getobs(data, i)) <: T - ``` - - for any valid index `i`, then all the following are guaranteed to work: + Return an upper bound `T` on the scitype of individual observations guaranteed to work + when calling `$op`: if `observations = obs(model, data)`, for some `model` returned by + `fit(algorithm, ...)`, and `typeof(o) <: T` for each `o` in + `observations`, then the call `$(op)(model,$extra data)` is supported. - ```julia - $op(model,$extra data...) - obsdata = obs($op, algorithm, data...) - $op(model,$extra Obs(), obsdata) - ``` - whenever `algorithm = LearnAPI.algorithm(model)`. + $DOC_EXPLAIN_EACHOBS See also See also [`LearnAPI.fit_type`](@ref), [`LearnAPI.fit_scitype`](@ref), [`LearnAPI.fit_observation_type`](@ref). @@ -649,19 +634,19 @@ const DOC_PREDICT_OUTPUT(s) = Return an upper bound for the $(s)s of predictions of the specified form where supported, and otherwise return `Any`. For example, if - ŷ = LearnAPI.predict(model, LearnAPI.Distribution(), data...) + ŷ = predict(model, Distribution(), data) successfully returns (i.e., `algorithm` supports predictions of target probability distributions) then the following is guaranteed to hold: - $(s)(ŷ) <: LearnAPI.predict_output_$(s)(algorithm, LearnAPI.Distribution()) + $(s)(ŷ) <: predict_output_$(s)(algorithm, Distribution()) **Note.** This trait has a single-argument "convenience" version `LearnAPI.predict_output_$(s)(algorithm)` derived from this one, which returns a dictionary keyed on target proxy types. - See also [`LearnAPI.KindOfProxy`](@ref), [`LearnAPI.predict`](@ref), - [`LearnAPI.predict_input_$(s)`](@ref). + See also [`LearnAPI.KindOfProxy`](@ref), [`predict`](@ref), + [`predict_input_$(s)`](@ref). # New implementations @@ -685,7 +670,7 @@ predict_output_type(algorithm, kind_of_proxy) = Any # # DERIVED TRAITS -name(A) = string(typename(A)) +name(A) = split(string(constructor(A)), ".") |> last is_algorithm(A) = !isempty(functions(A)) @@ -703,14 +688,14 @@ const DOC_PREDICT_OUTPUT2(s) = As an example, if - ŷ = LearnAPI.predict(model, LearnAPI.Distribution(), data...) + ŷ = predict(model, Distribution(), data...) successfully returns (i.e., `algorithm` supports predictions of target probability distributions) then the following is guaranteed to hold: - $(s)(ŷ) <: LearnAPI.predict_output_$(s)s(algorithm)[LearnAPI.Distribution] + $(s)(ŷ) <: LearnAPI.predict_output_$(s)s(algorithm)[Distribution] - See also [`LearnAPI.KindOfProxy`](@ref), [`LearnAPI.predict`](@ref), + See also [`LearnAPI.KindOfProxy`](@ref), [`predict`](@ref), [`LearnAPI.predict_input_$(s)`](@ref). # New implementations diff --git a/src/types.jl b/src/types.jl index e72c159e..e77c4cb7 100644 --- a/src/types.jl +++ b/src/types.jl @@ -1,28 +1,6 @@ # # TARGET PROXIES -const DOC_HOW_TO_LIST_PROXIES = - "Run `LearnAPI.CONCRETE_TARGET_PROXY_TYPES` "* - " to list all options. " - - -""" - - LearnAPI.KindOfProxy - -Abstract type whose concrete subtypes `T` each represent a different kind of proxy for -some target variable, associated with some algorithm. Instances `T()` are used to request -the form of target predictions in [`predict`](@ref) calls. - -See LearnAPI.jl documentation for an explanation of "targets" and "target proxies". - -For example, `Distribution` is a concrete subtype of `LearnAPI.KindOfProxy` and a call -like `predict(model, Distribution(), Xnew)` returns a data object whose observations are -probability density/mass functions, assuming `algorithm` supports predictions of that -form. - -$DOC_HOW_TO_LIST_PROXIES - -""" +# see later for doc string: abstract type KindOfProxy end """ @@ -32,7 +10,7 @@ Abstract subtype of [`LearnAPI.KindOfProxy`](@ref). If `kind_of_proxy` is an ins `LearnAPI.IID` then, given `data` constisting of ``n`` observations, the following must hold: -- `ŷ = LearnAPI.predict(model, kind_of_proxy, data...)` is +- `ŷ = LearnAPI.predict(model, kind_of_proxy, data)` is data also consisting of ``n`` observations. - The ``j``th observation of `ŷ`, for any ``j``, depends only on the ``j``th @@ -53,22 +31,50 @@ struct Parametric <: IID end struct LabelAmbiguous <: IID end struct LabelAmbiguousSampleable <: IID end struct LabelAmbiguousDistribution <: IID end +struct LabelAmbiguousFuzzy <: IID end struct ConfidenceInterval <: IID end -struct Set <: IID end -struct ProbabilisticSet <: IID end +struct Fuzzy <: IID end +struct ProbabilisticFuzzy <: IID end struct SurvivalFunction <: IID end struct SurvivalDistribution <: IID end +struct HazardFunction <: IID end struct OutlierScore <: IID end struct Continuous <: IID end -# struct None <: KindOfProxy end -struct JointSampleable <: KindOfProxy end -struct JointDistribution <: KindOfProxy end -struct JointLogDistribution <: KindOfProxy end +""" + Joint <: KindOfProxy + +Abstract subtype of [`LearnAPI.KindOfProxy`](@ref). If `kind_of_proxy` is an instance of +`LearnAPI.Joint` then, given `data` consisting of ``n`` observations, `predict(model, +kind_of_proxy, data)` represents a *single* probability distribution for the sample +space ``Y^n``, where ``Y`` is the space from which the target variable takes its values. + +""" +abstract type Joint <: KindOfProxy end +struct JointSampleable <: Joint end +struct JointDistribution <: Joint end +struct JointLogDistribution <: Joint end + +""" + Single <: KindOfProxy + +Abstract subtype of [`LearnAPI.KindOfProxy`](@ref). It applies only to algorithms for +which `predict` has no data argument, i.e., is of the form `predict(model, +kind_of_proxy)`. An example is an algorithm learning a probability distribution from +samples, and we regard the samples as drawn from the "target" variable. If in this case, +`kind_of_proxy` is an instance of `LearnAPI.Single` then, `predict(algorithm)` returns a +single object representing a probability distribution. + +""" +abstract type Single <: KindOfProxy end +struct SingleSampeable <: Single end +struct SingleDistribution <: Single end +struct SingleLogDistribution <: Single end const CONCRETE_TARGET_PROXY_TYPES = [ subtypes(IID)..., - setdiff(subtypes(KindOfProxy), subtypes(IID))..., + subtypes(Single)..., + subtypes(Joint)..., ] const CONCRETE_TARGET_PROXY_TYPES_SYMBOLS = map(CONCRETE_TARGET_PROXY_TYPES) do T @@ -82,3 +88,28 @@ const CONCRETE_TARGET_PROXY_TYPES_LIST = join( ", ", " and ", ) + +const DOC_HOW_TO_LIST_PROXIES = + "The instances of [`LearnAPI.KindOfProxy`](@ref) are: "* + "$(LearnAPI.CONCRETE_TARGET_PROXY_TYPES_LIST). " + + +""" + + LearnAPI.KindOfProxy + +Abstract type whose concrete subtypes `T` each represent a different kind of proxy for +some target variable, associated with some algorithm. Instances `T()` are used to request +the form of target predictions in [`predict`](@ref) calls. + +See LearnAPI.jl documentation for an explanation of "targets" and "target proxies". + +For example, `Distribution` is a concrete subtype of `LearnAPI.KindOfProxy` and a call +like `predict(model, Distribution(), Xnew)` returns a data object whose observations are +probability density/mass functions, assuming `algorithm` supports predictions of that +form. + +$DOC_HOW_TO_LIST_PROXIES + +""" +KindOfProxy diff --git a/test/integration/regression.jl b/test/integration/regression.jl index 2c5d9d70..ee419d21 100644 --- a/test/integration/regression.jl +++ b/test/integration/regression.jl @@ -13,10 +13,10 @@ import DataFrames struct Ridge lambda::Float64 end -Ridge(; lambda=0.1) = Ridge(lambda) +Ridge(; lambda=0.1) = Ridge(lambda) # LearnAPI.constructor defined later -struct RidgeFitObs{T} - A::Matrix{T} # p x n +struct RidgeFitObs{T,M<:AbstractMatrix{T}} + A::M # p x n names::Vector{Symbol} y::Vector{T} end @@ -27,23 +27,28 @@ struct RidgeFitted{T,F} feature_importances::F end +LearnAPI.algorithm(model::RidgeFitted) = model.algorithm + Base.getindex(data::RidgeFitObs, I) = RidgeFitObs(data.A[:,I], data.names, data.y[I]) Base.length(data::RidgeFitObs, I) = length(data.y) -function LearnAPI.obs(::typeof(fit), ::Ridge, X, y) +# observations for consumption by `fit`: +function LearnAPI.obs(::Ridge, data) + X, y = data table = Tables.columntable(X) names = Tables.columnnames(table) |> collect - RidgeFitObs(Tables.matrix(table, transpose=true), names, y) + RidgeFitObs(Tables.matrix(table)', names, y) end -function LearnAPI.obsfit(algorithm::Ridge, fitdata::RidgeFitObs, verbosity) +# for observations: +function LearnAPI.fit(algorithm::Ridge, observations::RidgeFitObs; verbosity=1) # unpack hyperparameters and data: lambda = algorithm.lambda - A = fitdata.A - names = fitdata.names - y = fitdata.y + A = observations.A + names = observations.names + y = observations.y # apply core algorithm: coefficients = (A*A' + algorithm.lambda*I)\(A*y) # 1 x p matrix @@ -61,12 +66,31 @@ function LearnAPI.obsfit(algorithm::Ridge, fitdata::RidgeFitObs, verbosity) end -LearnAPI.algorithm(model::RidgeFitted) = model.algorithm +# for unprocessed `data = (X, y)`: +LearnAPI.fit(algorithm::Ridge, data; kwargs...) = + fit(algorithm, obs(algorithm, data); kwargs...) + +# for convenience: +LearnAPI.fit(algorithm::Ridge, X, y; kwargs...) = + fit(algorithm, (X, y); kwargs...) -LearnAPI.obspredict(model::RidgeFitted, ::LiteralTarget, Anew::Matrix) = - ((model.coefficients)'*Anew)' +# to extract the target: +LearnAPI.target(::Ridge, data) = last(data) +LearnAPI.target(::Ridge, observations::RidgeFitObs) = observations.y -LearnAPI.obs(::typeof(predict), ::Ridge, X) = Tables.matrix(X, transpose=true) +# observations for consumption by `predict`: +LearnAPI.obs(::RidgeFitted, X) = Tables.matrix(X)' + +# matrix input: +LearnAPI.predict(model::RidgeFitted, ::LiteralTarget, observations::AbstractMatrix) = + observations'*model.coefficients + +# tabular input: +LearnAPI.predict(model::RidgeFitted, ::LiteralTarget, Xnew) = + predict(model, LiteralTarget(), obs(model, Xnew)) + +# convenience method: +LearnAPI.predict(model::RidgeFitted, data) = predict(model, LiteralTarget(), data) LearnAPI.feature_importances(model::RidgeFitted) = model.feature_importances @@ -75,21 +99,20 @@ LearnAPI.minimize(model::RidgeFitted) = @trait( Ridge, - position_of_target=2, + constructor = Ridge, + target=true, kinds_of_proxy = (LiteralTarget(),), functions = ( fit, - obsfit, minimize, predict, - obspredict, obs, LearnAPI.algorithm, LearnAPI.feature_importances, ) ) -n = 10 # number of observations +n = 30 # number of observations train = 1:6 test = 7:10 a, b, c = rand(n), rand(n), rand(n) @@ -112,7 +135,7 @@ y = 2a - b + 3c + 0.05*rand(n) ), ) - # quite fitting: + # quiet fitting: model = @test_logs( fit( algorithm, @@ -126,10 +149,10 @@ y = 2a - b + 3c + 0.05*rand(n) @test ŷ isa Vector{Float64} @test predict(model, Tables.subset(X, test)) == ŷ - fitdata = LearnAPI.obs(fit, algorithm, X, y) - predictdata = LearnAPI.obs(predict, algorithm, X) - model = obsfit(algorithm, MLUtils.getobs(fitdata, train); verbosity=1) - @test obspredict(model, LiteralTarget(), MLUtils.getobs(predictdata, test)) == ŷ + fitobs = LearnAPI.obs(algorithm, (X, y)) + predictobs = LearnAPI.obs(model, X) + model = fit(algorithm, MLUtils.getobs(fitobs, train); verbosity=0) + @test predict(model, LiteralTarget(), MLUtils.getobs(predictobs, test)) ≈ ŷ @test LearnAPI.feature_importances(model) isa Vector{<:Pair{Symbol}} @@ -140,11 +163,15 @@ y = 2a - b + 3c + 0.05*rand(n) recovered_model = deserialize(filename) @test LearnAPI.algorithm(recovered_model) == algorithm - @test obspredict( + @test predict( recovered_model, LiteralTarget(), - MLUtils.getobs(predictdata, test) - ) == ŷ + MLUtils.getobs(predictobs, test) + ) ≈ ŷ + + @test LearnAPI.target(algorithm, (X, y)) == y + @test LearnAPI.target(algorithm, fitobs) == y + end # # VARIATION OF RIDGE REGRESSION THAT USES FALLBACK OF LearnAPI.obs @@ -152,7 +179,7 @@ end struct BabyRidge lambda::Float64 end -BabyRidge(; lambda=0.1) = BabyRidge(lambda) +BabyRidge(; lambda=0.1) = BabyRidge(lambda) # LearnAPI.constructor defined later struct BabyRidgeFitted{T,F} algorithm::BabyRidge @@ -160,18 +187,17 @@ struct BabyRidgeFitted{T,F} feature_importances::F end -function LearnAPI.obsfit(algorithm::BabyRidge, data, verbosity) +function LearnAPI.fit(algorithm::BabyRidge, data; verbosity=1) X, y = data lambda = algorithm.lambda - table = Tables.columntable(X) names = Tables.columnnames(table) |> collect - A = Tables.matrix(table, transpose=true) + A = Tables.matrix(table)' # apply core algorithm: - coefficients = (A*A' + algorithm.lambda*I)\(A*y) # 1 x p matrix + coefficients = (A*A' + algorithm.lambda*I)\(A*y) # vector feature_importances = nothing @@ -179,25 +205,29 @@ function LearnAPI.obsfit(algorithm::BabyRidge, data, verbosity) end +LearnAPI.target(::BabyRidge, data) = last(data) + +# convenience form: +LearnAPI.fit(algorithm::BabyRidge, X, y; kwargs...) = + fit(algorithm, (X, y); kwargs...) + LearnAPI.algorithm(model::BabyRidgeFitted) = model.algorithm -function LearnAPI.obspredict(model::BabyRidgeFitted, ::LiteralTarget, data) - X = only(data) - Anew = Tables.matrix(X, transpose=true) - return ((model.coefficients)'*Anew)' -end +LearnAPI.predict(model::BabyRidgeFitted, ::LiteralTarget, Xnew) = + Tables.matrix(Xnew)*model.coefficients + +LearnAPI.minimize(model::BabyRidgeFitted) = + BabyRidgeFitted(model.algorithm, model.coefficients, nothing) @trait( BabyRidge, - position_of_target=2, + constructor = Ridge, + target=true, kinds_of_proxy = (LiteralTarget(),), functions = ( fit, - obsfit, minimize, predict, - obspredict, - obs, LearnAPI.algorithm, LearnAPI.feature_importances, ) @@ -210,10 +240,12 @@ end ŷ = predict(model, LiteralTarget(), Tables.subset(X, test)) @test ŷ isa Vector{Float64} - fitdata = obs(fit, algorithm, X, y) - predictdata = LearnAPI.obs(predict, algorithm, X) - model = obsfit(algorithm, MLUtils.getobs(fitdata, train); verbosity=0) - @test obspredict(model, LiteralTarget(), MLUtils.getobs(predictdata, test)) == ŷ + fitobs = obs(algorithm, (X, y)) + predictobs = LearnAPI.obs(model, X) + model = fit(algorithm, MLUtils.getobs(fitobs, train); verbosity=0) + @test predict(model, LiteralTarget(), MLUtils.getobs(predictobs, test)) == ŷ + + @test LearnAPI.target(algorithm, (X, y)) == y end true diff --git a/test/integration/static_algorithms.jl b/test/integration/static_algorithms.jl index e5295ddc..3991dbf4 100644 --- a/test/integration/static_algorithms.jl +++ b/test/integration/static_algorithms.jl @@ -13,13 +13,15 @@ import DataFrames struct Selector names::Vector{Symbol} end -Selector(; names=Symbol[]) = Selector(names) +Selector(; names=Symbol[]) = Selector(names) # LearnAPI.constructor defined later -LearnAPI.obsfit(algorithm::Selector, obsdata, verbosity) = algorithm -LearnAPI.algorithm(model) = model # i.e., the algorithm +# `fit` has no input data, does no "learning", and just returns thinly wrapped `algorithm` +# (to distinguish it from the algorithm in dispatch): +LearnAPI.fit(algorithm::Selector; verbosity=1) = Ref(algorithm) +LearnAPI.algorithm(model) = model[] -function LearnAPI.obstransform(algorithm::Selector, obsdata) - X = only(obsdata) +function LearnAPI.transform(model::Base.RefValue{Selector}, X) + algorithm = LearnAPI.algorithm(model) table = Tables.columntable(X) names = Tables.columnnames(table) filtered_names = filter(in(algorithm.names), names) @@ -28,23 +30,31 @@ function LearnAPI.obstransform(algorithm::Selector, obsdata) return Tables.materializer(X)(filtered_table) end -@trait Selector functions = ( - fit, - obsfit, - minimize, - transform, - obstransform, - obs, - Learn.algorithm, +# fit and transform in one go: +function LearnAPI.transform(algorithm::Selector, X) + model = fit(algorithm) + transform(model, X) +end + +@trait( + Selector, + constructor = Selector, + functions = ( + fit, + minimize, + transform, + Learn.algorithm, + ), ) @testset "test a static transformer" begin algorithm = Selector(names=[:x, :w]) X = DataFrames.DataFrame(rand(3, 4), [:x, :y, :z, :w]) model = fit(algorithm) # no data arguments! - @test model == algorithm - @test transform(model, X) == - DataFrames.DataFrame(Tables.matrix(X)[:,[1,4]], [:x, :w]) + @test LearnAPI.algorithm(model) == algorithm + W = transform(model, X) + @test W == DataFrames.DataFrame(Tables.matrix(X)[:,[1,4]], [:x, :w]) + @test W == transform(algorithm, X) end @@ -56,7 +66,7 @@ end struct Selector2 names::Vector{Symbol} end -Selector2(; names=Symbol[]) = Selector2(names) +Selector2(; names=Symbol[]) = Selector2(names) # LearnAPI.constructor defined later mutable struct Selector2Fit algorithm::Selector2 @@ -66,13 +76,11 @@ end LearnAPI.algorithm(model::Selector2Fit) = model.algorithm rejected(model::Selector2Fit) = model.rejected -# Here `obsdata=()` and we are just wrapping `algorithm` with a place-holder for -# the `rejected` feature names. -LearnAPI.obsfit(algorithm::Selector2, obsdata, verbosity) = Selector2Fit(algorithm) +# Here we are wrapping `algorithm` with a place-holder for the `rejected` feature names. +LearnAPI.fit(algorithm::Selector2; verbosity=1) = Selector2Fit(algorithm) -# output the filtered table and add `rejected` field to model (mutatated) -function LearnAPI.obstransform(model::Selector2Fit, obsdata) - X = only(obsdata) +# output the filtered table and add `rejected` field to model (mutatated!) +function LearnAPI.transform(model::Selector2Fit, X) table = Tables.columntable(X) names = Tables.columnnames(table) keep = LearnAPI.algorithm(model).names @@ -83,16 +91,21 @@ function LearnAPI.obstransform(model::Selector2Fit, obsdata) return Tables.materializer(X)(filtered_table) end +# fit and transform in one step: +function LearnAPI.transform(algorithm::Selector2, X) + model = fit(algorithm) + transform(model, X) +end + @trait( Selector2, + constructor = Selector, predict_or_transform_mutates = true, functions = ( fit, obsfit, minimize, transform, - obstransform, - obs, Learn.algorithm, :(MyPkg.rejected), # accessor function not owned by LearnAPI.jl ) @@ -106,6 +119,7 @@ end @test LearnAPI.algorithm(model) == algorithm filtered = DataFrames.DataFrame(Tables.matrix(X)[:,[1,4]], [:x, :w]) @test transform(model, X) == filtered + @test transform(algorithm, X) == filtered @test rejected(model) == [:y, :z] end diff --git a/test/runtests.jl b/test/runtests.jl index 8697a248..93788bc4 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,6 +4,10 @@ using Test include("tools.jl") end +@testset "traits.jl" begin + include("traits.jl") +end + # # INTEGRATION TESTS @testset "regression" begin diff --git a/test/tools.jl b/test/tools.jl index 1b2e942f..523f40e1 100644 --- a/test/tools.jl +++ b/test/tools.jl @@ -1,6 +1,5 @@ using LearnAPI using Test -using SparseArrays module Fruit using LearnAPI @@ -22,13 +21,6 @@ import .Fruit ## HELPERS -@testset "typename" begin - @test LearnAPI.typename(Fruit.RedApple(1)) == :RedApple - @test LearnAPI.typename(nothing) == :Nothing - m = SparseArrays.sparse([1,2], [1,3], [0.5, 0.6]) - @test LearnAPI.typename(m) == :SparseMatrixCSC -end - @testset "snakecase" begin @test LearnAPI.snakecase("AnthonyBlaomsPetElk") == "anthony_blaoms_pet_elk" diff --git a/test/traits.jl b/test/traits.jl new file mode 100644 index 00000000..3000d016 --- /dev/null +++ b/test/traits.jl @@ -0,0 +1,16 @@ +module FruitSalad +using LearnAPI + +struct RedApple{T} + x::T +end + +LearnAPI.constructor(::RedApple) = RedApple + +end + +import .FruitSalad + +@testset "name" begin + @test LearnAPI.name(FruitSalad.RedApple(1)) == "RedApple" +end From d47cabe03a50f236ae86c2e1a2aa46dd8b0ae149 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Sun, 19 May 2024 16:23:49 +1200 Subject: [PATCH 02/27] rm redundant pkg from [extras] --- Project.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/Project.toml b/Project.toml index 206a4038..ee543d1a 100644 --- a/Project.toml +++ b/Project.toml @@ -14,7 +14,6 @@ DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" -SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" From f0c68d53fc5355b92a7833ce697f6e30a358cf37 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Sun, 19 May 2024 17:05:37 +1200 Subject: [PATCH 03/27] fix typos --- docs/src/anatomy_of_an_implementation.md | 2 +- docs/src/obs.md | 2 +- src/obs.jl | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/src/anatomy_of_an_implementation.md b/docs/src/anatomy_of_an_implementation.md index 1c011d21..b136a8ba 100644 --- a/docs/src/anatomy_of_an_implementation.md +++ b/docs/src/anatomy_of_an_implementation.md @@ -401,7 +401,7 @@ LearnAPI.target(::Ridge, observations::RidgeFitObs) = observations.y ### The `obs` contract Providing `fit` signatures matching the output of `obs`, is the first part of the `obs` -contract. The second part is this: *The outupt of `obs` must implement the* +contract. The second part is this: *The output of `obs` must implement the* [MLUtils.jl](https://juliaml.github.io/MLUtils.jl/dev/) `getobs/numobs` *interface for accessing individual observations*. It usually suffices to overload `Base.getindex` and `Base.length` (which are the `getobs/numobs` fallbacks): diff --git a/docs/src/obs.md b/docs/src/obs.md index fe198a85..3e40e3f9 100644 --- a/docs/src/obs.md +++ b/docs/src/obs.md @@ -8,7 +8,7 @@ performance advantages over naive workflows in some cases (e.g., cross-validatio ```julia obs(algorithm, data) # can be passed to `fit` instead of `data` -obs(model, data) # can be passed to `predict` or `tranform` instead of `data` +obs(model, data) # can be passed to `predict` or `transform` instead of `data` ``` ## Typical workflows diff --git a/src/obs.jl b/src/obs.jl index 0348d3da..f67b19c9 100644 --- a/src/obs.jl +++ b/src/obs.jl @@ -69,7 +69,7 @@ that case. ## Sample implementation -Refer to the "Anatomy of an Implemetation" section of the LearnAPI +Refer to the "Anatomy of an Implementation" section of the LearnAPI [manual](https://juliaai.github.io/LearnAPI.jl/dev/). From 3252e09899055371946700bd074df1b6f0f86d40 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Tue, 21 May 2024 09:29:26 +1200 Subject: [PATCH 04/27] more doc tweaks --- docs/src/anatomy_of_an_implementation.md | 11 ++++++----- docs/src/index.md | 8 ++++---- docs/src/reference.md | 18 +++++++++--------- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/docs/src/anatomy_of_an_implementation.md b/docs/src/anatomy_of_an_implementation.md index b136a8ba..93a82d1a 100644 --- a/docs/src/anatomy_of_an_implementation.md +++ b/docs/src/anatomy_of_an_implementation.md @@ -166,8 +166,8 @@ predictions. ## Algorithm traits Algorithm [traits](@ref traits) record extra generic information about an algorithm, or -make specific promises of behavior. They usually have an algorithm as the single -argument. We regard [`LearnAPI.constructor`](@ref) defined above as a trait. +make specific promises of behavior. They usually have an algorithm as the single argument, +and so we also regard [`LearnAPI.constructor`](@ref) defined above as a trait. In LearnAPI.jl `predict` always outputs a [target or target proxy](@ref proxy), where "target" is understood very broadly. We overload a trait to record the fact here that the @@ -214,9 +214,10 @@ traits_list) to see which might apply to a new implementation, to enable maximum functionality provided by third party packages, and to assist third party algorithms that match machine learning algorithms to user-defined tasks. -Having set `LearnAPI.target(::Ridge) == true` we are obliged to overload a multi-argument -version of `LearnAPI.target` to extract the target from the `data` that gets supplied to -`fit`: +According to the contract articulated in its document string, having set +[`LearnAPI.target(::Ridge)`](@ref) equal to `true`, we are obliged to overload a +multi-argument version of `LearnAPI.target` to extract the target from the `data` that +gets supplied to `fit`: ```@example anatomy LearnAPI.target(::Ridge, data) = last(data) diff --git a/docs/src/index.md b/docs/src/index.md index 4f979070..2e4afb74 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -12,9 +12,9 @@ A base Julia interface for machine learning and statistics LearnAPI.jl is a lightweight, functional-style interface, providing a collection of [methods](@ref Methods), such as `fit` and `predict`, to be implemented by algorithms from machine learning and statistics. Through such implementations, these algorithms buy into -functionality, such as hyperparameter optimization, as provided by ML/statistics toolboxes -and other packages. LearnAPI.jl also provides a number of Julia [traits](@ref traits) for -promising specific behavior. +functionality, such as hyperparameter optimization and model composition, as provided by +ML/statistics toolboxes and other packages. LearnAPI.jl also provides a number of Julia +[traits](@ref traits) for promising specific behavior. ```@raw html 🚧 @@ -78,7 +78,7 @@ opts out. The `fit` and `predict` methods consume these alternative representati The fallback data interface is the [MLUtils.jl](https://github.com/JuliaML/MLUtils.jl) `getobs/numobs` interface, and if the input consumed by the algorithm already implements that interface (tables, arrays, etc.) then overloading `obs` is completely optional. A -plain iteration interface (to support, e.g., data loaders reading images from disk files) +plain iteration interface (to support, e.g., data loaders reading images from disk) can also be specified. ## Learning more diff --git a/docs/src/reference.md b/docs/src/reference.md index 5b15e03e..f5be0824 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -54,16 +54,15 @@ compared with censored ground truth survival times. #### Definitions -More generally, whenever we have a variable (e.g., a class label) that can (in principle) -can be paired with a predicted value, or some predicted "proxy" for that variable (such as -a class probability), then we call the variable a *target* variable, and the predicted -output a *target proxy*. In this definition, it is immaterial whether or not the target -appears in training (is supervised) or whether or not the model generalizes to new +More generally, whenever we have a variable (e.g., a class label) that can, at least in +principle, be paired with a predicted value, or some predicted "proxy" for that variable +(such as a class probability), then we call the variable a *target* variable, and the +predicted output a *target proxy*. In this definition, it is immaterial whether or not the +target appears in training (is supervised) or whether or not the model generalizes to new observations ("learns"). LearnAPI.jl provides singleton [target proxy types](@ref proxy_types) for prediction -dispatch in LearnAPI.jl. These are also used to distinguish performance metrics provided -by the package +dispatch. These are also used to distinguish performance metrics provided by the package [StatisticalMeasures.jl](https://juliaai.github.io/StatisticalMeasures.jl/dev/). @@ -151,8 +150,9 @@ Only these method names are exported by LearnAPI: `fit`, `transform`, `inverse_t [`LearnAPI.constructor`](@ref) and [`LearnAPI.functions`](@ref) are universally compulsory. -- [`LearnAPI.target`](@ref) and [`LearnAPI.weights`](@ref) are both traits and methods to - extract, from `fit` input data, the target and per-observation weights, when available. +- [`LearnAPI.target`](@ref) and [`LearnAPI.weights`](@ref) are traits which also include + extended signatures for extracting, from `fit` input data, the target and + per-observation weights, when available. --- From 69bd859b9e3be74d2f232761d6e2d933fafafeb5 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Tue, 28 May 2024 17:30:35 +1200 Subject: [PATCH 05/27] fix table of contents for the docs --- docs/Project.toml | 1 + docs/make.jl | 18 ++++++++++-------- docs/src/anatomy_of_an_implementation.md | 2 +- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/docs/Project.toml b/docs/Project.toml index caa42f70..47eb52e6 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,5 +1,6 @@ [deps] Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +LearnAPI = "92ad9a40-7767-427a-9ee6-6e577f1266cb" MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54" ScientificTypesBase = "30f210dd-8aff-4c5f-94ba-8e64358c1161" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" diff --git a/docs/make.jl b/docs/make.jl index b0705cda..d9695614 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -10,14 +10,16 @@ makedocs( pages=[ "Home" => "index.md", "Anatomy of an Implementation" => "anatomy_of_an_implementation.md", - "Reference" => "reference.md", - "... fit" => "fit.md", - "... predict/transform" => "predict_transform.md", - "... Kinds of Target Proxy" => "kinds_of_target_proxy.md", - "... minimize" => "minimize.md", - "... obs" => "obs.md", - "... Accessor Functions" => "accessor_functions.md", - "... Algorithm Traits" => "traits.md", + "Reference" => [ + "Summary" => "reference.md", + "fit" => "fit.md", + "predict/transform" => "predict_transform.md", + "Kinds of Target Proxy" => "kinds_of_target_proxy.md", + "minimize" => "minimize.md", + "obs" => "obs.md", + "Accessor Functions" => "accessor_functions.md", + "Algorithm Traits" => "traits.md", + ], "Common Implementation Patterns" => "common_implementation_patterns.md", "Testing an Implementation" => "testing_an_implementation.md", ], diff --git a/docs/src/anatomy_of_an_implementation.md b/docs/src/anatomy_of_an_implementation.md index 93a82d1a..1ee22b2e 100644 --- a/docs/src/anatomy_of_an_implementation.md +++ b/docs/src/anatomy_of_an_implementation.md @@ -215,7 +215,7 @@ functionality provided by third party packages, and to assist third party algori match machine learning algorithms to user-defined tasks. According to the contract articulated in its document string, having set -[`LearnAPI.target(::Ridge)`](@ref) equal to `true`, we are obliged to overload a +[`LearnAPI.target`](@ref)`(::Ridge)`](@ref) equal to `true`, we are obliged to overload a multi-argument version of `LearnAPI.target` to extract the target from the `data` that gets supplied to `fit`: From f4b0fdda52879df2b76ea1004cdaefbe3670dd62 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Tue, 28 May 2024 17:32:40 +1200 Subject: [PATCH 06/27] tweak --- docs/src/index.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/src/index.md b/docs/src/index.md index 2e4afb74..1b6cc500 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -73,7 +73,8 @@ Algorithms are free to consume data in any format. However, a method called [`ob data_interface) (read as "observations") gives users and meta-algorithms access to an algorithm-specific representation of input data, which is also guaranteed to implement a standard interface for accessing individual observations, unless an algorithm explicitly -opts out. The `fit` and `predict` methods consume these alternative representations of data. +opts out. The `fit` and `predict` methods also consume these alternative representations +of data. The fallback data interface is the [MLUtils.jl](https://github.com/JuliaML/MLUtils.jl) `getobs/numobs` interface, and if the input consumed by the algorithm already implements From acac24f93fb39e8c424915338fce8d4a1d83322a Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Tue, 28 May 2024 17:35:53 +1200 Subject: [PATCH 07/27] doc tweak --- docs/src/reference.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/src/reference.md b/docs/src/reference.md index f5be0824..7a2a196b 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -120,10 +120,9 @@ Only these method names are exported by LearnAPI: `fit`, `transform`, `inverse_t ### List of methods -- [`fit`](@ref fit): for training or updating algorithms that generalize to new data. For - non-generalizing ("static") algorithms, `fit(algorithm)` generally wraps algorithm in a - mutable struct that can be mutated by `predict`/`transform` to record byproducts of - those operations. +- [`fit`](@ref fit): for training or updating algorithms that generalize to new data. Or, + for non-generalizing ("static") algorithms, wrap `algorithm` in a mutable struct that + can be mutated by `predict`/`transform` to record byproducts of those operations. - [`predict`](@ref operations): for outputting [targets](@ref proxy) or [target proxies](@ref proxy) (such as probability density functions) From 3b289f5dd1ab1a586b7c36f6ee974b77335b2e79 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Tue, 28 May 2024 17:37:16 +1200 Subject: [PATCH 08/27] tweak --- docs/make.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/make.jl b/docs/make.jl index d9695614..1ed6928f 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -11,7 +11,7 @@ makedocs( "Home" => "index.md", "Anatomy of an Implementation" => "anatomy_of_an_implementation.md", "Reference" => [ - "Summary" => "reference.md", + "Overview" => "reference.md", "fit" => "fit.md", "predict/transform" => "predict_transform.md", "Kinds of Target Proxy" => "kinds_of_target_proxy.md", From d6c320fdbea5186a99d0e7d3d73c8ec138c7d7cf Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Tue, 28 May 2024 17:50:17 +1200 Subject: [PATCH 09/27] whitespace fixes --- docs/src/anatomy_of_an_implementation.md | 172 +++++++++++------------ docs/src/reference.md | 10 +- 2 files changed, 93 insertions(+), 89 deletions(-) diff --git a/docs/src/anatomy_of_an_implementation.md b/docs/src/anatomy_of_an_implementation.md index 1ee22b2e..85eca73f 100644 --- a/docs/src/anatomy_of_an_implementation.md +++ b/docs/src/anatomy_of_an_implementation.md @@ -10,19 +10,19 @@ For a transformer, implementations ordinarily implement `transform` instead of !!! important - The core implementations of `fit`, `predict`, etc, - always have a *single* `data` argument, as in `fit(algorithm, data; verbosity=1)`. - Calls like `fit(algorithm, X, y)` are provided as additional convenience methods. + The core implementations of `fit`, `predict`, etc, + always have a *single* `data` argument, as in `fit(algorithm, data; verbosity=1)`. + Calls like `fit(algorithm, X, y)` are provided as additional convenience methods. !!! note - If the `data` object consumed by `fit`, `predict`, or `transform` is not - not a suitable table¹, array³, tuple of tables and arrays, or some - other object implementing - the MLUtils.jl `getobs`/`numobs` interface, - then an implementation must: (i) suitably overload the trait - [`LearnAPI.data_interface`](@ref); and/or (ii) overload [`obs`](@ref), as - illustrated below under [Providing an advanced data interface](@ref). + If the `data` object consumed by `fit`, `predict`, or `transform` is not + not a suitable table¹, array³, tuple of tables and arrays, or some + other object implementing + the MLUtils.jl `getobs`/`numobs` interface, + then an implementation must: (i) suitably overload the trait + [`LearnAPI.data_interface`](@ref); and/or (ii) overload [`obs`](@ref), as + illustrated below under [Providing an advanced data interface](@ref). The first line below imports the lightweight package LearnAPI.jl whose methods we will be extending. The second imports libraries needed for the core algorithm. @@ -39,7 +39,7 @@ Here's a new type whose instances specify ridge regression parameters: ```@example anatomy struct Ridge{T<:Real} - lambda::T + lambda::T end nothing # hide ``` @@ -63,7 +63,7 @@ changed to `0.05`. ## Implementing `fit` -A ridge regressor requires two types of data for training: *input features* `X`, which +A ridge regressor requires two types of data for training: input features `X`, which here we suppose are tabular¹, and a [target](@ref proxy) `y`, which we suppose is a vector. @@ -72,9 +72,9 @@ coefficients labelled by feature name for inspection after training: ```@example anatomy struct RidgeFitted{T,F} - algorithm::Ridge - coefficients::Vector{T} - named_coefficients::F + algorithm::Ridge + coefficients::Vector{T} + named_coefficients::F end nothing # hide ``` @@ -87,25 +87,25 @@ The core implementation of `fit` looks like this: ```@example anatomy function LearnAPI.fit(algorithm::Ridge, data; verbosity=1) - X, y = data + X, y = data - # data preprocessing: - table = Tables.columntable(X) - names = Tables.columnnames(table) |> collect - A = Tables.matrix(table, transpose=true) + # data preprocessing: + table = Tables.columntable(X) + names = Tables.columnnames(table) |> collect + A = Tables.matrix(table, transpose=true) - lambda = algorithm.lambda + lambda = algorithm.lambda - # apply core algorithm: - coefficients = (A*A' + algorithm.lambda*I)\(A*y) # vector + # apply core algorithm: + coefficients = (A*A' + algorithm.lambda*I)\(A*y) # vector - # determine named coefficients: - named_coefficients = [names[j] => coefficients[j] for j in eachindex(names)] + # determine named coefficients: + named_coefficients = [names[j] => coefficients[j] for j in eachindex(names)] - # make some noise, if allowed: - verbosity > 0 && @info "Coefficients: $named_coefficients" + # make some noise, if allowed: + verbosity > 0 && @info "Coefficients: $named_coefficients" - return RidgeFitted(algorithm, coefficients, named_coefficients) + return RidgeFitted(algorithm, coefficients, named_coefficients) end ``` @@ -127,7 +127,7 @@ Here's the implementation for our ridge regressor: ```@example anatomy LearnAPI.predict(model::RidgeFitted, ::LiteralTarget, Xnew) = - Tables.matrix(Xnew)*model.coefficients + Tables.matrix(Xnew)*model.coefficients ``` ## Accessor functions @@ -156,7 +156,7 @@ overload it to dump the named version of the coefficients: ```@example anatomy LearnAPI.minimize(model::RidgeFitted) = - RidgeFitted(model.algorithm, model.coefficients, nothing) + RidgeFitted(model.algorithm, model.coefficients, nothing) ``` Crucially, we can still use `LearnAPI.minimize(model)` in place of `model` to make new @@ -187,19 +187,19 @@ The macro can be used to specify multiple traits simultaneously: ```@example anatomy @trait( - Ridge, - constructor = Ridge, - target = true, - kinds_of_proxy=(LiteralTarget(),), - descriptors = (:regression,), - functions = ( - fit, - minimize, - predict, - obs, - LearnAPI.algorithm, - LearnAPI.coefficients, - ) + Ridge, + constructor = Ridge, + target = true, + kinds_of_proxy=(LiteralTarget(),), + descriptors = (:regression,), + functions = ( + fit, + minimize, + predict, + obs, + LearnAPI.algorithm, + LearnAPI.coefficients, + ) ) nothing # hide ``` @@ -230,10 +230,10 @@ enabling the kind of workflow previewed in [Sample workflow](@ref): ```@example anatomy LearnAPI.fit(algorithm::Ridge, X, y; kwargs...) = - fit(algorithm, (X, y); kwargs...) + fit(algorithm, (X, y); kwargs...) LearnAPI.predict(model::RidgeFitted, Xnew) = - predict(model, LiteralTarget(), Xnew) + predict(model, LiteralTarget(), Xnew) ``` ## [Demonstration](@id workflow) @@ -292,40 +292,40 @@ using LearnAPI using LinearAlgebra, Tables struct Ridge{T<:Real} - lambda::T + lambda::T end Ridge(; lambda=0.1) = Ridge(lambda) struct RidgeFitted{T,F} - algorithm::Ridge - coefficients::Vector{T} - named_coefficients::F + algorithm::Ridge + coefficients::Vector{T} + named_coefficients::F end LearnAPI.algorithm(model::RidgeFitted) = model.algorithm LearnAPI.coefficients(model::RidgeFitted) = model.named_coefficients LearnAPI.minimize(model::RidgeFitted) = - RidgeFitted(model.algorithm, model.coefficients, nothing) + RidgeFitted(model.algorithm, model.coefficients, nothing) LearnAPI.fit(algorithm::Ridge, X, y; kwargs...) = - fit(algorithm, (X, y); kwargs...) + fit(algorithm, (X, y); kwargs...) LearnAPI.predict(model::RidgeFitted, Xnew) = predict(model, LiteralTarget(), Xnew) @trait( - Ridge, - constructor = Ridge, - target = true, - kinds_of_proxy=(LiteralTarget(),), - descriptors = (:regression,), - functions = ( - fit, - minimize, - predict, - obs, - LearnAPI.algorithm, - LearnAPI.coefficients, - ) + Ridge, + constructor = Ridge, + target = true, + kinds_of_proxy=(LiteralTarget(),), + descriptors = (:regression,), + functions = ( + fit, + minimize, + predict, + obs, + LearnAPI.algorithm, + LearnAPI.coefficients, + ) ) n = 10 # number of observations @@ -344,9 +344,9 @@ new type: ```@example anatomy2 struct RidgeFitObs{T,M<:AbstractMatrix{T}} - A::M # p x n - names::Vector{Symbol} # features - y::Vector{T} # target + A::M # p x n + names::Vector{Symbol} # features + y::Vector{T} # target end ``` @@ -354,10 +354,10 @@ Now we overload `obs` to carry out the data pre-processing previously in `fit`, ```@example anatomy2 function LearnAPI.obs(::Ridge, data) - X, y = data - table = Tables.columntable(X) - names = Tables.columnnames(table) |> collect - return RidgeFitObs(Tables.matrix(table)', names, y) + X, y = data + table = Tables.columntable(X) + names = Tables.columnnames(table) |> collect + return RidgeFitObs(Tables.matrix(table)', names, y) end ``` @@ -369,27 +369,27 @@ methods - one to handle "regular" input, and one to handle the pre-processed dat ```@example anatomy2 function LearnAPI.fit(algorithm::Ridge, observations::RidgeFitObs; verbosity=1) - lambda = algorithm.lambda + lambda = algorithm.lambda - A = observations.A - names = observations.names - y = observations.y + A = observations.A + names = observations.names + y = observations.y - # apply core algorithm: - coefficients = (A*A' + algorithm.lambda*I)\(A*y) # 1 x p matrix + # apply core algorithm: + coefficients = (A*A' + algorithm.lambda*I)\(A*y) # 1 x p matrix - # determine named coefficients: - named_coefficients = [names[j] => coefficients[j] for j in eachindex(names)] + # determine named coefficients: + named_coefficients = [names[j] => coefficients[j] for j in eachindex(names)] - # make some noise, if allowed: - verbosity > 0 && @info "Coefficients: $named_coefficients" + # make some noise, if allowed: + verbosity > 0 && @info "Coefficients: $named_coefficients" - return RidgeFitted(algorithm, coefficients, named_coefficients) + return RidgeFitted(algorithm, coefficients, named_coefficients) end LearnAPI.fit(algorithm::Ridge, data; kwargs...) = - fit(algorithm, obs(algorithm, data); kwargs...) + fit(algorithm, obs(algorithm, data); kwargs...) ``` We provide an overloading of `LearnAPI.target` to handle the additional supported data @@ -409,7 +409,7 @@ accessing individual observations*. It usually suffices to overload `Base.getind ```@example anatomy2 Base.getindex(data::RidgeFitObs, I) = - RidgeFitObs(data.A[:,I], data.names, y[I]) + RidgeFitObs(data.A[:,I], data.names, y[I]) Base.length(data::RidgeFitObs, I) = length(data.y) ``` @@ -420,10 +420,10 @@ case: LearnAPI.obs(::RidgeFitted, Xnew) = Tables.matrix(Xnew)' LearnAPI.predict(model::RidgeFitted, ::LiteralTarget, observations::AbstractMatrix) = - observations'*model.coefficients + observations'*model.coefficients LearnAPI.predict(model::RidgeFitted, ::LiteralTarget, Xnew) = - predict(model, LiteralTarget(), obs(model, Xnew)) + predict(model, LiteralTarget(), obs(model, Xnew)) ``` ### Important notes: diff --git a/docs/src/reference.md b/docs/src/reference.md index 7a2a196b..41f57005 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -25,9 +25,13 @@ an example of data, the observations being the rows. Typically, data provided to LearnAPI.jl algorithms, will implement the [MLUtils.jl](https://juliaml.github.io/MLUtils.jl/stable) `getobs/numobs` interface for accessing individual observations, but implementations can opt out of this requirement; -see [`obs`](@ref) and [`LearnAPI.data_interface`](@ref) for details. In the MLUtils.jl -convention, observations in tables are the rows but observations in a matrix are the -columns. +see [`obs`](@ref) and [`LearnAPI.data_interface`](@ref) for details. + +!!! note + + In the MLUtils.jl + convention, observations in tables are the rows but observations in a matrix are the + columns. ### [Hyperparameters](@id hyperparameters) From 54a5f9b38130f11c028f56748f1a760a841b4eae Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Tue, 28 May 2024 17:55:25 +1200 Subject: [PATCH 10/27] fix whitespace --- docs/src/anatomy_of_an_implementation.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/src/anatomy_of_an_implementation.md b/docs/src/anatomy_of_an_implementation.md index 85eca73f..81c81d22 100644 --- a/docs/src/anatomy_of_an_implementation.md +++ b/docs/src/anatomy_of_an_implementation.md @@ -10,19 +10,19 @@ For a transformer, implementations ordinarily implement `transform` instead of !!! important - The core implementations of `fit`, `predict`, etc, - always have a *single* `data` argument, as in `fit(algorithm, data; verbosity=1)`. - Calls like `fit(algorithm, X, y)` are provided as additional convenience methods. + The core implementations of `fit`, `predict`, etc, + always have a *single* `data` argument, as in `fit(algorithm, data; verbosity=1)`. + Calls like `fit(algorithm, X, y)` are provided as additional convenience methods. !!! note - If the `data` object consumed by `fit`, `predict`, or `transform` is not - not a suitable table¹, array³, tuple of tables and arrays, or some - other object implementing - the MLUtils.jl `getobs`/`numobs` interface, - then an implementation must: (i) suitably overload the trait - [`LearnAPI.data_interface`](@ref); and/or (ii) overload [`obs`](@ref), as - illustrated below under [Providing an advanced data interface](@ref). + If the `data` object consumed by `fit`, `predict`, or `transform` is not + not a suitable table¹, array³, tuple of tables and arrays, or some + other object implementing + the MLUtils.jl `getobs`/`numobs` interface, + then an implementation must: (i) suitably overload the trait + [`LearnAPI.data_interface`](@ref); and/or (ii) overload [`obs`](@ref), as + illustrated below under [Providing an advanced data interface](@ref). The first line below imports the lightweight package LearnAPI.jl whose methods we will be extending. The second imports libraries needed for the core algorithm. From 0af5476db879c8c07824571d2f3f3e4e4499bdeb Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Thu, 30 May 2024 13:31:55 +1200 Subject: [PATCH 11/27] clarify importance of constructor over type in traits and docstrings --- docs/src/anatomy_of_an_implementation.md | 7 +++++++ docs/src/reference.md | 15 +++++++++++---- docs/src/traits.md | 11 ++++++----- src/traits.jl | 8 ++++++-- src/types.jl | 2 +- test/integration/regression.jl | 16 ++++++++++++++++ 6 files changed, 47 insertions(+), 12 deletions(-) diff --git a/docs/src/anatomy_of_an_implementation.md b/docs/src/anatomy_of_an_implementation.md index 81c81d22..9779403a 100644 --- a/docs/src/anatomy_of_an_implementation.md +++ b/docs/src/anatomy_of_an_implementation.md @@ -51,6 +51,11 @@ mechanism for creating new versions of itself, with modified property (field) va this end, we implement `LearnAPI.constructor`, which must return a keyword constructor: ```@example anatomy +""" + Ridge(; lambda=0.1) + +Instantiate a ridge regression algorithm, with regularization of `lambda`. +""" Ridge(; lambda=0.1) = Ridge(lambda) LearnAPI.constructor(::Ridge) = Ridge nothing # hide @@ -60,6 +65,8 @@ So, if `algorithm = Ridge(lambda=0.1)` then `LearnAPI.constructor(algorithm)(lam is another algorithm with the same properties, except that the value of `lambda` has been changed to `0.05`. +Note that we attach the docstring to the constructor, not the struct. + ## Implementing `fit` diff --git a/docs/src/reference.md b/docs/src/reference.md index 41f57005..11157384 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -95,6 +95,9 @@ for such algorithms [`LearnAPI.is_composite`](@ref)`(algorithm)` must be `true` is `false`). Generally, the keyword constructor provided by [`LearnAPI.constructor`](@ref) must provide default values for all non-algorithm properties. +Any object `algorithm` for which [`LearnAPI.functions`](@ref)`(algorithm)` is non-empty is +understood have a valid implementation of the LearnAPI.jl interface. + ### Example Any instance of `GradientRidgeRegressor` defined below is a valid algorithm. @@ -110,8 +113,11 @@ GradientRidgeRegressor(; learning_rate=0.01, epochs=10, l2_regularization=0.01) LearnAPI.constructor(::GradientRidgeRegressor) = GradientRidgeRegressor ``` -Any object `algorithm` for which [`LearnAPI.functions`](@ref)`(algorithm)` is non-empty is -understood have a valid implementation of the LearnAPI.jl interface. +### Documentation + +Attach public LearnAPI.jl-related documentation for an algorithm to it's *constructor*, +rather than to the struct defining its type. In this way, an algorithm can implement +non-LearnAPI interfaces (such as a native interface) with separate document strings. ## Methods @@ -125,8 +131,9 @@ Only these method names are exported by LearnAPI: `fit`, `transform`, `inverse_t ### List of methods - [`fit`](@ref fit): for training or updating algorithms that generalize to new data. Or, - for non-generalizing ("static") algorithms, wrap `algorithm` in a mutable struct that - can be mutated by `predict`/`transform` to record byproducts of those operations. + for non-generalizing algorithms (see [Static Algorithms](@ref)), wrap `algorithm` in a + mutable struct that can be mutated by `predict`/`transform` to record byproducts of + those operations. - [`predict`](@ref operations): for outputting [targets](@ref proxy) or [target proxies](@ref proxy) (such as probability density functions) diff --git a/docs/src/traits.md b/docs/src/traits.md index 9ff63967..7862d680 100644 --- a/docs/src/traits.md +++ b/docs/src/traits.md @@ -95,12 +95,13 @@ Multiple traits can be declared like this: To ensure that trait metadata can be stored in an external algorithm registry, LearnAPI.jl requires: -1. *Finiteness:* The value of a trait is the same for all algorithms with same - underlying `UnionAll` type. That is, even if the type parameters are different, the - trait should be the same. There is an exception if `is_composite(algorithm) = true`. +1. *Finiteness:* The value of a trait is the same for all `algorithm`s with same value of + [`LearnAPI.constructor(algorithm)`](@ref). This typically means trait values do not + depend on type parameters! There is an exception if `is_composite(algorithm) = true`. -2. *Serializability:* The value of any trait can be evaluated without installing any - third party package; `using LearnAPI` should suffice. +2. *Immediate serializability:* It should be possible to call a trait without first + installing any third party package. Importing the package that defines the algorithm, + together with `import LearnAPI` should suffice. Because of 1, combining a lot of functionality into one algorithm (e.g. the algorithm can perform both classification or regression) can mean traits are necessarily less diff --git a/src/traits.jl b/src/traits.jl index f5709206..2af32b60 100644 --- a/src/traits.jl +++ b/src/traits.jl @@ -76,8 +76,12 @@ julia> algorithm2.lambda # New implementations -All new implementations must overload this trait. It must be possible to recover an -algorithm from the constructor returned as follows: +All new implementations must overload this trait. + +Attach public LearnAPI.jl-related documentation for an algorithm to the constructor, not +the algorithm struct. + +It must be possible to recover an algorithm from the constructor returned as follows: ```julia properties = propertynames(algorithm) diff --git a/src/types.jl b/src/types.jl index e77c4cb7..3d09c93a 100644 --- a/src/types.jl +++ b/src/types.jl @@ -83,7 +83,7 @@ end const CONCRETE_TARGET_PROXY_TYPES_LIST = join( map(CONCRETE_TARGET_PROXY_TYPES_SYMBOLS) do s - "`$s`" + "`$s()`" end, ", ", " and ", diff --git a/test/integration/regression.jl b/test/integration/regression.jl index ee419d21..0ff394e4 100644 --- a/test/integration/regression.jl +++ b/test/integration/regression.jl @@ -10,9 +10,17 @@ import DataFrames # We overload `obs` to expose internal representation of input data. See later for a # simpler variation using the `obs` fallback. +# no docstring here - that goes with the constructor struct Ridge lambda::Float64 end + +""" + Ridge(; lambda=0.1) + +Instantiate a ridge regression algorithm, with regularization of `lambda`. + +""" Ridge(; lambda=0.1) = Ridge(lambda) # LearnAPI.constructor defined later struct RidgeFitObs{T,M<:AbstractMatrix{T}} @@ -176,9 +184,17 @@ end # # VARIATION OF RIDGE REGRESSION THAT USES FALLBACK OF LearnAPI.obs +# no docstring here - that goes with the constructor struct BabyRidge lambda::Float64 end + +""" + BabyRidge(; lambda=0.1) + +Instantiate a ridge regression algorithm, with regularization of `lambda`. + +""" BabyRidge(; lambda=0.1) = BabyRidge(lambda) # LearnAPI.constructor defined later struct BabyRidgeFitted{T,F} From 4b7c09c648560396103db82b6f1ea444235cdc3a Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Thu, 13 Jun 2024 15:33:43 +1200 Subject: [PATCH 12/27] add Expectile and Quantile target proxy types --- docs/src/kinds_of_target_proxy.md | 17 +++++++++++------ src/traits.jl | 15 ++++++++++----- src/types.jl | 3 +++ 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/docs/src/kinds_of_target_proxy.md b/docs/src/kinds_of_target_proxy.md index a34e1f42..35d51e4c 100644 --- a/docs/src/kinds_of_target_proxy.md +++ b/docs/src/kinds_of_target_proxy.md @@ -16,18 +16,20 @@ LearnAPI.IID | type | form of an observation | |:-------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `LearnAPI.LiteralTarget` | same as target observations | +| `LearnAPI.LiteralTarget` | same as target observations; may have the interpretation of a 50% quantile, 50% expectile or mode | | `LearnAPI.Sampleable` | object that can be sampled to obtain object of the same form as target observation | | `LearnAPI.Distribution` | explicit probability density/mass function whose sample space is all possible target observations | | `LearnAPI.LogDistribution` | explicit log-probability density/mass function whose sample space is possible target observations | -| † `LearnAPI.Probability` | numerical probability or probability vector | -| † `LearnAPI.LogProbability` | log-probability or log-probability vector | -| † `LearnAPI.Parametric` | a list of parameters (e.g., mean and variance) describing some distribution | +| `LearnAPI.Probability`¹ | numerical probability or probability vector | +| `LearnAPI.LogProbability`¹ | log-probability or log-probability vector | +| `LearnAPI.Parametric`¹ | a list of parameters (e.g., mean and variance) describing some distribution | | `LearnAPI.LabelAmbiguous` | collections of labels (in case of multi-class target) but without a known correspondence to the original target labels (and of possibly different number) as in, e.g., clustering | | `LearnAPI.LabelAmbiguousSampleable` | sampleable version of `LabelAmbiguous`; see `Sampleable` above | | `LearnAPI.LabelAmbiguousDistribution` | pdf/pmf version of `LabelAmbiguous`; see `Distribution` above | | `LearnAPI.LabelAmbiguousFuzzy` | same as `LabelAmbiguous` but with multiple values of indeterminant number | -| `LearnAPI.ConfidenceInterval` | confidence interval | +| `LearnAPI.Quantile`² | same as target but with quantile interpretation | +| `LearnAPI.Expectile`² | same as target but with expectile interpretation | +| `LearnAPI.ConfidenceInterval`² | confidence interval | | `LearnAPI.Fuzzy` | finite but possibly varying number of target observations | | `LearnAPI.ProbabilisticFuzzy` | as for `Fuzzy` but labeled with probabilities (not necessarily summing to one) | | `LearnAPI.SurvivalFunction` | survival function | @@ -36,9 +38,12 @@ LearnAPI.IID | `LearnAPI.OutlierScore` | numerical score reflecting degree of outlierness (not necessarily normalized) | | `LearnAPI.Continuous` | real-valued approximation/interpolation of a discrete-valued target, such as a count (e.g., number of phone calls) | -† Provided for completeness but discouraged to avoid [ambiguities in +¹Provided for completeness but discouraged to avoid [ambiguities in representation](https://github.com/alan-turing-institute/MLJ.jl/blob/dev/paper/paper.md#a-unified-approach-to-probabilistic-predictions-and-their-evaluation). +²The level will be controlled by a hyper-parameter; models providing only quantiles or +expectiles at 50% will provide `LiteralTarget` instead. + > Table of concrete subtypes of `LearnAPI.IID <: LearnAPI.KindOfProxy`. diff --git a/src/traits.jl b/src/traits.jl index 2af32b60..26d12597 100644 --- a/src/traits.jl +++ b/src/traits.jl @@ -304,14 +304,19 @@ doc_url(::Any) = "unknown" """ LearnAPI.load_path(algorithm) -Return a string indicating where the `struct` for `typeof(algorithm)` can be found, beginning -with the name of the package module defining it. For example, a return value of -`"FastTrees.LearnAPI.DecisionTreeClassifier"` means the following julia code will return the -algorithm type: +Return a string indicating where in code the definition of the algorithm's constructor can +be found, beginning with the name of the package module defining it. By "constructor" we +mean the return value of [`LearnAPI.constructor(algorithm)`](@ref). + +# Implementation + +For example, a return value of `"FastTrees.LearnAPI.DecisionTreeClassifier"` means the +following julia code will not error: ```julia import FastTrees -FastTrees.LearnAPI.DecisionTreeClassifier +import LearnAPI +@assert FastTrees.LearnAPI.DecisionTreeClassifier == LearnAPI.constructor(algorithm) ``` $DOC_UNKNOWN diff --git a/src/types.jl b/src/types.jl index 3d09c93a..02218bd3 100644 --- a/src/types.jl +++ b/src/types.jl @@ -40,6 +40,9 @@ struct SurvivalDistribution <: IID end struct HazardFunction <: IID end struct OutlierScore <: IID end struct Continuous <: IID end +struct Quantile <: IID end +struct Expectile <: IID end + """ Joint <: KindOfProxy From 82ade4072f976bd0a02f0bfd699a90d771032025 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Mon, 9 Sep 2024 09:34:21 +1200 Subject: [PATCH 13/27] add target_observation_scitype --- docs/make.jl | 5 ++++- docs/src/traits.md | 26 ++++++++++++++------------ src/traits.jl | 30 ++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 13 deletions(-) diff --git a/docs/make.jl b/docs/make.jl index 1ed6928f..1e3a6277 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -6,7 +6,10 @@ const REPO = Remotes.GitHub("JuliaAI", "LearnAPI.jl") makedocs( modules=[LearnAPI,], - format=Documenter.HTML(prettyurls = get(ENV, "CI", nothing) == "true"), + format=Documenter.HTML( + prettyurls = get(ENV, "CI", nothing) == "true", + collapselevel = 1, + ), pages=[ "Home" => "index.md", "Anatomy of an Implementation" => "anatomy_of_an_implementation.md", diff --git a/docs/src/traits.md b/docs/src/traits.md index 7862d680..84aedeef 100644 --- a/docs/src/traits.md +++ b/docs/src/traits.md @@ -36,20 +36,21 @@ package [ScientificTypesBase.jl](https://github.com/JuliaAI/ScientificTypesBase. | [`LearnAPI.human_name`](@ref)`(algorithm)` | human name for the algorithm; should be a noun | type name with spaces | "elastic net regressor" | | [`LearnAPI.data_interface`](@ref)`(algorithm)` | Interface implemented by objects returned by [`obs`](@ref) | `Base.HasLength()` (supports `MLUtils.getobs/numobs`) | `Base.SizeUnknown()` (supports `iterate`) | | [`LearnAPI.iteration_parameter`](@ref)`(algorithm)` | symbolic name of an iteration parameter | `nothing` | :epochs | -| [`LearnAPI.fit_scitype`](@ref)`(algorithm)` | upper bound on `scitype(data)` ensuring `fit(algorithm, data...)` works | `Union{}` | `Tuple{Table(Continuous), AbstractVector{Continuous}}` | -| [`LearnAPI.fit_observation_scitype`](@ref)`(algorithm)` | upper bound on `scitype(observation)` for `observation` in `data` ensuring `fit(algorithm, data...)` works | `Union{}` | `Tuple{AbstractVector{Continuous}, Continuous}` | -| [`LearnAPI.fit_type`](@ref)`(algorithm)` | upper bound on `typeof(data)` ensuring `fit(algorithm, data...)` works | `Union{}` | `Tuple{AbstractMatrix{<:Real}, AbstractVector{<:Real}}` | -| [`LearnAPI.fit_observation_type`](@ref)`(algorithm)` | upper bound on `typeof(observation)` for `observation` in `data` ensuring `fit(algorithm, data...)` works | `Union{}` | `Tuple{AbstractVector{<:Real}, Real}` | -| [`LearnAPI.predict_input_scitype`](@ref)`(algorithm)` | upper bound on `scitype(data)` ensuring `predict(model, kind, data...)` works | `Union{}` | `Table(Continuous)` | -| [`LearnAPI.predict_input_observation_scitype`](@ref)`(algorithm)` | upper bound on `scitype(observation)` for `observation` in `data` ensuring `predict(model, kind, data...)` works | `Union{}` | `Vector{Continuous}` | -| [`LearnAPI.predict_input_type`](@ref)`(algorithm)` | upper bound on `typeof(data)` ensuring `predict(model, kind, data...)` works | `Union{}` | `AbstractMatrix{<:Real}` | -| [`LearnAPI.predict_input_observation_type`](@ref)`(algorithm)` | upper bound on `typeof(observation)` for `observation` in `data` ensuring `predict(model, kind, data...)` works | `Union{}` | `Vector{<:Real}` | +| [`LearnAPI.fit_scitype`](@ref)`(algorithm)` | upper bound on `scitype(data)` ensuring `fit(algorithm, data)` works | `Union{}` | `Tuple{Table(Continuous), AbstractVector{Continuous}}` | +| [`LearnAPI.fit_observation_scitype`](@ref)`(algorithm)` | upper bound on `scitype(observation)` for `observation` in `data` ensuring `fit(algorithm, data)` works | `Union{}` | `Tuple{AbstractVector{Continuous}, Continuous}` | +| [`LearnAPI.fit_type`](@ref)`(algorithm)` | upper bound on `typeof(data)` ensuring `fit(algorithm, data)` works | `Union{}` | `Tuple{AbstractMatrix{<:Real}, AbstractVector{<:Real}}` | +| [`LearnAPI.fit_observation_type`](@ref)`(algorithm)` | upper bound on `typeof(observation)` for `observation` in `data` ensuring `fit(algorithm, data)` works | `Union{}` | `Tuple{AbstractVector{<:Real}, Real}` | +| [`LearnAPI.target_observation scitype`](@ref)`(algorithm)` | upper bound on the scitype of each observation of the targget | `Any` | `Continuous` | +| [`LearnAPI.predict_input_scitype`](@ref)`(algorithm)` | upper bound on `scitype(data)` ensuring `predict(model, kind, data)` works | `Union{}` | `Table(Continuous)` | +| [`LearnAPI.predict_input_observation_scitype`](@ref)`(algorithm)` | upper bound on `scitype(observation)` for `observation` in `data` ensuring `predict(model, kind, data)` works | `Union{}` | `Vector{Continuous}` | +| [`LearnAPI.predict_input_type`](@ref)`(algorithm)` | upper bound on `typeof(data)` ensuring `predict(model, kind, data)` works | `Union{}` | `AbstractMatrix{<:Real}` | +| [`LearnAPI.predict_input_observation_type`](@ref)`(algorithm)` | upper bound on `typeof(observation)` for `observation` in `data` ensuring `predict(model, kind, data)` works | `Union{}` | `Vector{<:Real}` | | [`LearnAPI.predict_output_scitype`](@ref)`(algorithm, kind_of_proxy)` | upper bound on `scitype(predict(model, ...))` | `Any` | `AbstractVector{Continuous}` | | [`LearnAPI.predict_output_type`](@ref)`(algorithm, kind_of_proxy)` | upper bound on `typeof(predict(model, ...))` | `Any` | `AbstractVector{<:Real}` | -| [`LearnAPI.transform_input_scitype`](@ref)`(algorithm)` | upper bound on `scitype(data)` ensuring `transform(model, data...)` works | `Union{}` | `Table(Continuous)` | -| [`LearnAPI.transform_input_observation_scitype`](@ref)`(algorithm)` | upper bound on `scitype(observation)` for `observation` in `data` ensuring `transform(model, data...)` works | `Union{}` | `Vector{Continuous}` | -| [`LearnAPI.transform_input_type`](@ref)`(algorithm)` | upper bound on `typeof(data)`ensuring `transform(model, data...)` works | `Union{}` | `AbstractMatrix{<:Real}}` | -| [`LearnAPI.transform_input_observation_type`](@ref)`(algorithm)` | upper bound on `typeof(observation)` for `observation` in `data` ensuring `transform(model, data...)` works | `Union{}` | `Vector{Continuous}` | +| [`LearnAPI.transform_input_scitype`](@ref)`(algorithm)` | upper bound on `scitype(data)` ensuring `transform(model, data)` works | `Union{}` | `Table(Continuous)` | +| [`LearnAPI.transform_input_observation_scitype`](@ref)`(algorithm)` | upper bound on `scitype(observation)` for `observation` in `data` ensuring `transform(model, data)` works | `Union{}` | `Vector{Continuous}` | +| [`LearnAPI.transform_input_type`](@ref)`(algorithm)` | upper bound on `typeof(data)`ensuring `transform(model, data)` works | `Union{}` | `AbstractMatrix{<:Real}}` | +| [`LearnAPI.transform_input_observation_type`](@ref)`(algorithm)` | upper bound on `typeof(observation)` for `observation` in `data` ensuring `transform(model, data)` works | `Union{}` | `Vector{Continuous}` | | [`LearnAPI.transform_output_scitype`](@ref)`(algorithm)` | upper bound on `scitype(transform(model, ...))` | `Any` | `Table(Continuous)` | | [`LearnAPI.transform_output_type`](@ref)`(algorithm)` | upper bound on `typeof(transform(model, ...))` | `Any` | `AbstractMatrix{<:Real}` | | [`LearnAPI.predict_or_transform_mutates`](@ref)`(algorithm)` | `true` if `predict` or `transform` mutates first argument | `false` | `true` | @@ -130,6 +131,7 @@ LearnAPI.fit_scitype LearnAPI.fit_type LearnAPI.fit_observation_scitype LearnAPI.fit_observation_type +LearnAPI.target_observation_scitype LearnAPI.predict_input_scitype LearnAPI.predict_input_observation_scitype LearnAPI.predict_input_type diff --git a/src/traits.jl b/src/traits.jl index 26d12597..79fd3453 100644 --- a/src/traits.jl +++ b/src/traits.jl @@ -44,6 +44,7 @@ const TRAITS = [ :fit_observation_scitype, :fit_type, :fit_observation_type, + :target_observation_scitype, :predict_input_scitype, :predict_output_scitype, :predict_input_type, @@ -491,6 +492,35 @@ Optional. The fallback return value is `Union{}`. $(DOC_ONLY_ONE(:fit)) """ fit_observation_type(::Any) = Union{} +""" + LearnAPI.target_observation_scitype(algorithm) + +Return an upper bound `S` on the scitype of each observation of `LearnAPI.target(data)`, +where `data` is an admissible argument in the call `fit(algorithm, data)`. + +This interpretation only holds if `LearnAPI.target(algorithm)` is `true`. In any case, +however, if `algorithm` implements `predict`, then `S` will always be an +upper bound on the scitype of observations that could be conceivably extracted from the +output of [`predict`](@ref). For example, suppose we have + +```julia +model = fit(algorithm, data) +ŷ = predict(model, Sampleable(), data_new) +``` + +Then each sample generated by each "observation" of `ŷ` (a vector of sampleable objects, +say) will be bound in scitype by `S`. + +See also See also [`LearnAPI.fit_observation_scitype`](@ref). + +# New implementations + +Optional. The fallback return value is `Any`. + +""" +target_observation_scitype(::Any) = Any + + function DOC_INPUT_SCITYPE(op) extra = op == :predict ? " kind_of_proxy," : "" ONLY = DOC_ONLY_ONE(op) From 7a781a086493c601097941bdc75f2a5cae64cfdd Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Tue, 24 Sep 2024 13:30:13 +1200 Subject: [PATCH 14/27] more doc updates --- docs/make.jl | 1 + docs/src/fit.md | 54 ++++++++++++++++++++----- docs/src/index.md | 14 ++++--- docs/src/minimize.md | 2 +- docs/src/obs.md | 57 +++++++++++++++++--------- docs/src/predict_transform.md | 29 +++++++++++-- docs/src/reference.md | 75 +++++++++++++++++++--------------- docs/src/traits.md | 4 +- src/LearnAPI.jl | 1 + src/accessor_functions.jl | 33 ++++++++------- src/fit.jl | 27 +++++++++---- src/minimize.jl | 2 +- src/obs.jl | 37 +++++++++-------- src/predict_transform.jl | 40 +++++++++++++----- src/traits.jl | 57 +++++++++++++------------- src/types.jl | 76 +++++++++++++++++++++++++++++++++++ 16 files changed, 353 insertions(+), 156 deletions(-) diff --git a/docs/make.jl b/docs/make.jl index 1e3a6277..ecfc1dd0 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -19,6 +19,7 @@ makedocs( "predict/transform" => "predict_transform.md", "Kinds of Target Proxy" => "kinds_of_target_proxy.md", "minimize" => "minimize.md", + "input" => "input.md", "obs" => "obs.md", "Accessor Functions" => "accessor_functions.md", "Algorithm Traits" => "traits.md", diff --git a/docs/src/fit.md b/docs/src/fit.md index c3727110..1687c686 100644 --- a/docs/src/fit.md +++ b/docs/src/fit.md @@ -1,33 +1,69 @@ # [`fit`](@ref fit) +Training for the first time: + ```julia fit(algorithm, data; verbosity=1) -> model -fit(model, data; verbosity=1) -> updated_model +fit(algorithm; verbosity=1) -> static_model ``` -When `fit` expects an tuple form of argument, `data = (X1, ..., Xn)`, then the signature -`fit(algorithm, X1, ..., Xn)` is also provided. +Updating: + +``` +fit(model, data; verbosity=1, param1=new_value1, param2=new_value2, ...) -> updated_model +fit(model, NewObservations(), new_data; verbosity=1, param1=new_value1, ...) -> updated_model +fit(model, NewFeatures(), new_data; verbosity=1, param1=new_value1, ...) -> updated_model +``` -## Typical workflow +When `fit` expects a tuple form of argument, `data = (X1, ..., Xn)`, then the signature +`fit(algorithm, X1, ..., Xn)` is also provided. + +## Typical workflows + +Supposing `Algorithm` is some supervised classifier type, with an iteration parameter `n`: ```julia -# Train some supervised `algorithm`: -model = fit(algorithm, X, y) +algorithm = Algorithm(n=100) +model = fit(algorithm, (X, y)) # or `fit(algorithm, X, y)` # Predict probability distributions: ŷ = predict(model, Distribution(), Xnew) # Inspect some byproducts of training: LearnAPI.feature_importances(model) + +# Add 50 iterations and predict again: +model = fit(model; n=150) +predict(model, Distribution(), X) +``` + +### A static algorithm (no "learning") + +```julia +# Apply some clustering algorithm which cannot be generalized to new data: +model = fit(algorithm) +labels = predict(model, LabelAmbiguous(), X) # mutates `model` + +# inspect byproducts of the clustering algorithm (e.g., outliers): +LearnAPI.extras(model) ``` ## Implementation guide -| method | fallback | compulsory? | -|:--------------------------|:---------|-------------| -| [`fit`](@ref)`(alg, ...)` | none | yes | +Initial training: + +| method | fallback | compulsory? | +|:-------------------------------------------------------------------------------|:-----------------------------------------------------------------|--------------------| +| [`fit`](@ref)`(algorithm, data; verbosity=1)` | ignores `data` and applies signature below | yes, unless static | +| [`fit`](@ref)`(algorithm; verbosity=1)` | none | no, unless static | +Updating: +| method | fallback | compulsory? | +|:-------------------------------------------------------------------------------|:---------------------------------------------------------------------------|-------------| +| [`fit`](@ref)`(model, data; verbosity=1, param_updates...)` | retrains from scratch on `data` with specified hyperparameter replacements | no | +| [`fit`](@ref)`(model, ::NewObservations, data; verbosity=1, param_updates...)` | none | no | +| [`fit`](@ref)`(model, ::NewFeatures, data; verbosity=1, param_updates...)` | none | no | ## Reference diff --git a/docs/src/index.md b/docs/src/index.md index 1b6cc500..b66a6d74 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -69,18 +69,20 @@ on the usual supervised/unsupervised learning dichotomy. From this point of view supervised algorithm is simply one in which a target variable exists, and happens to appear as an input to training but not to prediction. +## Data interfaces + Algorithms are free to consume data in any format. However, a method called [`obs`](@ref data_interface) (read as "observations") gives users and meta-algorithms access to an algorithm-specific representation of input data, which is also guaranteed to implement a -standard interface for accessing individual observations, unless an algorithm explicitly -opts out. The `fit` and `predict` methods also consume these alternative representations -of data. +standard interface for accessing individual observations, unless the algorithm explicitly +opts out. Moreover, the `fit` and `predict` methods will also be able to consume these +alternative data representations. The fallback data interface is the [MLUtils.jl](https://github.com/JuliaML/MLUtils.jl) `getobs/numobs` interface, and if the input consumed by the algorithm already implements -that interface (tables, arrays, etc.) then overloading `obs` is completely optional. A -plain iteration interface (to support, e.g., data loaders reading images from disk) -can also be specified. +that interface (tables, arrays, etc.) then overloading `obs` is completely optional. Plain +iteration interfaces, with or without knowledge of the number of observations, can also be +specified (to support, e.g., data loaders reading images from disk). ## Learning more diff --git a/docs/src/minimize.md b/docs/src/minimize.md index 6fad919a..8e7a4efb 100644 --- a/docs/src/minimize.md +++ b/docs/src/minimize.md @@ -7,7 +7,7 @@ minimize(model) -> # Typical workflow ```julia -model = fit(algorithm, X, y) +model = fit(algorithm, (X, y)) # or `fit(algorithm, X, y)` ŷ = predict(model, LiteralTarget(), Xnew) LearnAPI.feature_importances(model) diff --git a/docs/src/obs.md b/docs/src/obs.md index 3e40e3f9..bae83427 100644 --- a/docs/src/obs.md +++ b/docs/src/obs.md @@ -1,4 +1,4 @@ -# [`obs`](@id data_interface) +# [`obs` and Data Interfaces](@id data_interface) The `obs` method takes data intended as input to `fit`, `predict` or `transform`, and transforms it to an algorithm-specific form guaranteed to implement a form of observation @@ -13,18 +13,21 @@ obs(model, data) # can be passed to `predict` or `transform` instead of `dat ## Typical workflows -LearnAPI.jl makes no explicit assumptions about the form of data `X` and `y` in a call -like `fit(algorithm, (X, y))`. However, if we define +LearnAPI.jl makes no universal assumptions about the form of `data` in a call +like `fit(algorithm, data)`. However, if we define ```julia -observations = obs(algorithm, (X, y)) +observations = obs(algorithm, data) ``` -then, assuming the typical case that `LearnAPI.data_interface(algorithm) == Base.HasLength()`, `observations` implements the [MLUtils.jl](https://juliaml.github.io/MLUtils.jl/dev/) `getobs`/`numobs` interface. Moreover, we can pass `observations` to `fit` in place of -the original data, or first resample it using `MLUtils.getobs`: +then, assuming the typical case that `LearnAPI.data_interface(algorithm) == +LearnAPI.RandomAccess()`, `observations` implements the +[MLUtils.jl](https://juliaml.github.io/MLUtils.jl/dev/) `getobs`/`numobs` interface, for +grabbing and counting observations. Moreover, we can pass `observations` to `fit` in place +of the original data, or first resample it using `MLUtils.getobs`: ```julia -# equivalent to `model = fit(algorithm, (X, y))` (or `fit(algorithm, X, y))`: +# equivalent to `model = fit(algorithm, data)` model = fit(algorithm, observations) # with resampling: @@ -40,40 +43,38 @@ how a user might call `obs` and `MLUtils.getobs` to perform efficient cross-vali using LearnAPI import MLUtils -X = -y = -algorithm = +algorithm = + +data = +X = LearnAPI.input(algorithm, data) +y = LearnAPI.target(algorithm, data) train_test_folds = map([1:10, 11:20, 21:30]) do test (setdiff(1:30, test), test) end -fitobs = obs(algorithm, (X, y)) +fitobs = obs(algorithm, data) never_trained = true scores = map(train_test_folds) do (train, test) # train using model-specific representation of data: - trainobs = MLUtils.getobs(fitobs, train) - model = fit(algorithm, trainobs) + fitobs_subset = MLUtils.getobs(fitobs, train) + model = fit(algorithm, fitobs_subset) # predict on the fold complement: if never_trained global predictobs = obs(model, X) global never_trained = false end - testobs = MLUtils.getobs(predictobs, test) - ŷ = predict(model, LiteralTarget(), testobs) + predictobs_subset = MLUtils.getobs(predictobs, test) + ŷ = predict(model, LiteralTarget(), predictobs_subset) return end ``` -Note here that the output of `predict` will match the representation of `y` , i.e., -there is no concept of an algorithm-specific representation of *outputs*, only inputs. - - ## Implementation guide | method | compulsory? | fallback | @@ -89,3 +90,21 @@ A sample implementation is given in [Providing an advanced data interface](@ref) ```@docs obs ``` + +### Data interfaces + +New implementations must overload [`LearnAPI.data_interface(algorithm)`](@ref) if the +output of [`obs`](@ref) does not implement [`LearnAPI.RandomAccess`](@ref). (Arrays, most +tables, and all tuples thereof, implement `RandomAccess`.) + +- [`LearnAPI.RandomAccess`](@ref) (default) +- [`LearnAPI.FiniteIterable`](@ref) +- [`LearnAPI.Iterable`](@ref) + + +```@docs +LearnAPI.RandomAccess +LearnAPI.FiniteIterable +LearnAPI.Iterable +``` + diff --git a/docs/src/predict_transform.md b/docs/src/predict_transform.md index 35fb52d7..2ec378ef 100644 --- a/docs/src/predict_transform.md +++ b/docs/src/predict_transform.md @@ -9,12 +9,13 @@ inverse_transform(model, data) When a method expects a tuple form of argument, `data = (X1, ..., Xn)`, then a slurping signature is also provided, as in `transform(model, X1, ..., Xn)`. -## Typical worklows + +## [Typical worklows](@id predict_workflow) Train some supervised `algorithm`: ```julia -model = fit(algorithm, X, y) +model = fit(algorithm, (X, y)) # or `fit(algorithm, X, y)` ``` Predict probability distributions: @@ -52,7 +53,7 @@ ŷ = predict(model, LiteralTarget(), predictobs) ``` -## Implementation guide +## [Implementation guide](@id predict_guide) | method | compulsory? | fallback | |:----------------------------|:-----------:|:--------:| @@ -72,7 +73,27 @@ paired with an implementation of [`inverse_transform`](@ref), for returning (app right inverses to `transform`. -## Reference +### [One-liners combining fit and transform/predict](@id one_liners) + +Algorithms may optionally overload `transform` to apply `fit` first, using the supplied +data if required, and then immediately `transform` the same data. The same applies to +`predict`. In that case the first argument of `transform`/`predict` is an *algorithm* +instead of the output of `fit`: + +```julia +predict(algorithm, kind_of_proxy, data) # `fit` implied +transform(algorithm, data) # `fit` implied +``` + +For example, if `fit(algorithm, X)` is defined, then `predict(algorithm, X)` will be +shorthand for + +```julia +model = fit(algorithm, X) +predict(model, X) +``` + +## [Reference](@id predict_ref) ```@docs predict diff --git a/docs/src/reference.md b/docs/src/reference.md index 11157384..de0bb3d6 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -25,21 +25,21 @@ an example of data, the observations being the rows. Typically, data provided to LearnAPI.jl algorithms, will implement the [MLUtils.jl](https://juliaml.github.io/MLUtils.jl/stable) `getobs/numobs` interface for accessing individual observations, but implementations can opt out of this requirement; -see [`obs`](@ref) and [`LearnAPI.data_interface`](@ref) for details. +see [`obs`](@ref) and [`LearnAPI.data_interface`](@ref) for details. -!!! note +!!! note - In the MLUtils.jl - convention, observations in tables are the rows but observations in a matrix are the - columns. + In the MLUtils.jl + convention, observations in tables are the rows but observations in a matrix are the + columns. ### [Hyperparameters](@id hyperparameters) Besides the data it consumes, a machine learning algorithm's behavior is governed by a number of user-specified *hyperparameters*, such as the number of trees in a random -forest. In LearnAPI.jl, one is allowed to have hyperparematers that are not data-generic. -For example, a class weight dictionary will only make sense for a target taking values in -the set of dictionary keys. +forest. In LearnAPI.jl, one is allowed to have hyperparameters that are not data-generic. +For example, a class weight dictionary, which will only make sense for a target taking +values in the set of dictionary keys, can be specified as a hyperparameter. ### [Targets and target proxies](@id proxy) @@ -54,7 +54,7 @@ detection, "outlier"/"inlier" predictions, or probability-like scores, are simil compared with ground truth labels. In clustering, integer labels assigned to observations by the clustering algorithm can can be paired with human labels using, say, the Rand index. In survival analysis, predicted survival functions or probability distributions are -compared with censored ground truth survival times. +compared with censored ground truth survival times. And so on ... #### Definitions @@ -74,8 +74,12 @@ dispatch. These are also used to distinguish performance metrics provided by the An object implementing the LearnAPI.jl interface is called an *algorithm*, although it is more accurately "the configuration of some algorithm".¹ An algorithm encapsulates a -particular set of user-specified [hyperparameters](@ref) as the object's properties. It -does not store learned parameters. +particular set of user-specified [hyperparameters](@ref) as the object's *properties* +(which conceivably differ from its fields). It does not store learned parameters. + +Informally, we will sometimes use the word "model" to refer to the output of +`fit(algorithm, ...)` (see below), something which typically does store learned +parameters. For `algorithm` to be a valid LearnAPI.jl algorithm, [`LearnAPI.constructor(algorithm)`](@ref) must be defined and return a keyword constructor @@ -90,13 +94,16 @@ named_properties = NamedTuple{properties}(getproperty.(Ref(algorithm), propertie Note that if if `algorithm` is an instance of a *mutable* struct, this requirement generally requires overloading `Base.==` for the struct. -A *composite algorithm* is one with a property that can take other algorithms as values; -for such algorithms [`LearnAPI.is_composite`](@ref)`(algorithm)` must be `true` (fallback -is `false`). Generally, the keyword constructor provided by [`LearnAPI.constructor`](@ref) -must provide default values for all non-algorithm properties. +#### Composite algorithms (wrappers) + +A *composite algorithm* is one with at least one property that can take other algorithms +as values; for such algorithms [`LearnAPI.is_composite`](@ref)`(algorithm)` must be `true` +(fallback is `false`). Generally, the keyword constructor provided by +[`LearnAPI.constructor`](@ref) must provide default values for all fields that are not +algorithm-valued. Any object `algorithm` for which [`LearnAPI.functions`](@ref)`(algorithm)` is non-empty is -understood have a valid implementation of the LearnAPI.jl interface. +understood to have a valid implementation of the LearnAPI.jl interface. ### Example @@ -109,7 +116,7 @@ struct GradientRidgeRegressor{T<:Real} l2_regularization::T end GradientRidgeRegressor(; learning_rate=0.01, epochs=10, l2_regularization=0.01) = - GradientRidgeRegressor(learning_rate, epochs, l2_regularization) + GradientRidgeRegressor(learning_rate, epochs, l2_regularization) LearnAPI.constructor(::GradientRidgeRegressor) = GradientRidgeRegressor ``` @@ -117,16 +124,22 @@ LearnAPI.constructor(::GradientRidgeRegressor) = GradientRidgeRegressor Attach public LearnAPI.jl-related documentation for an algorithm to it's *constructor*, rather than to the struct defining its type. In this way, an algorithm can implement -non-LearnAPI interfaces (such as a native interface) with separate document strings. +multiple interfaces, in addition to the LearnAPI interface, with separate document strings +for each. ## Methods Only these method names are exported by LearnAPI: `fit`, `transform`, `inverse_transform`, -`minimize`, and `obs`. All new implementations must implement [`fit`](@ref), -[`LearnAPI.algorithm`](@ref algorithm_minimize), [`LearnAPI.constructor`](@ref) and -[`LearnAPI.functions`](@ref). The last two are algorithm traits, which can be set with the -[`@trait`](@ref) macro. +`minimize`, and `obs`. + +!!! note + + All new implementations must implement [`fit`](@ref), + [`LearnAPI.algorithm`](@ref algorithm_minimize), [`LearnAPI.constructor`](@ref) and + [`LearnAPI.functions`](@ref). The last two are algorithm traits, which can be set + with the [`@trait`](@ref) macro. + ### List of methods @@ -147,23 +160,21 @@ Only these method names are exported by LearnAPI: `fit`, `transform`, `inverse_t - [`minimize`](@ref algorithm_minimize): for stripping the `model` output by `fit` of inessential content, for purposes of serialization. +- [`LearnAPI.input`](@ref input): for extracting inputs from training data. + - [`obs`](@ref data_interface): a method for exposing to the user algorithm-specific - representations of data guaranteed to implement observation access according to the - value of the [`LearnAPI.data_interface`](@ref) trait - -- [Accessor functions](@ref accessor_functions): include things like `feature_importances` - and `training_losses`, for extracting, from training outcomes, information common to - many algorithms. + representations of data that are guaranteed to implement observation access, as + specified by [`LearnAPI.data_interface(algorithm)`](@ref). + +- [Accessor functions](@ref accessor_functions): these include functions like + `feature_importances` and `training_losses`, for extracting, from training outcomes, + information common to many algorithms. - [Algorithm traits](@ref traits): special methods, that promise specific algorithm behavior or for recording general information about the algorithm. Only [`LearnAPI.constructor`](@ref) and [`LearnAPI.functions`](@ref) are universally compulsory. -- [`LearnAPI.target`](@ref) and [`LearnAPI.weights`](@ref) are traits which also include - extended signatures for extracting, from `fit` input data, the target and - per-observation weights, when available. - --- ¹ We acknowledge users may not like this terminology, and may know "algorithm" by some diff --git a/docs/src/traits.md b/docs/src/traits.md index 84aedeef..b04b494d 100644 --- a/docs/src/traits.md +++ b/docs/src/traits.md @@ -22,7 +22,7 @@ package [ScientificTypesBase.jl](https://github.com/JuliaAI/ScientificTypesBase. | trait | return value | fallback value | example | |:----------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------|:---------------------------------------------------------| | [`LearnAPI.constructor`](@ref)`(algorithm)` | constructor for generating new or modified versions of `algorithm` | (no fallback) | `RidgeRegressor` | -| [`LearnAPI.functions`](@ref)`(algorithm)` | functions you can apply to `algorithm` or associated model (traits excluded) | `()` | `(fit, predict, minimize, LearnAPI.algorithm, obs)` | +| [`LearnAPI.functions`](@ref)`(algorithm)` | functions you can apply to `algorithm` or associated model (traits excluded) | `()` | `(:fit, :predict, :minimize, :(LearnAPI.algorithm), :obs)` | | [`LearnAPI.kinds_of_proxy`](@ref)`(algorithm)` | instances `kind` of `KindOfProxy` for which an implementation of `LearnAPI.predict(algorithm, kind, ...)` is guaranteed. | `()` | `(Distribution(), Interval())` | | [`LearnAPI.target`](@ref)`(algorithm)` | `true` if target can appear in `fit` data | `false` | `true` | | [`LearnAPI.weights`](@ref)`(algorithm)` | `true` if per-observation weights can appear in `fit` data | `false` | `true` | @@ -40,7 +40,7 @@ package [ScientificTypesBase.jl](https://github.com/JuliaAI/ScientificTypesBase. | [`LearnAPI.fit_observation_scitype`](@ref)`(algorithm)` | upper bound on `scitype(observation)` for `observation` in `data` ensuring `fit(algorithm, data)` works | `Union{}` | `Tuple{AbstractVector{Continuous}, Continuous}` | | [`LearnAPI.fit_type`](@ref)`(algorithm)` | upper bound on `typeof(data)` ensuring `fit(algorithm, data)` works | `Union{}` | `Tuple{AbstractMatrix{<:Real}, AbstractVector{<:Real}}` | | [`LearnAPI.fit_observation_type`](@ref)`(algorithm)` | upper bound on `typeof(observation)` for `observation` in `data` ensuring `fit(algorithm, data)` works | `Union{}` | `Tuple{AbstractVector{<:Real}, Real}` | -| [`LearnAPI.target_observation scitype`](@ref)`(algorithm)` | upper bound on the scitype of each observation of the targget | `Any` | `Continuous` | +| [`LearnAPI.target_observation_scitype`](@ref)`(algorithm)` | upper bound on the scitype of each observation of the targget | `Any` | `Continuous` | | [`LearnAPI.predict_input_scitype`](@ref)`(algorithm)` | upper bound on `scitype(data)` ensuring `predict(model, kind, data)` works | `Union{}` | `Table(Continuous)` | | [`LearnAPI.predict_input_observation_scitype`](@ref)`(algorithm)` | upper bound on `scitype(observation)` for `observation` in `data` ensuring `predict(model, kind, data)` works | `Union{}` | `Vector{Continuous}` | | [`LearnAPI.predict_input_type`](@ref)`(algorithm)` | upper bound on `typeof(data)` ensuring `predict(model, kind, data)` works | `Union{}` | `AbstractMatrix{<:Real}` | diff --git a/src/LearnAPI.jl b/src/LearnAPI.jl index 9ba6b54e..66c9aa9e 100644 --- a/src/LearnAPI.jl +++ b/src/LearnAPI.jl @@ -7,6 +7,7 @@ include("types.jl") include("predict_transform.jl") include("fit.jl") include("minimize.jl") +include("input.jl") include("obs.jl") include("accessor_functions.jl") include("traits.jl") diff --git a/src/accessor_functions.jl b/src/accessor_functions.jl index b87a3ab1..854bfdb7 100644 --- a/src/accessor_functions.jl +++ b/src/accessor_functions.jl @@ -31,7 +31,7 @@ is `true`. # New implementations Implementation is compulsory for new algorithm types. The behaviour described above is the -only contract. $(DOC_IMPLEMENTED_METHODS(:algorithm)) +only contract. $(DOC_IMPLEMENTED_METHODS(":(LearnAPI.algorithm)")) """ function algorithm end @@ -44,7 +44,7 @@ Return the algorithm-specific feature importances of a `model` output by an abstract vector of `feature::Symbol => importance::Real` pairs (e.g `[:gender => 0.23, :height => 0.7, :weight => 0.1]`). -The `algorithm` supports feature importances if `LearnAPI.feature_importances in +The `algorithm` supports feature importances if `:(LearnAPI.feature_importances) in LearnAPI.functions(algorithm)`. If an algorithm is sometimes unable to report feature importances then @@ -55,7 +55,7 @@ If an algorithm is sometimes unable to report feature importances then Implementation is optional. -$(DOC_IMPLEMENTED_METHODS(:feature_importances)). +$(DOC_IMPLEMENTED_METHODS(":(LearnAPI.feature_importances)")). """ function feature_importances end @@ -68,7 +68,7 @@ an abstract vector of `feature_or_class::Symbol => coefficient::Real` pairs (e.g => 0.23, :height => 0.7, :weight => 0.1]`) or, in the case of multi-targets, `feature::Symbol => coefficients::AbstractVector{<:Real}` pairs. -The `model` reports coefficients if `LearnAPI.coefficients in +The `model` reports coefficients if `:(LearnAPI.coefficients) in LearnAPI.functions(Learn.algorithm(model))`. See also [`LearnAPI.intercept`](@ref). @@ -77,7 +77,7 @@ See also [`LearnAPI.intercept`](@ref). Implementation is optional. -$(DOC_IMPLEMENTED_METHODS(:coefficients)). +$(DOC_IMPLEMENTED_METHODS(":(LearnAPI.coefficients)")). """ function coefficients end @@ -88,7 +88,7 @@ function coefficients end For a linear model, return the learned intercept. The value returned is `Real` (single target) or an `AbstractVector{<:Real}` (multi-target). -The `model` reports intercept if `LearnAPI.intercept in +The `model` reports intercept if `:(LearnAPI.intercept) in LearnAPI.functions(Learn.algorithm(model))`. See also [`LearnAPI.coefficients`](@ref). @@ -97,7 +97,7 @@ See also [`LearnAPI.coefficients`](@ref). Implementation is optional. -$(DOC_IMPLEMENTED_METHODS(:intercept)). +$(DOC_IMPLEMENTED_METHODS(":(LearnAPI.intercept)")). """ function intercept end @@ -120,7 +120,7 @@ See also [`LearnAPI.trees`](@ref). Implementation is optional. -$(DOC_IMPLEMENTED_METHODS(:tree)). +$(DOC_IMPLEMENTED_METHODS(":(LearnAPI.tree)")). """ function tree end @@ -137,7 +137,7 @@ See also [`LearnAPI.tree`](@ref). Implementation is optional. -$(DOC_IMPLEMENTED_METHODS(:trees)). +$(DOC_IMPLEMENTED_METHODS(":(LearnAPI.trees)")). """ function trees end @@ -155,7 +155,7 @@ See also [`fit`](@ref). Implement for iterative algorithms that compute and record training losses as part of training (e.g. neural networks). -$(DOC_IMPLEMENTED_METHODS(:training_losses)). +$(DOC_IMPLEMENTED_METHODS(":(LearnAPI.training_losses)")). """ function training_losses end @@ -173,7 +173,7 @@ See also [`fit`](@ref). Implement for iterative algorithms that compute and record training losses as part of training (e.g. neural networks). -$(DOC_IMPLEMENTED_METHODS(:training_predictions)). +$(DOC_IMPLEMENTED_METHODS(":(LearnAPI.training_predictions)")). """ function training_predictions end @@ -192,7 +192,7 @@ Implement for algorithms, such as outlier detection algorithms, which associate with each observation during training, where these scores are of interest in later processes (e.g, in defining normalized scores for new data). -$(DOC_IMPLEMENTED_METHODS(:training_scores)). +$(DOC_IMPLEMENTED_METHODS(":(LearnAPI.training_scores)")). """ function training_scores end @@ -212,9 +212,9 @@ See also [`is_composite`](@ref). # New implementations -Implementent if and only if `model` is a composite model. +Implementent if and only if `model` is a composite model. -$(DOC_IMPLEMENTED_METHODS(:components)). +$(DOC_IMPLEMENTED_METHODS(":(LearnAPI.components)")). """ function components end @@ -229,7 +229,7 @@ See also [`fit`](@ref). # New implementations -$(DOC_IMPLEMENTED_METHODS(:training_labels)). +$(DOC_IMPLEMENTED_METHODS(":(LearnAPI.training_labels)")). """ function training_labels end @@ -273,7 +273,7 @@ See also [`fit`](@ref). Implementation is discouraged for byproducts already covered by other LearnAPI.jl accessor functions: $ACCESSOR_FUNCTIONS_WITHOUT_EXTRAS_LIST. -$(DOC_IMPLEMENTED_METHODS(:training_labels)). +$(DOC_IMPLEMENTED_METHODS(":(LearnAPI.training_labels)")). """ function extras end @@ -287,4 +287,3 @@ const ACCESSOR_FUNCTIONS_LIST = join( ", ", " and ", ) - diff --git a/src/fit.jl b/src/fit.jl index 316d0eab..56087fd3 100644 --- a/src/fit.jl +++ b/src/fit.jl @@ -14,15 +14,19 @@ returning an object, `model`, on which other methods, such as [`predict`](@ref) [`transform`](@ref), can be dispatched. [`LearnAPI.functions(algorithm)`](@ref) returns a list of methods that can be applied to either `algorithm` or `model`. -The second signature applies to algorithms which do not generalize to new observations. In -that case `predict` or `transform` actually execute the algorithm, but may also write to -the (mutable) object returned by `fit`. +The second signature is provided by algorithms that do not generalize to new observations +("static" algorithms). In that case, `transform(model, data)` or `predict(model, ..., +data)` carries out the actual algorithm execution, writing any byproducts of that +operation to the mutable object `model` returned by `fit`. -When `data` is a tuple, a data slurping form of `fit` is typically provided. +Whenever `fit` expects a tuple form of argument, `data = (X1, ..., Xn)`, then the +signature `fit(algorithm, X1, ..., Xn)` is also provided. + +For example, a supervised classifier will typically admit this workflow: ```julia -model = fit(algorithm, (X, y)) # or `fit(algorithm, X, y)` -ŷ = predict(model, X) +model = fit(algorithm, (X, y)) # or `fit(algorithm, X, y)` +ŷ = predict(model, Xnew) ``` Use `verbosity=0` for warnings only, and `-1` for silent training. @@ -34,7 +38,14 @@ See also [`predict`](@ref), [`transform`](@ref), [`inverse_transform`](@ref), # New implementations -Implementation is compulsory. The signature must include `verbosity`. +Implementation is compulsory. The signature must include `verbosity`. Note the requirement +on providing slurping signatures. A fallback for the first signature calls the second, +ignoring `data`: + +```julia +fit(algorithm, data...; kwargs...) = fit(algorithm; kwargs...) +``` +$(DOC_DATA_INTERFACE(:fit)) """ -fit(algorithm, data...; kwargs...) = nothing +fit(algorithm, data...; kwargs...) = fit(algorithm; kwargs...) diff --git a/src/minimize.jl b/src/minimize.jl index f37b9d0a..653d3fdf 100644 --- a/src/minimize.jl +++ b/src/minimize.jl @@ -17,7 +17,7 @@ functionality is preserved by `minimize`. # New implementations Overloading `minimize` for new algorithms is optional. The fallback is the -identity. $(DOC_IMPLEMENTED_METHODS(:minimize, overloaded=true)) +identity. $(DOC_IMPLEMENTED_METHODS(":minimize", overloaded=true)) New implementations must enforce the following identities, whenever the right-hand side is defined: diff --git a/src/obs.jl b/src/obs.jl index f67b19c9..2a874d6a 100644 --- a/src/obs.jl +++ b/src/obs.jl @@ -7,9 +7,9 @@ Return an algorithm-specific representation of `data`, suitable for passing to ` `data`. Here `model` is the return value of `fit(algorithm, ...)` for some LearnAPI.jl algorithm, `algorithm`. -The returned object is guaranteed to implement observation access as indicated -by [`LearnAPI.data_interface(algorithm)`](@ref) (typically the -[MLUtils.jl](https://juliaml.github.io/MLUtils.jl/dev/) `getobs`/`numobs` interface). +The returned object is guaranteed to implement observation access as indicated by +[`LearnAPI.data_interface(algorithm)`](@ref) (typically +[`LearnAPI.RandomAccess()`](@ref)). Calling `fit`/`predict`/`transform` on the returned objects may have performance advantages over calling directly on `data` in some contexts. And resampling the returned @@ -21,21 +21,19 @@ object using `MLUtils.getobs` may be cheaper than directly resampling the compon Usual workflow, using data-specific resampling methods: ```julia -X = -y = - -Xtrain = Tables.select(X, 1:100) -ytrain = y[1:100] -model = fit(algorithm, (Xtrain, ytrain)) -ŷ = predict(model, LiteralTarget(), y[101:150]) +data = (X, y) # a DataFrame and a vector +data_train = (Tables.select(X, 1:100), y[1:100]) +model = fit(algorithm, data_train) +ŷ = predict(model, LiteralTarget(), X[101:150]) ``` -Alternative workflow using `obs` and the MLUtils.jl API: +Alternative workflow using `obs` and the MLUtils.jl method `getobs` (assumes +`LearnAPI.data_interface(algorithm) == RandomAccess()`): ```julia import MLUtils -fit_obsevations = obs(algorithm, (X, y)) +fit_observations = obs(algorithm, data) model = fit(algorithm, MLUtils.getobs(fit_observations, 1:100)) predict_observations = obs(model, X) @@ -52,15 +50,16 @@ See also [`LearnAPI.data_interface`](@ref). Implementation is typically optional. -For each supported form of `data` in `fit(algorithm, data)`, `predict(model, data)`, and -`transform(model, data)`, it must be true that `model = fit(algorithm, observations)` is -supported, whenever `observations = obs(algorithm, data)`, and that `predict(model, -observations)` and `transform(model, observations)` are supported, whenever `observations -= obs(model, data)`. +For each supported form of `data` in `fit(algorithm, data)`, it must be true that `model = +fit(algorithm, observations)` is equivalent to `model = fit(algorithm, data)`, whenever +`observations = obs(algorithm, data)`. For each supported form of `data` in calls +`predict(model, ..., data)` and `transform(model, data)`, where implemented, the calls +`predict(model, ..., observations)` and `transform(model, observations)` are supported +alternatives, whenever `observations = obs(model, data)`. The fallback for `obs` is `obs(model_or_algorithm, data) = data`, and the fallback for -`LearnAPI.data_interface(algorithm)` indicates MLUtils.jl as the adopted interface. For -details refer to the [`LearnAPI.data_interface`](@ref) document string. +`LearnAPI.data_interface(algorithm)` is `LearnAPI.RandomAccess()`. For details refer to +the [`LearnAPI.data_interface`](@ref) document string. In particular, if the `data` to be consumed by `fit`, `predict` or `transform` consists only of suitable tables and arrays, then `obs` and `LearnAPI.data_interface` do not need diff --git a/src/predict_transform.jl b/src/predict_transform.jl index a20598f8..97385a78 100644 --- a/src/predict_transform.jl +++ b/src/predict_transform.jl @@ -32,6 +32,21 @@ DOC_MINIMIZE(func) = """ +DOC_DATA_INTERFACE(method) = + """ + + ## Assumptions about data + + By default, it is assumed that `data` supports the [`LearnAPI.RandomAccess`](@ref) + interface (all matrices, with observations-as-columns, most tables, and tuples + thereof). See [`LearnAPI.RandomAccess`](@ref) for details. If this is not the case + then an implementation must suitably: (i) overload the trait + [`LearnAPI.data_interface`](@ref); and/or (ii) overload [`obs`](@ref). Refer to these + methods' document strings for details. + + """ + + # # METHOD STUBS/FALLBACKS """ @@ -56,17 +71,20 @@ In the following, `algorithm` is some supervised learning algorithm with training features `X`, training target `y`, and test features `Xnew`: ```julia -model = fit(algorithm, X, y; verbosity=0) +model = fit(algorithm, (X, y)) # or `fit(algorithm, X, y)` predict(model, LiteralTarget(), Xnew) ``` -Note `predict ` does not mutate any argument, except in the special case -`LearnAPI.predict_or_transform_mutates(algorithm) = true`. - See also [`fit`](@ref), [`transform`](@ref), [`inverse_transform`](@ref). # Extended help +If `predict` supports data in the form of a tuple `data = (X1, ..., Xn)`, then a slurping +signature is also provided, as in `predict(model, X1, ..., Xn)`. + +Note `predict ` does not mutate any argument, except in the special case +`LearnAPI.predict_or_transform_mutates(algorithm) = true`. + # New implementations If there is no notion of a "target" variable in the LearnAPI.jl sense, or you need an @@ -78,12 +96,14 @@ convenience form, but it is free to choose the fallback `kind_of_proxy`. Each `kind_of_proxy` that gets an implementation must be added to the list returned by [`LearnAPI.kinds_of_proxy`](@ref). -$(DOC_IMPLEMENTED_METHODS(:predict)) +$(DOC_IMPLEMENTED_METHODS(":predict")) $(DOC_MINIMIZE(:predict)) $(DOC_MUTATION(:predict)) +$(DOC_DATA_INTERFACE(:predict)) + """ function predict end @@ -94,7 +114,7 @@ function predict end Return a transformation of some `data`, using some `model`, as returned by [`fit`](@ref). -For `data` that consists of a tuple, a slurping version is typically provided, i.e., +For `data` that consists of a tuple, a slurping version is also provided, i.e., you can do `transform(model, X1, X2, X3)` in place of `transform(model, (X1, X2, X3))`. # Example @@ -115,7 +135,7 @@ model = fit(algorithm) W = transform(model, X) ``` -or, in one step: +or, in one step (where supported): ```julia W = transform(algorithm, X) @@ -132,12 +152,14 @@ See also [`fit`](@ref), [`predict`](@ref), # New implementations Implementation for new LearnAPI.jl algorithms is optional. -$(DOC_IMPLEMENTED_METHODS(:transform)) +$(DOC_IMPLEMENTED_METHODS(":transform")) $(DOC_MINIMIZE(:transform)) $(DOC_MUTATION(:transform)) +$(DOC_DATA_INTERFACE(:transform)) + """ function transform end @@ -166,7 +188,7 @@ See also [`fit`](@ref), [`transform`](@ref), [`predict`](@ref). # New implementations -Implementation is optional. $(DOC_IMPLEMENTED_METHODS(:inverse_transform)) +Implementation is optional. $(DOC_IMPLEMENTED_METHODS(":inverse_transform")) $(DOC_MINIMIZE(:inverse_transform)) diff --git a/src/traits.jl b/src/traits.jl index 79fd3453..a3ccceb8 100644 --- a/src/traits.jl +++ b/src/traits.jl @@ -100,11 +100,11 @@ function constructor end """ LearnAPI.functions(algorithm) -Return a tuple of functions that can be meaningfully applied with `algorithm`, or an -associate model (object returned by `fit(algorithm, ...)`, as the first +Return a tuple of symbols respresenting functions that can be meaningfully applied with +`algorithm`, or an associate model (object returned by `fit(algorithm, ...)`, as the first argument. Algorithm traits (`algorithm` is the *only* argument) are excluded. -In addition to functions, the returned tuple may include expressions, like +In addition to symbols, the returned tuple may include expressions, like `:(DecisionTree.print_tree)`, which reference functions not owned by LearnAPI.jl. The understanding is that `algorithm` is a LearnAPI-compliant object whenever the return @@ -117,15 +117,15 @@ value is non-empty. All new implementations must overload this trait. Here's a checklist for elements in the return value: -| function | implementation/overloading compulsory? | include in returned tuple? | -|----------------------|----------------------------------------|----------------------------| -| `fit` | yes | yes | -| `minimize` | no | yes | -| `obs` | no | yes | -| `LearnAPI.algorithm` | yes | yes | -| `inverse_transform` | no | only if implemented | -| `predict` | no | only if implemented | -| `transform` | no | only if implemented | +| symbol | implementation/overloading compulsory? | include in returned tuple? | +|-----------------------|----------------------------------------|----------------------------| +| `:fit` | yes | yes | +| `:minimize` | no | yes | +| `:obs` | no | yes | +| `:LearnAPI.algorithm` | yes | yes | +| `:inverse_transform` | no | only if implemented | +| `:predict` | no | only if implemented | +| `:transform` | no | only if implemented | Also include any implemented accessor functions. The LearnAPI.jl accessor functions are: $ACCESSOR_FUNCTIONS_LIST. @@ -137,11 +137,15 @@ functions(::Any) = () """ LearnAPI.kinds_of_proxy(algorithm) -Returns an tuple of all instances, `kind`, for which for which `predict(algorithm, kind, +Returns a tuple of all instances, `kind`, for which for which `predict(algorithm, kind, data...)` has a guaranteed implementation. Each such `kind` subtypes [`LearnAPI.KindOfProxy`](@ref). Examples are `LiteralTarget()` (for predicting actual target values) and `Distributions()` (for predicting probability mass/density functions). +If a `predict(model, data)` is overloaded to return predictions for a specific kind of +proxy (e.g., `predict(model::MyModel, data) = predict(model, Distribution(), data)`) then +that kind appears first in the returned tuple. + See also [`LearnAPI.predict`](@ref), [`LearnAPI.KindOfProxy`](@ref). # Extended help @@ -188,7 +192,7 @@ target(::Any, data) = nothing """ LearnAPI.weights(algorithm)::Bool - LearnAPI.target(algorithm, data) -> weights + LearnAPI.weights(algorithm, data) -> weights First method (an algorithm trait) returns `true` if the second method returns per-observation weights, for some value(s) of `data`, where `data` is a supported argument @@ -333,7 +337,7 @@ load_path(::Any) = "unknown" Returns `true` if one or more properties (fields) of `algorithm` may themselves be algorithms, and `false` otherwise. -See also `[LearnAPI.components]`(@ref). +See also [`LearnAPI.components`](@ref). # New implementations @@ -367,28 +371,23 @@ human_name(M) = snakecase(name(M), delim=' ') # `name` defined below """ LearnAPI.data_interface(algorithm) -Return the data interface supported by `algorithm` for accessing individual observations in -representations of input data returned by [`obs(algorithm, data)`](@ref) or [`obs(model, -data)`](@ref). Here `data` is `fit`, `predict`, or `transform`-consumable data. - -Options for the return value: - -- `Base.HasLength()`: Data returned by `obs` implements the - [MLUtils.jl](https://juliaml.github.io/MLUtils.jl/dev/) `getobs/numobs` interface; it - usually suffices to overload `Base.getindex` and `Base.length` (which are the - `getobs/numobs` fallbacks). +Return the data interface supported by `algorithm` for accessing individual observations +in representations of input data returned by [`obs(algorithm, data)`](@ref) or +[`obs(model, data)`](@ref), whenever `algorithm == LearnAPI.algorithm(model)`. Here `data` +is `fit`, `predict`, or `transform`-consumable data. -- `Base.SizeUnknown()`: Data returned by `obs` implements Julia's `iterate` - interface. +Possible return values are [`LearnAPI.RandomAccess`](@ref), +[`LearnAPI.FiniteIterable`](@ref), and [`LearnAPI.Iterable`](@ref). See also [`obs`](@ref). # New implementations -The fallback returns `Base.HasLength`. +The fallback returns [`LearnAPI.RandomAccess`](@ref), which applies to arrays, most +tables, and tuples of these. See the doc-string for details. """ -data_interface(::Any) = Base.HasLength() +data_interface(::Any) = LearnAPI.RandomAccess() """ LearnAPI.predict_or_transform_mutates(algorithm) diff --git a/src/types.jl b/src/types.jl index 02218bd3..8d755fdb 100644 --- a/src/types.jl +++ b/src/types.jl @@ -116,3 +116,79 @@ $DOC_HOW_TO_LIST_PROXIES """ KindOfProxy + + +# # DATA INTERFACES + +abstract type DataInterface end +abstract type Finite <: DataInterface end + +""" + LearnAPI.RandomAccess + +A data interface type. We say that `data` implements the `RandomAccess` interface if +`data` implements the methods `getobs` and `numobs` from MLUtils.jl. The first method +allows one to grab observations specified by an arbitrary index set, as in +`MLUtils.getobs(data, [2, 3, 5])`, while the second method returns the total number of +available observations, which is assumed to be known and finite. + +All arrays implement `RandomAccess`, with the last index being the observation index +(observations-as-columns in matrices). + +A Tables.jl compatible table `data` implements `RandomAccess` if `Tables.istable(data)` is +true and if `data` implements `DataAPI.nrows`. This includes many tables, and in +particular, `DataFrame`s. Tables that are also tuples are excluded. + +Any tuple of objects implementing `RandomAccess` also implements `RandomAccess`. + +If [`LearnAPI.data_interface(algorithm)`](@ref) takes the value `RandomAccess()`, then +[`obs`](@ref)`(algorithm, ...)` is guaranteed to return objects implementing the +`RandomAccess` interface, and the same holds for `obs(model, ...)`, whenever +`LearnAPI.algorithm(model) == algorithm`. + +# Implementing `RandomAccess` for new data types + +Typically, to implement `RandomAccess` for a new data type requires only implementing +`Base.getindex` and `Base.length`, which are the fallbacks for `MLUtils.getobs` and +`MLUtils.numobs`, and this avoids making MLUtils.jl a package dependency. + +See also [`LearnAPI.FiniteIterable`](@ref), [`LearnAPI.Iterable`](@ref). +""" +struct RandomAccess <: Finite end + +""" + LearnAPI.FiniteIterable + +A data interface type. We say that `data` implements the `FiniteIterable` interface if +it implements Julia's `iterate` interface, including `Base.length`, and if +`Base.IteratorSize(typeof(data)) == Base.HasLength()`. For example, this is true if: + +- `data` implements the [`LearnAPI.RandomAccess`](@ref) interface (arrays and most tables) + +- `data isa MLUtils.DataLoader`, which includes output from `MLUtils.eachobs`. + +If [`LearnAPI.data_interface(algorithm)`](@ref) takes the value `FiniteIterable()`, then +[`obs`](@ref)`(algorithm, ...)` is guaranteed to return objects implementing the +`FiniteIterable` interface, and the same holds for `obs(model, ...)`, whenever +`LearnAPI.algorithm(model) == algorithm`. + +See also [`LearnAPI.RandomAccess`](@ref), [`LearnAPI.Iterable`](@ref). +""" +struct FiniteIterable <: Finite end + +""" + LearnAPI.Iterable + +A data interface type. We say that `data` implements the `Iterable` interface if it +implements Julia's basic `iterate` interface. (Such objects may not implement +`MLUtils.numobs` or `Base.length`.) + +If [`LearnAPI.data_interface(algorithm)`](@ref) takes the value `Iterable()`, then +[`obs`](@ref)`(algorithm, ...)` is guaranteed to return objects implementing `Iterable`, +and the same holds for `obs(model, ...)`, whenever `LearnAPI.algorithm(model) == +algorithm`. + +See also [`LearnAPI.FiniteIterable`](@ref), [`LearnAPI.RandomAccess`](@ref). + +""" +struct Iterable <: DataInterface end From 31c42c60892502660bc1bc7ca40b637a53a5eb51 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Thu, 26 Sep 2024 16:55:03 +1200 Subject: [PATCH 15/27] add fallbacks to reduce need to overload some convenience methods --- docs/src/anatomy_of_an_implementation.md | 222 ++++++++++++----------- src/fit.jl | 7 +- src/predict_transform.jl | 9 +- src/traits.jl | 39 ++-- test/integration/regression.jl | 79 ++++---- test/integration/static_algorithms.jl | 31 ++-- 6 files changed, 207 insertions(+), 180 deletions(-) diff --git a/docs/src/anatomy_of_an_implementation.md b/docs/src/anatomy_of_an_implementation.md index 9779403a..37c73c94 100644 --- a/docs/src/anatomy_of_an_implementation.md +++ b/docs/src/anatomy_of_an_implementation.md @@ -10,19 +10,19 @@ For a transformer, implementations ordinarily implement `transform` instead of !!! important - The core implementations of `fit`, `predict`, etc, - always have a *single* `data` argument, as in `fit(algorithm, data; verbosity=1)`. - Calls like `fit(algorithm, X, y)` are provided as additional convenience methods. + Implementations of `fit`, `predict`, etc, + always have a *single* `data` argument, as in `fit(algorithm, data; verbosity=1)`. + For user convenience, calls like `fit(algorithm, X, y)` automatically fallback to `fit(algorithm, (X, y))`. !!! note - If the `data` object consumed by `fit`, `predict`, or `transform` is not - not a suitable table¹, array³, tuple of tables and arrays, or some - other object implementing - the MLUtils.jl `getobs`/`numobs` interface, - then an implementation must: (i) suitably overload the trait - [`LearnAPI.data_interface`](@ref); and/or (ii) overload [`obs`](@ref), as - illustrated below under [Providing an advanced data interface](@ref). + If the `data` object consumed by `fit`, `predict`, or `transform` is not + not a suitable table¹, array³, tuple of tables and arrays, or some + other object implementing + the MLUtils.jl `getobs`/`numobs` interface, + then an implementation must: (i) suitably overload the trait + [`LearnAPI.data_interface`](@ref); and/or (ii) overload [`obs`](@ref), as + illustrated below under [Providing an advanced data interface](@ref). The first line below imports the lightweight package LearnAPI.jl whose methods we will be extending. The second imports libraries needed for the core algorithm. @@ -46,9 +46,10 @@ nothing # hide Instances of `Ridge` will be [algorithms](@ref algorithms), in LearnAPI.jl parlance. -To [qualify](@ref algorithms) as a LearnAPI algorithm, an object must be come with a -mechanism for creating new versions of itself, with modified property (field) values. To -this end, we implement `LearnAPI.constructor`, which must return a keyword constructor: +Associated with each new type of LearnAPI [algorithm](@ref algorithms) will be a keyword +argument constructor, providing default values for all properties (struct fields) that are +not other algorithms, and we must implement `LearnAPI.constructor(algorithm)`, for +recovering the constructor from an instance: ```@example anatomy """ @@ -61,18 +62,15 @@ LearnAPI.constructor(::Ridge) = Ridge nothing # hide ``` -So, if `algorithm = Ridge(lambda=0.1)` then `LearnAPI.constructor(algorithm)(lambda=0.05)` -is another algorithm with the same properties, except that the value of `lambda` has been -changed to `0.05`. - -Note that we attach the docstring to the constructor, not the struct. +So, in this case, if `algorithm = Ridge(0.2)`, then +`LearnAPI.constructor(algorithm)(lambda=0.2) == algorithm` is true. Note that we attach +the docstring to the *constructor*, not the struct. ## Implementing `fit` -A ridge regressor requires two types of data for training: input features `X`, which -here we suppose are tabular¹, and a [target](@ref proxy) `y`, which we suppose is a -vector. +A ridge regressor requires two types of data for training: input features `X`, which here +we suppose are tabular¹, and a [target](@ref proxy) `y`, which we suppose is a vector. It is convenient to define a new type for the `fit` output, which will include coefficients labelled by feature name for inspection after training: @@ -134,9 +132,32 @@ Here's the implementation for our ridge regressor: ```@example anatomy LearnAPI.predict(model::RidgeFitted, ::LiteralTarget, Xnew) = - Tables.matrix(Xnew)*model.coefficients + Tables.matrix(Xnew)*model.coefficients +``` + +Since we can make no other kind of prediction in this case, we may overload the following +for user convenience: + +```@example anatomy +LearnAPI.predict(model::RidgeFitted, Xnew) = predict(model, LiteralTarget(), Xnew) ``` +## Extracting the target from training data + +The `fit` method consumes data which includes a [target variable](@ref proxy), i.e., the +algorithm is a supervised learner. We must therefore declare how the target variable can be extracted +from training data, by implementing [`LearnAPI.target`](@ref): + +```@example anatomy +LearnAPI.target(algorithm, data) = last(data) +``` + +There is a similar method, [`LearnAPI.input`](@ref) for declaring how input data can be +extracted (for passing to `predict`, for example) but this method has a fallback which +typically suffices: return `first(data)` if `data` is a tuple, and otherwise return +`data`. + + ## Accessor functions An [accessor function](@ref accessor_functions) has the output of [`fit`](@ref) as it's @@ -174,74 +195,46 @@ predictions. Algorithm [traits](@ref traits) record extra generic information about an algorithm, or make specific promises of behavior. They usually have an algorithm as the single argument, -and so we also regard [`LearnAPI.constructor`](@ref) defined above as a trait. - -In LearnAPI.jl `predict` always outputs a [target or target proxy](@ref proxy), where -"target" is understood very broadly. We overload a trait to record the fact here that the -target variable explicitly appears in training (i.e, the algorithm is supervised): - -```julia -LearnAPI.target(::Ridge) = true -``` +and so we regard [`LearnAPI.constructor`](@ref) defined above as a trait. -or, using a shortcut: +Because we have implemented `predict`, we are required to overload the +[`LearnAPI.kinds_of_proxy`](@ref) trait. Because we can only make point predictions of the +target, we do this like this: ```julia -@trait Ridge target = true +LearnAPI.kinds_of_proxy(::Ridge) = (LiteralTarget(),) ``` -The macro can be used to specify multiple traits simultaneously: +A macro provides a shortcut, convenient when multiple traits are to be defined: ```@example anatomy @trait( - Ridge, - constructor = Ridge, - target = true, - kinds_of_proxy=(LiteralTarget(),), - descriptors = (:regression,), - functions = ( - fit, - minimize, - predict, - obs, - LearnAPI.algorithm, - LearnAPI.coefficients, - ) + Ridge, + constructor = Ridge, + kinds_of_proxy=(LiteralTarget(),), + descriptors = (:regression,), + functions = ( + :(LearnAPI.fit), + :(LearnAPI.algorithm), + :(LearnAPI.minimize), + :(LearnAPI.obs), + :(LearnAPI.input), + :(LearnAPI.target), + :(LearnAPI.predict), + :(LearnAPI.coefficients), + ) ) nothing # hide ``` -The trait `kinds_of_proxy` is required here, because we implemented `predict`. - -The last trait `functions` returns a list of all LearnAPI.jl methods that can be +The last trait, `functions`, returns a list of all LearnAPI.jl methods that can be meaninfully applied to the algorithm or associated model. See [`LearnAPI.functions`](@ref) -for a checklist. This, and [`LearnAPI.constructor`](@ref), are the only universally -compulsory traits. However, it is worthwhile studying the [list of all traits](@ref -traits_list) to see which might apply to a new implementation, to enable maximum buy into -functionality provided by third party packages, and to assist third party algorithms that -match machine learning algorithms to user-defined tasks. - -According to the contract articulated in its document string, having set -[`LearnAPI.target`](@ref)`(::Ridge)`](@ref) equal to `true`, we are obliged to overload a -multi-argument version of `LearnAPI.target` to extract the target from the `data` that -gets supplied to `fit`: - -```@example anatomy -LearnAPI.target(::Ridge, data) = last(data) -``` - -## Convenience methods - -Finally, we extend `fit` and `predict` with signatures convenient for user interaction, -enabling the kind of workflow previewed in [Sample workflow](@ref): +for a checklist. [`LearnAPI.functions`](@ref) and [`LearnAPI.constructor`](@ref), are the +only universally compulsory traits. However, it is worthwhile studying the [list of all +traits](@ref traits_list) to see which might apply to a new implementation, to enable +maximum buy into functionality provided by third party packages, and to assist third party +algorithms that match machine learning algorithms to user-defined tasks. -```@example anatomy -LearnAPI.fit(algorithm::Ridge, X, y; kwargs...) = - fit(algorithm, (X, y); kwargs...) - -LearnAPI.predict(model::RidgeFitted, Xnew) = - predict(model, LiteralTarget(), Xnew) -``` ## [Demonstration](@id workflow) @@ -267,7 +260,9 @@ foreach(println, LearnAPI.functions(algorithm)) Training and predicting: ```@example anatomy -model = fit(algorithm, Tables.subset(X, train), y[train]) +Xtrain = Tables.subset(X, train) +ytrain = y[train] +model = fit(algorithm, (Xtrain, ytrain)) # `fit(algorithm, Xtrain, ytrain)` will also work ŷ = predict(model, LiteralTarget(), Tables.subset(X, test)) ``` @@ -299,7 +294,7 @@ using LearnAPI using LinearAlgebra, Tables struct Ridge{T<:Real} - lambda::T + lambda::T end Ridge(; lambda=0.1) = Ridge(lambda) @@ -320,19 +315,20 @@ LearnAPI.fit(algorithm::Ridge, X, y; kwargs...) = LearnAPI.predict(model::RidgeFitted, Xnew) = predict(model, LiteralTarget(), Xnew) @trait( - Ridge, - constructor = Ridge, - target = true, - kinds_of_proxy=(LiteralTarget(),), - descriptors = (:regression,), - functions = ( - fit, - minimize, - predict, - obs, - LearnAPI.algorithm, - LearnAPI.coefficients, - ) + Ridge, + constructor = Ridge, + kinds_of_proxy=(LiteralTarget(),), + descriptors = (:regression,), + functions = ( + :(LearnAPI.fit), + :(LearnAPI.algorithm), + :(LearnAPI.minimize), + :(LearnAPI.obs), + :(LearnAPI.input), + :(LearnAPI.target), + :(LearnAPI.predict), + :(LearnAPI.coefficients), + ) ) n = 10 # number of observations @@ -351,7 +347,7 @@ new type: ```@example anatomy2 struct RidgeFitObs{T,M<:AbstractMatrix{T}} - A::M # p x n + A::M # `p` x `n` matrix names::Vector{Symbol} # features y::Vector{T} # target end @@ -399,20 +395,13 @@ LearnAPI.fit(algorithm::Ridge, data; kwargs...) = fit(algorithm, obs(algorithm, data); kwargs...) ``` -We provide an overloading of `LearnAPI.target` to handle the additional supported data -argument of `fit`: - -```@example anatomy2 -LearnAPI.target(::Ridge, observations::RidgeFitObs) = observations.y -``` - ### The `obs` contract Providing `fit` signatures matching the output of `obs`, is the first part of the `obs` -contract. The second part is this: *The output of `obs` must implement the* -[MLUtils.jl](https://juliaml.github.io/MLUtils.jl/dev/) `getobs/numobs` *interface for -accessing individual observations*. It usually suffices to overload `Base.getindex` and -`Base.length` (which are the `getobs/numobs` fallbacks): +contract. The second part is this: *The output of `obs` must implement the interface +specified by the trait* [`LearnAPI.data_interface(algorithm)`](@ref). Assuming this is +[`LearnAPI.RandomAccess()`](@ref) (the default) it usually suffices to overload +`Base.getindex` and `Base.length`: ```@example anatomy2 Base.getindex(data::RidgeFitObs, I) = @@ -433,6 +422,24 @@ LearnAPI.predict(model::RidgeFitted, ::LiteralTarget, Xnew) = predict(model, LiteralTarget(), obs(model, Xnew)) ``` +### `target` and `input` methods + +We provide an additional overloading of [`LearnAPI.target`](@ref) to handle the additional supported data +argument of `fit`: + +```@example anatomy2 +LearnAPI.target(::Ridge, observations::RidgeFitObs) = observations.y +``` + +Similarly, we must overload [`LearnAPI.input`](@ref), which extracts inputs from training +data (objects that can be passed to `predict`) like this + +```@example anatomy2 +LearnAPI.input(::Ridge, observations::RidgeFitObs) = observations.A +``` +as the fallback mentioned above is no longer adequate. + + ### Important notes: - The observations to be consumed by `fit` are returned by `obs(algorithm::Ridge, ...)`, @@ -445,9 +452,9 @@ LearnAPI.predict(model::RidgeFitted, ::LiteralTarget, Xnew) = table here. Since LearnAPI.jl provides fallbacks for `obs` that simply return the unadulterated data -input, overloading `obs` is optional. This is provided data in publicized `fit`/`predict` -signatures consists of objects implementing the `getobs/numobs` interface (such as tables¹ -and arrays³). +argument, overloading `obs` is optional. This is provided data in publicized +`fit`/`predict` signatures consists only of objects implement the +[`LearnAPI.RandomAccess`](@ref) interface (most tables¹, arrays³, and tuples thereof). To buy out of supporting the MLUtils.jl interface altogether, an implementation must overload the trait, [`LearnAPI.data_interface(algorithm)`](@ref). @@ -486,6 +493,3 @@ like the native ones, they must be included in the [`LearnAPI.functions`](@ref) declaration. ³ The last index must be the observation index. - -⁴ Guaranteed assuming -`LearnAPI.data_interface(algorithm) == Base.HasLength()`, the default. diff --git a/src/fit.jl b/src/fit.jl index 56087fd3..ddc1dd9b 100644 --- a/src/fit.jl +++ b/src/fit.jl @@ -43,9 +43,12 @@ on providing slurping signatures. A fallback for the first signature calls the s ignoring `data`: ```julia -fit(algorithm, data...; kwargs...) = fit(algorithm; kwargs...) +fit(algorithm, data; kwargs...) = fit(algorithm; kwargs...) ``` $(DOC_DATA_INTERFACE(:fit)) """ -fit(algorithm, data...; kwargs...) = fit(algorithm; kwargs...) +fit(algorithm, data; kwargs...) = + fit(algorithm; kwargs...) +fit(algorithm, data1, datas...; kwargs...) = + fit(algorithm, (data1, datas...); kwargs...) diff --git a/src/predict_transform.jl b/src/predict_transform.jl index 97385a78..932ecc2f 100644 --- a/src/predict_transform.jl +++ b/src/predict_transform.jl @@ -105,7 +105,10 @@ $(DOC_MUTATION(:predict)) $(DOC_DATA_INTERFACE(:predict)) """ -function predict end +predict(model, k::KindOfProxy, data1, data2, datas...; kwargs...) = + predict(model, k, (data1, data2, datas...); kwargs...) +predict(model, data1, data2, datas...; kwargs...) = + predict(model, (data1, data2, datas...); kwargs...) """ @@ -161,7 +164,9 @@ $(DOC_MUTATION(:transform)) $(DOC_DATA_INTERFACE(:transform)) """ -function transform end +transform(model, data1, data2...; kwargs...) = + transform(model, (data1, datas...); kwargs...) + """ diff --git a/src/traits.jl b/src/traits.jl index a3ccceb8..090716b4 100644 --- a/src/traits.jl +++ b/src/traits.jl @@ -100,12 +100,13 @@ function constructor end """ LearnAPI.functions(algorithm) -Return a tuple of symbols respresenting functions that can be meaningfully applied with -`algorithm`, or an associate model (object returned by `fit(algorithm, ...)`, as the first -argument. Algorithm traits (`algorithm` is the *only* argument) are excluded. +Return a tuple of expressions respresenting functions that can be meaningfully applied +with `algorithm`, or an associated model (object returned by `fit(algorithm, ...)`, as the +first argument. Algorithm traits (methods for which `algorithm` is the *only* argument) +are excluded. -In addition to symbols, the returned tuple may include expressions, like -`:(DecisionTree.print_tree)`, which reference functions not owned by LearnAPI.jl. +The returned tuple may include expressions like `:(DecisionTree.print_tree)`, which +reference functions not owned by LearnAPI.jl. The understanding is that `algorithm` is a LearnAPI-compliant object whenever the return value is non-empty. @@ -117,18 +118,22 @@ value is non-empty. All new implementations must overload this trait. Here's a checklist for elements in the return value: -| symbol | implementation/overloading compulsory? | include in returned tuple? | -|-----------------------|----------------------------------------|----------------------------| -| `:fit` | yes | yes | -| `:minimize` | no | yes | -| `:obs` | no | yes | -| `:LearnAPI.algorithm` | yes | yes | -| `:inverse_transform` | no | only if implemented | -| `:predict` | no | only if implemented | -| `:transform` | no | only if implemented | - -Also include any implemented accessor functions. The LearnAPI.jl accessor functions are: -$ACCESSOR_FUNCTIONS_LIST. +| symbol | implementation/overloading compulsory? | include in returned tuple? | +|---------------------------------|----------------------------------------|------------------------------------| +| `:(LearnAPI.fit)` | yes | yes | +| `:(LearnAPI.algorithm)` | yes | yes | +| `:(LearnAPI.minimize)` | no | yes | +| `:(LearnAPI.obs)` | no | yes | +| `:(LearnAPI.input)` | no | yes, unless `fit` consumes no data | +| `:(LearnAPI.target)` | no | only if implemented | +| `:(LearnAPI.weights)` | no | only if implemented | +| `:(LearnAPI.predict)` | no | only if implemented | +| `:(LearnAPI.transform)` | no | only if implemented | +| `:(LearnAPI.inverse_transform)` | no | only if implemented | +| | no | only if implemented | + +Also include any implemented accessor functions, both those owned by LearnaAPI.jl, and any +algorithm-specific ones. The LearnAPI.jl accessor functions are: $ACCESSOR_FUNCTIONS_LIST. """ functions(::Any) = () diff --git a/test/integration/regression.jl b/test/integration/regression.jl index 0ff394e4..38b0e8f3 100644 --- a/test/integration/regression.jl +++ b/test/integration/regression.jl @@ -7,8 +7,8 @@ import DataFrames # # NAIVE RIDGE REGRESSION WITH NO INTERCEPTS -# We overload `obs` to expose internal representation of input data. See later for a -# simpler variation using the `obs` fallback. +# We overload `obs` to expose internal representation of data. See later for a simpler +# variation using the `obs` fallback. # no docstring here - that goes with the constructor struct Ridge @@ -78,13 +78,10 @@ end LearnAPI.fit(algorithm::Ridge, data; kwargs...) = fit(algorithm, obs(algorithm, data); kwargs...) -# for convenience: -LearnAPI.fit(algorithm::Ridge, X, y; kwargs...) = - fit(algorithm, (X, y); kwargs...) - -# to extract the target: +# extracting stuff from training data: LearnAPI.target(::Ridge, data) = last(data) LearnAPI.target(::Ridge, observations::RidgeFitObs) = observations.y +LearnAPI.input(::Ridge, observations::RidgeFitObs) = observations.A # observations for consumption by `predict`: LearnAPI.obs(::RidgeFitted, X) = Tables.matrix(X)' @@ -100,6 +97,7 @@ LearnAPI.predict(model::RidgeFitted, ::LiteralTarget, Xnew) = # convenience method: LearnAPI.predict(model::RidgeFitted, data) = predict(model, LiteralTarget(), data) +# accessor function: LearnAPI.feature_importances(model::RidgeFitted) = model.feature_importances LearnAPI.minimize(model::RidgeFitted) = @@ -108,18 +106,20 @@ LearnAPI.minimize(model::RidgeFitted) = @trait( Ridge, constructor = Ridge, - target=true, kinds_of_proxy = (LiteralTarget(),), functions = ( - fit, - minimize, - predict, - obs, - LearnAPI.algorithm, - LearnAPI.feature_importances, - ) + :(LearnAPI.fit), + :(LearnAPI.algorithm), + :(LearnAPI.minimize), + :(LearnAPI.obs), + :(LearnAPI.input), + :(LearnAPI.target), + :(LearnAPI.predict), + :(LearnAPI.feature_importances), + ) ) +# synthetic test data: n = 30 # number of observations train = 1:6 test = 7:10 @@ -127,10 +127,14 @@ a, b, c = rand(n), rand(n), rand(n) X = (; a, b, c) X = DataFrames.DataFrame(X) y = 2a - b + 3c + 0.05*rand(n) +data = (X, y) @testset "test an implementation of ridge regression" begin algorithm = Ridge(lambda=0.5) - @test LearnAPI.obs in LearnAPI.functions(algorithm) + @test :(LearnAPI.obs) in LearnAPI.functions(algorithm) + + @test LearnAPI.target(algorithm, data) == y + @test LearnAPI.input(algorithm, data) == X # verbose fitting: @test_logs( @@ -157,10 +161,12 @@ y = 2a - b + 3c + 0.05*rand(n) @test ŷ isa Vector{Float64} @test predict(model, Tables.subset(X, test)) == ŷ - fitobs = LearnAPI.obs(algorithm, (X, y)) + fitobs = LearnAPI.obs(algorithm, data) predictobs = LearnAPI.obs(model, X) model = fit(algorithm, MLUtils.getobs(fitobs, train); verbosity=0) + @test LearnAPI.target(algorithm, fitobs) == y @test predict(model, LiteralTarget(), MLUtils.getobs(predictobs, test)) ≈ ŷ + @test predict(model, LearnAPI.input(algorithm, fitobs)) ≈ predict(model, X) @test LearnAPI.feature_importances(model) isa Vector{<:Pair{Symbol}} @@ -177,9 +183,6 @@ y = 2a - b + 3c + 0.05*rand(n) MLUtils.getobs(predictobs, test) ) ≈ ŷ - @test LearnAPI.target(algorithm, (X, y)) == y - @test LearnAPI.target(algorithm, fitobs) == y - end # # VARIATION OF RIDGE REGRESSION THAT USES FALLBACK OF LearnAPI.obs @@ -221,32 +224,34 @@ function LearnAPI.fit(algorithm::BabyRidge, data; verbosity=1) end +# extracting stuff from training data: LearnAPI.target(::BabyRidge, data) = last(data) -# convenience form: -LearnAPI.fit(algorithm::BabyRidge, X, y; kwargs...) = - fit(algorithm, (X, y); kwargs...) - LearnAPI.algorithm(model::BabyRidgeFitted) = model.algorithm LearnAPI.predict(model::BabyRidgeFitted, ::LiteralTarget, Xnew) = Tables.matrix(Xnew)*model.coefficients +# convenience method: +LearnAPI.predict(model::BabyRidgeFitted, data) = predict(model, LiteralTarget(), data) + LearnAPI.minimize(model::BabyRidgeFitted) = BabyRidgeFitted(model.algorithm, model.coefficients, nothing) @trait( BabyRidge, - constructor = Ridge, - target=true, + constructor = BabyRidge, kinds_of_proxy = (LiteralTarget(),), functions = ( - fit, - minimize, - predict, - LearnAPI.algorithm, - LearnAPI.feature_importances, - ) + :(LearnAPI.fit), + :(LearnAPI.algorithm), + :(LearnAPI.minimize), + :(LearnAPI.obs), + :(LearnAPI.input), + :(LearnAPI.target), + :(LearnAPI.predict), + :(LearnAPI.feature_importances), + ) ) @testset "test a variation which does not overload LearnAPI.obs" begin @@ -256,12 +261,14 @@ LearnAPI.minimize(model::BabyRidgeFitted) = ŷ = predict(model, LiteralTarget(), Tables.subset(X, test)) @test ŷ isa Vector{Float64} - fitobs = obs(algorithm, (X, y)) + fitobs = obs(algorithm, data) predictobs = LearnAPI.obs(model, X) model = fit(algorithm, MLUtils.getobs(fitobs, train); verbosity=0) - @test predict(model, LiteralTarget(), MLUtils.getobs(predictobs, test)) == ŷ - - @test LearnAPI.target(algorithm, (X, y)) == y + @test predict(model, LiteralTarget(), MLUtils.getobs(predictobs, test)) == ŷ == + predict(model, MLUtils.getobs(predictobs, test)) + @test LearnAPI.target(algorithm, data) == y + @test LearnAPI.predict(model, X) ≈ + LearnAPI.predict(model, LearnAPI.input(algorithm, data)) end true diff --git a/test/integration/static_algorithms.jl b/test/integration/static_algorithms.jl index 3991dbf4..1d6a2ad6 100644 --- a/test/integration/static_algorithms.jl +++ b/test/integration/static_algorithms.jl @@ -15,8 +15,8 @@ struct Selector end Selector(; names=Symbol[]) = Selector(names) # LearnAPI.constructor defined later -# `fit` has no input data, does no "learning", and just returns thinly wrapped `algorithm` -# (to distinguish it from the algorithm in dispatch): +# `fit` consumes no observational data, does no "learning", and just returns a thinly +# wrapped `algorithm` (to distinguish it from the algorithm in dispatch): LearnAPI.fit(algorithm::Selector; verbosity=1) = Ref(algorithm) LearnAPI.algorithm(model) = model[] @@ -40,10 +40,11 @@ end Selector, constructor = Selector, functions = ( - fit, - minimize, - transform, - Learn.algorithm, + :(LearnAPI.fit), + :(LearnAPI.algorithm), + :(LearnAPI.minimize), + :(LearnAPI.obs), + :(LearnAPI.transform), ), ) @@ -61,7 +62,9 @@ end # # FEATURE SELECTOR THAT REPORTS BYPRODUCTS OF SELECTION PROCESS # This a variation of `Selector` above that stores the names of rejected features in the -# model object, for inspection by an accessor function called `rejected`. +# model object, for inspection by an accessor function called `rejected`. Since +# `transform(model, X)` mutates `model` in this case, we must overload the +# `predict_or_transform_mutates` trait. struct Selector2 names::Vector{Symbol} @@ -99,15 +102,15 @@ end @trait( Selector2, - constructor = Selector, + constructor = Selector2, predict_or_transform_mutates = true, functions = ( - fit, - obsfit, - minimize, - transform, - Learn.algorithm, - :(MyPkg.rejected), # accessor function not owned by LearnAPI.jl + :(LearnAPI.fit), + :(LearnAPI.algorithm), + :(LearnAPI.minimize), + :(LearnAPI.obs), + :(LearnAPI.transform), + :(MyPkg.rejected), # accessor function not owned by LearnAPI.jl, ) ) From 79c67e320277423d39682116d41bb56c556525f4 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Thu, 26 Sep 2024 17:33:01 +1200 Subject: [PATCH 16/27] add fallbacks to rm need for overloading predict convenience fn --- docs/src/anatomy_of_an_implementation.md | 19 ++++++++----------- docs/src/obs.md | 2 +- src/fit.jl | 8 +++++--- src/obs.jl | 7 ++++++- src/predict_transform.jl | 13 +++++++------ test/integration/regression.jl | 6 ------ 6 files changed, 27 insertions(+), 28 deletions(-) diff --git a/docs/src/anatomy_of_an_implementation.md b/docs/src/anatomy_of_an_implementation.md index 37c73c94..716ef514 100644 --- a/docs/src/anatomy_of_an_implementation.md +++ b/docs/src/anatomy_of_an_implementation.md @@ -116,7 +116,7 @@ end ## Implementing `predict` -The primary `predict` call will look like this: +Users will be able to call `predict` like this: ```julia predict(model, LiteralTarget(), Xnew) @@ -128,19 +128,17 @@ the target, such as probability density functions. `LiteralTarget` is an exampl [`LearnAPI.KindOfProxy`](@ref proxy_types) type. Targets and target proxies are discussed [here](@ref proxy). -Here's the implementation for our ridge regressor: +So, we provide this implementation for our ridge regressor: ```@example anatomy LearnAPI.predict(model::RidgeFitted, ::LiteralTarget, Xnew) = Tables.matrix(Xnew)*model.coefficients ``` -Since we can make no other kind of prediction in this case, we may overload the following -for user convenience: +If the kind of proxy is omitted, as in `predict(model, Xnew)`, then a fallback grabs the +first element of the tuple returned by [`LearnAPI.kinds_of_proxy(algorithm)`](@ref), which +we overload appropriately below. -```@example anatomy -LearnAPI.predict(model::RidgeFitted, Xnew) = predict(model, LiteralTarget(), Xnew) -``` ## Extracting the target from training data @@ -263,7 +261,7 @@ Training and predicting: Xtrain = Tables.subset(X, train) ytrain = y[train] model = fit(algorithm, (Xtrain, ytrain)) # `fit(algorithm, Xtrain, ytrain)` will also work -ŷ = predict(model, LiteralTarget(), Tables.subset(X, test)) +ŷ = predict(model, Tables.subset(X, test)) ``` Extracting coefficients: @@ -312,7 +310,6 @@ LearnAPI.minimize(model::RidgeFitted) = LearnAPI.fit(algorithm::Ridge, X, y; kwargs...) = fit(algorithm, (X, y); kwargs...) -LearnAPI.predict(model::RidgeFitted, Xnew) = predict(model, LiteralTarget(), Xnew) @trait( Ridge, @@ -424,8 +421,8 @@ LearnAPI.predict(model::RidgeFitted, ::LiteralTarget, Xnew) = ### `target` and `input` methods -We provide an additional overloading of [`LearnAPI.target`](@ref) to handle the additional supported data -argument of `fit`: +We provide an additional overloading of [`LearnAPI.target`](@ref) to handle the additional +supported data argument of `fit`: ```@example anatomy2 LearnAPI.target(::Ridge, observations::RidgeFitObs) = observations.y diff --git a/docs/src/obs.md b/docs/src/obs.md index bae83427..ed44668b 100644 --- a/docs/src/obs.md +++ b/docs/src/obs.md @@ -82,7 +82,7 @@ end | [`obs(algorithm_or_model, data)`](@ref) | depends | returns `data` | | | | | -A sample implementation is given in [Providing an advanced data interface](@ref). +A sample implementation is given in [Providing an advanced data interface](@ref). ## Reference diff --git a/src/fit.jl b/src/fit.jl index ddc1dd9b..2a5e0cbf 100644 --- a/src/fit.jl +++ b/src/fit.jl @@ -38,13 +38,15 @@ See also [`predict`](@ref), [`transform`](@ref), [`inverse_transform`](@ref), # New implementations -Implementation is compulsory. The signature must include `verbosity`. Note the requirement -on providing slurping signatures. A fallback for the first signature calls the second, -ignoring `data`: +Implementation is compulsory. The signature must include `verbosity`. A fallback for the +first signature calls the second, ignoring `data`: ```julia fit(algorithm, data; kwargs...) = fit(algorithm; kwargs...) ``` + +Fallbacks also provide the data slurping versions. + $(DOC_DATA_INTERFACE(:fit)) """ diff --git a/src/obs.jl b/src/obs.jl index 2a874d6a..2d784a89 100644 --- a/src/obs.jl +++ b/src/obs.jl @@ -66,9 +66,14 @@ only of suitable tables and arrays, then `obs` and `LearnAPI.data_interface` do to be overloaded. However, the user will get no performance benefits by using `obs` in that case. +When overloading `obs(algorithm, data)` to output new model-specific representations of +data, it may be necessary to also overload [`LearnAPI.input`](@ref), +[`LearnAPI.target`](@ref) (supervised algorithms), and/or [`LearnAPI.weights`](@ref) (if +weights are supported), for extracting relevant parts of the representation. + ## Sample implementation -Refer to the "Anatomy of an Implementation" section of the LearnAPI +Refer to the "Anatomy of an Implementation" section of the LearnAPI.jl [manual](https://juliaai.github.io/LearnAPI.jl/dev/). diff --git a/src/predict_transform.jl b/src/predict_transform.jl index 932ecc2f..c30cd7a5 100644 --- a/src/predict_transform.jl +++ b/src/predict_transform.jl @@ -61,7 +61,8 @@ options with [`LearnAPI.kinds_of_proxy(algorithm)`](@ref), where `algorithm = LearnAPI.algorithm(model)`. The shortcut `predict(model, data)` calls the first method with an algorithm-specific -`kind_of_proxy`. +`kind_of_proxy`, namely the first element of [`LearnAPI.kinds_of_proxy(algorithm)`](@ref), +which lists all supported target proxies. The argument `model` is anything returned by a call of the form `fit(algorithm, ...)`. @@ -90,9 +91,7 @@ Note `predict ` does not mutate any argument, except in the special case If there is no notion of a "target" variable in the LearnAPI.jl sense, or you need an operation with an inverse, implement [`transform`](@ref) instead. -Implementation is optional. If the first signature is implemented for some -`kind_of_proxy`, then the implementation should provide an implementation of the second -convenience form, but it is free to choose the fallback `kind_of_proxy`. Each +Implementation is optional. Only the first signature is implemented, but each `kind_of_proxy` that gets an implementation must be added to the list returned by [`LearnAPI.kinds_of_proxy`](@ref). @@ -105,12 +104,14 @@ $(DOC_MUTATION(:predict)) $(DOC_DATA_INTERFACE(:predict)) """ +predict(model, data) = predict(model, kinds_of_proxy(algorithm(model)) |> first, data) predict(model, k::KindOfProxy, data1, data2, datas...; kwargs...) = predict(model, k, (data1, data2, datas...); kwargs...) predict(model, data1, data2, datas...; kwargs...) = predict(model, (data1, data2, datas...); kwargs...) + """ transform(model, data) @@ -154,8 +155,8 @@ See also [`fit`](@ref), [`predict`](@ref), # New implementations -Implementation for new LearnAPI.jl algorithms is optional. -$(DOC_IMPLEMENTED_METHODS(":transform")) +Implementation for new LearnAPI.jl algorithms is optional. A fallback provides the +slurping version. $(DOC_IMPLEMENTED_METHODS(":transform")) $(DOC_MINIMIZE(:transform)) diff --git a/test/integration/regression.jl b/test/integration/regression.jl index 38b0e8f3..d8118a72 100644 --- a/test/integration/regression.jl +++ b/test/integration/regression.jl @@ -94,9 +94,6 @@ LearnAPI.predict(model::RidgeFitted, ::LiteralTarget, observations::AbstractMatr LearnAPI.predict(model::RidgeFitted, ::LiteralTarget, Xnew) = predict(model, LiteralTarget(), obs(model, Xnew)) -# convenience method: -LearnAPI.predict(model::RidgeFitted, data) = predict(model, LiteralTarget(), data) - # accessor function: LearnAPI.feature_importances(model::RidgeFitted) = model.feature_importances @@ -232,9 +229,6 @@ LearnAPI.algorithm(model::BabyRidgeFitted) = model.algorithm LearnAPI.predict(model::BabyRidgeFitted, ::LiteralTarget, Xnew) = Tables.matrix(Xnew)*model.coefficients -# convenience method: -LearnAPI.predict(model::BabyRidgeFitted, data) = predict(model, LiteralTarget(), data) - LearnAPI.minimize(model::BabyRidgeFitted) = BabyRidgeFitted(model.algorithm, model.coefficients, nothing) From d27022916f92f8771b7a3de927870b848fe66d95 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Thu, 26 Sep 2024 17:40:35 +1200 Subject: [PATCH 17/27] add some forgotten files --- docs/make.jl | 2 +- docs/src/target_weights_input.md | 35 ++++++++++++++++++++++++++++++++ src/LearnAPI.jl | 2 +- src/target_weights_input.jl | 30 +++++++++++++++++++++++++++ 4 files changed, 67 insertions(+), 2 deletions(-) create mode 100644 docs/src/target_weights_input.md create mode 100644 src/target_weights_input.jl diff --git a/docs/make.jl b/docs/make.jl index ecfc1dd0..2514acce 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -19,7 +19,7 @@ makedocs( "predict/transform" => "predict_transform.md", "Kinds of Target Proxy" => "kinds_of_target_proxy.md", "minimize" => "minimize.md", - "input" => "input.md", + "target/weights/input" => "target_weights_input.md", "obs" => "obs.md", "Accessor Functions" => "accessor_functions.md", "Algorithm Traits" => "traits.md", diff --git a/docs/src/target_weights_input.md b/docs/src/target_weights_input.md new file mode 100644 index 00000000..8dfaa9f3 --- /dev/null +++ b/docs/src/target_weights_input.md @@ -0,0 +1,35 @@ +# [`input`](@id input) + +```julia +LearnAPI.input(algorithm, data) -> +``` + +# Typical workflow + +Not typically appearing in a general user's workflow but useful in meta-alagorithms, such +as cross-validation (see the example in [`obs` and Data Interfaces](@ref data_interface)). + +Supposing `algorithm` is a supervised classifier predicting a one-dimensional vector +target: + +```julia +model = fit(algorithm, data) +X = LearnAPI.input(algorithm, data) +y = LearnAPI.target(algorithm, data) +ŷ = predict(model, LiteralTarget(), X) +training_loss = sum(ŷ .!= y) +``` + +# Implementation guide + +The fallback returns `first(data)`, assuming `data` is a tuple, and `data` otherwise. + +| method | compulsory? | +|:-------------------------|:-----------:| +| [`LearnAPI.input`](@ref) | no | + +# Reference + +```@docs +LearnAPI.input +``` diff --git a/src/LearnAPI.jl b/src/LearnAPI.jl index 66c9aa9e..0de8c026 100644 --- a/src/LearnAPI.jl +++ b/src/LearnAPI.jl @@ -7,7 +7,7 @@ include("types.jl") include("predict_transform.jl") include("fit.jl") include("minimize.jl") -include("input.jl") +include("target_weights_input.jl") include("obs.jl") include("accessor_functions.jl") include("traits.jl") diff --git a/src/target_weights_input.jl b/src/target_weights_input.jl new file mode 100644 index 00000000..35a3e3df --- /dev/null +++ b/src/target_weights_input.jl @@ -0,0 +1,30 @@ +""" + LearnAPI.input(algorithm, data) + +Where `data` is a supported data argument for `fit`, extract from `data` something +suitable for passing as the third argument to `predict`, as in the following sample +workflow: + +```julia +model = fit(algorithm, data) +X = input(data) +ŷ = predict(algorithm, kind_of_proxy, X) # eg, `kind_of_proxy = LiteralTarget()` +``` + +The return value has the same number of observations as `data` does. Where +`LearnAPI.target(algorithm)` is `true` (supervised learning) one expects `ŷ` above to be +an approximate proxy for `target(algorithm, data)`, the training target. + + +# New implementations + +The following fallbacks typically make overloading `LearnAPI.input` unnecessary: + +```julia +LearnAPI.input(algorithm, data) = data +LearnAPI.input(algorithm, data::Tuple) = first(data) +``` + +""" +input(algorithm, data) = data +input(algorithm, data::Tuple) = first(data) From 6e721c824dd46290233ce0cc74ef3c7fc143f933 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Fri, 27 Sep 2024 08:14:09 +1200 Subject: [PATCH 18/27] doc updates and some small re-organziation of code --- docs/src/anatomy_of_an_implementation.md | 48 ++++++++++---------- docs/src/index.md | 7 +-- docs/src/reference.md | 9 ++-- docs/src/target_weights_input.md | 21 ++++++--- docs/src/traits.md | 13 +++--- src/predict_transform.jl | 8 ++-- src/target_weights_input.jl | 56 +++++++++++++++++++++--- src/traits.jl | 41 +---------------- 8 files changed, 111 insertions(+), 92 deletions(-) diff --git a/docs/src/anatomy_of_an_implementation.md b/docs/src/anatomy_of_an_implementation.md index 716ef514..58b3ba7b 100644 --- a/docs/src/anatomy_of_an_implementation.md +++ b/docs/src/anatomy_of_an_implementation.md @@ -8,21 +8,23 @@ refer to the [demonstration](@ref workflow) of the implementation given later. For a transformer, implementations ordinarily implement `transform` instead of `predict`. For more on `predict` versus `transform`, see [Predict or transform?](@ref) -!!! important +!!! note - Implementations of `fit`, `predict`, etc, - always have a *single* `data` argument, as in `fit(algorithm, data; verbosity=1)`. - For user convenience, calls like `fit(algorithm, X, y)` automatically fallback to `fit(algorithm, (X, y))`. + New implementations of `fit`, `predict`, etc, + always have a *single* `data` argument, as in + `LearnAPI.fit(algorithm, data; verbosity=1) = ...`. + For convenience, user calls like `fit(algorithm, X, y)` automatically fallback + to `fit(algorithm, (X, y))`. !!! note - If the `data` object consumed by `fit`, `predict`, or `transform` is not - not a suitable table¹, array³, tuple of tables and arrays, or some - other object implementing - the MLUtils.jl `getobs`/`numobs` interface, - then an implementation must: (i) suitably overload the trait - [`LearnAPI.data_interface`](@ref); and/or (ii) overload [`obs`](@ref), as - illustrated below under [Providing an advanced data interface](@ref). + If the `data` object consumed by `fit`, `predict`, or `transform` is not + not a suitable table¹, array³, tuple of tables and arrays, or some + other object implementing + the MLUtils.jl `getobs`/`numobs` interface, + then an implementation must: (i) suitably overload the trait + [`LearnAPI.data_interface`](@ref); and/or (ii) overload [`obs`](@ref), as + illustrated below under [Providing an advanced data interface](@ref). The first line below imports the lightweight package LearnAPI.jl whose methods we will be extending. The second imports libraries needed for the core algorithm. @@ -48,7 +50,7 @@ Instances of `Ridge` will be [algorithms](@ref algorithms), in LearnAPI.jl parla Associated with each new type of LearnAPI [algorithm](@ref algorithms) will be a keyword argument constructor, providing default values for all properties (struct fields) that are -not other algorithms, and we must implement `LearnAPI.constructor(algorithm)`, for +not other algorithms, and we must implement [`LearnAPI.constructor(algorithm)`](@ref), for recovering the constructor from an instance: ```@example anatomy @@ -62,7 +64,7 @@ LearnAPI.constructor(::Ridge) = Ridge nothing # hide ``` -So, in this case, if `algorithm = Ridge(0.2)`, then +For example, in this case, if `algorithm = Ridge(0.2)`, then `LearnAPI.constructor(algorithm)(lambda=0.2) == algorithm` is true. Note that we attach the docstring to the *constructor*, not the struct. @@ -123,12 +125,12 @@ predict(model, LiteralTarget(), Xnew) ``` where `Xnew` is a table (of the same form as `X` above). The argument `LiteralTarget()` -signals that we want literal predictions of the target variable, as opposed to a proxy for -the target, such as probability density functions. `LiteralTarget` is an example of a -[`LearnAPI.KindOfProxy`](@ref proxy_types) type. Targets and target proxies are discussed -[here](@ref proxy). +signals that literal predictions of the target variable are sought, as opposed to some +proxy for the target, such as probability density functions. `LiteralTarget` is an +example of a [`LearnAPI.KindOfProxy`](@ref proxy_types) type. Targets and target proxies +are discussed [here](@ref proxy). -So, we provide this implementation for our ridge regressor: +We provide this implementation for our ridge regressor: ```@example anatomy LearnAPI.predict(model::RidgeFitted, ::LiteralTarget, Xnew) = @@ -197,7 +199,7 @@ and so we regard [`LearnAPI.constructor`](@ref) defined above as a trait. Because we have implemented `predict`, we are required to overload the [`LearnAPI.kinds_of_proxy`](@ref) trait. Because we can only make point predictions of the -target, we do this like this: +target, we make this definition: ```julia LearnAPI.kinds_of_proxy(::Ridge) = (LiteralTarget(),) @@ -233,6 +235,11 @@ traits](@ref traits_list) to see which might apply to a new implementation, to e maximum buy into functionality provided by third party packages, and to assist third party algorithms that match machine learning algorithms to user-defined tasks. +Note that we know `Ridge` instances are supervised algorithms because `:(LearnAPI.target) +in LearnAPI.functions(algorithm)`, for every instance `algorithm`. With [some +exceptions](@ref trait_contract), the value of a trait should depend only on the *type* of +the argument. + ## [Demonstration](@id workflow) @@ -308,9 +315,6 @@ LearnAPI.coefficients(model::RidgeFitted) = model.named_coefficients LearnAPI.minimize(model::RidgeFitted) = RidgeFitted(model.algorithm, model.coefficients, nothing) -LearnAPI.fit(algorithm::Ridge, X, y; kwargs...) = - fit(algorithm, (X, y); kwargs...) - @trait( Ridge, constructor = Ridge, diff --git a/docs/src/index.md b/docs/src/index.md index b66a6d74..3bb96562 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -31,9 +31,10 @@ ML/statistics toolboxes and other packages. LearnAPI.jl also provides a number o ## Sample workflow Suppose `forest` is some object encapsulating the hyperparameters of the [random forest -algorithm](https://en.wikipedia.org/wiki/Random_forest) (the number of trees, -etc.). Then, a LearnAPI.jl interface can be implemented, for objects with the type of -`forest`, to enable the following basic workflow: +algorithm](https://en.wikipedia.org/wiki/Random_forest) (the number of trees, etc.). Then, +a LearnAPI.jl interface can be implemented, for objects with the type of `forest`, to +enable the basic workflow below. In this case data is presented following the +"scikit-learn" `X, y` pattern, although LearnAPI.jl supports other patterns as well. ```julia X = diff --git a/docs/src/reference.md b/docs/src/reference.md index de0bb3d6..3b8d7397 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -160,11 +160,12 @@ Only these method names are exported by LearnAPI: `fit`, `transform`, `inverse_t - [`minimize`](@ref algorithm_minimize): for stripping the `model` output by `fit` of inessential content, for purposes of serialization. -- [`LearnAPI.input`](@ref input): for extracting inputs from training data. +- [`LearnAPI.target`](@ref input), [`LearnAPI.weights`](@ref input), + [`LearnAPI.input`](@ref): for extracting relevant parts of training data, where defined. -- [`obs`](@ref data_interface): a method for exposing to the user algorithm-specific - representations of data that are guaranteed to implement observation access, as - specified by [`LearnAPI.data_interface(algorithm)`](@ref). +- [`obs`](@ref data_interface): optional method for exposing to the user + algorithm-specific representations of data that are guaranteed to implement observation + access, as specified by [`LearnAPI.data_interface(algorithm)`](@ref). - [Accessor functions](@ref accessor_functions): these include functions like `feature_importances` and `training_losses`, for extracting, from training outcomes, diff --git a/docs/src/target_weights_input.md b/docs/src/target_weights_input.md index 8dfaa9f3..847dbbec 100644 --- a/docs/src/target_weights_input.md +++ b/docs/src/target_weights_input.md @@ -1,9 +1,15 @@ -# [`input`](@id input) +# [`target`, `weights`, and `input`](@id input) + +Methods for extracting parts of training data: ```julia -LearnAPI.input(algorithm, data) -> +LearnAPI.target(algorithm, data) -> +LearnAPI.weights(algorithm, data) -> +LearnAPI.input(algorithm, data) -> ``` +Here `data` is something supported in a call of the form `fit(algorithm, data)`. + # Typical workflow Not typically appearing in a general user's workflow but useful in meta-alagorithms, such @@ -24,12 +30,17 @@ training_loss = sum(ŷ .!= y) The fallback returns `first(data)`, assuming `data` is a tuple, and `data` otherwise. -| method | compulsory? | -|:-------------------------|:-----------:| -| [`LearnAPI.input`](@ref) | no | +| method | fallback | compulsory? | | +|:---------------------------|:-----------------:|------------------------|---| +| [`LearnAPI.target`](@ref) | returns `nothing` | no | | +| [`LearnAPI.weights`](@ref) | returns `nothing` | no | | +| [`LearnAPI.input`](@ref) | see docstring | only if fallback fails | | + # Reference ```@docs +LearnAPI.target +LearnAPI.weights LearnAPI.input ``` diff --git a/docs/src/traits.md b/docs/src/traits.md index b04b494d..c75145b1 100644 --- a/docs/src/traits.md +++ b/docs/src/traits.md @@ -1,8 +1,9 @@ # [Algorithm Traits](@id traits) -Traits generally promise specific algorithm behavior, such as: *This algorithm supports -per-observation weights, or *This algorithm's `transform` method predicts `Real` -vectors*. They also record more mundane information, such as a package license. +Traits generally promise specific algorithm behavior, such as: *This algorithm can make +point or probabilistic predictions*, *This algorithm sees a target variable in training*, +or *This algorithm's `transform` method predicts `Real` vectors*. They also record more +mundane information, such as a package license. Algorithm traits are functions whose first (and usually only) argument is an algorithm. @@ -24,8 +25,6 @@ package [ScientificTypesBase.jl](https://github.com/JuliaAI/ScientificTypesBase. | [`LearnAPI.constructor`](@ref)`(algorithm)` | constructor for generating new or modified versions of `algorithm` | (no fallback) | `RidgeRegressor` | | [`LearnAPI.functions`](@ref)`(algorithm)` | functions you can apply to `algorithm` or associated model (traits excluded) | `()` | `(:fit, :predict, :minimize, :(LearnAPI.algorithm), :obs)` | | [`LearnAPI.kinds_of_proxy`](@ref)`(algorithm)` | instances `kind` of `KindOfProxy` for which an implementation of `LearnAPI.predict(algorithm, kind, ...)` is guaranteed. | `()` | `(Distribution(), Interval())` | -| [`LearnAPI.target`](@ref)`(algorithm)` | `true` if target can appear in `fit` data | `false` | `true` | -| [`LearnAPI.weights`](@ref)`(algorithm)` | `true` if per-observation weights can appear in `fit` data | `false` | `true` | | [`LearnAPI.descriptors`](@ref)`(algorithm)` | lists one or more suggestive algorithm descriptors from `LearnAPI.descriptors()` | `()` | (:regression, :probabilistic) | | [`LearnAPI.is_pure_julia`](@ref)`(algorithm)` | `true` if implementation is 100% Julia code | `false` | `true` | | [`LearnAPI.pkg_name`](@ref)`(algorithm)` | name of package providing core code (may be different from package providing LearnAPI.jl implementation) | `"unknown"` | `"DecisionTree"` | @@ -91,7 +90,7 @@ Multiple traits can be declared like this: ) ``` -### The global trait contracts +### [The global trait contract](@id trait_contract) To ensure that trait metadata can be stored in an external algorithm registry, LearnAPI.jl requires: @@ -115,8 +114,6 @@ informative (as in `LearnAPI.predict_type(algorithm) = Any`). LearnAPI.constructor LearnAPI.functions LearnAPI.kinds_of_proxy -LearnAPI.target -LearnAPI.weights LearnAPI.descriptors LearnAPI.is_pure_julia LearnAPI.pkg_name diff --git a/src/predict_transform.jl b/src/predict_transform.jl index c30cd7a5..c1c9d9d2 100644 --- a/src/predict_transform.jl +++ b/src/predict_transform.jl @@ -1,4 +1,4 @@ - function DOC_IMPLEMENTED_METHODS(name; overloaded=false) +function DOC_IMPLEMENTED_METHODS(name; overloaded=false) word = overloaded ? "overloaded" : "implemented" "If $word, you must include `$name` in the tuple returned by the "* "[`LearnAPI.functions`](@ref) trait. " @@ -105,6 +105,8 @@ $(DOC_DATA_INTERFACE(:predict)) """ predict(model, data) = predict(model, kinds_of_proxy(algorithm(model)) |> first, data) + +# automatic slurping of multiple data arguments: predict(model, k::KindOfProxy, data1, data2, datas...; kwargs...) = predict(model, k, (data1, data2, datas...); kwargs...) predict(model, data1, data2, datas...; kwargs...) = @@ -166,9 +168,7 @@ $(DOC_DATA_INTERFACE(:transform)) """ transform(model, data1, data2...; kwargs...) = - transform(model, (data1, datas...); kwargs...) - - + transform(model, (data1, datas...); kwargs...) # automatic slurping """ inverse_transform(model, data) diff --git a/src/target_weights_input.jl b/src/target_weights_input.jl index 35a3e3df..b5d486e6 100644 --- a/src/target_weights_input.jl +++ b/src/target_weights_input.jl @@ -1,9 +1,47 @@ +""" + LearnAPI.target(algorithm, data) -> target + +Return, for each form of `data` supported in a call of the form [`fit(algorithm, +data)`](@ref), the target variable part of `data`. If `nothing` is returned, the +`algorithm` does not see a target variable in training (is unsupervised). + +Refer to LearnAPI.jl documenation for the precise meaning of "target". + +# New implementations + +A fallback returns `nothing`. Must be implemented if `fit` consumes data including a +target variable. + +$(DOC_IMPLEMENTED_METHODS(":(LearnAPI.target)"; overloaded=true)) + +""" +target(::Any, data) = nothing + +""" + LearnAPI.weights(algorithm, data) -> weights + +Return, for each form of `data` supported in a call of the form `[`fit(algorithm, +data)`](@ref), the per-observation weights part of `data`. Where `nothing` is returned, no +weights are part of `data`, which is to be interpretted as uniform weighting. + +# New implementations + +Overloading is optional. A fallback returns `nothing`. + +$(DOC_IMPLEMENTED_METHODS(":(LearnAPI.weights)"; overloaded=true)) + +""" +weights(::Any, data) = nothing + """ LearnAPI.input(algorithm, data) -Where `data` is a supported data argument for `fit`, extract from `data` something -suitable for passing as the third argument to `predict`, as in the following sample -workflow: +Return, for each form of `data` supported in a call of the form `[`fit(algorithm, +data)`](@ref), the "input" or "features" part of `data` (as opposed to the target +variable, for example). + +The returned object `X` may always be passed to `predict` or `transform`, where +implemented, as in the following sample workflow: ```julia model = fit(algorithm, data) @@ -11,9 +49,10 @@ X = input(data) ŷ = predict(algorithm, kind_of_proxy, X) # eg, `kind_of_proxy = LiteralTarget()` ``` -The return value has the same number of observations as `data` does. Where -`LearnAPI.target(algorithm)` is `true` (supervised learning) one expects `ŷ` above to be -an approximate proxy for `target(algorithm, data)`, the training target. +The return value has the same number of observations as `data` does. For supervised models +(i.e., where `:(LearnAPI.target) in LearnAPI.functions(algorithm)`) `ŷ` above is generally +inteneded to be an approximate proxy for `LearnAPI.target(algorithm, data)`, the training +target. # New implementations @@ -25,6 +64,11 @@ LearnAPI.input(algorithm, data) = data LearnAPI.input(algorithm, data::Tuple) = first(data) ``` +Overloading may be necessary if [`obs(algorithm, data)`](@ref) is overloaded to return +some algorithm-specific representation of training `data`. For density estimators, whose +`fit` typically consumes *only* a target variable, you should overload this method to +return `nothing`. + """ input(algorithm, data) = data input(algorithm, data::Tuple) = first(data) diff --git a/src/traits.jl b/src/traits.jl index 090716b4..7fcf63d6 100644 --- a/src/traits.jl +++ b/src/traits.jl @@ -27,8 +27,6 @@ const TRAITS = [ :constructor, :functions, :kinds_of_proxy, - :target, - :weights, :descriptors, :is_pure_julia, :pkg_name, @@ -63,8 +61,7 @@ const TRAITS = [ """ Learn.API.constructor(algorithm) -Return a keyword constructor that can be used to clone `algorithm` or make copies with -selectively altered property values: +Return a keyword constructor that can be used to clone `algorithm`: ```julia-repl julia> algorithm.lambda @@ -179,42 +176,6 @@ For more on target variables and target proxies, refer to the LearnAPI documenta """ kinds_of_proxy(::Any) = () -""" - LearnAPI.target(algorithm)::Bool - LearnAPI.target(algorithm, data) -> target - -First method (an algorithm trait) returns `true` if the second method returns a target -variable for some value(s) of `data`, where `data` is a supported argument in -[`fit(algorithm, data)`](@ref). - -# New implementations - -The trait fallback returns `false`. A fallback for the second method returns `nothing`. - -""" -target(::Any) = false -target(::Any, data) = nothing - -""" - LearnAPI.weights(algorithm)::Bool - LearnAPI.weights(algorithm, data) -> weights - -First method (an algorithm trait) returns `true` if the second method returns -per-observation weights, for some value(s) of `data`, where `data` is a supported argument -in [`fit(algorithm, data)`](@ref). - -Otherwise, weights, if they apply, are assumed uniform. - -# New implementations - -The trait fallback returns `false`. A fallback for the second method returns `nothing`, -which is interpreted as uniform weights. - -""" -weights(::Any) = false -weights(::Any, data) = nothing - - descriptors() = [ :regression, :classification, From 729e0d790fefd221b0ebeabdb0074254ae99aa91 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 2 Oct 2024 10:07:12 +1300 Subject: [PATCH 19/27] complete addition of update methods + other tweaks --- docs/make.jl | 4 +- docs/src/accessor_functions.md | 6 +- docs/src/anatomy_of_an_implementation.md | 39 ++++--- docs/src/fit.md | 57 ++++++---- docs/src/index.md | 34 +++--- docs/src/kinds_of_target_proxy.md | 2 +- docs/src/obs.md | 4 +- docs/src/reference.md | 47 ++++---- ...ts_input.md => target_weights_features.md} | 18 +-- src/LearnAPI.jl | 5 +- src/fit.jl | 107 ++++++++++++++++-- src/obs.jl | 2 +- src/predict_transform.jl | 12 +- ...ts_input.jl => target_weights_features.jl} | 20 ++-- src/traits.jl | 84 +++++++------- test/integration/regression.jl | 14 ++- test/integration/static_algorithms.jl | 2 + 17 files changed, 301 insertions(+), 156 deletions(-) rename docs/src/{target_weights_input.md => target_weights_features.md} (59%) rename src/{target_weights_input.jl => target_weights_features.jl} (78%) diff --git a/docs/make.jl b/docs/make.jl index 2514acce..dafb1c97 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -15,11 +15,11 @@ makedocs( "Anatomy of an Implementation" => "anatomy_of_an_implementation.md", "Reference" => [ "Overview" => "reference.md", - "fit" => "fit.md", + "fit/update" => "fit.md", "predict/transform" => "predict_transform.md", "Kinds of Target Proxy" => "kinds_of_target_proxy.md", "minimize" => "minimize.md", - "target/weights/input" => "target_weights_input.md", + "target/weights/features" => "target_weights_features.md", "obs" => "obs.md", "Accessor Functions" => "accessor_functions.md", "Algorithm Traits" => "traits.md", diff --git a/docs/src/accessor_functions.md b/docs/src/accessor_functions.md index f35adc54..e6e50864 100644 --- a/docs/src/accessor_functions.md +++ b/docs/src/accessor_functions.md @@ -1,6 +1,7 @@ # [Accessor Functions](@id accessor_functions) -The sole argument of an accessor function is the output, `model`, of [`fit`](@ref). +The sole argument of an accessor function is the output, `model`, of +[`fit`](@ref). Algorithms are free to implement any number of these, or none of them. - [`LearnAPI.algorithm(model)`](@ref) - [`LearnAPI.extras(model)`](@ref) @@ -15,6 +16,9 @@ The sole argument of an accessor function is the output, `model`, of [`fit`](@re - [`LearnAPI.training_scores(model)`](@ref) - [`LearnAPI.components(model)`](@ref) +Algorithm-specific accessor functions may also be implemented. The names of all accessor +functions are included in the list returned by [`LearnAPI.functions(algorithm)`](@ref). + ## Implementation guide All new implementations must implement [`LearnAPI.algorithm`](@ref). While, all others are diff --git a/docs/src/anatomy_of_an_implementation.md b/docs/src/anatomy_of_an_implementation.md index 58b3ba7b..206e624d 100644 --- a/docs/src/anatomy_of_an_implementation.md +++ b/docs/src/anatomy_of_an_implementation.md @@ -5,7 +5,7 @@ regression](https://en.wikipedia.org/wiki/Ridge_regression) with no intercept. T workflow we want to enable has been previewed in [Sample workflow](@ref). Readers can also refer to the [demonstration](@ref workflow) of the implementation given later. -For a transformer, implementations ordinarily implement `transform` instead of +A transformer ordinarily implements `transform` instead of `predict`. For more on `predict` versus `transform`, see [Predict or transform?](@ref) !!! note @@ -13,18 +13,26 @@ For a transformer, implementations ordinarily implement `transform` instead of New implementations of `fit`, `predict`, etc, always have a *single* `data` argument, as in `LearnAPI.fit(algorithm, data; verbosity=1) = ...`. - For convenience, user calls like `fit(algorithm, X, y)` automatically fallback + For convenience, user-calls, such as `fit(algorithm, X, y)`, automatically fallback to `fit(algorithm, (X, y))`. !!! note + By default, it is assumed that `data` supports the [`LearnAPI.RandomAccess`](@ref) + interface; this includes all matrices, with observations-as-columns, most tables, and + tuples thereof). See [`LearnAPI.RandomAccess`](@ref) for details. If this is not the + case then an implementation must either: + If the `data` object consumed by `fit`, `predict`, or `transform` is not not a suitable table¹, array³, tuple of tables and arrays, or some other object implementing the MLUtils.jl `getobs`/`numobs` interface, - then an implementation must: (i) suitably overload the trait - [`LearnAPI.data_interface`](@ref); and/or (ii) overload [`obs`](@ref), as - illustrated below under [Providing an advanced data interface](@ref). + then an implementation must: (i) overload [`obs`](@ref) to articulate how + provided data can be transformed into a form that does support + it, as illustrated below under + [Providing an advanced data interface](@ref); or (ii) overload the trait + [`LearnAPI.data_interface`](@ref) to specify a more relaxed data + API. The first line below imports the lightweight package LearnAPI.jl whose methods we will be extending. The second imports libraries needed for the core algorithm. @@ -152,9 +160,9 @@ from training data, by implementing [`LearnAPI.target`](@ref): LearnAPI.target(algorithm, data) = last(data) ``` -There is a similar method, [`LearnAPI.input`](@ref) for declaring how input data can be -extracted (for passing to `predict`, for example) but this method has a fallback which -typically suffices: return `first(data)` if `data` is a tuple, and otherwise return +There is a similar method, [`LearnAPI.features`](@ref) for declaring how training features +can be extracted (for passing to `predict`, for example) but this method has a fallback +which typically suffices: return `first(data)` if `data` is a tuple, and otherwise return `data`. @@ -218,7 +226,7 @@ A macro provides a shortcut, convenient when multiple traits are to be defined: :(LearnAPI.algorithm), :(LearnAPI.minimize), :(LearnAPI.obs), - :(LearnAPI.input), + :(LearnAPI.features), :(LearnAPI.target), :(LearnAPI.predict), :(LearnAPI.coefficients), @@ -325,7 +333,7 @@ LearnAPI.minimize(model::RidgeFitted) = :(LearnAPI.algorithm), :(LearnAPI.minimize), :(LearnAPI.obs), - :(LearnAPI.input), + :(LearnAPI.features), :(LearnAPI.target), :(LearnAPI.predict), :(LearnAPI.coefficients), @@ -423,7 +431,7 @@ LearnAPI.predict(model::RidgeFitted, ::LiteralTarget, Xnew) = predict(model, LiteralTarget(), obs(model, Xnew)) ``` -### `target` and `input` methods +### `target` and `features` methods We provide an additional overloading of [`LearnAPI.target`](@ref) to handle the additional supported data argument of `fit`: @@ -432,11 +440,11 @@ supported data argument of `fit`: LearnAPI.target(::Ridge, observations::RidgeFitObs) = observations.y ``` -Similarly, we must overload [`LearnAPI.input`](@ref), which extracts inputs from training -data (objects that can be passed to `predict`) like this +Similarly, we must overload [`LearnAPI.features`](@ref), which extracts features from +training data (objects that can be passed to `predict`) like this ```@example anatomy2 -LearnAPI.input(::Ridge, observations::RidgeFitObs) = observations.A +LearnAPI.features(::Ridge, observations::RidgeFitObs) = observations.A ``` as the fallback mentioned above is no longer adequate. @@ -482,6 +490,9 @@ ẑ = predict(model, MLUtils.getobs(observations_for_predict, test)) @assert ẑ == ŷ ``` +For an application of [`obs`](@ref) to efficient cross-validation, see [here](@ref +obs_workflows). + --- ¹ In LearnAPI.jl a *table* is any object `X` implementing the diff --git a/docs/src/fit.md b/docs/src/fit.md index 1687c686..c512be9c 100644 --- a/docs/src/fit.md +++ b/docs/src/fit.md @@ -1,22 +1,28 @@ -# [`fit`](@ref fit) +# [`fit`, `update`, `update_observations`, and `update_features`](@id fit) -Training for the first time: +### Training ```julia fit(algorithm, data; verbosity=1) -> model fit(algorithm; verbosity=1) -> static_model ``` -Updating: +A "static" algorithm is one that does not generalize to new observations (e.g., some +clustering algorithms); there is no trainiing data and the algorithm is executed by +`predict` or `transform` which receive the data. See example below. + +When `fit` expects a tuple form of argument, `data = (X1, ..., Xn)`, then the signature +`fit(algorithm, X1, ..., Xn)` is also provided. + +### Updating ``` -fit(model, data; verbosity=1, param1=new_value1, param2=new_value2, ...) -> updated_model -fit(model, NewObservations(), new_data; verbosity=1, param1=new_value1, ...) -> updated_model -fit(model, NewFeatures(), new_data; verbosity=1, param1=new_value1, ...) -> updated_model +update(model, data; verbosity=1, param1=new_value1, param2=new_value2, ...) -> updated_model +update_observations(model, new_data; verbosity=1, param1=new_value1, ...) -> updated_model +update_features(model, new_data; verbosity=1, param1=new_value1, ...) -> updated_model ``` -When `fit` expects a tuple form of argument, `data = (X1, ..., Xn)`, then the signature -`fit(algorithm, X1, ..., Xn)` is also provided. +Data slurping forms are similarly provided for updating methods. ## Typical workflows @@ -27,13 +33,13 @@ algorithm = Algorithm(n=100) model = fit(algorithm, (X, y)) # or `fit(algorithm, X, y)` # Predict probability distributions: -ŷ = predict(model, Distribution(), Xnew) +ŷ = predict(model, Distribution(), Xnew) # Inspect some byproducts of training: LearnAPI.feature_importances(model) # Add 50 iterations and predict again: -model = fit(model; n=150) +model = update(model; n=150) predict(model, Distribution(), X) ``` @@ -41,32 +47,41 @@ predict(model, Distribution(), X) ```julia # Apply some clustering algorithm which cannot be generalized to new data: -model = fit(algorithm) -labels = predict(model, LabelAmbiguous(), X) # mutates `model` +model = fit(algorithm) # no training data +labels = predict(model, LabelAmbiguous(), X) # may mutate `model` + +# Or, in one line: +labels = predict(algorithm, LabelAmbiguous(), X) -# inspect byproducts of the clustering algorithm (e.g., outliers): +# But two-line version exposes byproducts of the clustering algorithm (e.g., outliers): LearnAPI.extras(model) ``` ## Implementation guide -Initial training: +### Training | method | fallback | compulsory? | |:-------------------------------------------------------------------------------|:-----------------------------------------------------------------|--------------------| | [`fit`](@ref)`(algorithm, data; verbosity=1)` | ignores `data` and applies signature below | yes, unless static | | [`fit`](@ref)`(algorithm; verbosity=1)` | none | no, unless static | -Updating: +### Updating + +| method | fallback | compulsory? | +|:-------------------------------------------------------------------------------------|:---------|-------------| +| [`update`](@ref)`(model, data; verbosity=1, hyperparameter_updates...)` | none | no | +| [`update_observations`](@ref)`(model, data; verbosity=1, hyperparameter_updates...)` | none | no | +| [`update_features`](@ref)`(model, data; verbosity=1, hyperparameter_updates...)` | none | no | -| method | fallback | compulsory? | -|:-------------------------------------------------------------------------------|:---------------------------------------------------------------------------|-------------| -| [`fit`](@ref)`(model, data; verbosity=1, param_updates...)` | retrains from scratch on `data` with specified hyperparameter replacements | no | -| [`fit`](@ref)`(model, ::NewObservations, data; verbosity=1, param_updates...)` | none | no | -| [`fit`](@ref)`(model, ::NewFeatures, data; verbosity=1, param_updates...)` | none | no | +There are some contracts regarding the behaviour of the update methods, as they relate to +a previous `fit` call. Consult the document strings for details. ## Reference ```@docs -LearnAPI.fit +fit +update +update_observations +update_features ``` diff --git a/docs/src/index.md b/docs/src/index.md index 3bb96562..cf11d259 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -9,12 +9,14 @@ A base Julia interface for machine learning and statistics
``` -LearnAPI.jl is a lightweight, functional-style interface, providing a collection of -[methods](@ref Methods), such as `fit` and `predict`, to be implemented by algorithms from -machine learning and statistics. Through such implementations, these algorithms buy into -functionality, such as hyperparameter optimization and model composition, as provided by -ML/statistics toolboxes and other packages. LearnAPI.jl also provides a number of Julia -[traits](@ref traits) for promising specific behavior. +LearnAPI.jl is a lightweight, functional-style interface, providing a +collection of [methods](@ref Methods), such as `fit` and `predict`, to be implemented by +algorithms from machine learning and statistics. Through such implementations, these +algorithms buy into functionality, such as hyperparameter optimization and model +composition, as provided by ML/statistics toolboxes and other packages. LearnAPI.jl also +provides a number of Julia [traits](@ref traits) for promising specific behavior. + +LearnAPI.jl has no package dependencies. ```@raw html 🚧 @@ -41,15 +43,18 @@ X = y = Xnew = +# List LearnaAPI functions implemented for `forest`: +LearnAPI.functions(forest) + # Train: model = fit(forest, X, y) +# Generate point predictions: +ŷ = predict(model, Xnew) # or `predict(model, LiteralTarget(), Xnew)` + # Predict probability distributions: predict(model, Distribution(), Xnew) -# Generate point predictions: -ŷ = predict(model, LiteralTarget(), Xnew) # or `predict(model, Xnew)` - # Apply an "accessor function" to inspect byproducts of training: LearnAPI.feature_importances(model) @@ -77,13 +82,14 @@ data_interface) (read as "observations") gives users and meta-algorithms access algorithm-specific representation of input data, which is also guaranteed to implement a standard interface for accessing individual observations, unless the algorithm explicitly opts out. Moreover, the `fit` and `predict` methods will also be able to consume these -alternative data representations. +alternative data representations, for performance benefits in some situations. The fallback data interface is the [MLUtils.jl](https://github.com/JuliaML/MLUtils.jl) -`getobs/numobs` interface, and if the input consumed by the algorithm already implements -that interface (tables, arrays, etc.) then overloading `obs` is completely optional. Plain -iteration interfaces, with or without knowledge of the number of observations, can also be -specified (to support, e.g., data loaders reading images from disk). +`getobs/numobs` interface (here tagged as [`LearnAPI.RandomAccess()`](@ref)) and if the +input consumed by the algorithm already implements that interface (tables, arrays, etc.) +then overloading `obs` is completely optional. Plain iteration interfaces, with or without +knowledge of the number of observations, can also be specified (to support, e.g., data +loaders reading images from disk). ## Learning more diff --git a/docs/src/kinds_of_target_proxy.md b/docs/src/kinds_of_target_proxy.md index 35d51e4c..218c378a 100644 --- a/docs/src/kinds_of_target_proxy.md +++ b/docs/src/kinds_of_target_proxy.md @@ -47,7 +47,7 @@ expectiles at 50% will provide `LiteralTarget` instead. > Table of concrete subtypes of `LearnAPI.IID <: LearnAPI.KindOfProxy`. -## Proxies for distribution-fitting algorithms +## Proxies for density estimation lgorithms ```@docs LearnAPI.Single diff --git a/docs/src/obs.md b/docs/src/obs.md index ed44668b..82be98b5 100644 --- a/docs/src/obs.md +++ b/docs/src/obs.md @@ -11,7 +11,7 @@ obs(algorithm, data) # can be passed to `fit` instead of `data` obs(model, data) # can be passed to `predict` or `transform` instead of `data` ``` -## Typical workflows +## [Typical workflows](@id obs_workflows) LearnAPI.jl makes no universal assumptions about the form of `data` in a call like `fit(algorithm, data)`. However, if we define @@ -46,7 +46,7 @@ import MLUtils algorithm = data = -X = LearnAPI.input(algorithm, data) +X = LearnAPI.features(algorithm, data) y = LearnAPI.target(algorithm, data) train_test_folds = map([1:10, 11:20, 21:30]) do test diff --git a/docs/src/reference.md b/docs/src/reference.md index 3b8d7397..698d0943 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -105,7 +105,7 @@ algorithm-valued. Any object `algorithm` for which [`LearnAPI.functions`](@ref)`(algorithm)` is non-empty is understood to have a valid implementation of the LearnAPI.jl interface. -### Example +#### Example Any instance of `GradientRidgeRegressor` defined below is a valid algorithm. @@ -120,33 +120,35 @@ GradientRidgeRegressor(; learning_rate=0.01, epochs=10, l2_regularization=0.01) LearnAPI.constructor(::GradientRidgeRegressor) = GradientRidgeRegressor ``` -### Documentation +## Documentation Attach public LearnAPI.jl-related documentation for an algorithm to it's *constructor*, rather than to the struct defining its type. In this way, an algorithm can implement multiple interfaces, in addition to the LearnAPI interface, with separate document strings for each. - ## Methods -Only these method names are exported by LearnAPI: `fit`, `transform`, `inverse_transform`, -`minimize`, and `obs`. - -!!! note +!!! note "Compulsory methods" - All new implementations must implement [`fit`](@ref), - [`LearnAPI.algorithm`](@ref algorithm_minimize), [`LearnAPI.constructor`](@ref) and - [`LearnAPI.functions`](@ref). The last two are algorithm traits, which can be set - with the [`@trait`](@ref) macro. + All new algorithm types must implement [`fit`](@ref), + [`LearnAPI.algorithm`](@ref algorithm_minimize), [`LearnAPI.constructor`](@ref) and + [`LearnAPI.functions`](@ref). +Most algorithms will also implement [`predict`](@ref) and/or [`transform`](@ref). ### List of methods - [`fit`](@ref fit): for training or updating algorithms that generalize to new data. Or, - for non-generalizing algorithms (see [Static Algorithms](@ref)), wrap `algorithm` in a - mutable struct that can be mutated by `predict`/`transform` to record byproducts of - those operations. + for non-generalizing algorithms (see [Static Algorithms](@ref)), for wrapping + `algorithm` in a mutable struct that can be mutated by `predict`/`transform` to record + byproducts of those operations. + +- [`update`](@ref fit): for updating learning outcomes after hyperparameter changes, such + as increasing an iteration parameter. + +- [`update_observations`](@ref fit), [`update_features`](@ref fit): update learning + outcomes by presenting additional training data. - [`predict`](@ref operations): for outputting [targets](@ref proxy) or [target proxies](@ref proxy) (such as probability density functions) @@ -161,20 +163,21 @@ Only these method names are exported by LearnAPI: `fit`, `transform`, `inverse_t inessential content, for purposes of serialization. - [`LearnAPI.target`](@ref input), [`LearnAPI.weights`](@ref input), - [`LearnAPI.input`](@ref): for extracting relevant parts of training data, where defined. + [`LearnAPI.features`](@ref): for extracting relevant parts of training data, where + defined. -- [`obs`](@ref data_interface): optional method for exposing to the user - algorithm-specific representations of data that are guaranteed to implement observation - access, as specified by [`LearnAPI.data_interface(algorithm)`](@ref). +- [`obs`](@ref data_interface): method for exposing to the user + algorithm-specific representations of data, which are additionally guaranteed to + implement the observation access API specified by + [`LearnAPI.data_interface(algorithm)`](@ref). - [Accessor functions](@ref accessor_functions): these include functions like `feature_importances` and `training_losses`, for extracting, from training outcomes, information common to many algorithms. -- [Algorithm traits](@ref traits): special methods, that promise specific algorithm - behavior or for recording general information about the algorithm. Only - [`LearnAPI.constructor`](@ref) and [`LearnAPI.functions`](@ref) are universally - compulsory. +- [Algorithm traits](@ref traits): methods that promise specific algorithm behavior or + record general information about the algorithm. Only [`LearnAPI.constructor`](@ref) and + [`LearnAPI.functions`](@ref) are universally compulsory. --- diff --git a/docs/src/target_weights_input.md b/docs/src/target_weights_features.md similarity index 59% rename from docs/src/target_weights_input.md rename to docs/src/target_weights_features.md index 847dbbec..78205a44 100644 --- a/docs/src/target_weights_input.md +++ b/docs/src/target_weights_features.md @@ -1,11 +1,11 @@ -# [`target`, `weights`, and `input`](@id input) +# [`target`, `weights`, and `features`](@id input) Methods for extracting parts of training data: ```julia LearnAPI.target(algorithm, data) -> LearnAPI.weights(algorithm, data) -> -LearnAPI.input(algorithm, data) -> +LearnAPI.features(algorithm, data) -> ``` Here `data` is something supported in a call of the form `fit(algorithm, data)`. @@ -20,7 +20,7 @@ target: ```julia model = fit(algorithm, data) -X = LearnAPI.input(algorithm, data) +X = LearnAPI.features(algorithm, data) y = LearnAPI.target(algorithm, data) ŷ = predict(model, LiteralTarget(), X) training_loss = sum(ŷ .!= y) @@ -30,11 +30,11 @@ training_loss = sum(ŷ .!= y) The fallback returns `first(data)`, assuming `data` is a tuple, and `data` otherwise. -| method | fallback | compulsory? | | -|:---------------------------|:-----------------:|------------------------|---| -| [`LearnAPI.target`](@ref) | returns `nothing` | no | | -| [`LearnAPI.weights`](@ref) | returns `nothing` | no | | -| [`LearnAPI.input`](@ref) | see docstring | only if fallback fails | | +| method | fallback | compulsory? | +|:----------------------------|:-----------------:|------------------------| +| [`LearnAPI.target`](@ref) | returns `nothing` | no | +| [`LearnAPI.weights`](@ref) | returns `nothing` | no | +| [`LearnAPI.features`](@ref) | see docstring | only if fallback fails | # Reference @@ -42,5 +42,5 @@ The fallback returns `first(data)`, assuming `data` is a tuple, and `data` other ```@docs LearnAPI.target LearnAPI.weights -LearnAPI.input +LearnAPI.features ``` diff --git a/src/LearnAPI.jl b/src/LearnAPI.jl index 0de8c026..e98d6dbc 100644 --- a/src/LearnAPI.jl +++ b/src/LearnAPI.jl @@ -7,13 +7,14 @@ include("types.jl") include("predict_transform.jl") include("fit.jl") include("minimize.jl") -include("target_weights_input.jl") +include("target_weights_features.jl") include("obs.jl") include("accessor_functions.jl") include("traits.jl") export @trait -export fit, predict, transform, inverse_transform, minimize, obs +export fit, update, update_observations, update_features +export predict, transform, inverse_transform, minimize, obs for name in Symbol.(CONCRETE_TARGET_PROXY_TYPES_SYMBOLS) @eval export $name diff --git a/src/fit.jl b/src/fit.jl index 2a5e0cbf..faefd610 100644 --- a/src/fit.jl +++ b/src/fit.jl @@ -1,13 +1,8 @@ -# # DOC STRING HELPERS - -const TRAINING_FUNCTIONS = (:fit,) - - # # FIT """ - LearnAPI.fit(algorithm, data; verbosity=1) - LearnAPI.fit(algorithm; verbosity=1) + fit(algorithm, data; verbosity=1) + fit(algorithm; verbosity=1) Execute the algorithm with configuration `algorithm` using the provided training `data`, returning an object, `model`, on which other methods, such as [`predict`](@ref) or @@ -54,3 +49,101 @@ fit(algorithm, data; kwargs...) = fit(algorithm; kwargs...) fit(algorithm, data1, datas...; kwargs...) = fit(algorithm, (data1, datas...); kwargs...) + +# # UPDATE AND COUSINS + +""" + update(model, data; verbosity=1, hyperparam_replacements...) + +Return an updated version of the `model` object returned by a previous [`fit`](@ref) or +`update` call, but with the specified hyperparameter replacements, in the form `p1=value1, +p2=value2, ...`. + +Provided that `data` is identical with the data presented in a preceding `fit` call, as in +the example below, execution is semantically equivalent to the call `fit(algorithm, +data)`, where `algorithm` is `LearnAPI.algorithm(model)` with the specified +replacements. In some cases (typically, when changing an iteration parameter) there may be +a performance benefit to using `update` instead of retraining ab initio. + +If `data` differs from that in the preceding `fit` or `update` call, then behaviour is +algorithm-specific. + +```julia +algorithm = MyForest(ntrees=100) + +# train with 100 trees: +model = fit(algorithm, data) + +# add 50 more trees: +model = update(model, data; ntrees=150) +``` + +See also [`fit`](@ref), [`update_observations`](@ref), [`update_features`](@ref). + +# New implementations + +Implementation is optional. The signature must include +`verbosity`. $(DOC_IMPLEMENTED_METHODS(":(LearnAPI.update)")) + +""" +update(model, data1, datas...; kwargs...) = update(model, (data1, datas...); kwargs...) + +""" + update_observations(model, new_data; verbosity=1, parameter_replacements...) + +Return an updated version of the `model` object returned by a previous [`fit`](@ref) or +`update` call given the new observations present in `new_data`. One may additionally +specify hyperparameter replacements in the form `p1=value1, p2=value2, ...`. + +When following the call `fit(algorithm, data)`, the `update` call is semantically +equivalent to retraining ab initio using a concatentation of `data` and `new_data`, +*provided there are no hyperparameter replacements.* Behaviour is otherwise +algorithm-specific. + +```julia-repl +algorithm = MyNeuralNetwork(epochs=10, learning_rate=0.01) + +# train for ten epochs: +model = fit(algorithm, data) + +# train for two more epochs using new data and new learning rate: +model = update_observations(model, new_data; epochs=2, learning_rate=0.1) +``` + +See also [`fit`](@ref), [`update`](@ref), [`update_features`](@ref). + +# Extended help + +# New implementations + +Implementation is optional. The signature must include +`verbosity`. $(DOC_IMPLEMENTED_METHODS(":(LearnAPI.update_observations)")) + +""" +update_observations(algorithm, data1, datas...; kwargs...) = + update_observations(algorithm, (data1, datas...); kwargs...) + +""" + update_features(model, new_data; verbosity=1, parameter_replacements...) + +Return an updated version of the `model` object returned by a previous [`fit`](@ref) or +`update` call given the new features encapsulated in `new_data`. One may additionally +specify hyperparameter replacements in the form `p1=value1, p2=value2, ...`. + +When following the call `fit(algorithm, data)`, the `update` call is semantically +equivalent to retraining ab initio using a concatentation of `data` and `new_data`, +*provided there are no hyperparameter replacements.* Behaviour is otherwise +algorithm-specific. + +See also [`fit`](@ref), [`update`](@ref), [`update_features`](@ref). + +# Extended help + +# New implementations + +Implementation is optional. The signature must include +`verbosity`. $(DOC_IMPLEMENTED_METHODS(":(LearnAPI.update_features)")) + +""" +update_features(algorithm, data1, datas...; kwargs...) = + update_features(algorithm, (data1, datas...); kwargs...) diff --git a/src/obs.jl b/src/obs.jl index 2d784a89..e781351e 100644 --- a/src/obs.jl +++ b/src/obs.jl @@ -67,7 +67,7 @@ to be overloaded. However, the user will get no performance benefits by using `o that case. When overloading `obs(algorithm, data)` to output new model-specific representations of -data, it may be necessary to also overload [`LearnAPI.input`](@ref), +data, it may be necessary to also overload [`LearnAPI.features`](@ref), [`LearnAPI.target`](@ref) (supervised algorithms), and/or [`LearnAPI.weights`](@ref) (if weights are supported), for extracting relevant parts of the representation. diff --git a/src/predict_transform.jl b/src/predict_transform.jl index c1c9d9d2..6b62dfd5 100644 --- a/src/predict_transform.jl +++ b/src/predict_transform.jl @@ -38,11 +38,13 @@ DOC_DATA_INTERFACE(method) = ## Assumptions about data By default, it is assumed that `data` supports the [`LearnAPI.RandomAccess`](@ref) - interface (all matrices, with observations-as-columns, most tables, and tuples - thereof). See [`LearnAPI.RandomAccess`](@ref) for details. If this is not the case - then an implementation must suitably: (i) overload the trait - [`LearnAPI.data_interface`](@ref); and/or (ii) overload [`obs`](@ref). Refer to these - methods' document strings for details. + interface; this includes all matrices, with observations-as-columns, most tables, and + tuples thereof). See [`LearnAPI.RandomAccess`](@ref) for details. If this is not the + case then an implementation must either: (i) overload [`obs`](@ref) to articulate how + provided data can be transformed into a form that does support + [`LearnAPI.RandomAccess`](@ref); or (ii) overload the trait + [`LearnAPI.data_interface`](@ref) to specify a more relaxed data API. Refer to + document strings for details. """ diff --git a/src/target_weights_input.jl b/src/target_weights_features.jl similarity index 78% rename from src/target_weights_input.jl rename to src/target_weights_features.jl index b5d486e6..e7fd0b63 100644 --- a/src/target_weights_input.jl +++ b/src/target_weights_features.jl @@ -34,18 +34,18 @@ $(DOC_IMPLEMENTED_METHODS(":(LearnAPI.weights)"; overloaded=true)) weights(::Any, data) = nothing """ - LearnAPI.input(algorithm, data) + LearnAPI.features(algorithm, data) Return, for each form of `data` supported in a call of the form `[`fit(algorithm, -data)`](@ref), the "input" or "features" part of `data` (as opposed to the target -variable, for example). +data)`](@ref), the "features" part of `data` (as opposed to the target +variable, for example). The returned object `X` may always be passed to `predict` or `transform`, where implemented, as in the following sample workflow: ```julia model = fit(algorithm, data) -X = input(data) +X = features(data) ŷ = predict(algorithm, kind_of_proxy, X) # eg, `kind_of_proxy = LiteralTarget()` ``` @@ -57,11 +57,13 @@ target. # New implementations -The following fallbacks typically make overloading `LearnAPI.input` unnecessary: +The only contract `features` must satisfy is the one about passability of the output to +`predict` or `transform`, for each supported input `data`. The following fallbacks +typically make overloading `LearnAPI.features` unnecessary: ```julia -LearnAPI.input(algorithm, data) = data -LearnAPI.input(algorithm, data::Tuple) = first(data) +LearnAPI.features(algorithm, data) = data +LearnAPI.features(algorithm, data::Tuple) = first(data) ``` Overloading may be necessary if [`obs(algorithm, data)`](@ref) is overloaded to return @@ -70,5 +72,5 @@ some algorithm-specific representation of training `data`. For density estimator return `nothing`. """ -input(algorithm, data) = data -input(algorithm, data::Tuple) = first(data) +features(algorithm, data) = data +features(algorithm, data::Tuple) = first(data) diff --git a/src/traits.jl b/src/traits.jl index 7fcf63d6..50ddda1d 100644 --- a/src/traits.jl +++ b/src/traits.jl @@ -115,19 +115,22 @@ value is non-empty. All new implementations must overload this trait. Here's a checklist for elements in the return value: -| symbol | implementation/overloading compulsory? | include in returned tuple? | -|---------------------------------|----------------------------------------|------------------------------------| -| `:(LearnAPI.fit)` | yes | yes | -| `:(LearnAPI.algorithm)` | yes | yes | -| `:(LearnAPI.minimize)` | no | yes | -| `:(LearnAPI.obs)` | no | yes | -| `:(LearnAPI.input)` | no | yes, unless `fit` consumes no data | -| `:(LearnAPI.target)` | no | only if implemented | -| `:(LearnAPI.weights)` | no | only if implemented | -| `:(LearnAPI.predict)` | no | only if implemented | -| `:(LearnAPI.transform)` | no | only if implemented | -| `:(LearnAPI.inverse_transform)` | no | only if implemented | -| | no | only if implemented | +| symbol | implementation/overloading compulsory? | include in returned tuple? | +|-----------------------------------|----------------------------------------|------------------------------------| +| `:(LearnAPI.fit)` | yes | yes | +| `:(LearnAPI.algorithm)` | yes | yes | +| `:(LearnAPI.minimize)` | no | yes | +| `:(LearnAPI.obs)` | no | yes | +| `:(LearnAPI.features)` | no | yes, unless `fit` consumes no data | +| `:(LearnAPI.update)` | no | only if implemented | +| `:(LearnAPI.update_observations)` | no | only if implemented | +| `:(LearnAPI.update_features)` | no | only if implemented | +| `:(LearnAPI.target)` | no | only if implemented | +| `:(LearnAPI.weights)` | no | only if implemented | +| `:(LearnAPI.predict)` | no | only if implemented | +| `:(LearnAPI.transform)` | no | only if implemented | +| `:(LearnAPI.inverse_transform)` | no | only if implemented | +| | no | only if implemented | Also include any implemented accessor functions, both those owned by LearnaAPI.jl, and any algorithm-specific ones. The LearnAPI.jl accessor functions are: $ACCESSOR_FUNCTIONS_LIST. @@ -177,38 +180,39 @@ For more on target variables and target proxies, refer to the LearnAPI documenta kinds_of_proxy(::Any) = () descriptors() = [ - :regression, - :classification, - :clustering, - :gradient_descent, - :iterative_algorithms, - :incremental_algorithms, - :dimension_reduction, - :encoders, - :static_algorithms, - :missing_value_imputation, - :ensemble_algorithms, - :wrappers, - :time_series_forecasting, - :time_series_classification, - :survival_analysis, - :distribution_fitters, - :Bayesian_algorithms, - :outlier_detection, - :collaborative_filtering, - :text_analysis, - :audio_analysis, - :natural_language_processing, - :image_processing, + "regression", + "classification", + "clustering", + "gradient descent", + "iterative algorithms", + "incremental algorithms", + "dimension reduction", + "encoders", + "feature engineering", + "static algorithms", + "missing value imputation", + "ensemble algorithms", + "wrappers", + "time series forecasting", + "time series classification", + "survival analysis", + "density estimation", + "Bayesian algorithms", + "outlier detection", + "collaborative filtering", + "text analysis", + "audio analysis", + "natural language processing", + "image processing", ] -const DOC_DESCRIPTORS_LIST = join(map(d -> "`:$d`", descriptors()), ", ") +const DOC_DESCRIPTORS_LIST = join(map(d -> "`\"$d\"`", descriptors()), ", ") """ LearnAPI.descriptors(algorithm) -Lists one or more suggestive algorithm descriptors from this list: $DOC_DESCRIPTORS_LIST (do -`LearnAPI.descriptors()` to reproduce). +Lists one or more suggestive algorithm descriptors. Do `LearnAPI.descriptors()` to list +all possible. !!! warning The value of this trait guarantees no particular behavior. The trait is @@ -216,7 +220,7 @@ Lists one or more suggestive algorithm descriptors from this list: $DOC_DESCRIPT # New implementations -This trait should return a tuple of symbols, as in `(:classifier, :text_analysis)`. +This trait should return a tuple of strings, as in `("classifier", "text analysis")`. """ descriptors(::Any) = () diff --git a/test/integration/regression.jl b/test/integration/regression.jl index d8118a72..e34a2993 100644 --- a/test/integration/regression.jl +++ b/test/integration/regression.jl @@ -81,7 +81,7 @@ LearnAPI.fit(algorithm::Ridge, data; kwargs...) = # extracting stuff from training data: LearnAPI.target(::Ridge, data) = last(data) LearnAPI.target(::Ridge, observations::RidgeFitObs) = observations.y -LearnAPI.input(::Ridge, observations::RidgeFitObs) = observations.A +LearnAPI.features(::Ridge, observations::RidgeFitObs) = observations.A # observations for consumption by `predict`: LearnAPI.obs(::RidgeFitted, X) = Tables.matrix(X)' @@ -104,12 +104,13 @@ LearnAPI.minimize(model::RidgeFitted) = Ridge, constructor = Ridge, kinds_of_proxy = (LiteralTarget(),), + descriptors = ("regression",) functions = ( :(LearnAPI.fit), :(LearnAPI.algorithm), :(LearnAPI.minimize), :(LearnAPI.obs), - :(LearnAPI.input), + :(LearnAPI.features), :(LearnAPI.target), :(LearnAPI.predict), :(LearnAPI.feature_importances), @@ -131,7 +132,7 @@ data = (X, y) @test :(LearnAPI.obs) in LearnAPI.functions(algorithm) @test LearnAPI.target(algorithm, data) == y - @test LearnAPI.input(algorithm, data) == X + @test LearnAPI.features(algorithm, data) == X # verbose fitting: @test_logs( @@ -163,7 +164,7 @@ data = (X, y) model = fit(algorithm, MLUtils.getobs(fitobs, train); verbosity=0) @test LearnAPI.target(algorithm, fitobs) == y @test predict(model, LiteralTarget(), MLUtils.getobs(predictobs, test)) ≈ ŷ - @test predict(model, LearnAPI.input(algorithm, fitobs)) ≈ predict(model, X) + @test predict(model, LearnAPI.features(algorithm, fitobs)) ≈ predict(model, X) @test LearnAPI.feature_importances(model) isa Vector{<:Pair{Symbol}} @@ -236,12 +237,13 @@ LearnAPI.minimize(model::BabyRidgeFitted) = BabyRidge, constructor = BabyRidge, kinds_of_proxy = (LiteralTarget(),), + descriptors = ("regression",) functions = ( :(LearnAPI.fit), :(LearnAPI.algorithm), :(LearnAPI.minimize), :(LearnAPI.obs), - :(LearnAPI.input), + :(LearnAPI.features), :(LearnAPI.target), :(LearnAPI.predict), :(LearnAPI.feature_importances), @@ -262,7 +264,7 @@ LearnAPI.minimize(model::BabyRidgeFitted) = predict(model, MLUtils.getobs(predictobs, test)) @test LearnAPI.target(algorithm, data) == y @test LearnAPI.predict(model, X) ≈ - LearnAPI.predict(model, LearnAPI.input(algorithm, data)) + LearnAPI.predict(model, LearnAPI.features(algorithm, data)) end true diff --git a/test/integration/static_algorithms.jl b/test/integration/static_algorithms.jl index 1d6a2ad6..6a7a72af 100644 --- a/test/integration/static_algorithms.jl +++ b/test/integration/static_algorithms.jl @@ -39,6 +39,7 @@ end @trait( Selector, constructor = Selector, + descriptors = ("feature engineering",) functions = ( :(LearnAPI.fit), :(LearnAPI.algorithm), @@ -104,6 +105,7 @@ end Selector2, constructor = Selector2, predict_or_transform_mutates = true, + descriptors = ("feature engineering",) functions = ( :(LearnAPI.fit), :(LearnAPI.algorithm), From 20b4bfff434f8424ea641f8af092873fc03273c4 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 2 Oct 2024 10:19:42 +1300 Subject: [PATCH 20/27] rename fit.* -> fit_update.* and descriptors -> tags --- docs/make.jl | 2 +- docs/src/anatomy_of_an_implementation.md | 4 ++-- docs/src/{fit.md => fit_update.md} | 0 docs/src/traits.md | 4 ++-- src/LearnAPI.jl | 2 +- src/{fit.jl => fit_update.jl} | 0 src/tools.jl | 4 ++-- src/traits.jl | 12 ++++++------ test/integration/regression.jl | 4 ++-- test/integration/static_algorithms.jl | 4 ++-- 10 files changed, 18 insertions(+), 18 deletions(-) rename docs/src/{fit.md => fit_update.md} (100%) rename src/{fit.jl => fit_update.jl} (100%) diff --git a/docs/make.jl b/docs/make.jl index dafb1c97..a0b0bb37 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -15,7 +15,7 @@ makedocs( "Anatomy of an Implementation" => "anatomy_of_an_implementation.md", "Reference" => [ "Overview" => "reference.md", - "fit/update" => "fit.md", + "fit/update" => "fit_update.md", "predict/transform" => "predict_transform.md", "Kinds of Target Proxy" => "kinds_of_target_proxy.md", "minimize" => "minimize.md", diff --git a/docs/src/anatomy_of_an_implementation.md b/docs/src/anatomy_of_an_implementation.md index 206e624d..13f17da1 100644 --- a/docs/src/anatomy_of_an_implementation.md +++ b/docs/src/anatomy_of_an_implementation.md @@ -220,7 +220,7 @@ A macro provides a shortcut, convenient when multiple traits are to be defined: Ridge, constructor = Ridge, kinds_of_proxy=(LiteralTarget(),), - descriptors = (:regression,), + tags = (:regression,), functions = ( :(LearnAPI.fit), :(LearnAPI.algorithm), @@ -327,7 +327,7 @@ LearnAPI.minimize(model::RidgeFitted) = Ridge, constructor = Ridge, kinds_of_proxy=(LiteralTarget(),), - descriptors = (:regression,), + tags = (:regression,), functions = ( :(LearnAPI.fit), :(LearnAPI.algorithm), diff --git a/docs/src/fit.md b/docs/src/fit_update.md similarity index 100% rename from docs/src/fit.md rename to docs/src/fit_update.md diff --git a/docs/src/traits.md b/docs/src/traits.md index c75145b1..7699bbce 100644 --- a/docs/src/traits.md +++ b/docs/src/traits.md @@ -25,7 +25,7 @@ package [ScientificTypesBase.jl](https://github.com/JuliaAI/ScientificTypesBase. | [`LearnAPI.constructor`](@ref)`(algorithm)` | constructor for generating new or modified versions of `algorithm` | (no fallback) | `RidgeRegressor` | | [`LearnAPI.functions`](@ref)`(algorithm)` | functions you can apply to `algorithm` or associated model (traits excluded) | `()` | `(:fit, :predict, :minimize, :(LearnAPI.algorithm), :obs)` | | [`LearnAPI.kinds_of_proxy`](@ref)`(algorithm)` | instances `kind` of `KindOfProxy` for which an implementation of `LearnAPI.predict(algorithm, kind, ...)` is guaranteed. | `()` | `(Distribution(), Interval())` | -| [`LearnAPI.descriptors`](@ref)`(algorithm)` | lists one or more suggestive algorithm descriptors from `LearnAPI.descriptors()` | `()` | (:regression, :probabilistic) | +| [`LearnAPI.tags`](@ref)`(algorithm)` | lists one or more suggestive algorithm tags from `LearnAPI.tags()` | `()` | (:regression, :probabilistic) | | [`LearnAPI.is_pure_julia`](@ref)`(algorithm)` | `true` if implementation is 100% Julia code | `false` | `true` | | [`LearnAPI.pkg_name`](@ref)`(algorithm)` | name of package providing core code (may be different from package providing LearnAPI.jl implementation) | `"unknown"` | `"DecisionTree"` | | [`LearnAPI.pkg_license`](@ref)`(algorithm)` | name of license of package providing core code | `"unknown"` | `"MIT"` | @@ -114,7 +114,7 @@ informative (as in `LearnAPI.predict_type(algorithm) = Any`). LearnAPI.constructor LearnAPI.functions LearnAPI.kinds_of_proxy -LearnAPI.descriptors +LearnAPI.tags LearnAPI.is_pure_julia LearnAPI.pkg_name LearnAPI.pkg_license diff --git a/src/LearnAPI.jl b/src/LearnAPI.jl index e98d6dbc..ffab0130 100644 --- a/src/LearnAPI.jl +++ b/src/LearnAPI.jl @@ -5,7 +5,7 @@ import InteractiveUtils.subtypes include("tools.jl") include("types.jl") include("predict_transform.jl") -include("fit.jl") +include("fit_update.jl") include("minimize.jl") include("target_weights_features.jl") include("obs.jl") diff --git a/src/fit.jl b/src/fit_update.jl similarity index 100% rename from src/fit.jl rename to src/fit_update.jl diff --git a/src/tools.jl b/src/tools.jl index d86e3d8d..1b033f05 100644 --- a/src/tools.jl +++ b/src/tools.jl @@ -16,7 +16,7 @@ Overload a number of traits for algorithms of type `TypeEx`. For example, the co ```julia @trait( RidgeRegressor, - descriptors = ("regression", ), + tags = ("regression", ), doc_url = "https://some.cool.documentation", ) ``` @@ -24,7 +24,7 @@ Overload a number of traits for algorithms of type `TypeEx`. For example, the co is equivalent to ```julia -LearnAPI.descriptors(::RidgeRegressor) = ("regression", ), +LearnAPI.tags(::RidgeRegressor) = ("regression", ), LearnAPI.doc_url(::RidgeRegressor) = "https://some.cool.documentation", ``` diff --git a/src/traits.jl b/src/traits.jl index 50ddda1d..30ad504b 100644 --- a/src/traits.jl +++ b/src/traits.jl @@ -27,7 +27,7 @@ const TRAITS = [ :constructor, :functions, :kinds_of_proxy, - :descriptors, + :tags, :is_pure_julia, :pkg_name, :pkg_license, @@ -179,7 +179,7 @@ For more on target variables and target proxies, refer to the LearnAPI documenta """ kinds_of_proxy(::Any) = () -descriptors() = [ +tags() = [ "regression", "classification", "clustering", @@ -206,12 +206,12 @@ descriptors() = [ "image processing", ] -const DOC_DESCRIPTORS_LIST = join(map(d -> "`\"$d\"`", descriptors()), ", ") +const DOC_TAGS_LIST = join(map(d -> "`\"$d\"`", tags()), ", ") """ - LearnAPI.descriptors(algorithm) + LearnAPI.tags(algorithm) -Lists one or more suggestive algorithm descriptors. Do `LearnAPI.descriptors()` to list +Lists one or more suggestive algorithm tags. Do `LearnAPI.tags()` to list all possible. !!! warning @@ -223,7 +223,7 @@ all possible. This trait should return a tuple of strings, as in `("classifier", "text analysis")`. """ -descriptors(::Any) = () +tags(::Any) = () """ LearnAPI.is_pure_julia(algorithm) diff --git a/test/integration/regression.jl b/test/integration/regression.jl index e34a2993..5b91561e 100644 --- a/test/integration/regression.jl +++ b/test/integration/regression.jl @@ -104,7 +104,7 @@ LearnAPI.minimize(model::RidgeFitted) = Ridge, constructor = Ridge, kinds_of_proxy = (LiteralTarget(),), - descriptors = ("regression",) + tags = ("regression",) functions = ( :(LearnAPI.fit), :(LearnAPI.algorithm), @@ -237,7 +237,7 @@ LearnAPI.minimize(model::BabyRidgeFitted) = BabyRidge, constructor = BabyRidge, kinds_of_proxy = (LiteralTarget(),), - descriptors = ("regression",) + tags = ("regression",) functions = ( :(LearnAPI.fit), :(LearnAPI.algorithm), diff --git a/test/integration/static_algorithms.jl b/test/integration/static_algorithms.jl index 6a7a72af..a143416b 100644 --- a/test/integration/static_algorithms.jl +++ b/test/integration/static_algorithms.jl @@ -39,7 +39,7 @@ end @trait( Selector, constructor = Selector, - descriptors = ("feature engineering",) + tags = ("feature engineering",) functions = ( :(LearnAPI.fit), :(LearnAPI.algorithm), @@ -105,7 +105,7 @@ end Selector2, constructor = Selector2, predict_or_transform_mutates = true, - descriptors = ("feature engineering",) + tags = ("feature engineering",) functions = ( :(LearnAPI.fit), :(LearnAPI.algorithm), From 1a92f479e796ff708bbe4d9f31a1b9973d889ace Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 2 Oct 2024 10:32:01 +1300 Subject: [PATCH 21/27] tweak --- docs/src/traits.md | 66 +++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/docs/src/traits.md b/docs/src/traits.md index 7699bbce..c20171d7 100644 --- a/docs/src/traits.md +++ b/docs/src/traits.md @@ -20,39 +20,39 @@ one argument. In the examples column of the table below, `Table`, `Continuous`, `Sampleable` are names owned by the package [ScientificTypesBase.jl](https://github.com/JuliaAI/ScientificTypesBase.jl/). -| trait | return value | fallback value | example | -|:----------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------|:---------------------------------------------------------| -| [`LearnAPI.constructor`](@ref)`(algorithm)` | constructor for generating new or modified versions of `algorithm` | (no fallback) | `RidgeRegressor` | -| [`LearnAPI.functions`](@ref)`(algorithm)` | functions you can apply to `algorithm` or associated model (traits excluded) | `()` | `(:fit, :predict, :minimize, :(LearnAPI.algorithm), :obs)` | -| [`LearnAPI.kinds_of_proxy`](@ref)`(algorithm)` | instances `kind` of `KindOfProxy` for which an implementation of `LearnAPI.predict(algorithm, kind, ...)` is guaranteed. | `()` | `(Distribution(), Interval())` | -| [`LearnAPI.tags`](@ref)`(algorithm)` | lists one or more suggestive algorithm tags from `LearnAPI.tags()` | `()` | (:regression, :probabilistic) | -| [`LearnAPI.is_pure_julia`](@ref)`(algorithm)` | `true` if implementation is 100% Julia code | `false` | `true` | -| [`LearnAPI.pkg_name`](@ref)`(algorithm)` | name of package providing core code (may be different from package providing LearnAPI.jl implementation) | `"unknown"` | `"DecisionTree"` | -| [`LearnAPI.pkg_license`](@ref)`(algorithm)` | name of license of package providing core code | `"unknown"` | `"MIT"` | -| [`LearnAPI.doc_url`](@ref)`(algorithm)` | url providing documentation of the core code | `"unknown"` | `"https://en.wikipedia.org/wiki/Decision_tree_learning"` | -| [`LearnAPI.load_path`](@ref)`(algorithm)` | a string indicating where the struct for `typeof(algorithm)` is defined, beginning with name of package providing implementation | `"unknown"` | `FastTrees.LearnAPI.DecisionTreeClassifier` | -| [`LearnAPI.is_composite`](@ref)`(algorithm)` | `true` if one or more properties (fields) of `algorithm` may be an algorithm | `false` | `true` | -| [`LearnAPI.human_name`](@ref)`(algorithm)` | human name for the algorithm; should be a noun | type name with spaces | "elastic net regressor" | -| [`LearnAPI.data_interface`](@ref)`(algorithm)` | Interface implemented by objects returned by [`obs`](@ref) | `Base.HasLength()` (supports `MLUtils.getobs/numobs`) | `Base.SizeUnknown()` (supports `iterate`) | -| [`LearnAPI.iteration_parameter`](@ref)`(algorithm)` | symbolic name of an iteration parameter | `nothing` | :epochs | -| [`LearnAPI.fit_scitype`](@ref)`(algorithm)` | upper bound on `scitype(data)` ensuring `fit(algorithm, data)` works | `Union{}` | `Tuple{Table(Continuous), AbstractVector{Continuous}}` | -| [`LearnAPI.fit_observation_scitype`](@ref)`(algorithm)` | upper bound on `scitype(observation)` for `observation` in `data` ensuring `fit(algorithm, data)` works | `Union{}` | `Tuple{AbstractVector{Continuous}, Continuous}` | -| [`LearnAPI.fit_type`](@ref)`(algorithm)` | upper bound on `typeof(data)` ensuring `fit(algorithm, data)` works | `Union{}` | `Tuple{AbstractMatrix{<:Real}, AbstractVector{<:Real}}` | -| [`LearnAPI.fit_observation_type`](@ref)`(algorithm)` | upper bound on `typeof(observation)` for `observation` in `data` ensuring `fit(algorithm, data)` works | `Union{}` | `Tuple{AbstractVector{<:Real}, Real}` | -| [`LearnAPI.target_observation_scitype`](@ref)`(algorithm)` | upper bound on the scitype of each observation of the targget | `Any` | `Continuous` | -| [`LearnAPI.predict_input_scitype`](@ref)`(algorithm)` | upper bound on `scitype(data)` ensuring `predict(model, kind, data)` works | `Union{}` | `Table(Continuous)` | -| [`LearnAPI.predict_input_observation_scitype`](@ref)`(algorithm)` | upper bound on `scitype(observation)` for `observation` in `data` ensuring `predict(model, kind, data)` works | `Union{}` | `Vector{Continuous}` | -| [`LearnAPI.predict_input_type`](@ref)`(algorithm)` | upper bound on `typeof(data)` ensuring `predict(model, kind, data)` works | `Union{}` | `AbstractMatrix{<:Real}` | -| [`LearnAPI.predict_input_observation_type`](@ref)`(algorithm)` | upper bound on `typeof(observation)` for `observation` in `data` ensuring `predict(model, kind, data)` works | `Union{}` | `Vector{<:Real}` | -| [`LearnAPI.predict_output_scitype`](@ref)`(algorithm, kind_of_proxy)` | upper bound on `scitype(predict(model, ...))` | `Any` | `AbstractVector{Continuous}` | -| [`LearnAPI.predict_output_type`](@ref)`(algorithm, kind_of_proxy)` | upper bound on `typeof(predict(model, ...))` | `Any` | `AbstractVector{<:Real}` | -| [`LearnAPI.transform_input_scitype`](@ref)`(algorithm)` | upper bound on `scitype(data)` ensuring `transform(model, data)` works | `Union{}` | `Table(Continuous)` | -| [`LearnAPI.transform_input_observation_scitype`](@ref)`(algorithm)` | upper bound on `scitype(observation)` for `observation` in `data` ensuring `transform(model, data)` works | `Union{}` | `Vector{Continuous}` | -| [`LearnAPI.transform_input_type`](@ref)`(algorithm)` | upper bound on `typeof(data)`ensuring `transform(model, data)` works | `Union{}` | `AbstractMatrix{<:Real}}` | -| [`LearnAPI.transform_input_observation_type`](@ref)`(algorithm)` | upper bound on `typeof(observation)` for `observation` in `data` ensuring `transform(model, data)` works | `Union{}` | `Vector{Continuous}` | -| [`LearnAPI.transform_output_scitype`](@ref)`(algorithm)` | upper bound on `scitype(transform(model, ...))` | `Any` | `Table(Continuous)` | -| [`LearnAPI.transform_output_type`](@ref)`(algorithm)` | upper bound on `typeof(transform(model, ...))` | `Any` | `AbstractMatrix{<:Real}` | -| [`LearnAPI.predict_or_transform_mutates`](@ref)`(algorithm)` | `true` if `predict` or `transform` mutates first argument | `false` | `true` | +| trait | return value | fallback value | example | +|:----------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------|:-----------------------------------------------------------| +| [`LearnAPI.constructor`](@ref)`(algorithm)` | constructor for generating new or modified versions of `algorithm` | (no fallback) | `RidgeRegressor` | +| [`LearnAPI.functions`](@ref)`(algorithm)` | functions you can apply to `algorithm` or associated model (traits excluded) | `()` | `(:fit, :predict, :minimize, :(LearnAPI.algorithm), :obs)` | +| [`LearnAPI.kinds_of_proxy`](@ref)`(algorithm)` | instances `kind` of `KindOfProxy` for which an implementation of `LearnAPI.predict(algorithm, kind, ...)` is guaranteed. | `()` | `(Distribution(), Interval())` | +| [`LearnAPI.tags`](@ref)`(algorithm)` | lists one or more suggestive algorithm tags from `LearnAPI.tags()` | `()` | (:regression, :probabilistic) | +| [`LearnAPI.is_pure_julia`](@ref)`(algorithm)` | `true` if implementation is 100% Julia code | `false` | `true` | +| [`LearnAPI.pkg_name`](@ref)`(algorithm)` | name of package providing core code (may be different from package providing LearnAPI.jl implementation) | `"unknown"` | `"DecisionTree"` | +| [`LearnAPI.pkg_license`](@ref)`(algorithm)` | name of license of package providing core code | `"unknown"` | `"MIT"` | +| [`LearnAPI.doc_url`](@ref)`(algorithm)` | url providing documentation of the core code | `"unknown"` | `"https://en.wikipedia.org/wiki/Decision_tree_learning"` | +| [`LearnAPI.load_path`](@ref)`(algorithm)` | a string locating the name of `LearnAPI.constructor(algorithm)` is defined, beginning with a package name | "unknown"` | `FastTrees.LearnAPI.DecisionTreeClassifier` | +| [`LearnAPI.is_composite`](@ref)`(algorithm)` | `true` if one or more properties of `algorithm` may be an algorithm | `false` | `true` | +| [`LearnAPI.human_name`](@ref)`(algorithm)` | human name for the algorithm; should be a noun | type name with spaces | "elastic net regressor" | +| [`LearnAPI.data_interface`](@ref)`(algorithm)` | Interface implemented by objects returned by [`obs`](@ref) | `Base.HasLength()` (supports `MLUtils.getobs/numobs`) | `Base.SizeUnknown()` (supports `iterate`) | +| [`LearnAPI.iteration_parameter`](@ref)`(algorithm)` | symbolic name of an iteration parameter | `nothing` | :epochs | +| [`LearnAPI.fit_scitype`](@ref)`(algorithm)` | upper bound on `scitype(data)` ensuring `fit(algorithm, data)` works | `Union{}` | `Tuple{Table(Continuous), AbstractVector{Continuous}}` | +| [`LearnAPI.fit_observation_scitype`](@ref)`(algorithm)` | upper bound on `scitype(observation)` for `observation` in `data` ensuring `fit(algorithm, data)` works | `Union{}` | `Tuple{AbstractVector{Continuous}, Continuous}` | +| [`LearnAPI.fit_type`](@ref)`(algorithm)` | upper bound on `typeof(data)` ensuring `fit(algorithm, data)` works | `Union{}` | `Tuple{AbstractMatrix{<:Real}, AbstractVector{<:Real}}` | +| [`LearnAPI.fit_observation_type`](@ref)`(algorithm)` | upper bound on `typeof(observation)` for `observation` in `data` ensuring `fit(algorithm, data)` works | `Union{}` | `Tuple{AbstractVector{<:Real}, Real}` | +| [`LearnAPI.target_observation_scitype`](@ref)`(algorithm)` | upper bound on the scitype of each observation of the targget | `Any` | `Continuous` | +| [`LearnAPI.predict_input_scitype`](@ref)`(algorithm)` | upper bound on `scitype(data)` ensuring `predict(model, kind, data)` works | `Union{}` | `Table(Continuous)` | +| [`LearnAPI.predict_input_observation_scitype`](@ref)`(algorithm)` | upper bound on `scitype(observation)` for `observation` in `data` ensuring `predict(model, kind, data)` works | `Union{}` | `Vector{Continuous}` | +| [`LearnAPI.predict_input_type`](@ref)`(algorithm)` | upper bound on `typeof(data)` ensuring `predict(model, kind, data)` works | `Union{}` | `AbstractMatrix{<:Real}` | +| [`LearnAPI.predict_input_observation_type`](@ref)`(algorithm)` | upper bound on `typeof(observation)` for `observation` in `data` ensuring `predict(model, kind, data)` works | `Union{}` | `Vector{<:Real}` | +| [`LearnAPI.predict_output_scitype`](@ref)`(algorithm, kind_of_proxy)` | upper bound on `scitype(predict(model, ...))` | `Any` | `AbstractVector{Continuous}` | +| [`LearnAPI.predict_output_type`](@ref)`(algorithm, kind_of_proxy)` | upper bound on `typeof(predict(model, ...))` | `Any` | `AbstractVector{<:Real}` | +| [`LearnAPI.transform_input_scitype`](@ref)`(algorithm)` | upper bound on `scitype(data)` ensuring `transform(model, data)` works | `Union{}` | `Table(Continuous)` | +| [`LearnAPI.transform_input_observation_scitype`](@ref)`(algorithm)` | upper bound on `scitype(observation)` for `observation` in `data` ensuring `transform(model, data)` works | `Union{}` | `Vector{Continuous}` | +| [`LearnAPI.transform_input_type`](@ref)`(algorithm)` | upper bound on `typeof(data)`ensuring `transform(model, data)` works | `Union{}` | `AbstractMatrix{<:Real}}` | +| [`LearnAPI.transform_input_observation_type`](@ref)`(algorithm)` | upper bound on `typeof(observation)` for `observation` in `data` ensuring `transform(model, data)` works | `Union{}` | `Vector{Continuous}` | +| [`LearnAPI.transform_output_scitype`](@ref)`(algorithm)` | upper bound on `scitype(transform(model, ...))` | `Any` | `Table(Continuous)` | +| [`LearnAPI.transform_output_type`](@ref)`(algorithm)` | upper bound on `typeof(transform(model, ...))` | `Any` | `AbstractMatrix{<:Real}` | +| [`LearnAPI.predict_or_transform_mutates`](@ref)`(algorithm)` | `true` if `predict` or `transform` mutates first argument | `false` | `true` | ### Derived Traits From d1f32596d01de2e4824e4ca1e05a09145ab56c53 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 2 Oct 2024 10:47:09 +1300 Subject: [PATCH 22/27] teak target_observation_scitype --- src/traits.jl | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/traits.jl b/src/traits.jl index 30ad504b..c6a7889e 100644 --- a/src/traits.jl +++ b/src/traits.jl @@ -464,21 +464,25 @@ fit_observation_type(::Any) = Union{} """ LearnAPI.target_observation_scitype(algorithm) -Return an upper bound `S` on the scitype of each observation of `LearnAPI.target(data)`, -where `data` is an admissible argument in the call `fit(algorithm, data)`. +Return an upper bound `S` on the scitype of each observation of an applicable target +variable. Specifically: -This interpretation only holds if `LearnAPI.target(algorithm)` is `true`. In any case, -however, if `algorithm` implements `predict`, then `S` will always be an -upper bound on the scitype of observations that could be conceivably extracted from the -output of [`predict`](@ref). For example, suppose we have +- If `:(LearnAPI.target) in LearnAPI.functions(algorithm)` (i.e., `fit` consumes target + variables) then "target" means anything returned by `LearnAPI.target(algorithm, data)`, + where `data` is an admissible argument in the call `fit(algorithm, data)`. + +- `S` will always be an upper bound on the scitype of observations that could be + conceivably extracted from the output of [`predict`](@ref). + +To illustate the second case, suppose we have ```julia model = fit(algorithm, data) ŷ = predict(model, Sampleable(), data_new) ``` -Then each sample generated by each "observation" of `ŷ` (a vector of sampleable objects, -say) will be bound in scitype by `S`. +Then each individual sample generated by each "observation" of `ŷ` (a vector of sampleable +objects, say) will be bound in scitype by `S`. See also See also [`LearnAPI.fit_observation_scitype`](@ref). From d69c5b0fb0167a92a0ae1ef8514e706aad7a116c Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 2 Oct 2024 11:36:12 +1300 Subject: [PATCH 23/27] purge a bunch of traits related to predict/transform input/output --- docs/src/traits.md | 105 +++++---------- src/traits.jl | 317 +++------------------------------------------ 2 files changed, 49 insertions(+), 373 deletions(-) diff --git a/docs/src/traits.md b/docs/src/traits.md index c20171d7..25edaa1c 100644 --- a/docs/src/traits.md +++ b/docs/src/traits.md @@ -1,17 +1,10 @@ # [Algorithm Traits](@id traits) -Traits generally promise specific algorithm behavior, such as: *This algorithm can make -point or probabilistic predictions*, *This algorithm sees a target variable in training*, -or *This algorithm's `transform` method predicts `Real` vectors*. They also record more -mundane information, such as a package license. +Algorithm traits are simply functions whose sole argument is an algorithm. -Algorithm traits are functions whose first (and usually only) argument is an algorithm. - -### Special two-argument traits - -The two-argument version of [`LearnAPI.predict_output_scitype`](@ref) and -[`LearnAPI.predict_output_scitype`](@ref) are the only overloadable traits with more than -one argument. +Traits promise specific algorithm behavior, such as: *This algorithm can make point or +probabilistic predictions* or *This algorithm is supervised* (sees a target in +training). They may also record more mundane information, such as a package license. ## [Trait summary](@id trait_summary) @@ -20,50 +13,35 @@ one argument. In the examples column of the table below, `Table`, `Continuous`, `Sampleable` are names owned by the package [ScientificTypesBase.jl](https://github.com/JuliaAI/ScientificTypesBase.jl/). -| trait | return value | fallback value | example | -|:----------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------|:-----------------------------------------------------------| -| [`LearnAPI.constructor`](@ref)`(algorithm)` | constructor for generating new or modified versions of `algorithm` | (no fallback) | `RidgeRegressor` | -| [`LearnAPI.functions`](@ref)`(algorithm)` | functions you can apply to `algorithm` or associated model (traits excluded) | `()` | `(:fit, :predict, :minimize, :(LearnAPI.algorithm), :obs)` | -| [`LearnAPI.kinds_of_proxy`](@ref)`(algorithm)` | instances `kind` of `KindOfProxy` for which an implementation of `LearnAPI.predict(algorithm, kind, ...)` is guaranteed. | `()` | `(Distribution(), Interval())` | -| [`LearnAPI.tags`](@ref)`(algorithm)` | lists one or more suggestive algorithm tags from `LearnAPI.tags()` | `()` | (:regression, :probabilistic) | -| [`LearnAPI.is_pure_julia`](@ref)`(algorithm)` | `true` if implementation is 100% Julia code | `false` | `true` | -| [`LearnAPI.pkg_name`](@ref)`(algorithm)` | name of package providing core code (may be different from package providing LearnAPI.jl implementation) | `"unknown"` | `"DecisionTree"` | -| [`LearnAPI.pkg_license`](@ref)`(algorithm)` | name of license of package providing core code | `"unknown"` | `"MIT"` | -| [`LearnAPI.doc_url`](@ref)`(algorithm)` | url providing documentation of the core code | `"unknown"` | `"https://en.wikipedia.org/wiki/Decision_tree_learning"` | -| [`LearnAPI.load_path`](@ref)`(algorithm)` | a string locating the name of `LearnAPI.constructor(algorithm)` is defined, beginning with a package name | "unknown"` | `FastTrees.LearnAPI.DecisionTreeClassifier` | -| [`LearnAPI.is_composite`](@ref)`(algorithm)` | `true` if one or more properties of `algorithm` may be an algorithm | `false` | `true` | -| [`LearnAPI.human_name`](@ref)`(algorithm)` | human name for the algorithm; should be a noun | type name with spaces | "elastic net regressor" | -| [`LearnAPI.data_interface`](@ref)`(algorithm)` | Interface implemented by objects returned by [`obs`](@ref) | `Base.HasLength()` (supports `MLUtils.getobs/numobs`) | `Base.SizeUnknown()` (supports `iterate`) | -| [`LearnAPI.iteration_parameter`](@ref)`(algorithm)` | symbolic name of an iteration parameter | `nothing` | :epochs | -| [`LearnAPI.fit_scitype`](@ref)`(algorithm)` | upper bound on `scitype(data)` ensuring `fit(algorithm, data)` works | `Union{}` | `Tuple{Table(Continuous), AbstractVector{Continuous}}` | -| [`LearnAPI.fit_observation_scitype`](@ref)`(algorithm)` | upper bound on `scitype(observation)` for `observation` in `data` ensuring `fit(algorithm, data)` works | `Union{}` | `Tuple{AbstractVector{Continuous}, Continuous}` | -| [`LearnAPI.fit_type`](@ref)`(algorithm)` | upper bound on `typeof(data)` ensuring `fit(algorithm, data)` works | `Union{}` | `Tuple{AbstractMatrix{<:Real}, AbstractVector{<:Real}}` | -| [`LearnAPI.fit_observation_type`](@ref)`(algorithm)` | upper bound on `typeof(observation)` for `observation` in `data` ensuring `fit(algorithm, data)` works | `Union{}` | `Tuple{AbstractVector{<:Real}, Real}` | -| [`LearnAPI.target_observation_scitype`](@ref)`(algorithm)` | upper bound on the scitype of each observation of the targget | `Any` | `Continuous` | -| [`LearnAPI.predict_input_scitype`](@ref)`(algorithm)` | upper bound on `scitype(data)` ensuring `predict(model, kind, data)` works | `Union{}` | `Table(Continuous)` | -| [`LearnAPI.predict_input_observation_scitype`](@ref)`(algorithm)` | upper bound on `scitype(observation)` for `observation` in `data` ensuring `predict(model, kind, data)` works | `Union{}` | `Vector{Continuous}` | -| [`LearnAPI.predict_input_type`](@ref)`(algorithm)` | upper bound on `typeof(data)` ensuring `predict(model, kind, data)` works | `Union{}` | `AbstractMatrix{<:Real}` | -| [`LearnAPI.predict_input_observation_type`](@ref)`(algorithm)` | upper bound on `typeof(observation)` for `observation` in `data` ensuring `predict(model, kind, data)` works | `Union{}` | `Vector{<:Real}` | -| [`LearnAPI.predict_output_scitype`](@ref)`(algorithm, kind_of_proxy)` | upper bound on `scitype(predict(model, ...))` | `Any` | `AbstractVector{Continuous}` | -| [`LearnAPI.predict_output_type`](@ref)`(algorithm, kind_of_proxy)` | upper bound on `typeof(predict(model, ...))` | `Any` | `AbstractVector{<:Real}` | -| [`LearnAPI.transform_input_scitype`](@ref)`(algorithm)` | upper bound on `scitype(data)` ensuring `transform(model, data)` works | `Union{}` | `Table(Continuous)` | -| [`LearnAPI.transform_input_observation_scitype`](@ref)`(algorithm)` | upper bound on `scitype(observation)` for `observation` in `data` ensuring `transform(model, data)` works | `Union{}` | `Vector{Continuous}` | -| [`LearnAPI.transform_input_type`](@ref)`(algorithm)` | upper bound on `typeof(data)`ensuring `transform(model, data)` works | `Union{}` | `AbstractMatrix{<:Real}}` | -| [`LearnAPI.transform_input_observation_type`](@ref)`(algorithm)` | upper bound on `typeof(observation)` for `observation` in `data` ensuring `transform(model, data)` works | `Union{}` | `Vector{Continuous}` | -| [`LearnAPI.transform_output_scitype`](@ref)`(algorithm)` | upper bound on `scitype(transform(model, ...))` | `Any` | `Table(Continuous)` | -| [`LearnAPI.transform_output_type`](@ref)`(algorithm)` | upper bound on `typeof(transform(model, ...))` | `Any` | `AbstractMatrix{<:Real}` | -| [`LearnAPI.predict_or_transform_mutates`](@ref)`(algorithm)` | `true` if `predict` or `transform` mutates first argument | `false` | `true` | +| trait | return value | fallback value | example | +|:-------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------|:-----------------------------------------------------------| +| [`LearnAPI.constructor`](@ref)`(algorithm)` | constructor for generating new or modified versions of `algorithm` | (no fallback) | `RidgeRegressor` | +| [`LearnAPI.functions`](@ref)`(algorithm)` | functions you can apply to `algorithm` or associated model (traits excluded) | `()` | `(:fit, :predict, :minimize, :(LearnAPI.algorithm), :obs)` | +| [`LearnAPI.kinds_of_proxy`](@ref)`(algorithm)` | instances `kind` of `KindOfProxy` for which an implementation of `LearnAPI.predict(algorithm, kind, ...)` is guaranteed. | `()` | `(Distribution(), Interval())` | +| [`LearnAPI.tags`](@ref)`(algorithm)` | lists one or more suggestive algorithm tags from `LearnAPI.tags()` | `()` | (:regression, :probabilistic) | +| [`LearnAPI.is_pure_julia`](@ref)`(algorithm)` | `true` if implementation is 100% Julia code | `false` | `true` | +| [`LearnAPI.pkg_name`](@ref)`(algorithm)` | name of package providing core code (may be different from package providing LearnAPI.jl implementation) | `"unknown"` | `"DecisionTree"` | +| [`LearnAPI.pkg_license`](@ref)`(algorithm)` | name of license of package providing core code | `"unknown"` | `"MIT"` | +| [`LearnAPI.doc_url`](@ref)`(algorithm)` | url providing documentation of the core code | `"unknown"` | `"https://en.wikipedia.org/wiki/Decision_tree_learning"` | +| [`LearnAPI.load_path`](@ref)`(algorithm)` | string locating name returned by `LearnAPI.constructor(algorithm)`, beginning with a package name | "unknown"` | `FastTrees.LearnAPI.DecisionTreeClassifier` | +| [`LearnAPI.is_composite`](@ref)`(algorithm)` | `true` if one or more properties of `algorithm` may be an algorithm | `false` | `true` | +| [`LearnAPI.human_name`](@ref)`(algorithm)` | human name for the algorithm; should be a noun | type name with spaces | "elastic net regressor" | +| [`LearnAPI.data_interface`](@ref)`(algorithm)` | Interface implemented by objects returned by [`obs`](@ref) | `Base.HasLength()` (supports `MLUtils.getobs/numobs`) | `Base.SizeUnknown()` (supports `iterate`) | +| [`LearnAPI.iteration_parameter`](@ref)`(algorithm)` | symbolic name of an iteration parameter | `nothing` | :epochs | +| [`LearnAPI.fit_observation_scitype`](@ref)`(algorithm)` | upper bound on `scitype(observation)` for `observation` in `data` ensuring `fit(algorithm, data)` works | `Union{}` | `Tuple{AbstractVector{Continuous}, Continuous}` | +| [`LearnAPI.target_observation_scitype`](@ref)`(algorithm)` | upper bound on the scitype of each observation of the targget | `Any` | `Continuous` | +| [`LearnAPI.predict_or_transform_mutates`](@ref)`(algorithm)` | `true` if `predict` or `transform` mutates first argument | `false` | `true` | ### Derived Traits -The following convenience methods are provided but not overloadable by new implementations. +The following are provided for convenience but should not be overloaded by new algorithms: -| trait | return value | example | -|:-----------------------------------------------------|:--------------------------------------------------------------------------------------------------------------|:--------| -| `LearnAPI.name(algorithm)` | algorithm type name as string | "PCA" | -| `LearnAPI.is_algorithm(algorithm)` | `true` if `LearnAPI.functions(algorithm)` is not empty | `true` | -| [`LearnAPI.predict_output_scitype(algorithm)`](@ref) | dictionary of upper bounds on the scitype of predictions, keyed on subtypes of [`LearnAPI.KindOfProxy`](@ref) | | -| [`LearnAPI.predict_output_type(algorithm)`](@ref) | dictionary of upper bounds on the type of predictions, keyed on subtypes of [`LearnAPI.KindOfProxy`](@ref) | | +| trait | return value | example | +|:-----------------------------------|:---------------------------------------------------------------------|:--------| +| `LearnAPI.name(algorithm)` | algorithm type name as string | "PCA" | +| `LearnAPI.is_algorithm(algorithm)` | `true` if `algorithm` is LearnAPI.jl-compliant | `true` | +| `LearnAPI.target(algorithm)` | `true` if [`LearnAPI.target(algorithm, data)`](@ref) is implemented | `false` | +| `LearnAPI.weights(algorithm)` | `true` if [`LearnAPI.weights(algorithm, data)`](@ref) is implemented | `false` | ## Implementation guide @@ -97,15 +75,15 @@ requires: 1. *Finiteness:* The value of a trait is the same for all `algorithm`s with same value of [`LearnAPI.constructor(algorithm)`](@ref). This typically means trait values do not - depend on type parameters! There is an exception if `is_composite(algorithm) = true`. + depend on type parameters! If `is_composite(algorithm) = true`, this requirement is + dropped. -2. *Immediate serializability:* It should be possible to call a trait without first - installing any third party package. Importing the package that defines the algorithm, - together with `import LearnAPI` should suffice. +2. *Low level deserializability:* It should be possible to evaluate the trait *value* when + `LearnAPI` is the only imported module. Because of 1, combining a lot of functionality into one algorithm (e.g. the algorithm can perform both classification or regression) can mean traits are necessarily less -informative (as in `LearnAPI.predict_type(algorithm) = Any`). +informative (as in `LearnAPI.target_observation_scitype(algorithm) = Any`). ## Reference @@ -124,23 +102,8 @@ LearnAPI.is_composite LearnAPI.human_name LearnAPI.data_interface LearnAPI.iteration_parameter -LearnAPI.fit_scitype -LearnAPI.fit_type LearnAPI.fit_observation_scitype -LearnAPI.fit_observation_type LearnAPI.target_observation_scitype -LearnAPI.predict_input_scitype -LearnAPI.predict_input_observation_scitype -LearnAPI.predict_input_type -LearnAPI.predict_input_observation_type -LearnAPI.predict_output_scitype -LearnAPI.predict_output_type -LearnAPI.transform_input_scitype -LearnAPI.transform_input_observation_scitype -LearnAPI.transform_input_type -LearnAPI.transform_input_observation_type LearnAPI.predict_or_transform_mutates -LearnAPI.transform_output_scitype -LearnAPI.transform_output_type LearnAPI.@trait ``` diff --git a/src/traits.jl b/src/traits.jl index c6a7889e..dfdd5c21 100644 --- a/src/traits.jl +++ b/src/traits.jl @@ -3,7 +3,7 @@ const DOC_UNKNOWN = "Returns `\"unknown\"` if the algorithm implementation has "* - "failed to overload the trait. " + "not overloaded the trait. " const DOC_ON_TYPE = "The value of the trait must depend only on the type of `algorithm`. " DOC_ONLY_ONE(func) = @@ -38,21 +38,11 @@ const TRAITS = [ :iteration_parameter, :data_interface, :predict_or_transform_mutates, - :fit_scitype, :fit_observation_scitype, - :fit_type, - :fit_observation_type, :target_observation_scitype, - :predict_input_scitype, - :predict_output_scitype, - :predict_input_type, - :predict_output_type, - :transform_input_scitype, - :transform_output_scitype, - :transform_input_type, - :transform_output_type, :name, :is_algorithm, + :target, ] @@ -147,9 +137,8 @@ data...)` has a guaranteed implementation. Each such `kind` subtypes [`LearnAPI.KindOfProxy`](@ref). Examples are `LiteralTarget()` (for predicting actual target values) and `Distributions()` (for predicting probability mass/density functions). -If a `predict(model, data)` is overloaded to return predictions for a specific kind of -proxy (e.g., `predict(model::MyModel, data) = predict(model, Distribution(), data)`) then -that kind appears first in the returned tuple. +The call `predict(model, data)` always returns `predict(model, kind, data)`, where `kind` +is the first element of the trait's return value. See also [`LearnAPI.predict`](@ref), [`LearnAPI.KindOfProxy`](@ref). @@ -157,9 +146,10 @@ See also [`LearnAPI.predict`](@ref), [`LearnAPI.KindOfProxy`](@ref). # New implementations -Implementation is optional but recommended whenever `predict` is overloaded. +Must be overloaded whenever `predict` is implemented. -Elements of the returned tuple must be one of these: $CONCRETE_TARGET_PROXY_TYPES_LIST. +Elements of the returned tuple must be one of the following, described further in +LearnAPI.jl documentation: $CONCRETE_TARGET_PROXY_TYPES_LIST. Suppose, for example, we have the following implementation of a supervised learner returning only probabilistic predictions: @@ -174,6 +164,8 @@ Then we can declare @trait MyNewAlgorithmType kinds_of_proxy = (LearnaAPI.Distribution(),) ``` +LearnAPI.jl provides the fallback for `predict(model, data)`. + For more on target variables and target proxies, refer to the LearnAPI documentation. """ @@ -336,7 +328,7 @@ to return `"K-nearest neighbors regressor"`. Ideally, this is a "concrete" noun `"ridge regressor"` rather than an "abstract" noun like `"ridge regression"`. """ -human_name(M) = snakecase(name(M), delim=' ') # `name` defined below +human_name(algorithm) = snakecase(name(alogorithm), delim=' ') # `name` defined below """ LearnAPI.data_interface(algorithm) @@ -388,23 +380,6 @@ Implement if algorithm is iterative. Returns a symbol or `nothing`. iteration_parameter(::Any) = nothing -""" - LearnAPI.fit_scitype(algorithm) - -Return an upper bound `S` on the scitype of `data` guaranteed to work when calling -`fit(algorithm, data)`: if `ScientificTypes.scitype(data) <: S`, then is `fit(algorithm, -data)` is supported. - -See also [`LearnAPI.fit_type`](@ref), [`LearnAPI.fit_observation_scitype`](@ref), -[`LearnAPI.fit_observation_type`](@ref). - -# New implementations - -Optional. The fallback return value is `Union{}`. $(DOC_ONLY_ONE(:fit)) - -""" -fit_scitype(::Any) = Union{} - """ LearnAPI.fit_observation_scitype(algorithm) @@ -415,8 +390,7 @@ when calling `fit`: if `observations = obs(algorithm, data)` and $DOC_EXPLAIN_EACHOBS -See also See also [`LearnAPI.fit_type`](@ref), [`LearnAPI.fit_scitype`](@ref), -[`LearnAPI.fit_observation_type`](@ref). +See also [`LearnAPI.target_observation_scitype`](@ref). # New implementations @@ -425,42 +399,6 @@ Optional. The fallback return value is `Union{}`. $(DOC_ONLY_ONE(:fit)) """ fit_observation_scitype(::Any) = Union{} -""" - LearnAPI.fit_type(algorithm) - -Return an upper bound `T` on the type of `data` guaranteed to work when calling -`fit(algorithm, data)`: if `typeof(data) <: T`, then `fit(algorithm, data)` is supported. - -See also [`LearnAPI.fit_scitype`](@ref), [`LearnAPI.fit_observation_type`](@ref). -[`LearnAPI.fit_observation_scitype`](@ref) - -# New implementations - -Optional. The fallback return value is `Union{}`. $(DOC_ONLY_ONE(:fit)) - -""" -fit_type(::Any) = Union{} - -""" - LearnAPI.fit_observation_type(algorithm) - -Return an upper bound `T` on the type of individual observations guaranteed to work -when calling `fit`: if `observations = obs(algorithm, data)` and -`typeof(o) <:S` for each `o` in `observations`, then the call -`fit(algorithm, data)` is supported. - -$DOC_EXPLAIN_EACHOBS - -See also See also [`LearnAPI.fit_type`](@ref), [`LearnAPI.fit_scitype`](@ref), -[`LearnAPI.fit_observation_scitype`](@ref). - -# New implementations - -Optional. The fallback return value is `Union{}`. $(DOC_ONLY_ONE(:fit)) - -""" -fit_observation_type(::Any) = Union{} - """ LearnAPI.target_observation_scitype(algorithm) @@ -494,235 +432,10 @@ Optional. The fallback return value is `Any`. target_observation_scitype(::Any) = Any -function DOC_INPUT_SCITYPE(op) - extra = op == :predict ? " kind_of_proxy," : "" - ONLY = DOC_ONLY_ONE(op) - """ - LearnAPI.$(op)_input_scitype(algorithm) - - Return an upper bound `S` on the scitype of `data` guaranteed to work in the call - `$op(algorithm,$extra data)`: if `ScientificTypes.scitype(data) <: S`, - then `$op(algorithm,$extra data)` is supported. - - See also [`LearnAPI.$(op)_input_type`](@ref). - - # New implementations - - Implementation is optional. The fallback return value is `Union{}`. $ONLY - - """ -end - -function DOC_INPUT_OBSERVATION_SCITYPE(op) - extra = op == :predict ? " kind_of_proxy," : "" - ONLY = DOC_ONLY_ONE(op) - """ - LearnAPI.$(op)_observation_scitype(algorithm) - - Return an upper bound `S` on the scitype of individual observations guaranteed to work - when calling `$op`: if `observations = obs(model, data)`, for some `model` returned by - `fit(algorithm, ...)`, and `ScientificTypes.scitype(o) <: S` for each `o` in - `observations`, then the call `$(op)(model,$extra data)` is supported. - - $DOC_EXPLAIN_EACHOBS - - See also See also [`LearnAPI.fit_type`](@ref), [`LearnAPI.fit_scitype`](@ref), - [`LearnAPI.fit_observation_type`](@ref). - - # New implementations - - Optional. The fallback return value is `Union{}`. $ONLY - - """ -end - -function DOC_INPUT_TYPE(op) - extra = op == :predict ? " kind_of_proxy," : "" - ONLY = DOC_ONLY_ONE(op) - """ - LearnAPI.$(op)_input_type(algorithm) - - Return an upper bound `T` on the scitype of `data` guaranteed to work in the call - `$op(algorithm,$extra data)`: if `typeof(data) <: T`, - then `$op(algorithm,$extra data)` is supported. - - See also [`LearnAPI.$(op)_input_type`](@ref). - - # New implementations - - Implementation is optional. The fallback return value is `Union{}`. Should not be - overloaded if `LearnAPI.$(op)_input_scitype` is overloaded. - - """ -end - -function DOC_INPUT_OBSERVATION_TYPE(op) - extra = op == :predict ? " kind_of_proxy," : "" - ONLY = DOC_ONLY_ONE(op) - """ - LearnAPI.$(op)_observation_type(algorithm) - - Return an upper bound `T` on the scitype of individual observations guaranteed to work - when calling `$op`: if `observations = obs(model, data)`, for some `model` returned by - `fit(algorithm, ...)`, and `typeof(o) <: T` for each `o` in - `observations`, then the call `$(op)(model,$extra data)` is supported. - - $DOC_EXPLAIN_EACHOBS - - See also See also [`LearnAPI.fit_type`](@ref), [`LearnAPI.fit_scitype`](@ref), - [`LearnAPI.fit_observation_type`](@ref). - - # New implementations - - Optional. The fallback return value is `Union{}`. $ONLY - - """ -end - -DOC_OUTPUT_SCITYPE(op) = - """ - LearnAPI.$(op)_output_scitype(algorithm) - - Return an upper bound on the scitype of the output of the `$op` operation. - - See also [`LearnAPI.$(op)_input_scitype`](@ref). - - # New implementations - - Implementation is optional. The fallback return value is `Any`. - - """ - -DOC_OUTPUT_TYPE(op) = - """ - LearnAPI.$(op)_output_type(algorithm) - - Return an upper bound on the type of the output of the `$op` operation. - - # New implementations - - Implementation is optional. The fallback return value is `Any`. - - """ - -"$(DOC_INPUT_SCITYPE(:predict))" -predict_input_scitype(::Any) = Union{} - -"$(DOC_INPUT_OBSERVATION_SCITYPE(:predict))" -predict_input_observation_scitype(::Any) = Union{} - -"$(DOC_INPUT_TYPE(:predict))" -predict_input_type(::Any) = Union{} - -"$(DOC_INPUT_OBSERVATION_TYPE(:predict))" -predict_input_observation_type(::Any) = Union{} - -"$(DOC_INPUT_SCITYPE(:transform))" -transform_input_scitype(::Any) = Union{} - -"$(DOC_INPUT_OBSERVATION_SCITYPE(:transform))" -transform_input_observation_scitype(::Any) = Union{} - -"$(DOC_INPUT_TYPE(:transform))" -transform_input_type(::Any) = Union{} - -"$(DOC_INPUT_OBSERVATION_TYPE(:transform))" -transform_input_observation_type(::Any) = Union{} - -"$(DOC_OUTPUT_SCITYPE(:transform))" -transform_output_scitype(::Any) = Any - -"$(DOC_OUTPUT_TYPE(:transform))" -transform_output_type(::Any) = Any - - -# # TWO-ARGUMENT TRAITS - -# Here `s` is `:type` or `:scitype`: -const DOC_PREDICT_OUTPUT(s) = - """ - LearnAPI.predict_output_$s(algorithm, kind_of_proxy::KindOfProxy) - - Return an upper bound for the $(s)s of predictions of the specified form where - supported, and otherwise return `Any`. For example, if - - ŷ = predict(model, Distribution(), data) - - successfully returns (i.e., `algorithm` supports predictions of target probability - distributions) then the following is guaranteed to hold: - - $(s)(ŷ) <: predict_output_$(s)(algorithm, Distribution()) - - **Note.** This trait has a single-argument "convenience" version - `LearnAPI.predict_output_$(s)(algorithm)` derived from this one, which returns a - dictionary keyed on target proxy types. - - See also [`LearnAPI.KindOfProxy`](@ref), [`predict`](@ref), - [`predict_input_$(s)`](@ref). - - # New implementations - - Overloading the trait is optional. Here's a sample implementation for a supervised - regressor type `MyRgs` that only predicts actual values of the target: - - ```julia - @trait MyRgs predict_output_$(s) = AbstractVector{ScientificTypesBase.Continuous} - ``` - - The fallback method returns `Any`. - - """ - -"$(DOC_PREDICT_OUTPUT(:scitype))" -predict_output_scitype(algorithm, kind_of_proxy) = Any - -"$(DOC_PREDICT_OUTPUT(:type))" -predict_output_type(algorithm, kind_of_proxy) = Any - - # # DERIVED TRAITS -name(A) = split(string(constructor(A)), ".") |> last - -is_algorithm(A) = !isempty(functions(A)) - +name(algorithm) = split(string(constructor(algorithm)), ".") |> last +is_algorithm(algorithm) = !isempty(functions(algorithm)) preferred_kind_of_proxy(algorithm) = first(kinds_of_proxy(algorithm)) - -const DOC_PREDICT_OUTPUT2(s) = - """ - LearnAPI.predict_output_$(s)(algorithm) - - Return a dictionary of upper bounds on the $(s) of predictions, keyed on concrete - subtypes of [`LearnAPI.KindOfProxy`](@ref). Each of these subtypes represents a - different form of target prediction (`LiteralTarget`, `Distribution`, - `SurvivalFunction`, etc) possibly supported by `algorithm`, but the existence of a key - does not guarantee that form is supported. - - As an example, if - - ŷ = predict(model, Distribution(), data...) - - successfully returns (i.e., `algorithm` supports predictions of target probability - distributions) then the following is guaranteed to hold: - - $(s)(ŷ) <: LearnAPI.predict_output_$(s)s(algorithm)[Distribution] - - See also [`LearnAPI.KindOfProxy`](@ref), [`predict`](@ref), - [`LearnAPI.predict_input_$(s)`](@ref). - - # New implementations - - This single argument trait should not be overloaded. Instead, overload - [`LearnAPI.predict_output_$(s)`](@ref)(algorithm, kind_of_proxy). - - """ - -"$(DOC_PREDICT_OUTPUT2(:scitype))" -predict_output_scitype(algorithm) = - Dict(T => predict_output_scitype(algorithm, T()) - for T in CONCRETE_TARGET_PROXY_TYPES) - -"$(DOC_PREDICT_OUTPUT2(:type))" -predict_output_type(algorithm) = - Dict(T => predict_output_type(algorithm, T()) - for T in CONCRETE_TARGET_PROXY_TYPES) +target(algorithm) = :(LearnAPI.target) in functions(algorithm) +weights(algorithm) = :(LearnAPI.weights) in functions(algorithm) From 11b38cf9696de43e3f4a004b06b862ea06cc174b Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 2 Oct 2024 11:47:27 +1300 Subject: [PATCH 24/27] rename LiteralTarget -> Point --- docs/src/anatomy_of_an_implementation.md | 20 +++++----- docs/src/index.md | 6 +-- docs/src/kinds_of_target_proxy.md | 50 ------------------------ docs/src/minimize.md | 4 +- docs/src/obs.md | 2 +- docs/src/predict_transform.md | 6 +-- docs/src/target_weights_features.md | 2 +- src/obs.jl | 4 +- src/predict_transform.jl | 4 +- src/target_weights_features.jl | 2 +- src/traits.jl | 2 +- src/types.jl | 46 +++++++++++++++++++++- test/integration/regression.jl | 22 +++++------ 13 files changed, 82 insertions(+), 88 deletions(-) diff --git a/docs/src/anatomy_of_an_implementation.md b/docs/src/anatomy_of_an_implementation.md index 13f17da1..3c2a7d5f 100644 --- a/docs/src/anatomy_of_an_implementation.md +++ b/docs/src/anatomy_of_an_implementation.md @@ -129,19 +129,19 @@ end Users will be able to call `predict` like this: ```julia -predict(model, LiteralTarget(), Xnew) +predict(model, Point(), Xnew) ``` -where `Xnew` is a table (of the same form as `X` above). The argument `LiteralTarget()` +where `Xnew` is a table (of the same form as `X` above). The argument `Point()` signals that literal predictions of the target variable are sought, as opposed to some -proxy for the target, such as probability density functions. `LiteralTarget` is an +proxy for the target, such as probability density functions. `Point` is an example of a [`LearnAPI.KindOfProxy`](@ref proxy_types) type. Targets and target proxies are discussed [here](@ref proxy). We provide this implementation for our ridge regressor: ```@example anatomy -LearnAPI.predict(model::RidgeFitted, ::LiteralTarget, Xnew) = +LearnAPI.predict(model::RidgeFitted, ::Point, Xnew) = Tables.matrix(Xnew)*model.coefficients ``` @@ -210,7 +210,7 @@ Because we have implemented `predict`, we are required to overload the target, we make this definition: ```julia -LearnAPI.kinds_of_proxy(::Ridge) = (LiteralTarget(),) +LearnAPI.kinds_of_proxy(::Ridge) = (Point(),) ``` A macro provides a shortcut, convenient when multiple traits are to be defined: @@ -219,7 +219,7 @@ A macro provides a shortcut, convenient when multiple traits are to be defined: @trait( Ridge, constructor = Ridge, - kinds_of_proxy=(LiteralTarget(),), + kinds_of_proxy=(Point(),), tags = (:regression,), functions = ( :(LearnAPI.fit), @@ -326,7 +326,7 @@ LearnAPI.minimize(model::RidgeFitted) = @trait( Ridge, constructor = Ridge, - kinds_of_proxy=(LiteralTarget(),), + kinds_of_proxy=(Point(),), tags = (:regression,), functions = ( :(LearnAPI.fit), @@ -424,11 +424,11 @@ case: ```@example anatomy2 LearnAPI.obs(::RidgeFitted, Xnew) = Tables.matrix(Xnew)' -LearnAPI.predict(model::RidgeFitted, ::LiteralTarget, observations::AbstractMatrix) = +LearnAPI.predict(model::RidgeFitted, ::Point, observations::AbstractMatrix) = observations'*model.coefficients -LearnAPI.predict(model::RidgeFitted, ::LiteralTarget, Xnew) = - predict(model, LiteralTarget(), obs(model, Xnew)) +LearnAPI.predict(model::RidgeFitted, ::Point, Xnew) = + predict(model, Point(), obs(model, Xnew)) ``` ### `target` and `features` methods diff --git a/docs/src/index.md b/docs/src/index.md index cf11d259..7b638aed 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -50,7 +50,7 @@ LearnAPI.functions(forest) model = fit(forest, X, y) # Generate point predictions: -ŷ = predict(model, Xnew) # or `predict(model, LiteralTarget(), Xnew)` +ŷ = predict(model, Xnew) # or `predict(model, Point(), Xnew)` # Predict probability distributions: predict(model, Distribution(), Xnew) @@ -65,10 +65,10 @@ serialize("my_random_forest.jls", small_model) # Recover saved model and algorithm configuration: recovered_model = deserialize("my_random_forest.jls") @assert LearnAPI.algorithm(recovered_model) == forest -@assert predict(recovered_model, LiteralTarget(), Xnew) == ŷ +@assert predict(recovered_model, Point(), Xnew) == ŷ ``` -`Distribution` and `LiteralTarget` are singleton types owned by LearnAPI.jl. They allow +`Distribution` and `Point` are singleton types owned by LearnAPI.jl. They allow dispatch based on the [kind of target proxy](@ref proxy), a key LearnAPI.jl concept. LearnAPI.jl places more emphasis on the notion of target variables and target proxies than on the usual supervised/unsupervised learning dichotomy. From this point of view, a diff --git a/docs/src/kinds_of_target_proxy.md b/docs/src/kinds_of_target_proxy.md index 218c378a..da150f96 100644 --- a/docs/src/kinds_of_target_proxy.md +++ b/docs/src/kinds_of_target_proxy.md @@ -14,64 +14,14 @@ LearnAPI.KindOfProxy LearnAPI.IID ``` -| type | form of an observation | -|:-------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `LearnAPI.LiteralTarget` | same as target observations; may have the interpretation of a 50% quantile, 50% expectile or mode | -| `LearnAPI.Sampleable` | object that can be sampled to obtain object of the same form as target observation | -| `LearnAPI.Distribution` | explicit probability density/mass function whose sample space is all possible target observations | -| `LearnAPI.LogDistribution` | explicit log-probability density/mass function whose sample space is possible target observations | -| `LearnAPI.Probability`¹ | numerical probability or probability vector | -| `LearnAPI.LogProbability`¹ | log-probability or log-probability vector | -| `LearnAPI.Parametric`¹ | a list of parameters (e.g., mean and variance) describing some distribution | -| `LearnAPI.LabelAmbiguous` | collections of labels (in case of multi-class target) but without a known correspondence to the original target labels (and of possibly different number) as in, e.g., clustering | -| `LearnAPI.LabelAmbiguousSampleable` | sampleable version of `LabelAmbiguous`; see `Sampleable` above | -| `LearnAPI.LabelAmbiguousDistribution` | pdf/pmf version of `LabelAmbiguous`; see `Distribution` above | -| `LearnAPI.LabelAmbiguousFuzzy` | same as `LabelAmbiguous` but with multiple values of indeterminant number | -| `LearnAPI.Quantile`² | same as target but with quantile interpretation | -| `LearnAPI.Expectile`² | same as target but with expectile interpretation | -| `LearnAPI.ConfidenceInterval`² | confidence interval | -| `LearnAPI.Fuzzy` | finite but possibly varying number of target observations | -| `LearnAPI.ProbabilisticFuzzy` | as for `Fuzzy` but labeled with probabilities (not necessarily summing to one) | -| `LearnAPI.SurvivalFunction` | survival function | -| `LearnAPI.SurvivalDistribution` | probability distribution for survival time | -| `LearnAPI.SurvivalHazardFunction` | hazard function for survival time | -| `LearnAPI.OutlierScore` | numerical score reflecting degree of outlierness (not necessarily normalized) | -| `LearnAPI.Continuous` | real-valued approximation/interpolation of a discrete-valued target, such as a count (e.g., number of phone calls) | - -¹Provided for completeness but discouraged to avoid [ambiguities in -representation](https://github.com/alan-turing-institute/MLJ.jl/blob/dev/paper/paper.md#a-unified-approach-to-probabilistic-predictions-and-their-evaluation). - -²The level will be controlled by a hyper-parameter; models providing only quantiles or -expectiles at 50% will provide `LiteralTarget` instead. - -> Table of concrete subtypes of `LearnAPI.IID <: LearnAPI.KindOfProxy`. - - ## Proxies for density estimation lgorithms ```@docs LearnAPI.Single ``` -| type `T` | form of output of `predict(model, ::T)` | -|:--------------------------------:|:-----------------------------------------------------------------------| -| `LearnAPI.SingleSampleable` | object that can be sampled to obtain a single target observation | -| `LearnAPI.SingleDistribution` | explicit probability density/mass function for sampling the target | -| `LearnAPI.SingleLogDistribution` | explicit log-probability density/mass function for sampling the target | - -> Table of `LearnAPI.KindOfProxy` subtypes subtyping `LearnAPI.Single` - - ## Joint probability distributions ```@docs LearnAPI.Joint ``` - -| type `T` | form of output of `predict(model, ::T, data)` | -|:-------------------------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `LearnAPI.JointSampleable` | object that can be sampled to obtain a *vector* whose elements have the form of target observations; the vector length matches the number of observations in `data`. | -| `LearnAPI.JointDistribution` | explicit probability density/mass function whose sample space is vectors of target observations; the vector length matches the number of observations in `data` | -| `LearnAPI.JointLogDistribution` | explicit log-probability density/mass function whose sample space is vectors of target observations; the vector length matches the number of observations in `data` | - -> Table of `LearnAPI.KindOfProxy` subtypes subtyping `LearnAPI.Joint` diff --git a/docs/src/minimize.md b/docs/src/minimize.md index 8e7a4efb..03bc028e 100644 --- a/docs/src/minimize.md +++ b/docs/src/minimize.md @@ -8,14 +8,14 @@ minimize(model) -> ```julia model = fit(algorithm, (X, y)) # or `fit(algorithm, X, y)` -ŷ = predict(model, LiteralTarget(), Xnew) +ŷ = predict(model, Point(), Xnew) LearnAPI.feature_importances(model) small_model = minimize(model) serialize("my_model.jls", small_model) recovered_model = deserialize("my_random_forest.jls") -@assert predict(recovered_model, LiteralTarget(), Xnew) == ŷ +@assert predict(recovered_model, Point(), Xnew) == ŷ # throws MethodError: LearnAPI.feature_importances(recovered_model) diff --git a/docs/src/obs.md b/docs/src/obs.md index 82be98b5..cf794d87 100644 --- a/docs/src/obs.md +++ b/docs/src/obs.md @@ -68,7 +68,7 @@ scores = map(train_test_folds) do (train, test) global never_trained = false end predictobs_subset = MLUtils.getobs(predictobs, test) - ŷ = predict(model, LiteralTarget(), predictobs_subset) + ŷ = predict(model, Point(), predictobs_subset) return diff --git a/docs/src/predict_transform.md b/docs/src/predict_transform.md index 2ec378ef..df961719 100644 --- a/docs/src/predict_transform.md +++ b/docs/src/predict_transform.md @@ -27,7 +27,7 @@ ŷ = predict(model, Distribution(), Xnew) Generate point predictions: ```julia -ŷ = predict(model, LiteralTarget(), Xnew) +ŷ = predict(model, Point(), Xnew) ``` Train a dimension-reducing `algorithm`: @@ -49,7 +49,7 @@ inverse_transform(model, Xnew_reduced) fitobs = obs(algorithm, (X, y)) # algorithm-specific repr. of data model = fit(algorithm, MLUtils.getobs(fitobs, 1:100)) predictobs = obs(model, MLUtils.getobs(X, 101:150)) -ŷ = predict(model, LiteralTarget(), predictobs) +ŷ = predict(model, Point(), predictobs) ``` @@ -65,7 +65,7 @@ ŷ = predict(model, LiteralTarget(), predictobs) If the algorithm has a notion of [target variable](@ref proxy), then use [`predict`](@ref) to output each supported [kind of target proxy](@ref -proxy_types) (`LiteralTarget()`, `Distribution()`, etc). +proxy_types) (`Point()`, `Distribution()`, etc). For output not associated with a target variable, implement [`transform`](@ref) instead, which does not dispatch on [`LearnAPI.KindOfProxy`](@ref), but can be optionally diff --git a/docs/src/target_weights_features.md b/docs/src/target_weights_features.md index 78205a44..df4f76b7 100644 --- a/docs/src/target_weights_features.md +++ b/docs/src/target_weights_features.md @@ -22,7 +22,7 @@ target: model = fit(algorithm, data) X = LearnAPI.features(algorithm, data) y = LearnAPI.target(algorithm, data) -ŷ = predict(model, LiteralTarget(), X) +ŷ = predict(model, Point(), X) training_loss = sum(ŷ .!= y) ``` diff --git a/src/obs.jl b/src/obs.jl index e781351e..47fd8b79 100644 --- a/src/obs.jl +++ b/src/obs.jl @@ -24,7 +24,7 @@ Usual workflow, using data-specific resampling methods: data = (X, y) # a DataFrame and a vector data_train = (Tables.select(X, 1:100), y[1:100]) model = fit(algorithm, data_train) -ŷ = predict(model, LiteralTarget(), X[101:150]) +ŷ = predict(model, Point(), X[101:150]) ``` Alternative workflow using `obs` and the MLUtils.jl method `getobs` (assumes @@ -37,7 +37,7 @@ fit_observations = obs(algorithm, data) model = fit(algorithm, MLUtils.getobs(fit_observations, 1:100)) predict_observations = obs(model, X) -ẑ = predict(model, LiteralTarget(), MLUtils.getobs(predict_observations, 101:150)) +ẑ = predict(model, Point(), MLUtils.getobs(predict_observations, 101:150)) @assert ẑ == ŷ ``` diff --git a/src/predict_transform.jl b/src/predict_transform.jl index 6b62dfd5..a87cf07b 100644 --- a/src/predict_transform.jl +++ b/src/predict_transform.jl @@ -57,7 +57,7 @@ DOC_DATA_INTERFACE(method) = The first signature returns target predictions, or proxies for target predictions, for input features `data`, according to some `model` returned by [`fit`](@ref). Where -supported, these are literally target predictions if `kind_of_proxy = LiteralTarget()`, +supported, these are literally target predictions if `kind_of_proxy = Point()`, and probability density/mass functions if `kind_of_proxy = Distribution()`. List all options with [`LearnAPI.kinds_of_proxy(algorithm)`](@ref), where `algorithm = LearnAPI.algorithm(model)`. @@ -75,7 +75,7 @@ training features `X`, training target `y`, and test features `Xnew`: ```julia model = fit(algorithm, (X, y)) # or `fit(algorithm, X, y)` -predict(model, LiteralTarget(), Xnew) +predict(model, Point(), Xnew) ``` See also [`fit`](@ref), [`transform`](@ref), [`inverse_transform`](@ref). diff --git a/src/target_weights_features.jl b/src/target_weights_features.jl index e7fd0b63..69fab433 100644 --- a/src/target_weights_features.jl +++ b/src/target_weights_features.jl @@ -46,7 +46,7 @@ implemented, as in the following sample workflow: ```julia model = fit(algorithm, data) X = features(data) -ŷ = predict(algorithm, kind_of_proxy, X) # eg, `kind_of_proxy = LiteralTarget()` +ŷ = predict(algorithm, kind_of_proxy, X) # eg, `kind_of_proxy = Point()` ``` The return value has the same number of observations as `data` does. For supervised models diff --git a/src/traits.jl b/src/traits.jl index dfdd5c21..97adf49c 100644 --- a/src/traits.jl +++ b/src/traits.jl @@ -134,7 +134,7 @@ functions(::Any) = () Returns a tuple of all instances, `kind`, for which for which `predict(algorithm, kind, data...)` has a guaranteed implementation. Each such `kind` subtypes -[`LearnAPI.KindOfProxy`](@ref). Examples are `LiteralTarget()` (for predicting actual +[`LearnAPI.KindOfProxy`](@ref). Examples are `Point()` (for predicting actual target values) and `Distributions()` (for predicting probability mass/density functions). The call `predict(model, data)` always returns `predict(model, kind, data)`, where `kind` diff --git a/src/types.jl b/src/types.jl index 8d755fdb..e046384d 100644 --- a/src/types.jl +++ b/src/types.jl @@ -18,10 +18,42 @@ following must hold: See also [`LearnAPI.KindOfProxy`](@ref). +# Extended help + +| type | form of an observation | +|:-------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `LearnAPI.Point` | same as target observations; may have the interpretation of a 50% quantile, 50% expectile or mode | +| `LearnAPI.Sampleable` | object that can be sampled to obtain object of the same form as target observation | +| `LearnAPI.Distribution` | explicit probability density/mass function whose sample space is all possible target observations | +| `LearnAPI.LogDistribution` | explicit log-probability density/mass function whose sample space is possible target observations | +| `LearnAPI.Probability`¹ | numerical probability or probability vector | +| `LearnAPI.LogProbability`¹ | log-probability or log-probability vector | +| `LearnAPI.Parametric`¹ | a list of parameters (e.g., mean and variance) describing some distribution | +| `LearnAPI.LabelAmbiguous` | collections of labels (in case of multi-class target) but without a known correspondence to the original target labels (and of possibly different number) as in, e.g., clustering | +| `LearnAPI.LabelAmbiguousSampleable` | sampleable version of `LabelAmbiguous`; see `Sampleable` above | +| `LearnAPI.LabelAmbiguousDistribution` | pdf/pmf version of `LabelAmbiguous`; see `Distribution` above | +| `LearnAPI.LabelAmbiguousFuzzy` | same as `LabelAmbiguous` but with multiple values of indeterminant number | +| `LearnAPI.Quantile`² | same as target but with quantile interpretation | +| `LearnAPI.Expectile`² | same as target but with expectile interpretation | +| `LearnAPI.ConfidenceInterval`² | confidence interval | +| `LearnAPI.Fuzzy` | finite but possibly varying number of target observations | +| `LearnAPI.ProbabilisticFuzzy` | as for `Fuzzy` but labeled with probabilities (not necessarily summing to one) | +| `LearnAPI.SurvivalFunction` | survival function | +| `LearnAPI.SurvivalDistribution` | probability distribution for survival time | +| `LearnAPI.SurvivalHazardFunction` | hazard function for survival time | +| `LearnAPI.OutlierScore` | numerical score reflecting degree of outlierness (not necessarily normalized) | +| `LearnAPI.Continuous` | real-valued approximation/interpolation of a discrete-valued target, such as a count (e.g., number of phone calls) | + +¹Provided for completeness but discouraged to avoid [ambiguities in +representation](https://github.com/alan-turing-institute/MLJ.jl/blob/dev/paper/paper.md#a-unified-approach-to-probabilistic-predictions-and-their-evaluation). + +²The level will be controlled by a hyper-parameter; models providing only quantiles or +expectiles at 50% will provide `Point` instead. + """ abstract type IID <: KindOfProxy end -struct LiteralTarget <: IID end +struct Point <: IID end struct Sampleable <: IID end struct Distribution <: IID end struct LogDistribution <: IID end @@ -52,6 +84,12 @@ Abstract subtype of [`LearnAPI.KindOfProxy`](@ref). If `kind_of_proxy` is an in kind_of_proxy, data)` represents a *single* probability distribution for the sample space ``Y^n``, where ``Y`` is the space from which the target variable takes its values. +| type `T` | form of output of `predict(model, ::T, data)` | +|:-------------------------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `LearnAPI.JointSampleable` | object that can be sampled to obtain a *vector* whose elements have the form of target observations; the vector length matches the number of observations in `data`. | +| `LearnAPI.JointDistribution` | explicit probability density/mass function whose sample space is vectors of target observations; the vector length matches the number of observations in `data` | +| `LearnAPI.JointLogDistribution` | explicit log-probability density/mass function whose sample space is vectors of target observations; the vector length matches the number of observations in `data` | + """ abstract type Joint <: KindOfProxy end struct JointSampleable <: Joint end @@ -68,6 +106,12 @@ samples, and we regard the samples as drawn from the "target" variable. If in th `kind_of_proxy` is an instance of `LearnAPI.Single` then, `predict(algorithm)` returns a single object representing a probability distribution. +| type `T` | form of output of `predict(model, ::T)` | +|:--------------------------------:|:-----------------------------------------------------------------------| +| `LearnAPI.SingleSampleable` | object that can be sampled to obtain a single target observation | +| `LearnAPI.SingleDistribution` | explicit probability density/mass function for sampling the target | +| `LearnAPI.SingleLogDistribution` | explicit log-probability density/mass function for sampling the target | + """ abstract type Single <: KindOfProxy end struct SingleSampeable <: Single end diff --git a/test/integration/regression.jl b/test/integration/regression.jl index 5b91561e..c61aa72e 100644 --- a/test/integration/regression.jl +++ b/test/integration/regression.jl @@ -87,12 +87,12 @@ LearnAPI.features(::Ridge, observations::RidgeFitObs) = observations.A LearnAPI.obs(::RidgeFitted, X) = Tables.matrix(X)' # matrix input: -LearnAPI.predict(model::RidgeFitted, ::LiteralTarget, observations::AbstractMatrix) = +LearnAPI.predict(model::RidgeFitted, ::Point, observations::AbstractMatrix) = observations'*model.coefficients # tabular input: -LearnAPI.predict(model::RidgeFitted, ::LiteralTarget, Xnew) = - predict(model, LiteralTarget(), obs(model, Xnew)) +LearnAPI.predict(model::RidgeFitted, ::Point, Xnew) = + predict(model, Point(), obs(model, Xnew)) # accessor function: LearnAPI.feature_importances(model::RidgeFitted) = model.feature_importances @@ -103,7 +103,7 @@ LearnAPI.minimize(model::RidgeFitted) = @trait( Ridge, constructor = Ridge, - kinds_of_proxy = (LiteralTarget(),), + kinds_of_proxy = (Point(),), tags = ("regression",) functions = ( :(LearnAPI.fit), @@ -155,7 +155,7 @@ data = (X, y) ), ) - ŷ = predict(model, LiteralTarget(), Tables.subset(X, test)) + ŷ = predict(model, Point(), Tables.subset(X, test)) @test ŷ isa Vector{Float64} @test predict(model, Tables.subset(X, test)) == ŷ @@ -163,7 +163,7 @@ data = (X, y) predictobs = LearnAPI.obs(model, X) model = fit(algorithm, MLUtils.getobs(fitobs, train); verbosity=0) @test LearnAPI.target(algorithm, fitobs) == y - @test predict(model, LiteralTarget(), MLUtils.getobs(predictobs, test)) ≈ ŷ + @test predict(model, Point(), MLUtils.getobs(predictobs, test)) ≈ ŷ @test predict(model, LearnAPI.features(algorithm, fitobs)) ≈ predict(model, X) @test LearnAPI.feature_importances(model) isa Vector{<:Pair{Symbol}} @@ -177,7 +177,7 @@ data = (X, y) @test LearnAPI.algorithm(recovered_model) == algorithm @test predict( recovered_model, - LiteralTarget(), + Point(), MLUtils.getobs(predictobs, test) ) ≈ ŷ @@ -227,7 +227,7 @@ LearnAPI.target(::BabyRidge, data) = last(data) LearnAPI.algorithm(model::BabyRidgeFitted) = model.algorithm -LearnAPI.predict(model::BabyRidgeFitted, ::LiteralTarget, Xnew) = +LearnAPI.predict(model::BabyRidgeFitted, ::Point, Xnew) = Tables.matrix(Xnew)*model.coefficients LearnAPI.minimize(model::BabyRidgeFitted) = @@ -236,7 +236,7 @@ LearnAPI.minimize(model::BabyRidgeFitted) = @trait( BabyRidge, constructor = BabyRidge, - kinds_of_proxy = (LiteralTarget(),), + kinds_of_proxy = (Point(),), tags = ("regression",) functions = ( :(LearnAPI.fit), @@ -254,13 +254,13 @@ LearnAPI.minimize(model::BabyRidgeFitted) = algorithm = BabyRidge(lambda=0.5) model = fit(algorithm, Tables.subset(X, train), y[train]; verbosity=0) - ŷ = predict(model, LiteralTarget(), Tables.subset(X, test)) + ŷ = predict(model, Point(), Tables.subset(X, test)) @test ŷ isa Vector{Float64} fitobs = obs(algorithm, data) predictobs = LearnAPI.obs(model, X) model = fit(algorithm, MLUtils.getobs(fitobs, train); verbosity=0) - @test predict(model, LiteralTarget(), MLUtils.getobs(predictobs, test)) == ŷ == + @test predict(model, Point(), MLUtils.getobs(predictobs, test)) == ŷ == predict(model, MLUtils.getobs(predictobs, test)) @test LearnAPI.target(algorithm, data) == y @test LearnAPI.predict(model, X) ≈ From 8fd02c96496b817e03fbc52a76ae8e46c9c82e14 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 2 Oct 2024 13:39:59 +1300 Subject: [PATCH 25/27] fix typos --- src/fit_update.jl | 4 ++-- src/target_weights_features.jl | 6 +++--- src/traits.jl | 4 ++-- typos.toml | 6 ++++++ 4 files changed, 13 insertions(+), 7 deletions(-) create mode 100644 typos.toml diff --git a/src/fit_update.jl b/src/fit_update.jl index faefd610..44b427b2 100644 --- a/src/fit_update.jl +++ b/src/fit_update.jl @@ -96,7 +96,7 @@ Return an updated version of the `model` object returned by a previous [`fit`](@ specify hyperparameter replacements in the form `p1=value1, p2=value2, ...`. When following the call `fit(algorithm, data)`, the `update` call is semantically -equivalent to retraining ab initio using a concatentation of `data` and `new_data`, +equivalent to retraining ab initio using a concatenation of `data` and `new_data`, *provided there are no hyperparameter replacements.* Behaviour is otherwise algorithm-specific. @@ -131,7 +131,7 @@ Return an updated version of the `model` object returned by a previous [`fit`](@ specify hyperparameter replacements in the form `p1=value1, p2=value2, ...`. When following the call `fit(algorithm, data)`, the `update` call is semantically -equivalent to retraining ab initio using a concatentation of `data` and `new_data`, +equivalent to retraining ab initio using a concatenation of `data` and `new_data`, *provided there are no hyperparameter replacements.* Behaviour is otherwise algorithm-specific. diff --git a/src/target_weights_features.jl b/src/target_weights_features.jl index 69fab433..7df72646 100644 --- a/src/target_weights_features.jl +++ b/src/target_weights_features.jl @@ -5,7 +5,7 @@ Return, for each form of `data` supported in a call of the form [`fit(algorithm, data)`](@ref), the target variable part of `data`. If `nothing` is returned, the `algorithm` does not see a target variable in training (is unsupervised). -Refer to LearnAPI.jl documenation for the precise meaning of "target". +Refer to LearnAPI.jl documentation for the precise meaning of "target". # New implementations @@ -22,7 +22,7 @@ target(::Any, data) = nothing Return, for each form of `data` supported in a call of the form `[`fit(algorithm, data)`](@ref), the per-observation weights part of `data`. Where `nothing` is returned, no -weights are part of `data`, which is to be interpretted as uniform weighting. +weights are part of `data`, which is to be interpreted as uniform weighting. # New implementations @@ -51,7 +51,7 @@ ŷ = predict(algorithm, kind_of_proxy, X) # eg, `kind_of_proxy = Point()` The return value has the same number of observations as `data` does. For supervised models (i.e., where `:(LearnAPI.target) in LearnAPI.functions(algorithm)`) `ŷ` above is generally -inteneded to be an approximate proxy for `LearnAPI.target(algorithm, data)`, the training +intended to be an approximate proxy for `LearnAPI.target(algorithm, data)`, the training target. diff --git a/src/traits.jl b/src/traits.jl index 97adf49c..93938007 100644 --- a/src/traits.jl +++ b/src/traits.jl @@ -87,7 +87,7 @@ function constructor end """ LearnAPI.functions(algorithm) -Return a tuple of expressions respresenting functions that can be meaningfully applied +Return a tuple of expressions representing functions that can be meaningfully applied with `algorithm`, or an associated model (object returned by `fit(algorithm, ...)`, as the first argument. Algorithm traits (methods for which `algorithm` is the *only* argument) are excluded. @@ -328,7 +328,7 @@ to return `"K-nearest neighbors regressor"`. Ideally, this is a "concrete" noun `"ridge regressor"` rather than an "abstract" noun like `"ridge regression"`. """ -human_name(algorithm) = snakecase(name(alogorithm), delim=' ') # `name` defined below +human_name(algorithm) = snakecase(name(algorithm), delim=' ') # `name` defined below """ LearnAPI.data_interface(algorithm) diff --git a/typos.toml b/typos.toml new file mode 100644 index 00000000..8f5d6f5a --- /dev/null +++ b/typos.toml @@ -0,0 +1,6 @@ +[default.extend-words] +# Don't correct "mape" to "map" +mape = "mape" +yhat = "yhat" +LSO ="LSO" +datas = "datas" From 60f8b6c0066097a615f45e98aff0c09ee85f00ad Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 2 Oct 2024 13:44:10 +1300 Subject: [PATCH 26/27] fix syntax error in test --- test/integration/regression.jl | 4 ++-- test/integration/static_algorithms.jl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/integration/regression.jl b/test/integration/regression.jl index c61aa72e..ba68cef1 100644 --- a/test/integration/regression.jl +++ b/test/integration/regression.jl @@ -104,7 +104,7 @@ LearnAPI.minimize(model::RidgeFitted) = Ridge, constructor = Ridge, kinds_of_proxy = (Point(),), - tags = ("regression",) + tags = ("regression",), functions = ( :(LearnAPI.fit), :(LearnAPI.algorithm), @@ -237,7 +237,7 @@ LearnAPI.minimize(model::BabyRidgeFitted) = BabyRidge, constructor = BabyRidge, kinds_of_proxy = (Point(),), - tags = ("regression",) + tags = ("regression",), functions = ( :(LearnAPI.fit), :(LearnAPI.algorithm), diff --git a/test/integration/static_algorithms.jl b/test/integration/static_algorithms.jl index a143416b..3812fbc6 100644 --- a/test/integration/static_algorithms.jl +++ b/test/integration/static_algorithms.jl @@ -39,7 +39,7 @@ end @trait( Selector, constructor = Selector, - tags = ("feature engineering",) + tags = ("feature engineering",), functions = ( :(LearnAPI.fit), :(LearnAPI.algorithm), @@ -105,7 +105,7 @@ end Selector2, constructor = Selector2, predict_or_transform_mutates = true, - tags = ("feature engineering",) + tags = ("feature engineering",), functions = ( :(LearnAPI.fit), :(LearnAPI.algorithm), From e25e4e738942e63bd8f823c9fc46d33ed8dbca1d Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Wed, 2 Oct 2024 13:48:20 +1300 Subject: [PATCH 27/27] add julia 1.10 testing to matrix Acked-by: Anthony D. Blaom --- .github/workflows/ci.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 37ef5474..ca263a9a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,8 @@ jobs: fail-fast: false matrix: version: - - '1.6' + - '1.6' # previous LTS release + - '1.10' # new LTS release - '1' # automatically expands to the latest stable 1.x release of Julia. os: - ubuntu-latest @@ -65,4 +66,4 @@ jobs: using Documenter: DocMeta, doctest using LearnAPI DocMeta.setdocmeta!(LearnAPI, :DocTestSetup, :(using LearnAPI); recursive=true) - doctest(LearnAPI)' \ No newline at end of file + doctest(LearnAPI)'