diff --git a/src/MLJModels.jl b/src/MLJModels.jl
index 12ac3789..639df9f3 100755
--- a/src/MLJModels.jl
+++ b/src/MLJModels.jl
@@ -37,7 +37,7 @@ export info
 
 # from model/Constant
 export ConstantRegressor, ConstantClassifier,
-        DeterministicConstantRegressor, DeterministicConstantClassifier
+    DeterministicConstantRegressor, DeterministicConstantClassifier
 
 # from model/ThresholdPredictors
 export BinaryThresholdPredictor
diff --git a/src/registry/Metadata.toml b/src/registry/Metadata.toml
index 531604cb..88fc96fd 100644
--- a/src/registry/Metadata.toml
+++ b/src/registry/Metadata.toml
@@ -3433,6 +3433,142 @@
 ":deep_properties" = "`()`"
 ":reporting_operations" = "`()`"
 
+[SIRUS.StableRulesClassifier]
+":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}`"
+":output_scitype" = "`ScientificTypesBase.Unknown`"
+":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`"
+":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractVector{<:ScientificTypesBase.Finite}}`"
+":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`"
+":transform_scitype" = "`ScientificTypesBase.Unknown`"
+":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`"
+":is_pure_julia" = "`true`"
+":package_name" = "SIRUS"
+":package_license" = "MIT"
+":load_path" = "SIRUS.StableForestClassifier"
+":package_uuid" = "9113e207-2504-4b06-8eee-d78e288bee65"
+":package_url" = "https://github.com/rikhuijzer/SIRUS.jl"
+":is_wrapper" = "`false`"
+":supports_weights" = "`false`"
+":supports_class_weights" = "`false`"
+":supports_online" = "`false`"
+":docstring" = "```\nStableRulesClassifier\n```\n\nA model type for constructing a stable rules classifier, based on [SIRUS.jl](https://github.com/rikhuijzer/SIRUS.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nStableRulesClassifier = @load StableRulesClassifier pkg=SIRUS\n```\n\nDo `model = StableRulesClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `StableRulesClassifier(rng=...)`.\n\n`StableRulesClassifier` implements the explainable rule-based model based on a random forest.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n  * `rng::AbstractRNG=default_rng()`: Random number generator.   Using a `StableRNG` from `StableRNGs.jl` is advised.\n  * `partial_sampling::Float64=0.7`:   Ratio of samples to use in each subset of the data.   The default should be fine for most cases.\n  * `n_trees::Int=1000`:   The number of trees to use.   It is advisable to use at least thousand trees to for a better rule selection, and   in turn better predictive performance.\n  * `max_depth::Int=2`:   The depth of the tree.   A lower depth decreases model complexity and can therefore improve accuracy when the sample size is small (reduce overfitting).\n  * `q::Int=10`: Number of cutpoints to use per feature.   The default value should be fine for most situations.\n  * `min_data_in_leaf::Int=5`: Minimum number of data points per leaf.\n  * `max_rules::Int=10`:   This is the most important hyperparameter.   In general, the more rules, the more accurate the model.   However, more rules will also decrease model interpretability.   So, it is important to find a good balance here.   In most cases, 10 to 40 rules should provide reasonable accuracy while remaining interpretable.\n  * `lambda::Float64=1.0`:   The weights of the final rules are determined via a regularized regression over each rule as a binary feature.   This hyperparameter specifies the strength of the ridge (L2) regularizer.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `fitresult`: A `StableRules` object.\n\n# Operations\n\n  * `predict(mach, Xnew)`: Return a vector of predictions for each row of `Xnew`.\n"
+":name" = "StableRulesClassifier"
+":human_name" = "stable rules classifier"
+":is_supervised" = "`true`"
+":prediction_type" = ":probabilistic"
+":abstract_type" = "`MLJModelInterface.Probabilistic`"
+":implemented_methods" = [":clean!", ":fit", ":predict"]
+":hyperparameters" = "`(:rng, :partial_sampling, :n_trees, :max_depth, :q, :min_data_in_leaf, :max_rules, :lambda)`"
+":hyperparameter_types" = "`(\"Random.AbstractRNG\", \"Real\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Float64\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
+":iteration_parameter" = "`nothing`"
+":supports_training_losses" = "`false`"
+":reports_feature_importances" = "`false`"
+":deep_properties" = "`()`"
+":reporting_operations" = "`()`"
+
+[SIRUS.StableRulesRegressor]
+":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}`"
+":output_scitype" = "`ScientificTypesBase.Unknown`"
+":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`"
+":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractVector{<:ScientificTypesBase.Continuous}}`"
+":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`"
+":transform_scitype" = "`ScientificTypesBase.Unknown`"
+":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`"
+":is_pure_julia" = "`true`"
+":package_name" = "SIRUS"
+":package_license" = "MIT"
+":load_path" = "SIRUS.StableForestRegressor"
+":package_uuid" = "9113e207-2504-4b06-8eee-d78e288bee65"
+":package_url" = "https://github.com/rikhuijzer/SIRUS.jl"
+":is_wrapper" = "`false`"
+":supports_weights" = "`false`"
+":supports_class_weights" = "`false`"
+":supports_online" = "`false`"
+":docstring" = "```\nStableRulesRegressor\n```\n\nA model type for constructing a stable rules regressor, based on [SIRUS.jl](https://github.com/rikhuijzer/SIRUS.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nStableRulesRegressor = @load StableRulesRegressor pkg=SIRUS\n```\n\nDo `model = StableRulesRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `StableRulesRegressor(rng=...)`.\n\n`StableRulesRegressor` implements the explainable rule-based regression model based on a random forest.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n  * `rng::AbstractRNG=default_rng()`: Random number generator.   Using a `StableRNG` from `StableRNGs.jl` is advised.\n  * `partial_sampling::Float64=0.7`:   Ratio of samples to use in each subset of the data.   The default should be fine for most cases.\n  * `n_trees::Int=1000`:   The number of trees to use.   It is advisable to use at least thousand trees to for a better rule selection, and   in turn better predictive performance.\n  * `max_depth::Int=2`:   The depth of the tree.   A lower depth decreases model complexity and can therefore improve accuracy when the sample size is small (reduce overfitting).\n  * `q::Int=10`: Number of cutpoints to use per feature.   The default value should be fine for most situations.\n  * `min_data_in_leaf::Int=5`: Minimum number of data points per leaf.\n  * `max_rules::Int=10`:   This is the most important hyperparameter.   In general, the more rules, the more accurate the model.   However, more rules will also decrease model interpretability.   So, it is important to find a good balance here.   In most cases, 10 to 40 rules should provide reasonable accuracy while remaining interpretable.\n  * `lambda::Float64=1.0`:   The weights of the final rules are determined via a regularized regression over each rule as a binary feature.   This hyperparameter specifies the strength of the ridge (L2) regularizer.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `fitresult`: A `StableRules` object.\n\n# Operations\n\n  * `predict(mach, Xnew)`: Return a vector of predictions for each row of `Xnew`.\n"
+":name" = "StableRulesRegressor"
+":human_name" = "stable rules regressor"
+":is_supervised" = "`true`"
+":prediction_type" = ":deterministic"
+":abstract_type" = "`MLJModelInterface.Deterministic`"
+":implemented_methods" = [":clean!", ":fit", ":predict"]
+":hyperparameters" = "`(:rng, :partial_sampling, :n_trees, :max_depth, :q, :min_data_in_leaf, :max_rules, :lambda)`"
+":hyperparameter_types" = "`(\"Random.AbstractRNG\", \"Real\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Int64\", \"Float64\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
+":iteration_parameter" = "`nothing`"
+":supports_training_losses" = "`false`"
+":reports_feature_importances" = "`false`"
+":deep_properties" = "`()`"
+":reporting_operations" = "`()`"
+
+[SIRUS.StableForestClassifier]
+":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}`"
+":output_scitype" = "`ScientificTypesBase.Unknown`"
+":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Finite}`"
+":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractVector{<:ScientificTypesBase.Finite}}`"
+":predict_scitype" = "`AbstractVector{ScientificTypesBase.Density{_s25} where _s25<:ScientificTypesBase.Finite}`"
+":transform_scitype" = "`ScientificTypesBase.Unknown`"
+":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`"
+":is_pure_julia" = "`true`"
+":package_name" = "SIRUS"
+":package_license" = "MIT"
+":load_path" = "SIRUS.StableForestClassifier"
+":package_uuid" = "9113e207-2504-4b06-8eee-d78e288bee65"
+":package_url" = "https://github.com/rikhuijzer/SIRUS.jl"
+":is_wrapper" = "`false`"
+":supports_weights" = "`false`"
+":supports_class_weights" = "`false`"
+":supports_online" = "`false`"
+":docstring" = "```\nStableForestClassifier\n```\n\nA model type for constructing a stable forest classifier, based on [SIRUS.jl](https://github.com/rikhuijzer/SIRUS.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nStableForestClassifier = @load StableForestClassifier pkg=SIRUS\n```\n\nDo `model = StableForestClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `StableForestClassifier(rng=...)`.\n\n`StableForestClassifier` implements the random forest classifier with a stabilized forest structure (Bénard et al., [2021](http://proceedings.mlr.press/v130/benard21a.html)). This stabilization increases stability when extracting rules. The impact on the predictive accuracy compared to standard random forests should be relatively small.\n\n!!! note\n    Just like normal random forests, this model is not easily explainable. If you are interested in an explainable model, use the `StableRulesClassifier` or `StableRulesRegressor`.\n\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n  * `rng::AbstractRNG=default_rng()`: Random number generator.   Using a `StableRNG` from `StableRNGs.jl` is advised.\n  * `partial_sampling::Float64=0.7`:   Ratio of samples to use in each subset of the data.   The default should be fine for most cases.\n  * `n_trees::Int=1000`:   The number of trees to use.   It is advisable to use at least thousand trees to for a better rule selection, and   in turn better predictive performance.\n  * `max_depth::Int=2`:   The depth of the tree.   A lower depth decreases model complexity and can therefore improve accuracy when the sample size is small (reduce overfitting).\n  * `q::Int=10`: Number of cutpoints to use per feature.   The default value should be fine for most situations.\n  * `min_data_in_leaf::Int=5`: Minimum number of data points per leaf.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `fitresult`: A `StableForest` object.\n\n# Operations\n\n  * `predict(mach, Xnew)`: Return a vector of predictions for each row of `Xnew`.\n"
+":name" = "StableForestClassifier"
+":human_name" = "stable forest classifier"
+":is_supervised" = "`true`"
+":prediction_type" = ":probabilistic"
+":abstract_type" = "`MLJModelInterface.Probabilistic`"
+":implemented_methods" = [":clean!", ":fit", ":predict"]
+":hyperparameters" = "`(:rng, :partial_sampling, :n_trees, :max_depth, :q, :min_data_in_leaf)`"
+":hyperparameter_types" = "`(\"Random.AbstractRNG\", \"Real\", \"Int64\", \"Int64\", \"Int64\", \"Int64\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`"
+":iteration_parameter" = "`nothing`"
+":supports_training_losses" = "`false`"
+":reports_feature_importances" = "`false`"
+":deep_properties" = "`()`"
+":reporting_operations" = "`()`"
+
+[SIRUS.StableForestRegressor]
+":input_scitype" = "`ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}`"
+":output_scitype" = "`ScientificTypesBase.Unknown`"
+":target_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`"
+":fit_data_scitype" = "`Tuple{ScientificTypesBase.Table{<:Union{AbstractVector{<:ScientificTypesBase.Continuous}, AbstractVector{<:ScientificTypesBase.Count}}}, AbstractVector{<:ScientificTypesBase.Continuous}}`"
+":predict_scitype" = "`AbstractVector{<:ScientificTypesBase.Continuous}`"
+":transform_scitype" = "`ScientificTypesBase.Unknown`"
+":inverse_transform_scitype" = "`ScientificTypesBase.Unknown`"
+":is_pure_julia" = "`true`"
+":package_name" = "SIRUS"
+":package_license" = "MIT"
+":load_path" = "SIRUS.StableForestRegressor"
+":package_uuid" = "9113e207-2504-4b06-8eee-d78e288bee65"
+":package_url" = "https://github.com/rikhuijzer/SIRUS.jl"
+":is_wrapper" = "`false`"
+":supports_weights" = "`false`"
+":supports_class_weights" = "`false`"
+":supports_online" = "`false`"
+":docstring" = "```\nStableForestRegressor\n```\n\nA model type for constructing a stable forest regressor, based on [SIRUS.jl](https://github.com/rikhuijzer/SIRUS.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nStableForestRegressor = @load StableForestRegressor pkg=SIRUS\n```\n\nDo `model = StableForestRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `StableForestRegressor(rng=...)`.\n\n`StableForestRegressor` implements the random forest regressor with a stabilized forest structure (Bénard et al., [2021](http://proceedings.mlr.press/v130/benard21a.html)).\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: the target, which can be any `AbstractVector` whose element scitype is `<:OrderedFactor` or `<:Multiclass`; check the scitype with `scitype(y)`\n\nTrain the machine with `fit!(mach, rows=...)`.\n\n# Hyperparameters\n\n  * `rng::AbstractRNG=default_rng()`: Random number generator.   Using a `StableRNG` from `StableRNGs.jl` is advised.\n  * `partial_sampling::Float64=0.7`:   Ratio of samples to use in each subset of the data.   The default should be fine for most cases.\n  * `n_trees::Int=1000`:   The number of trees to use.   It is advisable to use at least thousand trees to for a better rule selection, and   in turn better predictive performance.\n  * `max_depth::Int=2`:   The depth of the tree.   A lower depth decreases model complexity and can therefore improve accuracy when the sample size is small (reduce overfitting).\n  * `q::Int=10`: Number of cutpoints to use per feature.   The default value should be fine for most situations.\n  * `min_data_in_leaf::Int=5`: Minimum number of data points per leaf.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `fitresult`: A `StableForest` object.\n\n# Operations\n\n  * `predict(mach, Xnew)`: Return a vector of predictions for each row of `Xnew`.\n"
+":name" = "StableForestRegressor"
+":human_name" = "stable forest regressor"
+":is_supervised" = "`true`"
+":prediction_type" = ":deterministic"
+":abstract_type" = "`MLJModelInterface.Deterministic`"
+":implemented_methods" = [":clean!", ":fit", ":predict"]
+":hyperparameters" = "`(:rng, :partial_sampling, :n_trees, :max_depth, :q, :min_data_in_leaf)`"
+":hyperparameter_types" = "`(\"Random.AbstractRNG\", \"Real\", \"Int64\", \"Int64\", \"Int64\", \"Int64\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing)`"
+":iteration_parameter" = "`nothing`"
+":supports_training_losses" = "`false`"
+":reports_feature_importances" = "`false`"
+":deep_properties" = "`()`"
+":reporting_operations" = "`()`"
+
 [PartialLeastSquaresRegressor.KPLSRegressor]
 ":input_scitype" = "`ScientificTypesBase.Table{<:AbstractVector{<:ScientificTypesBase.Continuous}}`"
 ":output_scitype" = "`ScientificTypesBase.Unknown`"
@@ -4947,16 +5083,16 @@
 ":supports_weights" = "`true`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "```\nMultitargetSRRegressor\n```\n\nA model type for constructing a Multi-Target Symbolic Regression via Evolutionary Search, based on [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetSRRegressor = @load MultitargetSRRegressor pkg=SymbolicRegression\n```\n\nDo `model = MultitargetSRRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetSRRegressor(binary_operators=...)`.\n\nMulti-target Symbolic Regression regressor (`MultitargetSRRegressor`) conducts several searches for expressions that predict each target variable from a set of input variables. All data is assumed to be `Continuous`. The search is performed using an evolutionary algorithm. This algorithm is described in the paper https://arxiv.org/abs/2305.01582.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n  * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype\n\n`Continuous`; check column scitypes with `schema(X)`. Variable names in discovered expressions will be taken from the column names of `X`, if available.\n\n  * `y` is the target, which can be any table of target variables whose element scitype is `Continuous`; check the scitype with `schema(y)`.\n  * `w` is the observation weights which can either be `nothing` (default) or an  `AbstractVector` whoose element scitype is `Count` or `Continuous`. The same weights are used for all targets.\n\nTrain the machine using `fit!(mach)`, inspect the discovered expressions with `report(mach)`, and predict on new data with `predict(mach, Xnew)`. Note that unlike other regressors, symbolic regression stores a list of lists of trained models. The models chosen from each of these lists is defined by the function `selection_method` keyword argument, which by default balances accuracy and complexity.\n\n# Hyper-parameters\n\n  * `binary_operators`: Vector of binary operators (functions) to use.   Each operator should be defined for two input scalars,   and one output scalar. All operators   need to be defined over the entire real line (excluding infinity - these   are stopped before they are input), or return `NaN` where not defined.   For speed, define it so it takes two reals   of the same type as input, and outputs the same type. For the SymbolicUtils   simplification backend, you will need to define a generic method of the   operator so it takes arbitrary types.\n  * `unary_operators`: Same, but for   unary operators (one input scalar, gives an output scalar).\n  * `constraints`: Array of pairs specifying size constraints   for each operator. The constraints for a binary operator should be a 2-tuple   (e.g., `(-1, -1)`) and the constraints for a unary operator should be an `Int`.   A size constraint is a limit to the size of the subtree   in each argument of an operator. e.g., `[(^)=>(-1, 3)]` means that the   `^` operator can have arbitrary size (`-1`) in its left argument,   but a maximum size of `3` in its right argument. Default is   no constraints.\n  * `batching`: Whether to evolve based on small mini-batches of data,   rather than the entire dataset.\n  * `batch_size`: What batch size to use if using batching.\n  * `elementwise_loss`: What elementwise loss function to use. Can be one of   the following losses, or any other loss of type   `SupervisedLoss`. You can also pass a function that takes   a scalar target (left argument), and scalar predicted (right   argument), and returns a scalar. This will be averaged   over the predicted data. If weights are supplied, your   function should take a third argument for the weight scalar.   Included losses:       Regression:           - `LPDistLoss{P}()`,           - `L1DistLoss()`,           - `L2DistLoss()` (mean square),           - `LogitDistLoss()`,           - `HuberLoss(d)`,           - `L1EpsilonInsLoss(ϵ)`,           - `L2EpsilonInsLoss(ϵ)`,           - `PeriodicLoss(c)`,           - `QuantileLoss(τ)`,       Classification:           - `ZeroOneLoss()`,           - `PerceptronLoss()`,           - `L1HingeLoss()`,           - `SmoothedL1HingeLoss(γ)`,           - `ModifiedHuberLoss()`,           - `L2MarginLoss()`,           - `ExpLoss()`,           - `SigmoidLoss()`,           - `DWDMarginLoss(q)`.\n  * `loss_function`: Alternatively, you may redefine the loss used   as any function of `tree::Node{T}`, `dataset::Dataset{T}`,   and `options::Options`, so long as you output a non-negative   scalar of type `T`. This is useful if you want to use a loss   that takes into account derivatives, or correlations across   the dataset. This also means you could use a custom evaluation   for a particular expression. Take a look at `_eval_loss` in   the file `src/LossFunctions.jl` for an example.\n  * `npopulations`: How many populations of equations to use. By default   this is set equal to the number of cores\n  * `npop`: How many equations in each population.\n  * `ncycles_per_iteration`: How many generations to consider per iteration.\n  * `tournament_selection_n`: Number of expressions considered in each tournament.\n  * `tournament_selection_p`: The fittest expression in a tournament is to be   selected with probability `p`, the next fittest with probability `p*(1-p)`,   and so forth.\n  * `topn`: Number of equations to return to the host process, and to   consider for the hall of fame.\n  * `complexity_of_operators`: What complexity should be assigned to each operator,   and the occurrence of a constant or variable. By default, this is 1   for all operators. Can be a real number as well, in which case   the complexity of an expression will be rounded to the nearest integer.   Input this in the form of, e.g., [(^) => 3, sin => 2].\n  * `complexity_of_constants`: What complexity should be assigned to use of a constant.   By default, this is 1.\n  * `complexity_of_variables`: What complexity should be assigned to each variable.   By default, this is 1.\n  * `alpha`: The probability of accepting an equation mutation   during regularized evolution is given by exp(-delta_loss/(alpha * T)),   where T goes from 1 to 0. Thus, alpha=infinite is the same as no annealing.\n  * `maxsize`: Maximum size of equations during the search.\n  * `maxdepth`: Maximum depth of equations during the search, by default   this is set equal to the maxsize.\n  * `parsimony`: A multiplicative factor for how much complexity is   punished.\n  * `use_frequency`: Whether to use a parsimony that adapts to the   relative proportion of equations at each complexity; this will   ensure that there are a balanced number of equations considered   for every complexity.\n  * `use_frequency_in_tournament`: Whether to use the adaptive parsimony described   above inside the score, rather than just at the mutation accept/reject stage.\n  * `adaptive_parsimony_scaling`: How much to scale the adaptive parsimony term   in the loss. Increase this if the search is spending too much time   optimizing the most complex equations.\n  * `fast_cycle`: Whether to thread over subsamples of equations during   regularized evolution. Slightly improves performance, but is a different   algorithm.\n  * `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions.   This can be significantly faster, but is only compatible with certain   operators. *Experimental!*\n  * `migration`: Whether to migrate equations between processes.\n  * `hof_migration`: Whether to migrate equations from the hall of fame   to processes.\n  * `fraction_replaced`: What fraction of each population to replace with   migrated equations at the end of each cycle.\n  * `fraction_replaced_hof`: What fraction to replace with hall of fame   equations at the end of each cycle.\n  * `should_simplify`: Whether to simplify equations. If you   pass a custom objective, this will be set to `false`.\n  * `should_optimize_constants`: Whether to use an optimization algorithm   to periodically optimize constants in equations.\n  * `optimizer_nrestarts`: How many different random starting positions to consider   for optimization of constants.\n  * `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default   is \"BFGS\", but \"NelderMead\" is also supported.\n  * `optimizer_options`: General options for the constant optimization. For details   we refer to the documentation on `Optim.Options` from the `Optim.jl` package.   Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a   `Dict`, e.g. Dict(:x_tol => 1.0e-32,), or as an `Optim.Options` instance.\n  * `output_file`: What file to store equations to, as a backup.\n  * `perturbation_factor`: When mutating a constant, either   multiply or divide by (1+perturbation_factor)^(rand()+1).\n  * `probability_negate_constant`: Probability of negating a constant in the equation   when mutating it.\n  * `mutation_weights`: Relative probabilities of the mutations. The struct   `MutationWeights` should be passed to these options.   See its documentation on `MutationWeights` for the different weights.\n  * `crossover_probability`: Probability of performing crossover.\n  * `annealing`: Whether to use simulated annealing.\n  * `warmup_maxsize_by`: Whether to slowly increase the max size from 5 up to   `maxsize`. If nonzero, specifies the fraction through the search   at which the maxsize should be reached.\n  * `verbosity`: Whether to print debugging statements or   not.\n  * `save_to_file`: Whether to save equations to a file during the search.\n  * `bin_constraints`: See `constraints`. This is the same, but specified for binary   operators only (for example, if you have an operator that is both a binary   and unary operator).\n  * `una_constraints`: Likewise, for unary operators.\n  * `seed`: What random seed to use. `nothing` uses no seed.\n  * `progress`: Whether to use a progress bar output (`verbosity` will   have no effect).\n  * `early_stop_condition`: Float - whether to stop early if the mean loss gets below this value.   Function - a function taking (loss, complexity) as arguments and returning true or false.\n  * `timeout_in_seconds`: Float64 - the time in seconds after which to exit (as an alternative to the number of iterations).\n  * `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform.\n  * `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated   expression with the original expression and proceed normally.\n  * `enable_autodiff`: Whether to enable automatic differentiation functionality. This is turned off by default.   If turned on, this will be turned off if one of the operators does not have well-defined gradients.\n  * `nested_constraints`: Specifies how many times a combination of operators can be nested. For example,   `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`,   but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos`   can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination   of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified,   it is assumed that it can be nested an unlimited number of times. This requires that there is no operator   which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation).   For binary operators, both arguments are treated the same way, and the max of each argument is constrained.\n  * `deterministic`: Use a global counter for the birth time, rather than calls to `time()`. This gives   perfect resolution, and is therefore deterministic. However, it is not thread safe, and must be used   in serial mode.\n  * `define_helper_functions`: Whether to define helper functions   for constructing and evaluating trees.\n  * `niterations::Int=10`: The number of iterations to perform the search.   More iterations will improve the results.\n  * `parallelism=:multithreading`: What parallelism mode to use.   The options are `:multithreading`, `:multiprocessing`, and `:serial`.   By default, multithreading will be used. Multithreading uses less memory,   but multiprocessing can handle multi-node compute. If using `:multithreading`   mode, the number of threads available to julia are used. If using   `:multiprocessing`, `numprocs` processes will be created dynamically if   `procs` is unset. If you have already allocated processes, pass them   to the `procs` argument and they will be used.   You may also pass a string instead of a symbol, like `\"multithreading\"`.\n  * `numprocs::Union{Int, Nothing}=nothing`:  The number of processes to use,   if you want `equation_search` to set this up automatically. By default   this will be `4`, but can be any number (you should pick a number <=   the number of cores available).\n  * `procs::Union{Vector{Int}, Nothing}=nothing`: If you have set up   a distributed run manually with `procs = addprocs()` and `@everywhere`,   pass the `procs` to this keyword argument.\n  * `addprocs_function::Union{Function, Nothing}=nothing`: If using multiprocessing   (`parallelism=:multithreading`), and are not passing `procs` manually,   then they will be allocated dynamically using `addprocs`. However,   you may also pass a custom function to use instead of `addprocs`.   This function should take a single positional argument,   which is the number of processes to use, as well as the `lazy` keyword argument.   For example, if set up on a slurm cluster, you could pass   `addprocs_function = addprocs_slurm`, which will set up slurm processes.\n  * `runtests::Bool=true`: Whether to run (quick) tests before starting the   search, to see if there will be any problems during the equation search   related to the host environment.\n  * `loss_type::Type=Nothing`: If you would like to use a different type   for the loss than for the data you passed, specify the type here.   Note that if you pass complex data `::Complex{L}`, then the loss   type will automatically be set to `L`.\n  * `selection_method::Function`: Function to selection expression from   the Pareto frontier for use in `predict`. See `SymbolicRegression.MLJInterfaceModule.choose_best`   for an example. This function should return a single integer specifying   the index of the expression to use. By default, `choose_best` maximizes   the score (a pound-for-pound rating) of expressions reaching the threshold   of 1.5x the minimum loss. To fix the index at `5`, you could just write `Returns(5)`.\n\n# Operations\n\n  * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. The expression used for prediction is defined by the `selection_method` function, which can be seen by viewing `report(mach).best_idx`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `best_idx::Vector{Int}`: The index of the best expression in each Pareto frontier, as determined by the `selection_method` function.\n  * `equations::Vector{Vector{Node{T}}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). The outer vector is indexed by target variable, and the inner vector is ordered by increasing complexity. `T` is equal to the element type of the passed data.\n  * `equation_strings::Vector{Vector{String}}`: The expressions discovered by the search, represented as strings for easy inspection.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n  * `best_idx::Vector{Int}`: The index of the best expression in each Pareto frontier,  as determined by the `selection_method` function.\n  * `equations::Vector{Vector{Node{T}}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). The outer vector is indexed by target variable, and the inner vector is ordered by increasing complexity.\n  * `equation_strings::Vector{Vector{String}}`: The expressions discovered by the search, represented as strings for easy inspection.\n  * `complexities::Vector{Vector{Int}}`: The complexity of each expression in each Pareto frontier.\n  * `losses::Vector{Vector{L}}`: The loss of each expression in each Pareto frontier, according to the loss function specified in the model. The type `L` is the loss type, which is usually the same as the element type of data passed (i.e., `T`), but can differ if complex data types are passed.\n  * `scores::Vector{Vector{L}}`: A metric which considers both the complexity and loss of an expression, equal to the change in the log-loss divided by the change in complexity, relative to the previous expression along the Pareto frontier. A larger score aims to indicate an expression is more likely to be the true expression generating the data, but this is very problem-dependent and generally several other factors should be considered.\n\n# Examples\n\n```julia\nusing MLJ\nMultitargetSRRegressor = @load MultitargetSRRegressor pkg=SymbolicRegression\nX = (a=rand(100), b=rand(100), c=rand(100))\nY = (y1=(@. cos(X.c) * 2.1 - 0.9), y2=(@. X.a * X.b + X.c))\nmodel = MultitargetSRRegressor(binary_operators=[+, -, *], unary_operators=[exp], niterations=100)\nmach = machine(model, X, Y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equations used:\nr = report(mach)\nfor (output_index, (eq, i)) in enumerate(zip(r.equation_strings, r.best_idx))\n    println(\"Equation used for \", output_index, \": \", eq[i])\nend\n```\n\nSee also [`SRRegressor`](@ref).\n"
+":docstring" = "```\nMultitargetSRRegressor\n```\n\nA model type for constructing a Multi-Target Symbolic Regression via Evolutionary Search, based on [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nMultitargetSRRegressor = @load MultitargetSRRegressor pkg=SymbolicRegression\n```\n\nDo `model = MultitargetSRRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `MultitargetSRRegressor(binary_operators=...)`.\n\nMulti-target Symbolic Regression regressor (`MultitargetSRRegressor`) conducts several searches for expressions that predict each target variable from a set of input variables. All data is assumed to be `Continuous`. The search is performed using an evolutionary algorithm. This algorithm is described in the paper https://arxiv.org/abs/2305.01582.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n  * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype\n\n`Continuous`; check column scitypes with `schema(X)`. Variable names in discovered expressions will be taken from the column names of `X`, if available. Units in columns of `X` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n\n  * `y` is the target, which can be any table of target variables whose element scitype is `Continuous`; check the scitype with `schema(y)`. Units in columns of `y` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n  * `w` is the observation weights which can either be `nothing` (default) or an  `AbstractVector` whoose element scitype is `Count` or `Continuous`. The same weights are used for all targets.\n\nTrain the machine using `fit!(mach)`, inspect the discovered expressions with `report(mach)`, and predict on new data with `predict(mach, Xnew)`. Note that unlike other regressors, symbolic regression stores a list of lists of trained models. The models chosen from each of these lists is defined by the function `selection_method` keyword argument, which by default balances accuracy and complexity.\n\n# Hyper-parameters\n\n  * `binary_operators`: Vector of binary operators (functions) to use.   Each operator should be defined for two input scalars,   and one output scalar. All operators   need to be defined over the entire real line (excluding infinity - these   are stopped before they are input), or return `NaN` where not defined.   For speed, define it so it takes two reals   of the same type as input, and outputs the same type. For the SymbolicUtils   simplification backend, you will need to define a generic method of the   operator so it takes arbitrary types.\n  * `unary_operators`: Same, but for   unary operators (one input scalar, gives an output scalar).\n  * `constraints`: Array of pairs specifying size constraints   for each operator. The constraints for a binary operator should be a 2-tuple   (e.g., `(-1, -1)`) and the constraints for a unary operator should be an `Int`.   A size constraint is a limit to the size of the subtree   in each argument of an operator. e.g., `[(^)=>(-1, 3)]` means that the   `^` operator can have arbitrary size (`-1`) in its left argument,   but a maximum size of `3` in its right argument. Default is   no constraints.\n  * `batching`: Whether to evolve based on small mini-batches of data,   rather than the entire dataset.\n  * `batch_size`: What batch size to use if using batching.\n  * `elementwise_loss`: What elementwise loss function to use. Can be one of   the following losses, or any other loss of type   `SupervisedLoss`. You can also pass a function that takes   a scalar target (left argument), and scalar predicted (right   argument), and returns a scalar. This will be averaged   over the predicted data. If weights are supplied, your   function should take a third argument for the weight scalar.   Included losses:       Regression:           - `LPDistLoss{P}()`,           - `L1DistLoss()`,           - `L2DistLoss()` (mean square),           - `LogitDistLoss()`,           - `HuberLoss(d)`,           - `L1EpsilonInsLoss(ϵ)`,           - `L2EpsilonInsLoss(ϵ)`,           - `PeriodicLoss(c)`,           - `QuantileLoss(τ)`,       Classification:           - `ZeroOneLoss()`,           - `PerceptronLoss()`,           - `L1HingeLoss()`,           - `SmoothedL1HingeLoss(γ)`,           - `ModifiedHuberLoss()`,           - `L2MarginLoss()`,           - `ExpLoss()`,           - `SigmoidLoss()`,           - `DWDMarginLoss(q)`.\n  * `loss_function`: Alternatively, you may redefine the loss used   as any function of `tree::Node{T}`, `dataset::Dataset{T}`,   and `options::Options`, so long as you output a non-negative   scalar of type `T`. This is useful if you want to use a loss   that takes into account derivatives, or correlations across   the dataset. This also means you could use a custom evaluation   for a particular expression. If you are using   `batching=true`, then your function should   accept a fourth argument `idx`, which is either `nothing`   (indicating that the full dataset should be used), or a vector   of indices to use for the batch.   For example,\n\n    ```\n      function my_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}\n          prediction, flag = eval_tree_array(tree, dataset.X, options)\n          if !flag\n              return L(Inf)\n          end\n          return sum((prediction .- dataset.y) .^ 2) / dataset.n\n      end\n    ```\n  * `npopulations`: How many populations of equations to use. By default   this is set equal to the number of cores\n  * `npop`: How many equations in each population.\n  * `ncycles_per_iteration`: How many generations to consider per iteration.\n  * `tournament_selection_n`: Number of expressions considered in each tournament.\n  * `tournament_selection_p`: The fittest expression in a tournament is to be   selected with probability `p`, the next fittest with probability `p*(1-p)`,   and so forth.\n  * `topn`: Number of equations to return to the host process, and to   consider for the hall of fame.\n  * `complexity_of_operators`: What complexity should be assigned to each operator,   and the occurrence of a constant or variable. By default, this is 1   for all operators. Can be a real number as well, in which case   the complexity of an expression will be rounded to the nearest integer.   Input this in the form of, e.g., [(^) => 3, sin => 2].\n  * `complexity_of_constants`: What complexity should be assigned to use of a constant.   By default, this is 1.\n  * `complexity_of_variables`: What complexity should be assigned to each variable.   By default, this is 1.\n  * `alpha`: The probability of accepting an equation mutation   during regularized evolution is given by exp(-delta_loss/(alpha * T)),   where T goes from 1 to 0. Thus, alpha=infinite is the same as no annealing.\n  * `maxsize`: Maximum size of equations during the search.\n  * `maxdepth`: Maximum depth of equations during the search, by default   this is set equal to the maxsize.\n  * `parsimony`: A multiplicative factor for how much complexity is   punished.\n  * `dimensional_constraint_penalty`: An additive factor if the dimensional   constraint is violated.\n  * `use_frequency`: Whether to use a parsimony that adapts to the   relative proportion of equations at each complexity; this will   ensure that there are a balanced number of equations considered   for every complexity.\n  * `use_frequency_in_tournament`: Whether to use the adaptive parsimony described   above inside the score, rather than just at the mutation accept/reject stage.\n  * `adaptive_parsimony_scaling`: How much to scale the adaptive parsimony term   in the loss. Increase this if the search is spending too much time   optimizing the most complex equations.\n  * `fast_cycle`: Whether to thread over subsamples of equations during   regularized evolution. Slightly improves performance, but is a different   algorithm.\n  * `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions.   This can be significantly faster, but is only compatible with certain   operators. *Experimental!*\n  * `migration`: Whether to migrate equations between processes.\n  * `hof_migration`: Whether to migrate equations from the hall of fame   to processes.\n  * `fraction_replaced`: What fraction of each population to replace with   migrated equations at the end of each cycle.\n  * `fraction_replaced_hof`: What fraction to replace with hall of fame   equations at the end of each cycle.\n  * `should_simplify`: Whether to simplify equations. If you   pass a custom objective, this will be set to `false`.\n  * `should_optimize_constants`: Whether to use an optimization algorithm   to periodically optimize constants in equations.\n  * `optimizer_nrestarts`: How many different random starting positions to consider   for optimization of constants.\n  * `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default   is \"BFGS\", but \"NelderMead\" is also supported.\n  * `optimizer_options`: General options for the constant optimization. For details   we refer to the documentation on `Optim.Options` from the `Optim.jl` package.   Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a   `Dict`, e.g. Dict(:x_tol => 1.0e-32,), or as an `Optim.Options` instance.\n  * `output_file`: What file to store equations to, as a backup.\n  * `perturbation_factor`: When mutating a constant, either   multiply or divide by (1+perturbation_factor)^(rand()+1).\n  * `probability_negate_constant`: Probability of negating a constant in the equation   when mutating it.\n  * `mutation_weights`: Relative probabilities of the mutations. The struct   `MutationWeights` should be passed to these options.   See its documentation on `MutationWeights` for the different weights.\n  * `crossover_probability`: Probability of performing crossover.\n  * `annealing`: Whether to use simulated annealing.\n  * `warmup_maxsize_by`: Whether to slowly increase the max size from 5 up to   `maxsize`. If nonzero, specifies the fraction through the search   at which the maxsize should be reached.\n  * `verbosity`: Whether to print debugging statements or   not.\n  * `print_precision`: How many digits to print when printing   equations. By default, this is 5.\n  * `save_to_file`: Whether to save equations to a file during the search.\n  * `bin_constraints`: See `constraints`. This is the same, but specified for binary   operators only (for example, if you have an operator that is both a binary   and unary operator).\n  * `una_constraints`: Likewise, for unary operators.\n  * `seed`: What random seed to use. `nothing` uses no seed.\n  * `progress`: Whether to use a progress bar output (`verbosity` will   have no effect).\n  * `early_stop_condition`: Float - whether to stop early if the mean loss gets below this value.   Function - a function taking (loss, complexity) as arguments and returning true or false.\n  * `timeout_in_seconds`: Float64 - the time in seconds after which to exit (as an alternative to the number of iterations).\n  * `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform.\n  * `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated   expression with the original expression and proceed normally.\n  * `enable_autodiff`: Whether to enable automatic differentiation functionality. This is turned off by default.   If turned on, this will be turned off if one of the operators does not have well-defined gradients.\n  * `nested_constraints`: Specifies how many times a combination of operators can be nested. For example,   `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`,   but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos`   can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination   of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified,   it is assumed that it can be nested an unlimited number of times. This requires that there is no operator   which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation).   For binary operators, both arguments are treated the same way, and the max of each argument is constrained.\n  * `deterministic`: Use a global counter for the birth time, rather than calls to `time()`. This gives   perfect resolution, and is therefore deterministic. However, it is not thread safe, and must be used   in serial mode.\n  * `define_helper_functions`: Whether to define helper functions   for constructing and evaluating trees.\n  * `niterations::Int=10`: The number of iterations to perform the search.   More iterations will improve the results.\n  * `parallelism=:multithreading`: What parallelism mode to use.   The options are `:multithreading`, `:multiprocessing`, and `:serial`.   By default, multithreading will be used. Multithreading uses less memory,   but multiprocessing can handle multi-node compute. If using `:multithreading`   mode, the number of threads available to julia are used. If using   `:multiprocessing`, `numprocs` processes will be created dynamically if   `procs` is unset. If you have already allocated processes, pass them   to the `procs` argument and they will be used.   You may also pass a string instead of a symbol, like `\"multithreading\"`.\n  * `numprocs::Union{Int, Nothing}=nothing`:  The number of processes to use,   if you want `equation_search` to set this up automatically. By default   this will be `4`, but can be any number (you should pick a number <=   the number of cores available).\n  * `procs::Union{Vector{Int}, Nothing}=nothing`: If you have set up   a distributed run manually with `procs = addprocs()` and `@everywhere`,   pass the `procs` to this keyword argument.\n  * `addprocs_function::Union{Function, Nothing}=nothing`: If using multiprocessing   (`parallelism=:multithreading`), and are not passing `procs` manually,   then they will be allocated dynamically using `addprocs`. However,   you may also pass a custom function to use instead of `addprocs`.   This function should take a single positional argument,   which is the number of processes to use, as well as the `lazy` keyword argument.   For example, if set up on a slurm cluster, you could pass   `addprocs_function = addprocs_slurm`, which will set up slurm processes.\n  * `runtests::Bool=true`: Whether to run (quick) tests before starting the   search, to see if there will be any problems during the equation search   related to the host environment.\n  * `loss_type::Type=Nothing`: If you would like to use a different type   for the loss than for the data you passed, specify the type here.   Note that if you pass complex data `::Complex{L}`, then the loss   type will automatically be set to `L`.\n  * `selection_method::Function`: Function to selection expression from   the Pareto frontier for use in `predict`. See `SymbolicRegression.MLJInterfaceModule.choose_best`   for an example. This function should return a single integer specifying   the index of the expression to use. By default, `choose_best` maximizes   the score (a pound-for-pound rating) of expressions reaching the threshold   of 1.5x the minimum loss. To fix the index at `5`, you could just write `Returns(5)`.\n  * `dimensions_type::AbstractDimensions`: The type of dimensions to use when storing   the units of the data. By default this is `DynamicQuantities.DEFAULT_DIM_TYPE`.\n\n# Operations\n\n  * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. The expression used for prediction is defined by the `selection_method` function, which can be seen by viewing `report(mach).best_idx`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `best_idx::Vector{Int}`: The index of the best expression in each Pareto frontier, as determined by the `selection_method` function.\n  * `equations::Vector{Vector{Node{T}}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). The outer vector is indexed by target variable, and the inner vector is ordered by increasing complexity. `T` is equal to the element type of the passed data.\n  * `equation_strings::Vector{Vector{String}}`: The expressions discovered by the search, represented as strings for easy inspection.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n  * `best_idx::Vector{Int}`: The index of the best expression in each Pareto frontier,  as determined by the `selection_method` function.\n  * `equations::Vector{Vector{Node{T}}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). The outer vector is indexed by target variable, and the inner vector is ordered by increasing complexity.\n  * `equation_strings::Vector{Vector{String}}`: The expressions discovered by the search, represented as strings for easy inspection.\n  * `complexities::Vector{Vector{Int}}`: The complexity of each expression in each Pareto frontier.\n  * `losses::Vector{Vector{L}}`: The loss of each expression in each Pareto frontier, according to the loss function specified in the model. The type `L` is the loss type, which is usually the same as the element type of data passed (i.e., `T`), but can differ if complex data types are passed.\n  * `scores::Vector{Vector{L}}`: A metric which considers both the complexity and loss of an expression, equal to the change in the log-loss divided by the change in complexity, relative to the previous expression along the Pareto frontier. A larger score aims to indicate an expression is more likely to be the true expression generating the data, but this is very problem-dependent and generally several other factors should be considered.\n\n# Examples\n\n```julia\nusing MLJ\nMultitargetSRRegressor = @load MultitargetSRRegressor pkg=SymbolicRegression\nX = (a=rand(100), b=rand(100), c=rand(100))\nY = (y1=(@. cos(X.c) * 2.1 - 0.9), y2=(@. X.a * X.b + X.c))\nmodel = MultitargetSRRegressor(binary_operators=[+, -, *], unary_operators=[exp], niterations=100)\nmach = machine(model, X, Y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equations used:\nr = report(mach)\nfor (output_index, (eq, i)) in enumerate(zip(r.equation_strings, r.best_idx))\n    println(\"Equation used for \", output_index, \": \", eq[i])\nend\n```\n\nSee also [`SRRegressor`](@ref).\n"
 ":name" = "MultitargetSRRegressor"
 ":human_name" = "Multi-Target Symbolic Regression via Evolutionary Search"
 ":is_supervised" = "`true`"
 ":prediction_type" = ":deterministic"
 ":abstract_type" = "`MLJModelInterface.Deterministic`"
 ":implemented_methods" = [":predict"]
-":hyperparameters" = "`(:binary_operators, :unary_operators, :constraints, :elementwise_loss, :loss_function, :tournament_selection_n, :tournament_selection_p, :topn, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :parsimony, :alpha, :maxsize, :maxdepth, :fast_cycle, :turbo, :migration, :hof_migration, :should_simplify, :should_optimize_constants, :output_file, :npopulations, :perturbation_factor, :annealing, :batching, :batch_size, :mutation_weights, :crossover_probability, :warmup_maxsize_by, :use_frequency, :use_frequency_in_tournament, :adaptive_parsimony_scaling, :npop, :ncycles_per_iteration, :fraction_replaced, :fraction_replaced_hof, :verbosity, :save_to_file, :probability_negate_constant, :seed, :bin_constraints, :una_constraints, :progress, :terminal_width, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_options, :recorder, :recorder_file, :early_stop_condition, :timeout_in_seconds, :max_evals, :skip_mutation_failures, :enable_autodiff, :nested_constraints, :deterministic, :define_helper_functions, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :runtests, :loss_type, :selection_method)`"
-":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Union{Nothing, Function, LossFunctions.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Integer\", \"Real\", \"Integer\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Real\", \"Real\", \"Integer\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, AbstractString}\", \"Union{Nothing, Integer}\", \"Real\", \"Bool\", \"Bool\", \"Integer\", \"Union{SymbolicRegression.CoreModule.OptionsStructModule.MutationWeights, NamedTuple, AbstractVector}\", \"Real\", \"Real\", \"Bool\", \"Bool\", \"Real\", \"Integer\", \"Integer\", \"Real\", \"Real\", \"Real\", \"Bool\", \"Real\", \"Any\", \"Any\", \"Any\", \"Bool\", \"Union{Nothing, Integer}\", \"AbstractString\", \"Integer\", \"Real\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Union{Nothing, Bool}\", \"AbstractString\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Any\", \"Bool\", \"Bool\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Bool\", \"Type\", \"Function\")`"
-":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
+":hyperparameters" = "`(:binary_operators, :unary_operators, :constraints, :elementwise_loss, :loss_function, :tournament_selection_n, :tournament_selection_p, :topn, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :parsimony, :dimensional_constraint_penalty, :alpha, :maxsize, :maxdepth, :fast_cycle, :turbo, :migration, :hof_migration, :should_simplify, :should_optimize_constants, :output_file, :npopulations, :perturbation_factor, :annealing, :batching, :batch_size, :mutation_weights, :crossover_probability, :warmup_maxsize_by, :use_frequency, :use_frequency_in_tournament, :adaptive_parsimony_scaling, :npop, :ncycles_per_iteration, :fraction_replaced, :fraction_replaced_hof, :verbosity, :print_precision, :save_to_file, :probability_negate_constant, :seed, :bin_constraints, :una_constraints, :progress, :terminal_width, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_options, :val_recorder, :recorder_file, :early_stop_condition, :timeout_in_seconds, :max_evals, :skip_mutation_failures, :enable_autodiff, :nested_constraints, :deterministic, :define_helper_functions, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :runtests, :loss_type, :selection_method, :dimensions_type)`"
+":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Union{Nothing, Function, LossFunctions.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Integer\", \"Real\", \"Integer\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Real\", \"Union{Nothing, Real}\", \"Real\", \"Integer\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, AbstractString}\", \"Union{Nothing, Integer}\", \"Real\", \"Bool\", \"Bool\", \"Integer\", \"Union{SymbolicRegression.CoreModule.OptionsStructModule.MutationWeights, NamedTuple, AbstractVector}\", \"Real\", \"Real\", \"Bool\", \"Bool\", \"Real\", \"Integer\", \"Integer\", \"Real\", \"Real\", \"Real\", \"Integer\", \"Bool\", \"Real\", \"Any\", \"Any\", \"Any\", \"Bool\", \"Union{Nothing, Integer}\", \"AbstractString\", \"Integer\", \"Real\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Val\", \"AbstractString\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Any\", \"Bool\", \"Bool\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Bool\", \"Any\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
 ":iteration_parameter" = "`nothing`"
 ":supports_training_losses" = "`false`"
 ":reports_feature_importances" = "`false`"
@@ -4981,16 +5117,16 @@
 ":supports_weights" = "`true`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "```\nSRRegressor\n```\n\nA model type for constructing a Symbolic Regression via Evolutionary Search, based on [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\n```\n\nDo `model = SRRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SRRegressor(binary_operators=...)`.\n\nSingle-target Symbolic Regression regressor (`SRRegressor`) searches for symbolic expressions that predict a single target variable from a set of input variables. All data is assumed to be `Continuous`. The search is performed using an evolutionary algorithm. This algorithm is described in the paper https://arxiv.org/abs/2305.01582.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n  * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`. Variable names in discovered expressions will be taken from the column names of `X`, if available.\n  * `y` is the target, which can be any `AbstractVector` whose element scitype is   `Continuous`; check the scitype with `scitype(y)`.\n  * `w` is the observation weights which can either be `nothing` (default) or an  `AbstractVector` whoose element scitype is `Count` or `Continuous`.\n\nTrain the machine using `fit!(mach)`, inspect the discovered expressions with `report(mach)`, and predict on new data with `predict(mach, Xnew)`. Note that unlike other regressors, symbolic regression stores a list of trained models. The model chosen from this list is defined by the function `selection_method` keyword argument, which by default balances accuracy and complexity.\n\n# Hyper-parameters\n\n  * `binary_operators`: Vector of binary operators (functions) to use.   Each operator should be defined for two input scalars,   and one output scalar. All operators   need to be defined over the entire real line (excluding infinity - these   are stopped before they are input), or return `NaN` where not defined.   For speed, define it so it takes two reals   of the same type as input, and outputs the same type. For the SymbolicUtils   simplification backend, you will need to define a generic method of the   operator so it takes arbitrary types.\n  * `unary_operators`: Same, but for   unary operators (one input scalar, gives an output scalar).\n  * `constraints`: Array of pairs specifying size constraints   for each operator. The constraints for a binary operator should be a 2-tuple   (e.g., `(-1, -1)`) and the constraints for a unary operator should be an `Int`.   A size constraint is a limit to the size of the subtree   in each argument of an operator. e.g., `[(^)=>(-1, 3)]` means that the   `^` operator can have arbitrary size (`-1`) in its left argument,   but a maximum size of `3` in its right argument. Default is   no constraints.\n  * `batching`: Whether to evolve based on small mini-batches of data,   rather than the entire dataset.\n  * `batch_size`: What batch size to use if using batching.\n  * `elementwise_loss`: What elementwise loss function to use. Can be one of   the following losses, or any other loss of type   `SupervisedLoss`. You can also pass a function that takes   a scalar target (left argument), and scalar predicted (right   argument), and returns a scalar. This will be averaged   over the predicted data. If weights are supplied, your   function should take a third argument for the weight scalar.   Included losses:       Regression:           - `LPDistLoss{P}()`,           - `L1DistLoss()`,           - `L2DistLoss()` (mean square),           - `LogitDistLoss()`,           - `HuberLoss(d)`,           - `L1EpsilonInsLoss(ϵ)`,           - `L2EpsilonInsLoss(ϵ)`,           - `PeriodicLoss(c)`,           - `QuantileLoss(τ)`,       Classification:           - `ZeroOneLoss()`,           - `PerceptronLoss()`,           - `L1HingeLoss()`,           - `SmoothedL1HingeLoss(γ)`,           - `ModifiedHuberLoss()`,           - `L2MarginLoss()`,           - `ExpLoss()`,           - `SigmoidLoss()`,           - `DWDMarginLoss(q)`.\n  * `loss_function`: Alternatively, you may redefine the loss used   as any function of `tree::Node{T}`, `dataset::Dataset{T}`,   and `options::Options`, so long as you output a non-negative   scalar of type `T`. This is useful if you want to use a loss   that takes into account derivatives, or correlations across   the dataset. This also means you could use a custom evaluation   for a particular expression. Take a look at `_eval_loss` in   the file `src/LossFunctions.jl` for an example.\n  * `npopulations`: How many populations of equations to use. By default   this is set equal to the number of cores\n  * `npop`: How many equations in each population.\n  * `ncycles_per_iteration`: How many generations to consider per iteration.\n  * `tournament_selection_n`: Number of expressions considered in each tournament.\n  * `tournament_selection_p`: The fittest expression in a tournament is to be   selected with probability `p`, the next fittest with probability `p*(1-p)`,   and so forth.\n  * `topn`: Number of equations to return to the host process, and to   consider for the hall of fame.\n  * `complexity_of_operators`: What complexity should be assigned to each operator,   and the occurrence of a constant or variable. By default, this is 1   for all operators. Can be a real number as well, in which case   the complexity of an expression will be rounded to the nearest integer.   Input this in the form of, e.g., [(^) => 3, sin => 2].\n  * `complexity_of_constants`: What complexity should be assigned to use of a constant.   By default, this is 1.\n  * `complexity_of_variables`: What complexity should be assigned to each variable.   By default, this is 1.\n  * `alpha`: The probability of accepting an equation mutation   during regularized evolution is given by exp(-delta_loss/(alpha * T)),   where T goes from 1 to 0. Thus, alpha=infinite is the same as no annealing.\n  * `maxsize`: Maximum size of equations during the search.\n  * `maxdepth`: Maximum depth of equations during the search, by default   this is set equal to the maxsize.\n  * `parsimony`: A multiplicative factor for how much complexity is   punished.\n  * `use_frequency`: Whether to use a parsimony that adapts to the   relative proportion of equations at each complexity; this will   ensure that there are a balanced number of equations considered   for every complexity.\n  * `use_frequency_in_tournament`: Whether to use the adaptive parsimony described   above inside the score, rather than just at the mutation accept/reject stage.\n  * `adaptive_parsimony_scaling`: How much to scale the adaptive parsimony term   in the loss. Increase this if the search is spending too much time   optimizing the most complex equations.\n  * `fast_cycle`: Whether to thread over subsamples of equations during   regularized evolution. Slightly improves performance, but is a different   algorithm.\n  * `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions.   This can be significantly faster, but is only compatible with certain   operators. *Experimental!*\n  * `migration`: Whether to migrate equations between processes.\n  * `hof_migration`: Whether to migrate equations from the hall of fame   to processes.\n  * `fraction_replaced`: What fraction of each population to replace with   migrated equations at the end of each cycle.\n  * `fraction_replaced_hof`: What fraction to replace with hall of fame   equations at the end of each cycle.\n  * `should_simplify`: Whether to simplify equations. If you   pass a custom objective, this will be set to `false`.\n  * `should_optimize_constants`: Whether to use an optimization algorithm   to periodically optimize constants in equations.\n  * `optimizer_nrestarts`: How many different random starting positions to consider   for optimization of constants.\n  * `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default   is \"BFGS\", but \"NelderMead\" is also supported.\n  * `optimizer_options`: General options for the constant optimization. For details   we refer to the documentation on `Optim.Options` from the `Optim.jl` package.   Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a   `Dict`, e.g. Dict(:x_tol => 1.0e-32,), or as an `Optim.Options` instance.\n  * `output_file`: What file to store equations to, as a backup.\n  * `perturbation_factor`: When mutating a constant, either   multiply or divide by (1+perturbation_factor)^(rand()+1).\n  * `probability_negate_constant`: Probability of negating a constant in the equation   when mutating it.\n  * `mutation_weights`: Relative probabilities of the mutations. The struct   `MutationWeights` should be passed to these options.   See its documentation on `MutationWeights` for the different weights.\n  * `crossover_probability`: Probability of performing crossover.\n  * `annealing`: Whether to use simulated annealing.\n  * `warmup_maxsize_by`: Whether to slowly increase the max size from 5 up to   `maxsize`. If nonzero, specifies the fraction through the search   at which the maxsize should be reached.\n  * `verbosity`: Whether to print debugging statements or   not.\n  * `save_to_file`: Whether to save equations to a file during the search.\n  * `bin_constraints`: See `constraints`. This is the same, but specified for binary   operators only (for example, if you have an operator that is both a binary   and unary operator).\n  * `una_constraints`: Likewise, for unary operators.\n  * `seed`: What random seed to use. `nothing` uses no seed.\n  * `progress`: Whether to use a progress bar output (`verbosity` will   have no effect).\n  * `early_stop_condition`: Float - whether to stop early if the mean loss gets below this value.   Function - a function taking (loss, complexity) as arguments and returning true or false.\n  * `timeout_in_seconds`: Float64 - the time in seconds after which to exit (as an alternative to the number of iterations).\n  * `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform.\n  * `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated   expression with the original expression and proceed normally.\n  * `enable_autodiff`: Whether to enable automatic differentiation functionality. This is turned off by default.   If turned on, this will be turned off if one of the operators does not have well-defined gradients.\n  * `nested_constraints`: Specifies how many times a combination of operators can be nested. For example,   `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`,   but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos`   can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination   of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified,   it is assumed that it can be nested an unlimited number of times. This requires that there is no operator   which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation).   For binary operators, both arguments are treated the same way, and the max of each argument is constrained.\n  * `deterministic`: Use a global counter for the birth time, rather than calls to `time()`. This gives   perfect resolution, and is therefore deterministic. However, it is not thread safe, and must be used   in serial mode.\n  * `define_helper_functions`: Whether to define helper functions   for constructing and evaluating trees.\n  * `niterations::Int=10`: The number of iterations to perform the search.   More iterations will improve the results.\n  * `parallelism=:multithreading`: What parallelism mode to use.   The options are `:multithreading`, `:multiprocessing`, and `:serial`.   By default, multithreading will be used. Multithreading uses less memory,   but multiprocessing can handle multi-node compute. If using `:multithreading`   mode, the number of threads available to julia are used. If using   `:multiprocessing`, `numprocs` processes will be created dynamically if   `procs` is unset. If you have already allocated processes, pass them   to the `procs` argument and they will be used.   You may also pass a string instead of a symbol, like `\"multithreading\"`.\n  * `numprocs::Union{Int, Nothing}=nothing`:  The number of processes to use,   if you want `equation_search` to set this up automatically. By default   this will be `4`, but can be any number (you should pick a number <=   the number of cores available).\n  * `procs::Union{Vector{Int}, Nothing}=nothing`: If you have set up   a distributed run manually with `procs = addprocs()` and `@everywhere`,   pass the `procs` to this keyword argument.\n  * `addprocs_function::Union{Function, Nothing}=nothing`: If using multiprocessing   (`parallelism=:multithreading`), and are not passing `procs` manually,   then they will be allocated dynamically using `addprocs`. However,   you may also pass a custom function to use instead of `addprocs`.   This function should take a single positional argument,   which is the number of processes to use, as well as the `lazy` keyword argument.   For example, if set up on a slurm cluster, you could pass   `addprocs_function = addprocs_slurm`, which will set up slurm processes.\n  * `runtests::Bool=true`: Whether to run (quick) tests before starting the   search, to see if there will be any problems during the equation search   related to the host environment.\n  * `loss_type::Type=Nothing`: If you would like to use a different type   for the loss than for the data you passed, specify the type here.   Note that if you pass complex data `::Complex{L}`, then the loss   type will automatically be set to `L`.\n  * `selection_method::Function`: Function to selection expression from   the Pareto frontier for use in `predict`. See `SymbolicRegression.MLJInterfaceModule.choose_best`   for an example. This function should return a single integer specifying   the index of the expression to use. By default, `choose_best` maximizes   the score (a pound-for-pound rating) of expressions reaching the threshold   of 1.5x the minimum loss. To fix the index at `5`, you could just write `Returns(5)`.\n\n# Operations\n\n  * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. The expression used for prediction is defined by the `selection_method` function, which can be seen by viewing `report(mach).best_idx`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `best_idx::Int`: The index of the best expression in the Pareto frontier,  as determined by the `selection_method` function.\n  * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). `T` is equal to the element type of the passed data.\n  * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n  * `best_idx::Int`: The index of the best expression in the Pareto frontier,  as determined by the `selection_method` function.\n  * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity).\n  * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n  * `complexities::Vector{Int}`: The complexity of each expression in the Pareto frontier.\n  * `losses::Vector{L}`: The loss of each expression in the Pareto frontier, according to the loss function specified in the model. The type `L` is the loss type, which is usually the same as the element type of data passed (i.e., `T`), but can differ if complex data types are passed.\n  * `scores::Vector{L}`: A metric which considers both the complexity and loss of an expression, equal to the change in the log-loss divided by the change in complexity, relative to the previous expression along the Pareto frontier. A larger score aims to indicate an expression is more likely to be the true expression generating the data, but this is very problem-dependent and generally several other factors should be considered.\n\n# Examples\n\n```julia\nusing MLJ\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\nX, y = @load_boston\nmodel = SRRegressor(binary_operators=[+, -, *], unary_operators=[exp], niterations=100)\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nSee also [`MultitargetSRRegressor`](@ref).\n"
+":docstring" = "```\nSRRegressor\n```\n\nA model type for constructing a Symbolic Regression via Evolutionary Search, based on [SymbolicRegression.jl](https://github.com/MilesCranmer/SymbolicRegression.jl), and implementing the MLJ model interface.\n\nFrom MLJ, the type can be imported using\n\n```\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\n```\n\nDo `model = SRRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `SRRegressor(binary_operators=...)`.\n\nSingle-target Symbolic Regression regressor (`SRRegressor`) searches for symbolic expressions that predict a single target variable from a set of input variables. All data is assumed to be `Continuous`. The search is performed using an evolutionary algorithm. This algorithm is described in the paper https://arxiv.org/abs/2305.01582.\n\n# Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nOR\n\n```\nmach = machine(model, X, y, w)\n```\n\nHere:\n\n  * `X` is any table of input features (eg, a `DataFrame`) whose columns are of scitype `Continuous`; check column scitypes with `schema(X)`. Variable names in discovered expressions will be taken from the column names of `X`, if available. Units in columns of `X` (use `DynamicQuantities` for units) will trigger dimensional analysis to be used.\n  * `y` is the target, which can be any `AbstractVector` whose element scitype is   `Continuous`; check the scitype with `scitype(y)`. Units in `y` (use `DynamicQuantities`   for units) will trigger dimensional analysis to be used.\n  * `w` is the observation weights which can either be `nothing` (default) or an  `AbstractVector` whoose element scitype is `Count` or `Continuous`.\n\nTrain the machine using `fit!(mach)`, inspect the discovered expressions with `report(mach)`, and predict on new data with `predict(mach, Xnew)`. Note that unlike other regressors, symbolic regression stores a list of trained models. The model chosen from this list is defined by the function `selection_method` keyword argument, which by default balances accuracy and complexity.\n\n# Hyper-parameters\n\n  * `binary_operators`: Vector of binary operators (functions) to use.   Each operator should be defined for two input scalars,   and one output scalar. All operators   need to be defined over the entire real line (excluding infinity - these   are stopped before they are input), or return `NaN` where not defined.   For speed, define it so it takes two reals   of the same type as input, and outputs the same type. For the SymbolicUtils   simplification backend, you will need to define a generic method of the   operator so it takes arbitrary types.\n  * `unary_operators`: Same, but for   unary operators (one input scalar, gives an output scalar).\n  * `constraints`: Array of pairs specifying size constraints   for each operator. The constraints for a binary operator should be a 2-tuple   (e.g., `(-1, -1)`) and the constraints for a unary operator should be an `Int`.   A size constraint is a limit to the size of the subtree   in each argument of an operator. e.g., `[(^)=>(-1, 3)]` means that the   `^` operator can have arbitrary size (`-1`) in its left argument,   but a maximum size of `3` in its right argument. Default is   no constraints.\n  * `batching`: Whether to evolve based on small mini-batches of data,   rather than the entire dataset.\n  * `batch_size`: What batch size to use if using batching.\n  * `elementwise_loss`: What elementwise loss function to use. Can be one of   the following losses, or any other loss of type   `SupervisedLoss`. You can also pass a function that takes   a scalar target (left argument), and scalar predicted (right   argument), and returns a scalar. This will be averaged   over the predicted data. If weights are supplied, your   function should take a third argument for the weight scalar.   Included losses:       Regression:           - `LPDistLoss{P}()`,           - `L1DistLoss()`,           - `L2DistLoss()` (mean square),           - `LogitDistLoss()`,           - `HuberLoss(d)`,           - `L1EpsilonInsLoss(ϵ)`,           - `L2EpsilonInsLoss(ϵ)`,           - `PeriodicLoss(c)`,           - `QuantileLoss(τ)`,       Classification:           - `ZeroOneLoss()`,           - `PerceptronLoss()`,           - `L1HingeLoss()`,           - `SmoothedL1HingeLoss(γ)`,           - `ModifiedHuberLoss()`,           - `L2MarginLoss()`,           - `ExpLoss()`,           - `SigmoidLoss()`,           - `DWDMarginLoss(q)`.\n  * `loss_function`: Alternatively, you may redefine the loss used   as any function of `tree::Node{T}`, `dataset::Dataset{T}`,   and `options::Options`, so long as you output a non-negative   scalar of type `T`. This is useful if you want to use a loss   that takes into account derivatives, or correlations across   the dataset. This also means you could use a custom evaluation   for a particular expression. If you are using   `batching=true`, then your function should   accept a fourth argument `idx`, which is either `nothing`   (indicating that the full dataset should be used), or a vector   of indices to use for the batch.   For example,\n\n    ```\n      function my_loss(tree, dataset::Dataset{T,L}, options)::L where {T,L}\n          prediction, flag = eval_tree_array(tree, dataset.X, options)\n          if !flag\n              return L(Inf)\n          end\n          return sum((prediction .- dataset.y) .^ 2) / dataset.n\n      end\n    ```\n  * `npopulations`: How many populations of equations to use. By default   this is set equal to the number of cores\n  * `npop`: How many equations in each population.\n  * `ncycles_per_iteration`: How many generations to consider per iteration.\n  * `tournament_selection_n`: Number of expressions considered in each tournament.\n  * `tournament_selection_p`: The fittest expression in a tournament is to be   selected with probability `p`, the next fittest with probability `p*(1-p)`,   and so forth.\n  * `topn`: Number of equations to return to the host process, and to   consider for the hall of fame.\n  * `complexity_of_operators`: What complexity should be assigned to each operator,   and the occurrence of a constant or variable. By default, this is 1   for all operators. Can be a real number as well, in which case   the complexity of an expression will be rounded to the nearest integer.   Input this in the form of, e.g., [(^) => 3, sin => 2].\n  * `complexity_of_constants`: What complexity should be assigned to use of a constant.   By default, this is 1.\n  * `complexity_of_variables`: What complexity should be assigned to each variable.   By default, this is 1.\n  * `alpha`: The probability of accepting an equation mutation   during regularized evolution is given by exp(-delta_loss/(alpha * T)),   where T goes from 1 to 0. Thus, alpha=infinite is the same as no annealing.\n  * `maxsize`: Maximum size of equations during the search.\n  * `maxdepth`: Maximum depth of equations during the search, by default   this is set equal to the maxsize.\n  * `parsimony`: A multiplicative factor for how much complexity is   punished.\n  * `dimensional_constraint_penalty`: An additive factor if the dimensional   constraint is violated.\n  * `use_frequency`: Whether to use a parsimony that adapts to the   relative proportion of equations at each complexity; this will   ensure that there are a balanced number of equations considered   for every complexity.\n  * `use_frequency_in_tournament`: Whether to use the adaptive parsimony described   above inside the score, rather than just at the mutation accept/reject stage.\n  * `adaptive_parsimony_scaling`: How much to scale the adaptive parsimony term   in the loss. Increase this if the search is spending too much time   optimizing the most complex equations.\n  * `fast_cycle`: Whether to thread over subsamples of equations during   regularized evolution. Slightly improves performance, but is a different   algorithm.\n  * `turbo`: Whether to use `LoopVectorization.@turbo` to evaluate expressions.   This can be significantly faster, but is only compatible with certain   operators. *Experimental!*\n  * `migration`: Whether to migrate equations between processes.\n  * `hof_migration`: Whether to migrate equations from the hall of fame   to processes.\n  * `fraction_replaced`: What fraction of each population to replace with   migrated equations at the end of each cycle.\n  * `fraction_replaced_hof`: What fraction to replace with hall of fame   equations at the end of each cycle.\n  * `should_simplify`: Whether to simplify equations. If you   pass a custom objective, this will be set to `false`.\n  * `should_optimize_constants`: Whether to use an optimization algorithm   to periodically optimize constants in equations.\n  * `optimizer_nrestarts`: How many different random starting positions to consider   for optimization of constants.\n  * `optimizer_algorithm`: Select algorithm to use for optimizing constants. Default   is \"BFGS\", but \"NelderMead\" is also supported.\n  * `optimizer_options`: General options for the constant optimization. For details   we refer to the documentation on `Optim.Options` from the `Optim.jl` package.   Options can be provided here as `NamedTuple`, e.g. `(iterations=16,)`, as a   `Dict`, e.g. Dict(:x_tol => 1.0e-32,), or as an `Optim.Options` instance.\n  * `output_file`: What file to store equations to, as a backup.\n  * `perturbation_factor`: When mutating a constant, either   multiply or divide by (1+perturbation_factor)^(rand()+1).\n  * `probability_negate_constant`: Probability of negating a constant in the equation   when mutating it.\n  * `mutation_weights`: Relative probabilities of the mutations. The struct   `MutationWeights` should be passed to these options.   See its documentation on `MutationWeights` for the different weights.\n  * `crossover_probability`: Probability of performing crossover.\n  * `annealing`: Whether to use simulated annealing.\n  * `warmup_maxsize_by`: Whether to slowly increase the max size from 5 up to   `maxsize`. If nonzero, specifies the fraction through the search   at which the maxsize should be reached.\n  * `verbosity`: Whether to print debugging statements or   not.\n  * `print_precision`: How many digits to print when printing   equations. By default, this is 5.\n  * `save_to_file`: Whether to save equations to a file during the search.\n  * `bin_constraints`: See `constraints`. This is the same, but specified for binary   operators only (for example, if you have an operator that is both a binary   and unary operator).\n  * `una_constraints`: Likewise, for unary operators.\n  * `seed`: What random seed to use. `nothing` uses no seed.\n  * `progress`: Whether to use a progress bar output (`verbosity` will   have no effect).\n  * `early_stop_condition`: Float - whether to stop early if the mean loss gets below this value.   Function - a function taking (loss, complexity) as arguments and returning true or false.\n  * `timeout_in_seconds`: Float64 - the time in seconds after which to exit (as an alternative to the number of iterations).\n  * `max_evals`: Int (or Nothing) - the maximum number of evaluations of expressions to perform.\n  * `skip_mutation_failures`: Whether to simply skip over mutations that fail or are rejected, rather than to replace the mutated   expression with the original expression and proceed normally.\n  * `enable_autodiff`: Whether to enable automatic differentiation functionality. This is turned off by default.   If turned on, this will be turned off if one of the operators does not have well-defined gradients.\n  * `nested_constraints`: Specifies how many times a combination of operators can be nested. For example,   `[sin => [cos => 0], cos => [cos => 2]]` specifies that `cos` may never appear within a `sin`,   but `sin` can be nested with itself an unlimited number of times. The second term specifies that `cos`   can be nested up to 2 times within a `cos`, so that `cos(cos(cos(x)))` is allowed (as well as any combination   of `+` or `-` within it), but `cos(cos(cos(cos(x))))` is not allowed. When an operator is not specified,   it is assumed that it can be nested an unlimited number of times. This requires that there is no operator   which is used both in the unary operators and the binary operators (e.g., `-` could be both subtract, and negation).   For binary operators, both arguments are treated the same way, and the max of each argument is constrained.\n  * `deterministic`: Use a global counter for the birth time, rather than calls to `time()`. This gives   perfect resolution, and is therefore deterministic. However, it is not thread safe, and must be used   in serial mode.\n  * `define_helper_functions`: Whether to define helper functions   for constructing and evaluating trees.\n  * `niterations::Int=10`: The number of iterations to perform the search.   More iterations will improve the results.\n  * `parallelism=:multithreading`: What parallelism mode to use.   The options are `:multithreading`, `:multiprocessing`, and `:serial`.   By default, multithreading will be used. Multithreading uses less memory,   but multiprocessing can handle multi-node compute. If using `:multithreading`   mode, the number of threads available to julia are used. If using   `:multiprocessing`, `numprocs` processes will be created dynamically if   `procs` is unset. If you have already allocated processes, pass them   to the `procs` argument and they will be used.   You may also pass a string instead of a symbol, like `\"multithreading\"`.\n  * `numprocs::Union{Int, Nothing}=nothing`:  The number of processes to use,   if you want `equation_search` to set this up automatically. By default   this will be `4`, but can be any number (you should pick a number <=   the number of cores available).\n  * `procs::Union{Vector{Int}, Nothing}=nothing`: If you have set up   a distributed run manually with `procs = addprocs()` and `@everywhere`,   pass the `procs` to this keyword argument.\n  * `addprocs_function::Union{Function, Nothing}=nothing`: If using multiprocessing   (`parallelism=:multithreading`), and are not passing `procs` manually,   then they will be allocated dynamically using `addprocs`. However,   you may also pass a custom function to use instead of `addprocs`.   This function should take a single positional argument,   which is the number of processes to use, as well as the `lazy` keyword argument.   For example, if set up on a slurm cluster, you could pass   `addprocs_function = addprocs_slurm`, which will set up slurm processes.\n  * `runtests::Bool=true`: Whether to run (quick) tests before starting the   search, to see if there will be any problems during the equation search   related to the host environment.\n  * `loss_type::Type=Nothing`: If you would like to use a different type   for the loss than for the data you passed, specify the type here.   Note that if you pass complex data `::Complex{L}`, then the loss   type will automatically be set to `L`.\n  * `selection_method::Function`: Function to selection expression from   the Pareto frontier for use in `predict`. See `SymbolicRegression.MLJInterfaceModule.choose_best`   for an example. This function should return a single integer specifying   the index of the expression to use. By default, `choose_best` maximizes   the score (a pound-for-pound rating) of expressions reaching the threshold   of 1.5x the minimum loss. To fix the index at `5`, you could just write `Returns(5)`.\n  * `dimensions_type::AbstractDimensions`: The type of dimensions to use when storing   the units of the data. By default this is `DynamicQuantities.DEFAULT_DIM_TYPE`.\n\n# Operations\n\n  * `predict(mach, Xnew)`: Return predictions of the target given features `Xnew`, which should have same scitype as `X` above. The expression used for prediction is defined by the `selection_method` function, which can be seen by viewing `report(mach).best_idx`.\n\n# Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `best_idx::Int`: The index of the best expression in the Pareto frontier,  as determined by the `selection_method` function.\n  * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity). `T` is equal to the element type of the passed data.\n  * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n\n# Report\n\nThe fields of `report(mach)` are:\n\n  * `best_idx::Int`: The index of the best expression in the Pareto frontier,  as determined by the `selection_method` function.\n  * `equations::Vector{Node{T}}`: The expressions discovered by the search, represented in a dominating Pareto frontier (i.e., the best expressions found for each complexity).\n  * `equation_strings::Vector{String}`: The expressions discovered by the search, represented as strings for easy inspection.\n  * `complexities::Vector{Int}`: The complexity of each expression in the Pareto frontier.\n  * `losses::Vector{L}`: The loss of each expression in the Pareto frontier, according to the loss function specified in the model. The type `L` is the loss type, which is usually the same as the element type of data passed (i.e., `T`), but can differ if complex data types are passed.\n  * `scores::Vector{L}`: A metric which considers both the complexity and loss of an expression, equal to the change in the log-loss divided by the change in complexity, relative to the previous expression along the Pareto frontier. A larger score aims to indicate an expression is more likely to be the true expression generating the data, but this is very problem-dependent and generally several other factors should be considered.\n\n# Examples\n\n```julia\nusing MLJ\nSRRegressor = @load SRRegressor pkg=SymbolicRegression\nX, y = @load_boston\nmodel = SRRegressor(binary_operators=[+, -, *], unary_operators=[exp], niterations=100)\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nWith units and variable names:\n\n```julia\nusing MLJ\nusing DynamicQuantities\nSRegressor = @load SRRegressor pkg=SymbolicRegression\n\nX = (; x1=rand(32) .* us\"km/h\", x2=rand(32) .* us\"km\")\ny = @. X.x2 / X.x1 + 0.5us\"h\"\nmodel = SRRegressor(binary_operators=[+, -, *, /])\nmach = machine(model, X, y)\nfit!(mach)\ny_hat = predict(mach, X)\n# View the equation used:\nr = report(mach)\nprintln(\"Equation used:\", r.equation_strings[r.best_idx])\n```\n\nSee also [`MultitargetSRRegressor`](@ref).\n"
 ":name" = "SRRegressor"
 ":human_name" = "Symbolic Regression via Evolutionary Search"
 ":is_supervised" = "`true`"
 ":prediction_type" = ":deterministic"
 ":abstract_type" = "`MLJModelInterface.Deterministic`"
 ":implemented_methods" = [":predict"]
-":hyperparameters" = "`(:binary_operators, :unary_operators, :constraints, :elementwise_loss, :loss_function, :tournament_selection_n, :tournament_selection_p, :topn, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :parsimony, :alpha, :maxsize, :maxdepth, :fast_cycle, :turbo, :migration, :hof_migration, :should_simplify, :should_optimize_constants, :output_file, :npopulations, :perturbation_factor, :annealing, :batching, :batch_size, :mutation_weights, :crossover_probability, :warmup_maxsize_by, :use_frequency, :use_frequency_in_tournament, :adaptive_parsimony_scaling, :npop, :ncycles_per_iteration, :fraction_replaced, :fraction_replaced_hof, :verbosity, :save_to_file, :probability_negate_constant, :seed, :bin_constraints, :una_constraints, :progress, :terminal_width, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_options, :recorder, :recorder_file, :early_stop_condition, :timeout_in_seconds, :max_evals, :skip_mutation_failures, :enable_autodiff, :nested_constraints, :deterministic, :define_helper_functions, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :runtests, :loss_type, :selection_method)`"
-":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Union{Nothing, Function, LossFunctions.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Integer\", \"Real\", \"Integer\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Real\", \"Real\", \"Integer\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, AbstractString}\", \"Union{Nothing, Integer}\", \"Real\", \"Bool\", \"Bool\", \"Integer\", \"Union{SymbolicRegression.CoreModule.OptionsStructModule.MutationWeights, NamedTuple, AbstractVector}\", \"Real\", \"Real\", \"Bool\", \"Bool\", \"Real\", \"Integer\", \"Integer\", \"Real\", \"Real\", \"Real\", \"Bool\", \"Real\", \"Any\", \"Any\", \"Any\", \"Bool\", \"Union{Nothing, Integer}\", \"AbstractString\", \"Integer\", \"Real\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Union{Nothing, Bool}\", \"AbstractString\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Any\", \"Bool\", \"Bool\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Bool\", \"Type\", \"Function\")`"
-":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
+":hyperparameters" = "`(:binary_operators, :unary_operators, :constraints, :elementwise_loss, :loss_function, :tournament_selection_n, :tournament_selection_p, :topn, :complexity_of_operators, :complexity_of_constants, :complexity_of_variables, :parsimony, :dimensional_constraint_penalty, :alpha, :maxsize, :maxdepth, :fast_cycle, :turbo, :migration, :hof_migration, :should_simplify, :should_optimize_constants, :output_file, :npopulations, :perturbation_factor, :annealing, :batching, :batch_size, :mutation_weights, :crossover_probability, :warmup_maxsize_by, :use_frequency, :use_frequency_in_tournament, :adaptive_parsimony_scaling, :npop, :ncycles_per_iteration, :fraction_replaced, :fraction_replaced_hof, :verbosity, :print_precision, :save_to_file, :probability_negate_constant, :seed, :bin_constraints, :una_constraints, :progress, :terminal_width, :optimizer_algorithm, :optimizer_nrestarts, :optimizer_probability, :optimizer_iterations, :optimizer_options, :val_recorder, :recorder_file, :early_stop_condition, :timeout_in_seconds, :max_evals, :skip_mutation_failures, :enable_autodiff, :nested_constraints, :deterministic, :define_helper_functions, :niterations, :parallelism, :numprocs, :procs, :addprocs_function, :runtests, :loss_type, :selection_method, :dimensions_type)`"
+":hyperparameter_types" = "`(\"Any\", \"Any\", \"Any\", \"Union{Nothing, Function, LossFunctions.SupervisedLoss}\", \"Union{Nothing, Function}\", \"Integer\", \"Real\", \"Integer\", \"Any\", \"Union{Nothing, Real}\", \"Union{Nothing, Real}\", \"Real\", \"Union{Nothing, Real}\", \"Real\", \"Integer\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Bool\", \"Bool\", \"Union{Nothing, Bool}\", \"Bool\", \"Union{Nothing, AbstractString}\", \"Union{Nothing, Integer}\", \"Real\", \"Bool\", \"Bool\", \"Integer\", \"Union{SymbolicRegression.CoreModule.OptionsStructModule.MutationWeights, NamedTuple, AbstractVector}\", \"Real\", \"Real\", \"Bool\", \"Bool\", \"Real\", \"Integer\", \"Integer\", \"Real\", \"Real\", \"Real\", \"Integer\", \"Bool\", \"Real\", \"Any\", \"Any\", \"Any\", \"Bool\", \"Union{Nothing, Integer}\", \"AbstractString\", \"Integer\", \"Real\", \"Union{Nothing, Integer}\", \"Union{Nothing, Dict, NamedTuple, Optim.Options}\", \"Val\", \"AbstractString\", \"Union{Nothing, Function, Real}\", \"Union{Nothing, Real}\", \"Union{Nothing, Integer}\", \"Bool\", \"Bool\", \"Any\", \"Bool\", \"Bool\", \"Int64\", \"Symbol\", \"Union{Nothing, Int64}\", \"Union{Nothing, Vector{Int64}}\", \"Union{Nothing, Function}\", \"Bool\", \"Any\", \"Function\", \"Type{D} where D<:DynamicQuantities.AbstractDimensions\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
 ":iteration_parameter" = "`nothing`"
 ":supports_training_losses" = "`false`"
 ":reports_feature_importances" = "`false`"
@@ -5117,16 +5253,16 @@
 ":supports_weights" = "`true`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "EvoTreeClassifier(;kwargs...)\n\nA model type for constructing a EvoTreeClassifier, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API and the MLJ model interface. EvoTreeClassifier is used to perform multi-class classification, using cross-entropy loss.\n\n# Hyper-parameters\n\n  * `nrounds=10`:                 Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n  * `lambda::T=0.0`:              L2 regularization term on weights. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:               Minimum gain improvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n  * `max_depth=5`:                Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to `2^max_depth`. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=0.0`:             Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:              Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:              Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=32`:                   Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `rng=123`:                    Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n\n# Internal API\n\nDo `config = EvoTreeClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeClassifier(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, K]` where `K` is the number of classes:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeClassifier = @load EvoTreeClassifier pkg=EvoTrees\n```\n\nDo `model = EvoTreeClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeClassifier(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Multiclas` or `<:OrderedFactor`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n  * `predict_mode(mach, Xnew)`: returns the mode of each of the prediction above.\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeClassifier(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(1:3, nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeClassifier = @load EvoTreeClassifier pkg=EvoTrees\nmodel = EvoTreeClassifier(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_iris\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mode(mach, X)\n```\n\nSee also [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl).\n"
+":docstring" = "EvoTreeClassifier(;kwargs...)\n\nA model type for constructing a EvoTreeClassifier, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API and the MLJ model interface. EvoTreeClassifier is used to perform multi-class classification, using cross-entropy loss.\n\n# Hyper-parameters\n\n  * `nrounds=10`:                 Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n  * `lambda::T=0.0`:              L2 regularization term on weights. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:               Minimum gain improvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n  * `max_depth=5`:                Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to `2^max_depth`. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=1.0`:             Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:              Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:              Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=32`:                   Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `tree_type=\"binary\"`    Tree structure to be used. One of:\n\n      * `binary`:       Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n      * `oblivious`:    A common splitting condition is imposed to all nodes of a given depth.\n  * `rng=123`:                    Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n\n# Internal API\n\nDo `config = EvoTreeClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeClassifier(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, K]` where `K` is the number of classes:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeClassifier = @load EvoTreeClassifier pkg=EvoTrees\n```\n\nDo `model = EvoTreeClassifier()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeClassifier(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Multiclas` or `<:OrderedFactor`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n  * `predict_mode(mach, Xnew)`: returns the mode of each of the prediction above.\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeClassifier(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(1:3, nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeClassifier = @load EvoTreeClassifier pkg=EvoTrees\nmodel = EvoTreeClassifier(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_iris\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mode(mach, X)\n```\n\nSee also [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl).\n"
 ":name" = "EvoTreeClassifier"
 ":human_name" = "evo tree classifier"
 ":is_supervised" = "`true`"
 ":prediction_type" = ":probabilistic"
 ":abstract_type" = "`MLJModelInterface.Probabilistic`"
 ":implemented_methods" = [":predict"]
-":hyperparameters" = "`(:nrounds, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :rng)`"
-":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Any\")`"
-":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
+":hyperparameters" = "`(:nrounds, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :tree_type, :rng)`"
+":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"String\", \"Any\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
 ":iteration_parameter" = ":nrounds"
 ":supports_training_losses" = "`false`"
 ":reports_feature_importances" = "`true`"
@@ -5151,16 +5287,16 @@
 ":supports_weights" = "`true`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "EvoTreeGaussian(;kwargs...)\n\nA model type for constructing a EvoTreeGaussian, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeGaussian is used to perform Gaussian probabilistic regression, fitting μ and σ parameters to maximize likelihood.\n\n# Hyper-parameters\n\n  * `nrounds=10`:                 Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n  * `lambda::T=0.0`:              L2 regularization term on weights. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:               Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n  * `max_depth=5`:                Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=0.0`:             Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:              Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:              Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=32`:                   Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).  !Experimental feature: note that for Gaussian regression, constraints may not be enforce systematically.\n  * `rng=123`:                    Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n\n# Internal API\n\nDo `config = EvoTreeGaussian()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeGaussian(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, 2]` where the second dimensions refer to `μ` and `σ` respectively:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeGaussian = @load EvoTreeGaussian pkg=EvoTrees\n```\n\nDo `model = EvoTreeGaussian()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeGaussian(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: returns a vector of Gaussian distributions given features `Xnew` having the same scitype as `X` above.\n\nPredictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n  * `predict_mean(mach, Xnew)`\n  * `predict_mode(mach, Xnew)`\n  * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nparams = EvoTreeGaussian(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(params; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeGaussian = @load EvoTreeGaussian pkg=EvoTrees\nmodel = EvoTreeGaussian(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n```\n"
+":docstring" = "EvoTreeGaussian(;kwargs...)\n\nA model type for constructing a EvoTreeGaussian, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeGaussian is used to perform Gaussian probabilistic regression, fitting μ and σ parameters to maximize likelihood.\n\n# Hyper-parameters\n\n  * `nrounds=10`:                 Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n  * `lambda::T=0.0`:              L2 regularization term on weights. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:               Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n  * `max_depth=5`:                Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=8.0`:             Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:              Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:              Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=32`:                   Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).  !Experimental feature: note that for Gaussian regression, constraints may not be enforce systematically.\n  * `tree_type=\"binary\"`    Tree structure to be used. One of:\n\n      * `binary`:       Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n      * `oblivious`:    A common splitting condition is imposed to all nodes of a given depth.\n  * `rng=123`:                    Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n\n# Internal API\n\nDo `config = EvoTreeGaussian()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeGaussian(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, 2]` where the second dimensions refer to `μ` and `σ` respectively:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeGaussian = @load EvoTreeGaussian pkg=EvoTrees\n```\n\nDo `model = EvoTreeGaussian()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeGaussian(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: returns a vector of Gaussian distributions given features `Xnew` having the same scitype as `X` above.\n\nPredictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n  * `predict_mean(mach, Xnew)`\n  * `predict_mode(mach, Xnew)`\n  * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nparams = EvoTreeGaussian(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(params; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeGaussian = @load EvoTreeGaussian pkg=EvoTrees\nmodel = EvoTreeGaussian(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n```\n"
 ":name" = "EvoTreeGaussian"
 ":human_name" = "evo tree gaussian"
 ":is_supervised" = "`true`"
 ":prediction_type" = ":probabilistic"
 ":abstract_type" = "`MLJModelInterface.Probabilistic`"
 ":implemented_methods" = [":predict"]
-":hyperparameters" = "`(:nrounds, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :rng)`"
-":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Any\", \"Any\")`"
-":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
+":hyperparameters" = "`(:nrounds, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng)`"
+":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Any\", \"String\", \"Any\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
 ":iteration_parameter" = ":nrounds"
 ":supports_training_losses" = "`false`"
 ":reports_feature_importances" = "`true`"
@@ -5185,16 +5321,16 @@
 ":supports_weights" = "`true`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "EvoTreeMLE(;kwargs...)\n\nA model type for constructing a EvoTreeMLE, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeMLE performs maximum likelihood estimation. Assumed distribution is specified through `loss` kwargs. Both Gaussian and Logistic distributions are supported.\n\n# Hyper-parameters\n\n`loss=:gaussian`:         Loss to be be minimized during training. One of:\n\n  * `:gaussian` / `:gaussian_mle`\n  * `:logistic` / `:logistic_mle`\n  * `nrounds=10`:                 Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0.\n\nA lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.  \n\n  * `lambda::T=0.0`:              L2 regularization term on weights. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:               Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n  * `max_depth=5`:                Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=0.0`:             Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:              Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:              Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=32`:                   Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).  !Experimental feature: note that for MLE regression, constraints may not be enforced systematically.\n  * `rng=123`:                    Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n\n# Internal API\n\nDo `config = EvoTreeMLE()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeMLE(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, nparams]` where the second dimensions refer to `μ` & `σ` for Normal/Gaussian and `μ` & `s` for Logistic.\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeMLE = @load EvoTreeMLE pkg=EvoTrees\n```\n\nDo `model = EvoTreeMLE()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeMLE(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: returns a vector of Gaussian or Logistic distributions (according to provided `loss`) given features `Xnew` having the same scitype as `X` above.\n\nPredictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n  * `predict_mean(mach, Xnew)`\n  * `predict_mode(mach, Xnew)`\n  * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeMLE(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeMLE = @load EvoTreeMLE pkg=EvoTrees\nmodel = EvoTreeMLE(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n```\n"
+":docstring" = "EvoTreeMLE(;kwargs...)\n\nA model type for constructing a EvoTreeMLE, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeMLE performs maximum likelihood estimation. Assumed distribution is specified through `loss` kwargs. Both Gaussian and Logistic distributions are supported.\n\n# Hyper-parameters\n\n`loss=:gaussian`:         Loss to be be minimized during training. One of:\n\n  * `:gaussian` / `:gaussian_mle`\n  * `:logistic` / `:logistic_mle`\n  * `nrounds=10`:                 Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0.\n\nA lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.  \n\n  * `lambda::T=0.0`:              L2 regularization term on weights. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:               Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n  * `max_depth=5`:                Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=8.0`:             Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:              Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:              Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=32`:                   Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).  !Experimental feature: note that for MLE regression, constraints may not be enforced systematically.\n  * `tree_type=\"binary\"`          Tree structure to be used. One of:\n\n      * `binary`:       Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n      * `oblivious`:    A common splitting condition is imposed to all nodes of a given depth.\n  * `rng=123`:                    Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n\n# Internal API\n\nDo `config = EvoTreeMLE()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeMLE(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, nparams]` where the second dimensions refer to `μ` & `σ` for Normal/Gaussian and `μ` & `s` for Logistic.\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeMLE = @load EvoTreeMLE pkg=EvoTrees\n```\n\nDo `model = EvoTreeMLE()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeMLE(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with\n\n```\nmach = machine(model, X, y)\n```\n\nwhere\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: returns a vector of Gaussian or Logistic distributions (according to provided `loss`) given features `Xnew` having the same scitype as `X` above.\n\nPredictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n  * `predict_mean(mach, Xnew)`\n  * `predict_mode(mach, Xnew)`\n  * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeMLE(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeMLE = @load EvoTreeMLE pkg=EvoTrees\nmodel = EvoTreeMLE(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n```\n"
 ":name" = "EvoTreeMLE"
 ":human_name" = "evo tree mle"
 ":is_supervised" = "`true`"
 ":prediction_type" = ":probabilistic"
 ":abstract_type" = "`MLJModelInterface.Probabilistic`"
 ":implemented_methods" = []
-":hyperparameters" = "`(:nrounds, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :rng)`"
-":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Any\", \"Any\")`"
-":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
+":hyperparameters" = "`(:nrounds, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng)`"
+":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Any\", \"String\", \"Any\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
 ":iteration_parameter" = ":nrounds"
 ":supports_training_losses" = "`false`"
 ":reports_feature_importances" = "`true`"
@@ -5219,16 +5355,16 @@
 ":supports_weights" = "`true`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "EvoTreeRegressor(;kwargs...)\n\nA model type for constructing a EvoTreeRegressor, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API and the MLJ model interface.\n\n# Hyper-parameters\n\n  * `loss=:mse`:         Loss to be be minimized during training. One of:\n\n      * `:mse`\n      * `:logloss`\n      * `:gamma`\n      * `:tweedie`\n      * `:quantile`\n      * `:l1`\n  * `nrounds=10`:           Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n  * `lambda::T=0.0`:        L2 regularization term on weights. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:         Minimum gain improvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n  * `alpha::T=0.5`:         Loss specific parameter in the [0, 1] range:                           - `:quantile`: target quantile for the regression.                           - `:l1`: weighting parameters to positive vs negative residuals.                                 - Positive residual weights = `alpha`                                 - Negative residual weights = `(1 - alpha)`\n  * `max_depth=5`:          Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to `2^max_depth`. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=0.0`:       Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:        Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:        Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=32`:             Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).  Only `:linear`, `:logistic`, `:gamma` and `tweedie` losses are supported at the moment.\n  * `rng=123`:              Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n\n# Internal API\n\nDo `config = EvoTreeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeRegressor(loss=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, 1]`:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ Interface\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeRegressor = @load EvoTreeRegressor pkg=EvoTrees\n```\n\nDo `model = EvoTreeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeRegressor(loss=...)`.\n\n## Training model\n\nIn MLJ or MLJBase, bind an instance `model` to data with     `mach = machine(model, X, y)` where\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are deterministic.\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeRegressor(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeRegressor = @load EvoTreeRegressor pkg=EvoTrees\nmodel = EvoTreeRegressor(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\n```\n"
+":docstring" = "EvoTreeRegressor(;kwargs...)\n\nA model type for constructing a EvoTreeRegressor, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API and the MLJ model interface.\n\n# Hyper-parameters\n\n  * `loss=:mse`:         Loss to be be minimized during training. One of:\n\n      * `:mse`\n      * `:logloss`\n      * `:gamma`\n      * `:tweedie`\n      * `:quantile`\n      * `:l1`\n  * `nrounds=10`:           Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n  * `lambda::T=0.0`:        L2 regularization term on weights. Must be >= 0. Higher lambda can result in a more robust model.\n  * `gamma::T=0.0`:         Minimum gain improvement needed to perform a node split. Higher gamma can result in a more robust model. Must be >= 0.\n  * `alpha::T=0.5`:         Loss specific parameter in the [0, 1] range:                           - `:quantile`: target quantile for the regression.                           - `:l1`: weighting parameters to positive vs negative residuals.                                 - Positive residual weights = `alpha`                                 - Negative residual weights = `(1 - alpha)`\n  * `max_depth=5`:          Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to `2^max_depth`. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=1.0`:       Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:        Proportion of rows that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `colsample=1.0`:        Proportion of columns / features that are sampled at each iteration to build the tree. Should be in `]0, 1]`.\n  * `nbins=32`:             Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).  Only `:linear`, `:logistic`, `:gamma` and `tweedie` losses are supported at the moment.\n  * `tree_type=\"binary\"`    Tree structure to be used. One of:\n\n      * `binary`:       Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n      * `oblivious`:    A common splitting condition is imposed to all nodes of a given depth.\n  * `rng=123`:              Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n\n# Internal API\n\nDo `config = EvoTreeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeRegressor(loss=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, 1]`:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ Interface\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeRegressor = @load EvoTreeRegressor pkg=EvoTrees\n```\n\nDo `model = EvoTreeRegressor()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeRegressor(loss=...)`.\n\n## Training model\n\nIn MLJ or MLJBase, bind an instance `model` to data with     `mach = machine(model, X, y)` where\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Continuous`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n## Operations\n\n  * `predict(mach, Xnew)`: return predictions of the target given features `Xnew` having the same scitype as `X` above. Predictions are deterministic.\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeRegressor(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\n# MLJ Interface\nusing MLJ\nEvoTreeRegressor = @load EvoTreeRegressor pkg=EvoTrees\nmodel = EvoTreeRegressor(max_depth=5, nbins=32, nrounds=100)\nX, y = @load_boston\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\n```\n"
 ":name" = "EvoTreeRegressor"
 ":human_name" = "evo tree regressor"
 ":is_supervised" = "`true`"
 ":prediction_type" = ":deterministic"
 ":abstract_type" = "`MLJModelInterface.Deterministic`"
 ":implemented_methods" = [":predict"]
-":hyperparameters" = "`(:nrounds, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :rng)`"
-":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Any\", \"Any\")`"
-":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
+":hyperparameters" = "`(:nrounds, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng)`"
+":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Any\", \"String\", \"Any\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
 ":iteration_parameter" = ":nrounds"
 ":supports_training_losses" = "`false`"
 ":reports_feature_importances" = "`true`"
@@ -5253,16 +5389,16 @@
 ":supports_weights" = "`true`"
 ":supports_class_weights" = "`false`"
 ":supports_online" = "`false`"
-":docstring" = "EvoTreeCount(;kwargs...)\n\nA model type for constructing a EvoTreeCount, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeCount is used to perform Poisson probabilistic regression on count target.\n\n# Hyper-parameters\n\n  * `nrounds=10`:                 Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n  * `lambda::T=0.0`:              L2 regularization term on weights. Must be >= 0. Higher lambda can result in a more robust model. Must be >= 0.\n  * `gamma::T=0.0`:               Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model.\n  * `max_depth=5`:                Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=0.0`:             Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:              Proportion of rows that are sampled at each iteration to build the tree. Should be `]0, 1]`.\n  * `colsample=1.0`:              Proportion of columns / features that are sampled at each iteration to build the tree. Should be `]0, 1]`.\n  * `nbins=32`:                   Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).\n  * `rng=123`:                    Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n\n# Internal API\n\nDo `config = EvoTreeCount()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeCount(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, 1]`:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeCount = @load EvoTreeCount pkg=EvoTrees\n```\n\nDo `model = EvoTreeCount()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeCount(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with     mach = machine(model, X, y) where\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Count`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n  * `predict(mach, Xnew)`: returns a vector of Poisson distributions given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n  * `predict_mean(mach, Xnew)`\n  * `predict_mode(mach, Xnew)`\n  * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeCount(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(0:2, nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\nusing MLJ\nEvoTreeCount = @load EvoTreeCount pkg=EvoTrees\nmodel = EvoTreeCount(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nX, y = randn(nobs, nfeats), rand(0:2, nobs)\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n\n```\n\nSee also [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl).\n"
+":docstring" = "EvoTreeCount(;kwargs...)\n\nA model type for constructing a EvoTreeCount, based on [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl), and implementing both an internal API the MLJ model interface. EvoTreeCount is used to perform Poisson probabilistic regression on count target.\n\n# Hyper-parameters\n\n  * `nrounds=10`:                 Number of rounds. It corresponds to the number of trees that will be sequentially stacked. Must be >= 1.\n  * `eta=0.1`:              Learning rate. Each tree raw predictions are scaled by `eta` prior to be added to the stack of predictions. Must be > 0. A lower `eta` results in slower learning, requiring a higher `nrounds` but typically improves model performance.\n  * `lambda::T=0.0`:              L2 regularization term on weights. Must be >= 0. Higher lambda can result in a more robust model. Must be >= 0.\n  * `gamma::T=0.0`:               Minimum gain imprvement needed to perform a node split. Higher gamma can result in a more robust model.\n  * `max_depth=5`:                Maximum depth of a tree. Must be >= 1. A tree of depth 1 is made of a single prediction leaf. A complete tree of depth N contains `2^(N - 1)` terminal leaves and `2^(N - 1) - 1` split nodes. Compute cost is proportional to 2^max_depth. Typical optimal values are in the 3 to 9 range.\n  * `min_weight=1.0`:             Minimum weight needed in a node to perform a split. Matches the number of observations by default or the sum of weights as provided by the `weights` vector. Must be > 0.\n  * `rowsample=1.0`:              Proportion of rows that are sampled at each iteration to build the tree. Should be `]0, 1]`.\n  * `colsample=1.0`:              Proportion of columns / features that are sampled at each iteration to build the tree. Should be `]0, 1]`.\n  * `nbins=32`:                   Number of bins into which each feature is quantized. Buckets are defined based on quantiles, hence resulting in equal weight bins. Should be between 2 and 255.\n  * `monotone_constraints=Dict{Int, Int}()`: Specify monotonic constraints using a dict where the key is the feature index and the value the applicable constraint (-1=decreasing, 0=none, 1=increasing).\n  * `tree_type=\"binary\"`    Tree structure to be used. One of:\n\n      * `binary`:       Each node of a tree is grown independently. Tree are built depthwise until max depth is reach or if min weight or gain (see `gamma`) stops further node splits.\n      * `oblivious`:    A common splitting condition is imposed to all nodes of a given depth.\n  * `rng=123`:                    Either an integer used as a seed to the random number generator or an actual random number generator (`::Random.AbstractRNG`).\n\n# Internal API\n\nDo `config = EvoTreeCount()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in EvoTreeCount(max_depth=...).\n\n## Training model\n\nA model is built using [`fit_evotree`](@ref):\n\n```julia\nmodel = fit_evotree(config; x_train, y_train, kwargs...)\n```\n\n## Inference\n\nPredictions are obtained using [`predict`](@ref) which returns a `Matrix` of size `[nobs, 1]`:\n\n```julia\nEvoTrees.predict(model, X)\n```\n\nAlternatively, models act as a functor, returning predictions when called as a function with features as argument:\n\n```julia\nmodel(X)\n```\n\n# MLJ\n\nFrom MLJ, the type can be imported using:\n\n```julia\nEvoTreeCount = @load EvoTreeCount pkg=EvoTrees\n```\n\nDo `model = EvoTreeCount()` to construct an instance with default hyper-parameters. Provide keyword arguments to override hyper-parameter defaults, as in `EvoTreeCount(loss=...)`.\n\n## Training data\n\nIn MLJ or MLJBase, bind an instance `model` to data with     mach = machine(model, X, y) where\n\n  * `X`: any table of input features (eg, a `DataFrame`) whose columns each have one of the following element scitypes: `Continuous`, `Count`, or `<:OrderedFactor`; check column scitypes with `schema(X)`\n  * `y`: is the target, which can be any `AbstractVector` whose element scitype is `<:Count`; check the scitype with `scitype(y)`\n\nTrain the machine using `fit!(mach, rows=...)`.\n\n# Operations\n\n  * `predict(mach, Xnew)`: returns a vector of Poisson distributions given features `Xnew` having the same scitype as `X` above. Predictions are probabilistic.\n\nSpecific metrics can also be predicted using:\n\n  * `predict_mean(mach, Xnew)`\n  * `predict_mode(mach, Xnew)`\n  * `predict_median(mach, Xnew)`\n\n## Fitted parameters\n\nThe fields of `fitted_params(mach)` are:\n\n  * `:fitresult`: The `GBTree` object returned by EvoTrees.jl fitting algorithm.\n\n## Report\n\nThe fields of `report(mach)` are:\n\n  * `:features`: The names of the features encountered in training.\n\n# Examples\n\n```\n# Internal API\nusing EvoTrees\nconfig = EvoTreeCount(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nx_train, y_train = randn(nobs, nfeats), rand(0:2, nobs)\nmodel = fit_evotree(config; x_train, y_train)\npreds = EvoTrees.predict(model, x_train)\n```\n\n```\nusing MLJ\nEvoTreeCount = @load EvoTreeCount pkg=EvoTrees\nmodel = EvoTreeCount(max_depth=5, nbins=32, nrounds=100)\nnobs, nfeats = 1_000, 5\nX, y = randn(nobs, nfeats), rand(0:2, nobs)\nmach = machine(model, X, y) |> fit!\npreds = predict(mach, X)\npreds = predict_mean(mach, X)\npreds = predict_mode(mach, X)\npreds = predict_median(mach, X)\n\n```\n\nSee also [EvoTrees.jl](https://github.com/Evovest/EvoTrees.jl).\n"
 ":name" = "EvoTreeCount"
 ":human_name" = "evo tree count"
 ":is_supervised" = "`true`"
 ":prediction_type" = ":probabilistic"
 ":abstract_type" = "`MLJModelInterface.Probabilistic`"
 ":implemented_methods" = [":predict"]
-":hyperparameters" = "`(:nrounds, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :rng)`"
-":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Any\", \"Any\")`"
-":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
+":hyperparameters" = "`(:nrounds, :lambda, :gamma, :eta, :max_depth, :min_weight, :rowsample, :colsample, :nbins, :alpha, :monotone_constraints, :tree_type, :rng)`"
+":hyperparameter_types" = "`(\"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Float64\", \"Float64\", \"Int64\", \"Float64\", \"Any\", \"String\", \"Any\")`"
+":hyperparameter_ranges" = "`(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing)`"
 ":iteration_parameter" = ":nrounds"
 ":supports_training_losses" = "`false`"
 ":reports_feature_importances" = "`true`"
diff --git a/src/registry/Models.toml b/src/registry/Models.toml
index 930a6354..5bf50b75 100644
--- a/src/registry/Models.toml
+++ b/src/registry/Models.toml
@@ -3,6 +3,7 @@ CatBoost = ["CatBoostRegressor", "CatBoostClassifier"]
 NearestNeighborModels = ["KNNClassifier", "MultitargetKNNClassifier", "MultitargetKNNRegressor", "KNNRegressor"]
 MLJScikitLearnInterface = ["ProbabilisticSGDClassifier", "RidgeCVClassifier", "LogisticClassifier", "RandomForestRegressor", "ElasticNetCVRegressor", "PerceptronClassifier", "MultiTaskLassoRegressor", "LinearRegressor", "DBSCAN", "RidgeRegressor", "LassoLarsICRegressor", "ARDRegressor", "SVMNuRegressor", "RidgeClassifier", "SGDRegressor", "ComplementNBClassifier", "HuberRegressor", "SVMNuClassifier", "GradientBoostingClassifier", "GaussianProcessRegressor", "SVMLinearRegressor", "LarsRegressor", "MeanShift", "AdaBoostRegressor", "AffinityPropagation", "MultiTaskLassoCVRegressor", "OrthogonalMatchingPursuitRegressor", "RidgeCVRegressor", "PassiveAggressiveClassifier", "SVMRegressor", "BernoulliNBClassifier", "GaussianNBClassifier", "ExtraTreesClassifier", "KMeans", "MultiTaskElasticNetCVRegressor", "LassoLarsCVRegressor", "OrthogonalMatchingPursuitCVRegressor", "AdaBoostClassifier", "PassiveAggressiveRegressor", "BayesianRidgeRegressor", "RANSACRegressor", "BaggingClassifier", "GaussianProcessClassifier", "OPTICS", "KNeighborsRegressor", "MiniBatchKMeans", "LassoCVRegressor", "DummyRegressor", "LassoLarsRegressor", "LarsCVRegressor", "KNeighborsClassifier", "SVMLinearClassifier", "FeatureAgglomeration", "DummyClassifier", "BaggingRegressor", "BayesianQDA", "BayesianLDA", "SGDClassifier", "TheilSenRegressor", "SpectralClustering", "Birch", "AgglomerativeClustering", "ElasticNetRegressor", "RandomForestClassifier", "LogisticCVClassifier", "MultiTaskElasticNetRegressor", "ExtraTreesRegressor", "LassoRegressor", "MultinomialNBClassifier", "GradientBoostingRegressor", "SVMClassifier"]
 OutlierDetectionNeighbors = ["ABODDetector", "DNNDetector", "LOFDetector", "KNNDetector", "COFDetector"]
+SIRUS = ["StableRulesClassifier", "StableRulesRegressor", "StableForestClassifier", "StableForestRegressor"]
 PartialLeastSquaresRegressor = ["KPLSRegressor", "PLSRegressor"]
 MLJLinearModels = ["QuantileRegressor", "LogisticClassifier", "MultinomialClassifier", "LADRegressor", "RidgeRegressor", "RobustRegressor", "ElasticNetRegressor", "LinearRegressor", "LassoRegressor", "HuberRegressor"]
 ParallelKMeans = ["KMeans"]
diff --git a/src/registry/Project.toml b/src/registry/Project.toml
index aaa8cf3a..a70b44c8 100644
--- a/src/registry/Project.toml
+++ b/src/registry/Project.toml
@@ -27,6 +27,7 @@ OutlierDetectionPython = "2449c660-d36c-460e-a68b-92ab3c865b3e"
 ParallelKMeans = "42b8e9d4-006b-409a-8472-7f34b3fb58af"
 PartialLeastSquaresRegressor = "f4b1acfe-f311-436c-bb79-8483f53c17d5"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+SIRUS = "cdeec39e-fb35-4959-aadb-a1dd5dede958"
 SelfOrganizingMaps = "ba4b7379-301a-4be0-bee6-171e4e152787"
 SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb"