Skip to content

Commit

Permalink
Merge pull request #1127 from JuliaAI/constructor
Browse files Browse the repository at this point in the history
Add model wrappers to the Model Browser
  • Loading branch information
ablaom committed Jun 5, 2024
2 parents b4c6fa4 + 9f6f77e commit 4a23570
Show file tree
Hide file tree
Showing 8 changed files with 82 additions and 23 deletions.
43 changes: 41 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
ComputationalResources = "ed09eef8-17a6-5b46-8889-db040fac31e3"
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
FeatureSelection = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MLJBalancing = "45f359ea-796d-4f51-95a5-deb1a414c586"
MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
Expand All @@ -31,12 +32,13 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
CategoricalArrays = "0.8,0.9, 0.10"
ComputationalResources = "0.3"
Distributions = "0.21,0.22,0.23, 0.24, 0.25"
FeatureSelection = "0.1.1"
MLJBalancing = "0.1"
MLJBase = "1"
MLJEnsembles = "0.4"
MLJFlow = "0.5"
MLJIteration = "0.6"
MLJModels = "0.16"
MLJModels = "0.17"
MLJTestIntegration = "0.5.0"
MLJTuning = "0.8"
OpenML = "0.2,0.3"
Expand Down Expand Up @@ -89,4 +91,41 @@ SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["BetaML", "CatBoost", "EvoLinear", "EvoTrees", "Imbalance", "InteractiveUtils", "LightGBM", "MLJClusteringInterface", "MLJDecisionTreeInterface", "MLJFlux", "MLJGLMInterface", "MLJLIBSVMInterface", "MLJLinearModels", "MLJMultivariateStatsInterface", "MLJNaiveBayesInterface", "MLJScikitLearnInterface", "MLJTSVDInterface", "MLJTestInterface", "MLJTestIntegration", "MLJText", "MLJXGBoostInterface", "Markdown", "NearestNeighborModels", "OneRule", "OutlierDetectionNeighbors", "OutlierDetectionPython", "ParallelKMeans", "PartialLeastSquaresRegressor", "PartitionedLS", "SelfOrganizingMaps", "SIRUS", "SymbolicRegression", "StableRNGs", "Suppressor", "Test"]
test = [
"BetaML",
"CatBoost",
"EvoLinear",
"EvoTrees",
"Imbalance",
"InteractiveUtils",
"LightGBM",
"MLJClusteringInterface",
"MLJDecisionTreeInterface",
"MLJFlux",
"MLJGLMInterface",
"MLJLIBSVMInterface",
"MLJLinearModels",
"MLJMultivariateStatsInterface",
"MLJNaiveBayesInterface",
"MLJScikitLearnInterface",
"MLJTSVDInterface",
"MLJTestInterface",
"MLJTestIntegration",
"MLJText",
"MLJXGBoostInterface",
"Markdown",
"NearestNeighborModels",
"OneRule",
"OutlierDetectionNeighbors",
"OutlierDetectionPython",
"ParallelKMeans",
"PartialLeastSquaresRegressor",
"PartitionedLS",
"SelfOrganizingMaps",
"SIRUS",
"SymbolicRegression",
"StableRNGs",
"Suppressor",
"Test",
]

29 changes: 19 additions & 10 deletions docs/ModelDescriptors.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ AutoEncoder_BetaML = ["dimension_reduction"]
BM25Transformer_MLJText = ["encoders", "text_analysis"]
BaggingClassifier_MLJScikitLearnInterface = ["classification", "ensemble_models"]
BaggingRegressor_MLJScikitLearnInterface = ["regression", "ensemble_models"]
BalancedBaggingClassifier_MLJBalancing = ["class_imbalance", "classification"]
BalancedBaggingClassifier_MLJBalancing = ["class_imbalance", "classification", "meta_algorithms"]
BinaryThresholdPredictor_MLJModels = ["meta_algorithms", "classification"]
BalancedModel_MLJBalancing = ["class_imbalance", "meta_algorithms"]
BayesianLDA_MultivariateStats = ["dimension_reduction", "classification", "Bayesian_models"]
BayesianLDA_MLJScikitLearnInterface = ["dimension_reduction", "classification", "Bayesian_models"]
BayesianQDA_MLJScikitLearnInterface = ["dimension_reduction", "classification", "Bayesian_models"]
Expand Down Expand Up @@ -52,6 +54,7 @@ ElasticNetCVRegressor_MLJScikitLearnInterface = ["regression"]
ElasticNetRegressor_MLJLinearModels = ["regression"]
ElasticNetRegressor_MLJScikitLearnInterface = ["regression"]
ENNUndersampler_Imbalance = ["class_imbalance"]
EnsembleModel_MLJEnsembles = ["ensemble_models", "meta_algorithms"]
EpsilonSVR_LIBSVM = ["regression"]
EvoLinearRegressor_EvoLinear = ["regression"]
EvoTreeClassifier_EvoTrees = ["classification", "ensemble_models", "iterative_models"]
Expand Down Expand Up @@ -89,6 +92,7 @@ IForestDetector_OutlierDetectionPython = ["outlier_detection"]
ImageClassifier_MLJFlux = ["classification", "image_processing", "iterative_models"]
INNEDetector_OutlierDetectionPython = ["outlier_detection"]
InteractionTransformer_MLJModels = ["static_models"]
IteratedModel_MLJIteration = ["iterative_models", "meta_algorithms"]
KDEDetector_OutlierDetectionPython = ["outlier_detection"]
KMeansClusterer_BetaML = ["clustering"]
KMeans_Clustering = ["clustering", "dimension_reduction", ]
Expand All @@ -104,7 +108,7 @@ KNeighborsClassifier_MLJScikitLearnInterface = ["classification"]
KNeighborsRegressor_MLJScikitLearnInterface = ["regression"]
KPLSRegressor_PartialLeastSquaresRegressor = ["regression"]
KernelPCA_MultivariateStats = ["dimension_reduction", ]
KernelPerceptronClassifier_BetaML = ["classification"]
KernelPerceptronClassifier_BetaML = ["classification", "neural networks"]
LADRegressor_MLJLinearModels = ["regression"]
LDA_MultivariateStats = ["classification", "dimension_reduction", ]
LGBMClassifier_LightGBM = ["classification", "ensemble_models", "iterative_models"]
Expand Down Expand Up @@ -146,14 +150,14 @@ MultitargetGaussianMixtureRegressor_BetaML = ["regression", "distribution_fitter
MultitargetKNNClassifier_NearestNeighborModels = ["classification"]
MultitargetKNNRegressor_NearestNeighborModels = ["regression"]
MultitargetLinearRegressor_MultivariateStats = ["regression"]
MultitargetNeuralNetworkRegressor_BetaML = ["regression"]
MultitargetNeuralNetworkRegressor_MLJFlux = ["regression", "iterative_models"]
MultitargetNeuralNetworkRegressor_BetaML = ["regression", "neural networks"]
MultitargetNeuralNetworkRegressor_MLJFlux = ["regression", "iterative_models", "neural networks"]
MultitargetRidgeRegressor_MultivariateStats = ["regression"]
MultitargetSRRegressor_SymbolicRegression = ["regression"]
NeuralNetworkClassifier_BetaML = ["classification"]
NeuralNetworkClassifier_MLJFlux = ["classification", "iterative_models"]
NeuralNetworkRegressor_BetaML = ["regression"]
NeuralNetworkRegressor_MLJFlux = ["regression", "iterative_models"]
NeuralNetworkClassifier_BetaML = ["classification", "neural networks"]
NeuralNetworkClassifier_MLJFlux = ["classification", "iterative_models", "neural networks"]
NeuralNetworkRegressor_BetaML = ["regression", "neural networks"]
NeuralNetworkRegressor_MLJFlux = ["regression", "iterative_models", "neural networks"]
NuSVC_LIBSVM = ["classification"]
NuSVR_LIBSVM = ["regression"]
OCSVMDetector_OutlierDetectionPython = ["outlier_detection"]
Expand All @@ -171,8 +175,9 @@ PartLS_PartitionedLS = ["regression"]
PassiveAggressiveClassifier_MLJScikitLearnInterface = ["classification"]
PassiveAggressiveRegressor_MLJScikitLearnInterface = ["regression"]
PegasosClassifier_BetaML = ["classification"]
PerceptronClassifier_BetaML = ["classification", "iterative_models"]
PerceptronClassifier_MLJScikitLearnInterface = ["classification", "iterative_models"]
PerceptronClassifier_BetaML = ["classification", "iterative_models", "neural networks"]
PerceptronClassifier_MLJScikitLearnInterface = ["classification", "iterative_models", "neural networks"]
Pipeline_MLJBase = ["meta_algorithms"]
ProbabilisticNuSVC_LIBSVM = ["classification"]
ProbabilisticSGDClassifier_MLJScikitLearnInterface = ["classification"]
ProbabilisticSVC_LIBSVM = ["classification"]
Expand All @@ -190,6 +195,7 @@ RandomForestImputer_BetaML = ["missing_value_imputation", "ensemble_models", "it
RandomForestRegressor_BetaML = ["regression", "ensemble_models", "iterative_models"]
RandomForestRegressor_DecisionTree = ["regression", "ensemble_models", "iterative_models"]
RandomForestRegressor_MLJScikitLearnInterface = ["regression", "ensemble_models", "iterative_models"]
Resampler_MLJBase = ["meta_algorithms"]
RidgeCVClassifier_MLJScikitLearnInterface = ["classification"]
RidgeCVRegressor_MLJScikitLearnInterface = ["classification"]
RidgeClassifier_MLJScikitLearnInterface = ["classification"]
Expand All @@ -210,6 +216,7 @@ StableForestClassifier_SIRUS = ["classification"]
StableForestRegressor_SIRUS = ["regression"]
StableRulesClassifier_SIRUS = ["classification"]
StableRulesRegressor_SIRUS = ["regression"]
Stack_MLJBase = ["meta_algorithms", "ensemble_models"]
SVC_LIBSVM = ["classification"]
SVMClassifier_MLJScikitLearnInterface = ["classification"]
SVMLinearClassifier_MLJScikitLearnInterface = ["classification"]
Expand All @@ -222,9 +229,11 @@ SpectralClustering_MLJScikitLearnInterface = ["clustering", "static_models"]
Standardizer_MLJModels = ["encoders"]
SubspaceLDA_MultivariateStats = ["classification", "dimension_reduction"]
TomekUndersampler_Imbalance = ["class_imbalance"]
TunedModel_MLJTuning = ["meta_algorithms"]
TSVDTransformer_TSVD = ["dimension_reduction"]
TfidfTransformer_MLJText = ["encoders", "text_analysis"]
TheilSenRegressor_MLJScikitLearnInterface = ["regression"]
TransformedTargetModel_MLJBase = ["meta_algorithms", "outlier_detection"]
UnivariateBoxCoxTransformer_MLJModels = ["encoders"]
UnivariateDiscretizer_MLJModels = ["encoders"]
UnivariateFillImputer_MLJModels = ["missing_value_imputation"]
Expand Down
4 changes: 3 additions & 1 deletion docs/make.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import MLJ.MLJModels
import MLJ.MLJEnsembles
import MLJ.ScientificTypes
import MLJ.MLJBalancing
import MLJ.FeatureSelection
import ScientificTypesBase
import Distributions
using CategoricalArrays
Expand All @@ -37,7 +38,7 @@ isempty(problems) || error(
# compose the individual model docstring pages:
@info "Getting individual model docstrings from the registry and generating "*
"pages for them, written at /docs/src/models/ ."
for model in models()
for model in models(wrappers=true)
write_page(model)
end

Expand Down Expand Up @@ -118,6 +119,7 @@ makedocs(
IterationControl,
CategoricalDistributions,
StatisticalMeasures,
FeatureSelection,
],
pages = pages,
warnonly = [:cross_references, :missing_docs],
Expand Down
11 changes: 6 additions & 5 deletions docs/model_docstring_tools.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ const PATH_TO_MODEL_DOCS = joinpath(@__DIR__, "src", "models")
"""
remove_doc_refs(str::AbstractString)
Removes `@ref` references from `str. For example, a substring of the form
Removes `@ref` references from `str`. For example, a substring of the form
"[`some.thing_like_this123!`](@ref)" is replaced with "`some.thing_like_this123!`".
"""
Expand All @@ -27,8 +27,8 @@ handle(model) = model.name*"_"*model.package_name
**Private method.**
Compose and write to file the documentation page for `model`. Here `model` is an entry in
the MLJ Model Registry, i.e., an element of `MLJModels.models()`. The file name has the
form `"ModelName_PackageName.md"`, for example,
the MLJ Model Registry, i.e., an element of `MLJModels.models(; wrappers=true)`. The file
name has the form `"ModelName_PackageName.md"`, for example,
`"DecisionTreeClassifier_DecisionTree.md"`. Such a page can be referenced from any other
markdown page in /docs/src/ like this: `[DecisionTreeClassifier](@ref
DecisionTreeClassifier_DecisionTree)`.
Expand Down Expand Up @@ -56,6 +56,7 @@ const DESCRIPTORS_GIVEN_HANDLE =
# determined the list of all descriptors, ranked by frequency:
const descriptors = vcat(values(DESCRIPTORS_GIVEN_HANDLE)...)
const ranking = MLJBase.countmap(descriptors)
ranking["meta algorithms"] = 1e10
const DESCRIPTORS = sort(unique(descriptors), by=d -> ranking[d], rev=true)
const HANDLES = keys(DESCRIPTORS_GIVEN_HANDLE)

Expand All @@ -67,7 +68,7 @@ handle as key in /docs/src/ModelDescriptors.toml.
"""
function models_missing_descriptors()
handles = handle.(models())
handles = handle.(models(wrappers=true))
filter(handles) do h
!(h in HANDLES)
end
Expand All @@ -82,7 +83,7 @@ Return the list of models with a given `descriptor`, such as "regressor", as
these appear in /src/docs/ModelDescriptors.toml.
"""
modelswith(descriptor) = filter(models()) do model
modelswith(descriptor) = filter(models(wrappers=true)) do model
descriptor in DESCRIPTORS_GIVEN_HANDLE[handle(model)]
end

Expand Down
3 changes: 2 additions & 1 deletion docs/src/list_of_supported_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,11 @@ independent assessment.
[Imbalance.jl](https://github.com/JuliaAI/Imbalance.jl) | - | RandomOversampler, RandomWalkOversampler, ROSE, SMOTE, BorderlineSMOTE1, SMOTEN, SMOTENC, RandomUndersampler, ClusterUndersampler, ENNUndersampler, TomekUndersampler, | low |
[LIBSVM.jl](https://github.com/mpastell/LIBSVM.jl) | [MLJLIBSVMInterface.jl](https://github.com/JuliaAI/MLJLIBSVMInterface.jl) | LinearSVC, SVC, NuSVC, NuSVR, EpsilonSVR, OneClassSVM | high | also via ScikitLearn.jl
[LightGBM.jl](https://github.com/IQVIA-ML/LightGBM.jl) | - | LGBMClassifier, LGBMRegressor | high |
[FeatureSelector.jl](https://github.com/JuliaAI/FeatureSelection.jl) | - | FeatureSelector, RecursiveFeatureElimination | low |
[Flux.jl](https://github.com/FluxML/Flux.jl) | [MLJFlux.jl](https://github.com/FluxML/MLJFlux.jl) | NeuralNetworkRegressor, NeuralNetworkClassifier, MultitargetNeuralNetworkRegressor, ImageClassifier | low |
[MLJBalancing.jl](https://github.com/JuliaAI/MLJBalancing.jl) | - | BalancedBaggingClassifier | low |
[MLJLinearModels.jl](https://github.com/JuliaAI/MLJLinearModels.jl) | - | LinearRegressor, RidgeRegressor, LassoRegressor, ElasticNetRegressor, QuantileRegressor, HuberRegressor, RobustRegressor, LADRegressor, LogisticClassifier, MultinomialClassifier | medium |
[MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl) (built-in) | - | ConstantClassifier, ConstantRegressor, ContinuousEncoder, DeterministicConstantClassifier, DeterministicConstantRegressor, FeatureSelector, FillImputer, InteractionTransformer, OneHotEncoder, Standardizer, UnivariateBoxCoxTransformer, UnivariateDiscretizer, UnivariateFillImputer, UnivariateTimeTypeToContinuous, Standardizer, BinaryThreshholdPredictor | medium |
[MLJModels.jl](https://github.com/JuliaAI/MLJModels.jl) (built-in) | - | ConstantClassifier, ConstantRegressor, ContinuousEncoder, DeterministicConstantClassifier, DeterministicConstantRegressor, FillImputer, InteractionTransformer, OneHotEncoder, Standardizer, UnivariateBoxCoxTransformer, UnivariateDiscretizer, UnivariateFillImputer, UnivariateTimeTypeToContinuous, Standardizer, BinaryThreshholdPredictor | medium |
[MLJText.jl](https://github.com/JuliaAI/MLJText.jl) | - | TfidfTransformer, BM25Transformer, CountTransformer | low |
[MultivariateStats.jl](https://github.com/JuliaStats/MultivariateStats.jl) | [MLJMultivariateStatsInterface.jl](https://github.com/JuliaAI/MLJMultivariateStatsInterface.jl) | LinearRegressor, MultitargetLinearRegressor, RidgeRegressor, MultitargetRidgeRegressor, PCA, KernelPCA, ICA, LDA, BayesianLDA, SubspaceLDA, BayesianSubspaceLDA, FactorAnalysis, PPCA | high |
[NaiveBayes.jl](https://github.com/dfdx/NaiveBayes.jl) | [MLJNaiveBayesInterface.jl](https://github.com/JuliaAI/MLJNaiveBayesInterface.jl) | GaussianNBClassifier, MultinomialNBClassifier, HybridNBClassifier | low |
Expand Down
2 changes: 1 addition & 1 deletion docs/src/transformers.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ MLJModels.OneHotEncoder
MLJModels.ContinuousEncoder
MLJModels.FillImputer
MLJModels.UnivariateFillImputer
MLJModels.FeatureSelector
FeatureSelection.FeatureSelector
MLJModels.UnivariateBoxCoxTransformer
MLJModels.UnivariateDiscretizer
MLJModels.UnivariateTimeTypeToContinuous
Expand Down
10 changes: 7 additions & 3 deletions src/MLJ.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ explicitly loaded from the model-providing package, using `@load`, for example.
some common transformers, listed using `localmodels()` at startup, are immediately
available, as are the following model wrappers: `Pipeline`, `TunedModel`, `EnsembleModel`,
`IteratedModel`, `BalancedModel`, `TransformedTargetModel`, `BinaryThresholdPredictor`,
and `Stack`.
and `Stack`, `RecursiveFeatureSelection`.
# Components
Expand All @@ -35,6 +35,9 @@ and `Stack`.
- MLJBalancing.jl: Incorporation of oversampling/undersampling methods in pipelines, via
the `BalancedModel` wrapper
- FeatureSelection.jl: Transformers for feature selection, and the supervised model wrapper
`RecursiveFeatureSelection`.
- MLJFlow.jl: Integration with MLflow workflow tracking
- OpenML.jl: Tool for grabbing datasets from OpenML.org
Expand All @@ -60,6 +63,7 @@ import MLJBase.save
using MLJEnsembles
using MLJTuning
using MLJModels
@reexport using FeatureSelection
using OpenML
@reexport using MLJFlow
@reexport using StatisticalMeasures
Expand Down Expand Up @@ -165,11 +169,11 @@ export Grid, RandomSearch, Explicit, TunedModel, LatinHypercube,
# re-export from MLJModels:
export models, localmodels, @load, @iload, load, info, doc,
ConstantRegressor, ConstantClassifier, # builtins/Constant.jl
FeatureSelector, UnivariateStandardizer, # builtins/Transformers.jl
UnivariateStandardizer,
Standardizer, UnivariateBoxCoxTransformer,
OneHotEncoder, ContinuousEncoder, UnivariateDiscretizer,
FillImputer, matching, BinaryThresholdPredictor,
UnivariateTimeTypeToContinuous, InteractionTransformer
UnivariateTimeTypeToContinuous, InteractionTransformer # builtins/Transformers.jl

# re-export from MLJIteration:
export MLJIteration
Expand Down
3 changes: 3 additions & 0 deletions test/integration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ FILTER_GIVEN_ISSUE = Dict(
"MultiTaskLassoCVRegressor",
"MultiTaskLassoRegressor",
]
"https://github.com/JuliaAI/FeatureSelection.jl/issues/15" =>
model -> model.package_name == "FeatureSelection" &&
model.name = "RecursiveFeatureElimination"
)


Expand Down

0 comments on commit 4a23570

Please sign in to comment.