Add support for scikit-learn 1.4

Signed-off-by: Avi Shinnar <shinnar@us.ibm.com>
IBM · Feb 12, 2024 · 2b4c300 · 2b4c300
1 parent c43aabd
commit 2b4c300
Show file tree

Hide file tree

Showing 35 changed files with 625 additions and 97 deletions.
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -5,7 +5,7 @@ graphviz
 hyperopt
 jsonschema
 jsonsubschema
-scikit-learn>=1.0.0,<1.4
+scikit-learn>=1.0.0,<1.5.0
 scipy
 pandas
 decorator

diff --git a/lale/datasets/openml/openml_datasets.py b/lale/datasets/openml/openml_datasets.py
@@ -733,8 +733,12 @@ def fetch(
         ]
         txm1 = ColumnTransformer(transformers1, sparse_threshold=0.0)
 
+        if sklearn_version >= version.Version("1.2"):
+            ohe2 = OneHotEncoder(sparse_output=False)
+        else:
+            ohe2 = OneHotEncoder(sparse=False)
         transformers2 = [
-            ("ohe", OneHotEncoder(sparse=False), list(range(len(categorical_cols)))),
+            ("ohe", ohe2, list(range(len(categorical_cols)))),
             (
                 "no_op",
                 "passthrough",

diff --git a/lale/helpers.py b/lale/helpers.py
@@ -1308,6 +1308,30 @@ def get_sklearn_estimator_name() -> str:
         return "estimator"
 
 
+def with_fixed_estimator_name(**kwargs):
+    """Some higher order sklearn operators changed the name of the nested estimator in later versions.
+    This fixes up the arguments, renaming estimator and base_estimator appropriately.
+    """
+
+    if "base_estimator" in kwargs or "estimator" in kwargs:
+        from packaging import version
+
+        import lale.operators
+
+        if lale.operators.sklearn_version < version.Version("1.2"):
+            return {
+                "base_estimator" if k == "estimator" else k: v
+                for k, v in kwargs.items()
+            }
+        else:
+            return {
+                "estimator" if k == "base_estimator" else k: v
+                for k, v in kwargs.items()
+            }
+
+    return kwargs
+
+
 def get_estimator_param_name_from_hyperparams(hyperparams):
     be = hyperparams.get("base_estimator", "deprecated")
     if be == "deprecated" or (be is None and "estimator" in hyperparams):

diff --git a/lale/lib/aif360/bagging_orbis_classifier.py b/lale/lib/aif360/bagging_orbis_classifier.py
@@ -22,6 +22,7 @@
 import lale.operators
 from lale.lib.imblearn._common_schemas import _hparam_n_jobs, _hparam_random_state
 
+from ...helpers import with_fixed_estimator_name
 from .orbis import Orbis
 from .orbis import _hyperparams_schema as orbis_hyperparams_schema
 from .util import (
@@ -115,10 +116,12 @@ def _repair_dtypes(inner_X):  # for some reason BaggingClassifier spoils dtypes
 
         repair_dtypes = lale.lib.sklearn.FunctionTransformer(func=_repair_dtypes)
         trainable_ensemble = lale.lib.sklearn.BaggingClassifier(
-            base_estimator=repair_dtypes >> orbis,
-            n_estimators=self.n_estimators,
-            n_jobs=self.sampler_hparams["n_jobs"],
-            random_state=self.sampler_hparams["random_state"],
+            **with_fixed_estimator_name(
+                estimator=repair_dtypes >> orbis,
+                n_estimators=self.n_estimators,
+                n_jobs=self.sampler_hparams["n_jobs"],
+                random_state=self.sampler_hparams["random_state"],
+            )
         )
         encoded_y = pd.Series(self.lab_enc.transform(y), index=y.index)
         self.trained_ensemble = trainable_ensemble.fit(X, encoded_y)

diff --git a/lale/lib/autogen/kernel_pca.py b/lale/lib/autogen/kernel_pca.py
@@ -1,8 +1,9 @@
 from numpy import inf, nan
+from packaging import version
 from sklearn.decomposition import KernelPCA as Op
 
 from lale.docstrings import set_docstrings
-from lale.operators import make_operator
+from lale.operators import make_operator, sklearn_version
 
 
 class _KernelPCAImpl:
@@ -239,4 +240,23 @@ def transform(self, X):
 }
 KernelPCA = make_operator(_KernelPCAImpl, _combined_schemas)
 
+if sklearn_version >= version.Version("1.4"):
+
+    KernelPCA = KernelPCA.customize_schema(
+        degree={
+            "anyOf": [
+                {
+                    "type": "integer",
+                    "minimumForOptimizer": 2,
+                    "maximumForOptimizer": 3,
+                    "distribution": "uniform",
+                },
+                {"type": "number", "forOptimizer": False},
+            ],
+            "default": 3,
+            "description": "Degree for poly kernels",
+        },
+        set_as_available=True,
+    )
+
 set_docstrings(KernelPCA)
diff --git a/lale/lib/autogen/kernel_ridge.py b/lale/lib/autogen/kernel_ridge.py
@@ -170,4 +170,23 @@ def predict(self, X):
         set_as_available=True,
     )
 
+if sklearn_version >= version.Version("1.4"):
+
+    KernelRidge = KernelRidge.customize_schema(
+        degree={
+            "anyOf": [
+                {
+                    "type": "integer",
+                    "minimumForOptimizer": 0,
+                    "maximumForOptimizer": 100,
+                    "distribution": "uniform",
+                },
+                {"type": "number", "forOptimizer": False},
+            ],
+            "default": 3,
+            "description": "Degree of the polynomial kernel",
+        },
+        set_as_available=True,
+    )
+
 set_docstrings(KernelRidge)
diff --git a/lale/lib/autogen/lars.py b/lale/lib/autogen/lars.py
@@ -1,8 +1,8 @@
-from numpy import inf, nan
+from packaging import version
 from sklearn.linear_model import Lars as Op
 
 from lale.docstrings import set_docstrings
-from lale.operators import make_operator
+from lale.operators import make_operator, sklearn_version
 
 
 class _LarsImpl:
@@ -197,4 +197,24 @@ def predict(self, X):
 }
 Lars = make_operator(_LarsImpl, _combined_schemas)
 
+if sklearn_version >= version.Version("1.2"):
+    Lars = Lars.customize_schema(
+        normalize={
+            "anyOf": [
+                {
+                    "type": "boolean",
+                    "description": "This parameter is ignored when ``fit_intercept`` is set to False",
+                },
+                {"enum": ["deprecated"]},
+            ],
+            "default": "deprecated",
+            "description": "Deprecated",
+        },
+        set_as_available=True,
+    )
+
+if sklearn_version >= version.Version("1.4"):
+    Lars = Lars.customize_schema(normalize=None, set_as_available=True)
+
+
 set_docstrings(Lars)
diff --git a/lale/lib/autogen/lars_cv.py b/lale/lib/autogen/lars_cv.py
@@ -1,8 +1,8 @@
-from numpy import inf, nan
+from packaging import version
 from sklearn.linear_model import LarsCV as Op
 
 from lale.docstrings import set_docstrings
-from lale.operators import make_operator
+from lale.operators import make_operator, sklearn_version
 
 
 class _LarsCVImpl:
@@ -203,4 +203,23 @@ def predict(self, X):
 }
 LarsCV = make_operator(_LarsCVImpl, _combined_schemas)
 
+if sklearn_version >= version.Version("1.2"):
+    LarsCV = LarsCV.customize_schema(
+        normalize={
+            "anyOf": [
+                {
+                    "type": "boolean",
+                    "description": "This parameter is ignored when ``fit_intercept`` is set to False",
+                },
+                {"enum": ["deprecated"]},
+            ],
+            "default": "deprecated",
+            "description": "Deprecated",
+        },
+        set_as_available=True,
+    )
+
+if sklearn_version >= version.Version("1.4"):
+    LarsCV = LarsCV.customize_schema(normalize=None, set_as_available=True)
+
 set_docstrings(LarsCV)
diff --git a/lale/lib/autogen/lasso_lars.py b/lale/lib/autogen/lasso_lars.py
@@ -1,8 +1,8 @@
-from numpy import inf, nan
+from packaging import version
 from sklearn.linear_model import LassoLars as Op
 
 from lale.docstrings import set_docstrings
-from lale.operators import make_operator
+from lale.operators import make_operator, sklearn_version
 
 
 class _LassoLarsImpl:
@@ -197,4 +197,23 @@ def predict(self, X):
 }
 LassoLars = make_operator(_LassoLarsImpl, _combined_schemas)
 
+if sklearn_version >= version.Version("1.2"):
+    LassoLars = LassoLars.customize_schema(
+        normalize={
+            "anyOf": [
+                {
+                    "type": "boolean",
+                    "description": "This parameter is ignored when ``fit_intercept`` is set to False",
+                },
+                {"enum": ["deprecated"]},
+            ],
+            "default": "deprecated",
+            "description": "Deprecated",
+        },
+        set_as_available=True,
+    )
+
+if sklearn_version >= version.Version("1.4"):
+    LassoLars = LassoLars.customize_schema(normalize=None, set_as_available=True)
+
 set_docstrings(LassoLars)
diff --git a/lale/lib/autogen/lasso_lars_cv.py b/lale/lib/autogen/lasso_lars_cv.py
@@ -1,8 +1,8 @@
-from numpy import inf, nan
+from packaging import version
 from sklearn.linear_model import LassoLarsCV as Op
 
 from lale.docstrings import set_docstrings
-from lale.operators import make_operator
+from lale.operators import make_operator, sklearn_version
 
 
 class _LassoLarsCVImpl:
@@ -203,4 +203,24 @@ def predict(self, X):
 }
 LassoLarsCV = make_operator(_LassoLarsCVImpl, _combined_schemas)
 
+
+if sklearn_version >= version.Version("1.2"):
+    LassoLarsCV = LassoLarsCV.customize_schema(
+        normalize={
+            "anyOf": [
+                {
+                    "type": "boolean",
+                    "description": "This parameter is ignored when ``fit_intercept`` is set to False",
+                },
+                {"enum": ["deprecated"]},
+            ],
+            "default": "deprecated",
+            "description": "Deprecated",
+        },
+        set_as_available=True,
+    )
+
+if sklearn_version >= version.Version("1.4"):
+    LassoLarsCV = LassoLarsCV.customize_schema(normalize=None, set_as_available=True)
+
 set_docstrings(LassoLarsCV)
diff --git a/lale/lib/autogen/lasso_lars_ic.py b/lale/lib/autogen/lasso_lars_ic.py
@@ -1,8 +1,8 @@
-from numpy import inf, nan
+from packaging import version
 from sklearn.linear_model import LassoLarsIC as Op
 
 from lale.docstrings import set_docstrings
-from lale.operators import make_operator
+from lale.operators import make_operator, sklearn_version
 
 
 class _LassoLarsICImpl:
@@ -201,4 +201,23 @@ def predict(self, X):
 }
 LassoLarsIC = make_operator(_LassoLarsICImpl, _combined_schemas)
 
+if sklearn_version >= version.Version("1.2"):
+    LassoLarsIC = LassoLarsIC.customize_schema(
+        normalize={
+            "anyOf": [
+                {
+                    "type": "boolean",
+                    "description": "This parameter is ignored when ``fit_intercept`` is set to False",
+                },
+                {"enum": ["deprecated"]},
+            ],
+            "default": "deprecated",
+            "description": "Deprecated",
+        },
+        set_as_available=True,
+    )
+
+if sklearn_version >= version.Version("1.4"):
+    LassoLarsIC = LassoLarsIC.customize_schema(normalize=None, set_as_available=True)
+
 set_docstrings(LassoLarsIC)
diff --git a/lale/lib/autogen/orthogonal_matching_pursuit.py b/lale/lib/autogen/orthogonal_matching_pursuit.py
@@ -1,8 +1,8 @@
-from numpy import inf, nan
+from packaging import version
 from sklearn.linear_model import OrthogonalMatchingPursuit as Op
 
 from lale.docstrings import set_docstrings
-from lale.operators import make_operator
+from lale.operators import make_operator, sklearn_version
 
 
 class _OrthogonalMatchingPursuitImpl:
@@ -156,4 +156,25 @@ def predict(self, X):
     _OrthogonalMatchingPursuitImpl, _combined_schemas
 )
 
+if sklearn_version >= version.Version("1.2"):
+    OrthogonalMatchingPursuit = OrthogonalMatchingPursuit.customize_schema(
+        normalize={
+            "anyOf": [
+                {
+                    "type": "boolean",
+                    "description": "This parameter is ignored when ``fit_intercept`` is set to False",
+                },
+                {"enum": ["deprecated"]},
+            ],
+            "default": "deprecated",
+            "description": "Deprecated",
+        },
+        set_as_available=True,
+    )
+
+if sklearn_version >= version.Version("1.4"):
+    OrthogonalMatchingPursuit = OrthogonalMatchingPursuit.customize_schema(
+        normalize=None, set_as_available=True
+    )
+
 set_docstrings(OrthogonalMatchingPursuit)
diff --git a/lale/lib/autogen/orthogonal_matching_pursuit_cv.py b/lale/lib/autogen/orthogonal_matching_pursuit_cv.py
@@ -1,8 +1,8 @@
-from numpy import inf, nan
+from packaging import version
 from sklearn.linear_model import OrthogonalMatchingPursuitCV as Op
 
 from lale.docstrings import set_docstrings
-from lale.operators import make_operator
+from lale.operators import make_operator, sklearn_version
 
 
 class _OrthogonalMatchingPursuitCVImpl:
@@ -175,4 +175,26 @@ def predict(self, X):
     _OrthogonalMatchingPursuitCVImpl, _combined_schemas
 )
 
+if sklearn_version >= version.Version("1.2"):
+    OrthogonalMatchingPursuitCV = OrthogonalMatchingPursuitCV.customize_schema(
+        normalize={
+            "anyOf": [
+                {
+                    "type": "boolean",
+                    "description": "This parameter is ignored when ``fit_intercept`` is set to False",
+                },
+                {"enum": ["deprecated"]},
+            ],
+            "default": "deprecated",
+            "description": "Deprecated",
+        },
+        set_as_available=True,
+    )
+
+if sklearn_version >= version.Version("1.4"):
+    OrthogonalMatchingPursuitCV = OrthogonalMatchingPursuitCV.customize_schema(
+        normalize=None, set_as_available=True
+    )
+
+
 set_docstrings(OrthogonalMatchingPursuitCV)