Skip to content

Support for sklearn 1.6 conformance testing #2465

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 21 additions & 7 deletions daal4py/sklearn/linear_model/logistic_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,14 +359,16 @@ def __logistic_regression_path(
y_bin = np.ones(y.shape, dtype=X.dtype)
# for compute_class_weight

if solver in ["lbfgs", "newton-cg"]:
if solver == "liblinear" or (
not sklearn_check_version("1.6") and solver not in ["lbfgs", "newton-cg"]
):
mask_classes = np.array([-1, 1])
y_bin[~mask] = -1.0
else:
# HalfBinomialLoss, used for those solvers, represents y in [0, 1] instead
# of in [-1, 1].
mask_classes = np.array([0, 1])
y_bin[~mask] = 0.0
else:
mask_classes = np.array([-1, 1])
y_bin[~mask] = -1.0
else:
mask_classes = np.array([-1, 1])
mask = y == pos_class
Expand All @@ -388,7 +390,11 @@ def __logistic_regression_path(

else:
if sklearn_check_version("1.1"):
if solver in ["sag", "saga", "lbfgs", "newton-cg"]:
if sklearn_check_version("1.6"):
solver_list = ["sag", "saga", "lbfgs", "newton-cg", "newton-cholesky"]
else:
solver_list = ["sag", "saga", "lbfgs", "newton-cg"]
if solver in solver_list:
# SAG, lbfgs and newton-cg multinomial solvers need LabelEncoder,
# not LabelBinarizer, i.e. y as a 1d-array of integers.
# LabelEncoder also saves memory compared to LabelBinarizer, especially
Expand Down Expand Up @@ -488,7 +494,11 @@ def __logistic_regression_path(

if multi_class == "multinomial":
# fmin_l_bfgs_b and newton-cg accepts only ravelled parameters.
if solver in ["lbfgs", "newton-cg"]:
if sklearn_check_version("1.6"):
solver_list = ["lbfgs", "newton-cg", "newton-cholesky"]
else:
solver_list = ["lbfgs", "newton-cg"]
if solver in solver_list:
if _dal_ready and classes.size == 2:
w0 = w0[-1:, :]
if sklearn_check_version("1.1"):
Expand Down Expand Up @@ -753,7 +763,11 @@ def _func_(x, *args):
else:
n_classes = max(2, classes.size)
if sklearn_check_version("1.1"):
if solver in ["lbfgs", "newton-cg"]:
if sklearn_check_version("1.6"):
solver_list = ["lbfgs", "newton-cg", "newton-cholesky"]
else:
solver_list = ["lbfgs", "newton-cg"]
if solver in solver_list:
multi_w0 = np.reshape(w0, (n_classes, -1), order="F")
else:
multi_w0 = w0
Expand Down
15 changes: 15 additions & 0 deletions deselected_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,21 @@ deselected_tests:
# to CI parameters, as parameter validation is globally handled in sklearn version 1.2 onward
- cluster/tests/test_dbscan.py::test_dbscan_params_validation

# From sklearn 1.6, need to resolve logreg bug from joblib with_parallel_backend.
# Removal of this deselection will result in test_logistic fails (this one will pass).
- feature_selection/tests/test_rfe.py::test_rfe_with_joblib_threading_backend
# Failing tests since sklearn 1.6
- tests/test_common.py::test_estimators[CalibratedClassifierCV(cv=3,estimator=LogisticRegression(C=1))-check_sample_weight_equivalence_on_dense_data]
- tests/test_common.py::test_estimators[ExtraTreesClassifier(n_estimators=5)-check_sample_weight_equivalence_on_dense_data]
- tests/test_common.py::test_estimators[ExtraTreesRegressor(n_estimators=5)-check_sample_weight_equivalence_on_dense_data]
- utils/tests/test_estimator_checks.py::test_xfail_count_with_no_fast_fail
# XFail vs XPass differs between scikit-learn and scikit-learn-intelex since 1.6
- tests/test_common.py::test_estimators[LinearRegression()-check_sample_weight_equivalence_on_dense_data] <1.7
- tests/test_common.py::test_estimators[LogisticRegression(max_iter=5)-check_sample_weight_equivalence_on_dense_data]
- tests/test_common.py::test_estimators[LogisticRegression(max_iter=5,solver='newton-cg')-check_sample_weight_equivalence_on_dense_data]
- tests/test_common.py::test_estimators[NuSVC()-check_class_weight_classifiers]
- tests/test_common.py::test_estimators[CalibratedClassifierCV(estimator=LogisticRegression(C=1))-check_sample_weights_invariance(kind=ones)]

# --------------------------------------------------------
# No need to test daal4py patching
reduced_tests:
Expand Down
4 changes: 0 additions & 4 deletions onedal/linear_model/linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,8 +340,4 @@ def fit(self, X, y, queue=None):
packed_coefficients[:, 0],
)

if self.coef_.shape[0] == 1 and y.ndim == 1:
self.coef_ = self.coef_.ravel()
self.intercept_ = self.intercept_[0]

return self
9 changes: 9 additions & 0 deletions sklearnex/linear_model/ridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,15 @@ def _onedal_fit(self, X, y, sample_weight, queue=None):
self._onedal_estimator.fit(X, y, queue=queue)
self._save_attributes()

if sklearn_check_version("1.6"):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you do this for Linear Regression and the related incremental algos so that they all behave the same/ reduce maintenance?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure its relevant for the others, it originates from scikit-learn/scikit-learn#19746 which specifically addresses Ridge

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://github.com/scikit-learn/scikit-learn/blob/main/sklearn/linear_model/tests/test_common.py#L197 Is the answer hidden within the linked PR. Place notes into LinearRegression and IncrementalLinearRegression about this deviation, and add this change in return values to IncrementalRidge.

if y.ndim == 1 or y.shape[1] == 1:
self.coef_ = self.coef_.ravel()
self.intercept_ = self.intercept_[0]
else:
if self.coef_.shape[0] == 1 and y.ndim == 1:
self.coef_ = self.coef_.ravel()
self.intercept_ = self.intercept_[0]

def _onedal_predict(self, X, queue=None):
X = validate_data(self, X, accept_sparse=False, reset=False)

Expand Down
Loading