Merge pull request #200 from JesseLivezey/sklearn_update

fixes due to sklearn changes
BouchardLab · Mar 30, 2021 · 4766eac · 4766eac
2 parents 4f6f924 + 7d25ef4
commit 4766eac
Show file tree

Hide file tree

Showing 9 changed files with 60 additions and 19 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -3,16 +3,20 @@ os: linux
 python:
   - 3.6
   - 3.7
+  - 3.8
 matrix:
   include:
     - os: osx
       language: generic
       env: PYTHON=3.7.4
+      addons:
+        homebrew:
+          packages:
+            - openmpi
+            - pyenv-virtualenv
 before_install:
     - |
       if [ "$TRAVIS_OS_NAME" = "osx" ]; then
-        brew update
-        brew install openmpi pyenv-virtualenv
         pyenv install $PYTHON
         export PYENV_VERSION=$PYTHON
         export PATH="/Users/travis/.pyenv/shims:${PATH}"
@@ -33,7 +37,7 @@ install:
     - python setup.py develop
     - pip install pytest-cov
 script:
-    - flake8 pyuoi tests docs/gallery
+    - flake8 pyuoi tests examples
     - pytest --cov=./ tests
     - sphinx-build -W -b html docs/source docs/build
 after_success:

diff --git a/pyuoi/decomposition/base.py b/pyuoi/decomposition/base.py
@@ -1,6 +1,6 @@
 import abc as _abc
 
-from sklearn.linear_model.base import BaseEstimator
+from sklearn.linear_model._base import BaseEstimator
 
 
 class AbstractDecompositionModel(BaseEstimator, metaclass=_abc.ABCMeta):

diff --git a/pyuoi/linear_model/base.py b/pyuoi/linear_model/base.py
@@ -1,7 +1,7 @@
 import abc as _abc
 import numpy as np
 import logging
-from sklearn.linear_model.base import SparseCoefMixin
+from sklearn.linear_model._base import SparseCoefMixin
 from sklearn.metrics import r2_score, accuracy_score, log_loss
 from sklearn.model_selection import train_test_split
 from sklearn.utils import check_X_y

diff --git a/pyuoi/linear_model/elasticnet.py b/pyuoi/linear_model/elasticnet.py
@@ -3,7 +3,7 @@
 from .base import AbstractUoILinearRegressor
 
 from sklearn.linear_model import LinearRegression
-from sklearn.linear_model.coordinate_descent import _alpha_grid
+from sklearn.linear_model._coordinate_descent import _alpha_grid
 from sklearn.linear_model import ElasticNet
 
 

diff --git a/pyuoi/linear_model/lasso.py b/pyuoi/linear_model/lasso.py
@@ -2,7 +2,7 @@
 
 from sklearn.exceptions import NotFittedError
 from sklearn.linear_model import Lasso, LinearRegression
-from sklearn.linear_model.coordinate_descent import _alpha_grid
+from sklearn.linear_model._coordinate_descent import _alpha_grid
 try:
     import pycasso
 except ImportError:

diff --git a/pyuoi/linear_model/logistic.py b/pyuoi/linear_model/logistic.py
@@ -8,8 +8,6 @@
                            check_consistent_length, check_array)
 from sklearn.utils.multiclass import check_classification_targets
 from sklearn.utils.extmath import safe_sparse_dot, log_logistic, squared_norm
-from sklearn.linear_model.logistic import (_check_multi_class,
-                                           _intercept_dot)
 from sklearn.preprocessing import StandardScaler
 
 from scipy.optimize import minimize
@@ -366,8 +364,6 @@ def fit(self, X, y, sample_weight=None, coef_mask=None):
         -------
         self : object
         """
-        solver = 'lbfgs'
-
         if not isinstance(self.C, numbers.Number) or self.C < 0:
             raise ValueError("Penalty term must be positive; got (C=%r)"
                              % self.C)
@@ -388,8 +384,12 @@ def fit(self, X, y, sample_weight=None, coef_mask=None):
         self.classes_ = np.unique(y)
         n_samples, n_features = X.shape
 
-        multi_class = _check_multi_class(self.multi_class, solver,
-                                         len(self.classes_))
+        multi_class = self.multi_class
+        if multi_class == 'auto':
+            if len(self.classes_) > 2:
+                multi_class = 'multinomial'
+            else:
+                multi_class = 'ovr'
 
         n_classes = len(self.classes_)
         classes_ = self.classes_
@@ -529,7 +529,6 @@ def _logistic_regression_path(X, y, Cs=48, fit_intercept=True,
     n_iter : array, shape (n_cs,)
         Actual number of iteration for each Cs.
     """
-    solver = 'lbfgs'
     if isinstance(Cs, numbers.Integral):
         Cs = np.logspace(-4, 4, Cs)
 
@@ -543,7 +542,11 @@ def _logistic_regression_path(X, y, Cs=48, fit_intercept=True,
 
     classes = np.unique(y)
 
-    multi_class = _check_multi_class(multi_class, solver, len(classes))
+    if multi_class == 'auto':
+        if len(classes) > 2:
+            multi_class = 'multinomial'
+        else:
+            multi_class = 'ovr'
 
     # If sample weights exist, convert them to array (support for lists)
     # and check length
@@ -559,7 +562,7 @@ def _logistic_regression_path(X, y, Cs=48, fit_intercept=True,
     # the class_weights are assigned after masking the labels with a OvR.
     le = LabelEncoder()
     if isinstance(class_weight, dict) or multi_class == 'multinomial':
-        class_weight_ = compute_class_weight(class_weight, classes, y)
+        class_weight_ = compute_class_weight(class_weight, classes=classes, y=y)
         sample_weight *= class_weight_[le.fit_transform(y)]
 
     # For doing a ovr, we need to mask the labels first. for the
@@ -574,8 +577,9 @@ def _logistic_regression_path(X, y, Cs=48, fit_intercept=True,
         # for compute_class_weight
 
         if class_weight == "balanced":
-            class_weight_ = compute_class_weight(class_weight, mask_classes,
-                                                 y_bin)
+            class_weight_ = compute_class_weight(class_weight,
+                                                 classes=mask_classes,
+                                                 y=y_bin)
             sample_weight *= class_weight_[le.fit_transform(y_bin)]
 
     else:
@@ -847,3 +851,34 @@ def _multinomial_loss_grad(w, X, Y, alpha, mask, sample_weight):
     if fit_intercept:
         grad[:, -1] = diff.sum(axis=0) / n_samples
     return loss, grad.ravel(), p
+
+
+def _intercept_dot(w, X, y):
+    """Computes y * np.dot(X, w).
+    It takes into consideration if the intercept should be fit or not.
+    Parameters
+    ----------
+    w : ndarray of shape (n_features,) or (n_features + 1,)
+        Coefficient vector.
+    X : {array-like, sparse matrix} of shape (n_samples, n_features)
+        Training data.
+    y : ndarray of shape (n_samples,)
+        Array of labels.
+    Returns
+    -------
+    w : ndarray of shape (n_features,)
+        Coefficient vector without the intercept weight (w[-1]) if the
+        intercept should be fit. Unchanged otherwise.
+    c : float
+        The intercept.
+    yz : float
+        y * np.dot(X, w).
+    """
+    c = 0.
+    if w.size == X.shape[1] + 1:
+        c = w[-1]
+        w = w[:-1]
+
+    z = safe_sparse_dot(X, w) + c
+    yz = y * z
+    return w, c, yz
diff --git a/pyuoi/linear_model/scikit-learn_license b/pyuoi/linear_model/scikit-learn_license
@@ -1,6 +1,7 @@
 Portions of logistic.py including
 MaskedCoefLogisticRegression,
 _logistic_regression_path,
+_intercept_dot,
 _logistic_loss_and_grad, and
 _multinomial_loss_grad
 are based on code from scikit-learn. The scikit-learn license is below.

diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -1,4 +1,5 @@
 -r requirements.txt
+matplotlib
 pytest
 flake8
 cython

diff --git a/tests/test_uoi_lasso.py b/tests/test_uoi_lasso.py
@@ -6,7 +6,7 @@
 from sklearn.datasets import make_regression
 from sklearn.linear_model import Lasso
 from sklearn.metrics import r2_score
-from sklearn.linear_model.coordinate_descent import _alpha_grid
+from sklearn.linear_model._coordinate_descent import _alpha_grid
 try:
     import pycasso
 except ImportError: