Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Add ReducedRankRegression estimator (Resolves#10796) #28779

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions sklearn/cross_decomposition/__init__.py
@@ -1,3 +1,3 @@
from ._pls import CCA, PLSSVD, PLSCanonical, PLSRegression
from ._pls import CCA, PLSSVD, PLSCanonical, PLSRegression, ReducedRankRegression

__all__ = ["PLSCanonical", "PLSRegression", "PLSSVD", "CCA"]
__all__ = ["PLSCanonical", "PLSRegression", "PLSSVD", "CCA", "ReducedRankRegression"]
92 changes: 91 additions & 1 deletion sklearn/cross_decomposition/_pls.py
Expand Up @@ -20,14 +20,16 @@
TransformerMixin,
_fit_context,
)
from ..decomposition import PCA
from ..exceptions import ConvergenceWarning
from ..linear_model import Ridge
from ..utils import check_array, check_consistent_length
from ..utils._param_validation import Interval, StrOptions
from ..utils.extmath import svd_flip
from ..utils.fixes import parse_version, sp_version
from ..utils.validation import FLOAT_DTYPES, check_is_fitted

__all__ = ["PLSCanonical", "PLSRegression", "PLSSVD"]
__all__ = ["PLSCanonical", "PLSRegression", "PLSSVD", "ReducedRankRegression"]


if sp_version >= parse_version("1.7"):
Expand Down Expand Up @@ -182,6 +184,94 @@ def _deprecate_Y_when_required(y, Y):
raise ValueError("y is required.")
return _deprecate_Y_when_optional(y, Y)

class ReducedRankRegression(Ridge):
"""Reduced rank regression.

ReducedRankRegression enforces a low-rank constraint on the beta coefficients. If beta is
a [p x q] matrix in the case of normal regression, reduced rank regression enforces rank(beta) <= rank
where rank < min(p,q). This constraint is based on the assumption that X and Y are related by a smaller
number of latent factors, instead of the full space spanned by the coefficients of normal linear regression.
Reduced rank regression can also act as a form of regularization.

This implementation is built on top of sklearn.linear_model.Ridge. Thus, a ridge penalty can also be specified
if additional regularization is desired.

Parameters
----------
rank : int, default=2
The rank of the regression components. Should be in `[1, n_targets]`.

alpha : float, default=0
Regularization strength if an additional ridge penalty is desired. Must be a non-negative float.

ridge_params_dict : dict, default=None
A dictionary of parameters to pass to the Ridge constructor. See sklearn.linear_model.Ridge for more details.

Attributes
----------
coef_ : ndarray of shape (n_features,) or (n_targets, n_features)
Weight vector(s).

intercept_ : float or ndarray of shape (n_targets,)
Independent term in decision function. Set to 0.0 if
``fit_intercept = False``.

n_iter_ : None or ndarray of shape (n_targets,)
Actual number of iterations for each target. Available only for
sag and lsqr solvers. Other solvers will return None.


n_features_in_ : int
Number of features seen during :term:`fit`.


feature_names_in_ : ndarray of shape (`n_features_in_`,)
Names of features seen during :term:`fit`. Defined only when `X`
has feature names that are all strings.

See Also
--------
PLSRegression : Partial Least Squares regression.

Examples
--------
>>> from sklearn.cross_decomposition import ReducedRankRegression
>>> X = [[0., 0., 1.], [1.,0.,0.], [2.,2.,2.], [2.,5.,4.]]
>>> y = [[0.1, -0.2], [0.9, 1.1], [6.2, 5.9], [11.9, 12.3]]
>>> rrr = ReducedRankRegression(rank=2)
>>> rrr.fit(X, y)
ReducedRankRegression()
>>> Y_pred = rrr.predict(X)
"""


_parameter_constraints: dict = {
"rank": [Interval(Integral, 1, None, closed="left")],
"alpha": [Interval(Real, 0, None, closed="left"), np.ndarray],
#"ridge_params_dict": ["dict"], # TODO: add validation for Ridge parameters dict argument?
}

def __init__(self, rank=2, alpha=0, ridge_params_dict=None): # default full rank
if ridge_params_dict is None:
ridge_params_dict = dict(alpha=alpha) # default no regularization - equivlent to OLS
super().__init__(**ridge_params_dict)
self.ridge_params_dict = ridge_params_dict
self.rank = rank

def fit(self, X, y, sample_weight=None):
assert y.ndim > 1, "There must be more than one target variable to use ReducedRankRegression. If only one target variable is required, use LinearRegression, Ridge, or Lasso instead."
assert y.shape[1] > 1, "There must be more than one target variable to use ReducedRankRegression. If only one target variable is required, use LinearRegression, Ridge, or Lasso instead."
beta_ridge = super().fit(X, y, sample_weight=sample_weight).coef_.T
y_hat_ridge = super().predict(X)
pca = PCA(n_components=self.rank)
pca.fit(y_hat_ridge)
beta_proj = beta_ridge @ pca.components_.T # the encoding matrix (projects predictors from full space to space spanned by first n ranks)
beta_rrr = beta_proj @ pca.components_ # the reconstituted reduced rank regression matrix (same size as b_ridge)

self.coef_ = beta_rrr.T
self.encoder_ = beta_proj
self.decoder_ = pca.components_
return self

class _PLS(
ClassNamePrefixFeaturesOutMixin,
Expand Down