From 68cf2f1ebec8aa0fbc8ee7e8134ffd75fcaf2a8b Mon Sep 17 00:00:00 2001 From: "Marcelo R. Albuquerque" Date: Fri, 2 Apr 2021 21:55:40 -0300 Subject: [PATCH 1/4] Add GMMRegression scikit-learn RegressorMixin --- gmr/__init__.py | 2 +- gmr/gmm.py | 78 +++++++++++++++++++++++++++++++++++++++++++ gmr/tests/test_gmm.py | 29 +++++++++++++++- 3 files changed, 107 insertions(+), 2 deletions(-) diff --git a/gmr/__init__.py b/gmr/__init__.py index 3bb6d29837..6d1aff0245 100644 --- a/gmr/__init__.py +++ b/gmr/__init__.py @@ -23,7 +23,7 @@ __all__ = ["gmm", "mvn", "utils"] from .mvn import MVN, plot_error_ellipse - from .gmm import (GMM, plot_error_ellipses, kmeansplusplus_initialization, + from .gmm import (GMM, GMMRegression, plot_error_ellipses, kmeansplusplus_initialization, covariance_initialization) __all__.extend(["MVN", "plot_error_ellipse", "GMM", "plot_error_ellipses", diff --git a/gmr/gmm.py b/gmr/gmm.py index f1f213e5d7..20e4823473 100644 --- a/gmr/gmm.py +++ b/gmr/gmm.py @@ -1,6 +1,7 @@ import numpy as np from scipy.spatial.distance import cdist, pdist from scipy.stats import chi2 +from sklearn.base import BaseEstimator, RegressorMixin, MultiOutputMixin from .utils import check_random_state from .mvn import MVN @@ -489,6 +490,83 @@ def extract_mvn(self, component_idx): covariance=self.covariances[component_idx], verbose=self.verbose, random_state=self.random_state) +class GMMRegression(MultiOutputMixin, RegressorMixin, BaseEstimator): + """Scikit-learn RegressorMixin for the GMM class. + + Parameters + ---------- + + n_components : int (> 0) + Number of MVNs that compose the GMM. + + random_state : int or RandomState, optional (default: global random state) + If an integer is given, it fixes the seed. Defaults to the global numpy + random number generator. + + R_diff : float + Minimum allowed difference of responsibilities between successive + EM iterations. + + n_iter : int + Maximum number of iterations. + + init_params : str, optional (default: 'random') + Parameter initialization strategy. If means and covariances are + given in the constructor, this parameter will have no effect. + 'random' will sample initial means randomly from the dataset + and set covariances to identity matrices. This is the + computationally cheap solution. + 'kmeans++' will use k-means++ initialization for means and + initialize covariances to diagonal matrices with variances + set based on the average distances of samples in each dimensions. + This is computationally more expensive but often gives much + better results. + + Returns + ------- + self : GMMRegression + This object. + """ + + def __init__(self, n_components, priors=None, means=None, covariances=None, + verbose=0, random_state=None, R_diff=1e-4, n_iter=500, init_params="random"): + self.n_components = n_components + self.priors = priors + self.means = means + self.covariances = covariances + self.verbose = verbose + self.random_state = random_state + self.R_diff = R_diff + self.n_iter = n_iter + self.init_params = init_params + + def fit(self, X, y): + self.gmm = GMM(self.n_components, priors=self.priors, means=self.means, + covariances=self.covariances, verbose=self.verbose, random_state=self.random_state) + + if y.ndim > 2: + raise ValueError("y must have at most two dimensions.") + elif y.ndim == 1: + y = np.expand_dims(y, 1) + + if X.ndim > 2: + raise ValueError("y must have at most two dimensions.") + elif X.ndim == 1: + X = np.expand_dims(X, 1) + + self._indices = np.arange(X.shape[1]) + + self.gmm.from_samples(np.hstack((X, y)), + R_diff=self.R_diff, n_iter=self.n_iter, init_params=self.init_params) + return self + + def predict(self, X): + if X.ndim > 2: + raise ValueError("y must have at most two dimensions.") + elif X.ndim == 1: + X = np.expand_dims(X, 1) + + return self.gmm.predict(self._indices, X) def plot_error_ellipses(ax, gmm, colors=None, alpha=0.25, factors=np.linspace(0.25, 2.0, 8)): """Plot error ellipses of GMM components. diff --git a/gmr/tests/test_gmm.py b/gmr/tests/test_gmm.py index 362220e094..7177eccc25 100644 --- a/gmr/tests/test_gmm.py +++ b/gmr/tests/test_gmm.py @@ -11,7 +11,7 @@ except ImportError: # Python 3 from io import StringIO -from gmr import GMM, MVN, plot_error_ellipses, kmeansplusplus_initialization, covariance_initialization +from gmr import GMM, MVN, GMMRegression, plot_error_ellipses, kmeansplusplus_initialization, covariance_initialization from test_mvn import AxisStub @@ -251,6 +251,22 @@ def test_regression_with_2d_input(): pred = gmm.predict(np.array([0, 1]), np.hstack((x, x[::-1]))) mse = np.sum((y - pred) ** 2) / n_samples + random_state = check_random_state(0) + + n_samples = 200 + x = np.linspace(0, 2, n_samples)[:, np.newaxis] + y1 = 3 * x[:n_samples // 2] + 1 + y2 = -3 * x[n_samples // 2:] + 7 + noise = random_state.randn(n_samples, 1) * 0.01 + y = np.vstack((y1, y2)) + noise + samples = np.hstack((x, x[::-1], y)) + + gmm = GMMRegression(n_components=2, random_state=random_state) + gmm.fit(np.hstack((x, x[::-1])), y) + + pred = gmm.predict(np.hstack((x, x[::-1]))) + mse = np.sum((y - pred) ** 2) / n_samples + def test_regression_without_noise(): """Test regression without noise.""" @@ -273,6 +289,17 @@ def test_regression_without_noise(): mse = np.sum((y - pred) ** 2) / n_samples assert_less(mse, 0.01) + random_state = check_random_state(0) + + gmm = GMMRegression(n_components=2, random_state=random_state) + gmm.fit(x, y) + assert_array_almost_equal(gmm.gmm.priors, 0.5 * np.ones(2), decimal=2) + assert_array_almost_equal(gmm.gmm.means[0], np.array([1.5, 2.5]), decimal=2) + assert_array_almost_equal(gmm.gmm.means[1], np.array([0.5, 2.5]), decimal=1) + + pred = gmm.predict(x) + mse = np.sum((y - pred) ** 2) / n_samples + assert_less(mse, 0.01) def test_plot(): """Test plot of GMM.""" From 714cd94670d6625cb963e417aa18505dc4e4be0c Mon Sep 17 00:00:00 2001 From: "Marcelo R. Albuquerque" Date: Sat, 3 Apr 2021 19:07:05 -0300 Subject: [PATCH 2/4] Refactor GMMRegression to a separate gmr/sklearn.py module --- gmr/__init__.py | 10 +++++- gmr/gmm.py | 79 ----------------------------------------------- gmr/sklearn.py | 82 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+), 80 deletions(-) create mode 100644 gmr/sklearn.py diff --git a/gmr/__init__.py b/gmr/__init__.py index 6d1aff0245..fd148624f9 100644 --- a/gmr/__init__.py +++ b/gmr/__init__.py @@ -20,11 +20,19 @@ if not __GMR_SETUP__: from . import gmm, mvn, utils + import warnings + __all__ = ["gmm", "mvn", "utils"] from .mvn import MVN, plot_error_ellipse - from .gmm import (GMM, GMMRegression, plot_error_ellipses, kmeansplusplus_initialization, + from .gmm import (GMM, plot_error_ellipses, kmeansplusplus_initialization, covariance_initialization) + try: + from .sklearn import GMMRegression + except ImportError: + class GMMRegression: + warnings.warn("Scikit-learn is required to use GMMRegression.") + __all__.extend(["MVN", "plot_error_ellipse", "GMM", "plot_error_ellipses", "kmeansplusplus_initialization", "covariance_initialization"]) diff --git a/gmr/gmm.py b/gmr/gmm.py index 20e4823473..197976a8cb 100644 --- a/gmr/gmm.py +++ b/gmr/gmm.py @@ -1,7 +1,6 @@ import numpy as np from scipy.spatial.distance import cdist, pdist from scipy.stats import chi2 -from sklearn.base import BaseEstimator, RegressorMixin, MultiOutputMixin from .utils import check_random_state from .mvn import MVN @@ -490,84 +489,6 @@ def extract_mvn(self, component_idx): covariance=self.covariances[component_idx], verbose=self.verbose, random_state=self.random_state) -class GMMRegression(MultiOutputMixin, RegressorMixin, BaseEstimator): - """Scikit-learn RegressorMixin for the GMM class. - - Parameters - ---------- - - n_components : int (> 0) - Number of MVNs that compose the GMM. - - random_state : int or RandomState, optional (default: global random state) - If an integer is given, it fixes the seed. Defaults to the global numpy - random number generator. - - R_diff : float - Minimum allowed difference of responsibilities between successive - EM iterations. - - n_iter : int - Maximum number of iterations. - - init_params : str, optional (default: 'random') - Parameter initialization strategy. If means and covariances are - given in the constructor, this parameter will have no effect. - 'random' will sample initial means randomly from the dataset - and set covariances to identity matrices. This is the - computationally cheap solution. - 'kmeans++' will use k-means++ initialization for means and - initialize covariances to diagonal matrices with variances - set based on the average distances of samples in each dimensions. - This is computationally more expensive but often gives much - better results. - - Returns - ------- - self : GMMRegression - This object. - """ - - def __init__(self, n_components, priors=None, means=None, covariances=None, - verbose=0, random_state=None, R_diff=1e-4, n_iter=500, init_params="random"): - self.n_components = n_components - self.priors = priors - self.means = means - self.covariances = covariances - self.verbose = verbose - self.random_state = random_state - self.R_diff = R_diff - self.n_iter = n_iter - self.init_params = init_params - - def fit(self, X, y): - self.gmm = GMM(self.n_components, priors=self.priors, means=self.means, - covariances=self.covariances, verbose=self.verbose, random_state=self.random_state) - - if y.ndim > 2: - raise ValueError("y must have at most two dimensions.") - elif y.ndim == 1: - y = np.expand_dims(y, 1) - - if X.ndim > 2: - raise ValueError("y must have at most two dimensions.") - elif X.ndim == 1: - X = np.expand_dims(X, 1) - - self._indices = np.arange(X.shape[1]) - - self.gmm.from_samples(np.hstack((X, y)), - R_diff=self.R_diff, n_iter=self.n_iter, init_params=self.init_params) - return self - - def predict(self, X): - if X.ndim > 2: - raise ValueError("y must have at most two dimensions.") - elif X.ndim == 1: - X = np.expand_dims(X, 1) - - return self.gmm.predict(self._indices, X) - def plot_error_ellipses(ax, gmm, colors=None, alpha=0.25, factors=np.linspace(0.25, 2.0, 8)): """Plot error ellipses of GMM components. diff --git a/gmr/sklearn.py b/gmr/sklearn.py new file mode 100644 index 0000000000..d877fd5c49 --- /dev/null +++ b/gmr/sklearn.py @@ -0,0 +1,82 @@ +import numpy as np +from sklearn.base import BaseEstimator, RegressorMixin, MultiOutputMixin + +from .gmm import GMM + +class GMMRegression(MultiOutputMixin, RegressorMixin, BaseEstimator): + """Scikit-learn RegressorMixin for the GMM class. + + Parameters + ---------- + + n_components : int (> 0) + Number of MVNs that compose the GMM. + + random_state : int or RandomState, optional (default: global random state) + If an integer is given, it fixes the seed. Defaults to the global numpy + random number generator. + + R_diff : float + Minimum allowed difference of responsibilities between successive + EM iterations. + + n_iter : int + Maximum number of iterations. + + init_params : str, optional (default: 'random') + Parameter initialization strategy. If means and covariances are + given in the constructor, this parameter will have no effect. + 'random' will sample initial means randomly from the dataset + and set covariances to identity matrices. This is the + computationally cheap solution. + 'kmeans++' will use k-means++ initialization for means and + initialize covariances to diagonal matrices with variances + set based on the average distances of samples in each dimensions. + This is computationally more expensive but often gives much + better results. + + Returns + ------- + self : GMMRegression + This object. + """ + + def __init__(self, n_components, priors=None, means=None, covariances=None, + verbose=0, random_state=None, R_diff=1e-4, n_iter=500, init_params="random"): + self.n_components = n_components + self.priors = priors + self.means = means + self.covariances = covariances + self.verbose = verbose + self.random_state = random_state + self.R_diff = R_diff + self.n_iter = n_iter + self.init_params = init_params + + def fit(self, X, y): + self.gmm = GMM(self.n_components, priors=self.priors, means=self.means, + covariances=self.covariances, verbose=self.verbose, random_state=self.random_state) + + if y.ndim > 2: + raise ValueError("y must have at most two dimensions.") + elif y.ndim == 1: + y = np.expand_dims(y, 1) + + if X.ndim > 2: + raise ValueError("y must have at most two dimensions.") + elif X.ndim == 1: + X = np.expand_dims(X, 1) + + self._indices = np.arange(X.shape[1]) + + self.gmm.from_samples(np.hstack((X, y)), + R_diff=self.R_diff, n_iter=self.n_iter, init_params=self.init_params) + return self + + def predict(self, X): + if X.ndim > 2: + raise ValueError("y must have at most two dimensions.") + elif X.ndim == 1: + X = np.expand_dims(X, 1) + + return self.gmm.predict(self._indices, X) \ No newline at end of file From 556e9ecc3934bf2aadfbe1722b13bdc140dd79c5 Mon Sep 17 00:00:00 2001 From: "Marcelo R. Albuquerque" Date: Sun, 4 Apr 2021 09:35:21 -0300 Subject: [PATCH 3/4] Refactor GaussianMixtureRegressor and add gmr/tests/test_sklearn.py test suite. --- gmr/__init__.py | 10 +----- gmr/gmm.py | 1 + gmr/sklearn.py | 70 +++++++++++++++++++++++---------------- gmr/tests/test_gmm.py | 19 +---------- gmr/tests/test_sklearn.py | 68 +++++++++++++++++++++++++++++++++++++ 5 files changed, 112 insertions(+), 56 deletions(-) create mode 100644 gmr/tests/test_sklearn.py diff --git a/gmr/__init__.py b/gmr/__init__.py index fd148624f9..ebbc39c378 100644 --- a/gmr/__init__.py +++ b/gmr/__init__.py @@ -20,19 +20,11 @@ if not __GMR_SETUP__: from . import gmm, mvn, utils - import warnings - - __all__ = ["gmm", "mvn", "utils"] + __all__ = ["gmm", "mvn", "utils", "sklearn"] from .mvn import MVN, plot_error_ellipse from .gmm import (GMM, plot_error_ellipses, kmeansplusplus_initialization, covariance_initialization) - try: - from .sklearn import GMMRegression - except ImportError: - class GMMRegression: - warnings.warn("Scikit-learn is required to use GMMRegression.") - __all__.extend(["MVN", "plot_error_ellipse", "GMM", "plot_error_ellipses", "kmeansplusplus_initialization", "covariance_initialization"]) diff --git a/gmr/gmm.py b/gmr/gmm.py index 197976a8cb..f1f213e5d7 100644 --- a/gmr/gmm.py +++ b/gmr/gmm.py @@ -489,6 +489,7 @@ def extract_mvn(self, component_idx): covariance=self.covariances[component_idx], verbose=self.verbose, random_state=self.random_state) + def plot_error_ellipses(ax, gmm, colors=None, alpha=0.25, factors=np.linspace(0.25, 2.0, 8)): """Plot error ellipses of GMM components. diff --git a/gmr/sklearn.py b/gmr/sklearn.py index d877fd5c49..dcd3b73f32 100644 --- a/gmr/sklearn.py +++ b/gmr/sklearn.py @@ -1,26 +1,44 @@ import numpy as np -from sklearn.base import BaseEstimator, RegressorMixin, MultiOutputMixin + +try: + from sklearn.base import BaseEstimator, RegressorMixin, MultiOutputMixin + from sklearn.utils import check_X_y + from sklearn.utils.validation import check_is_fitted, check_array, FLOAT_DTYPES +except ImportError: + raise ImportError("Install scikit-learn (e.g. pip install scikit-learn) to use this extension.") from .gmm import GMM -class GMMRegression(MultiOutputMixin, RegressorMixin, BaseEstimator): - """Scikit-learn RegressorMixin for the GMM class. + +class GaussianMixtureRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator): + """Gaussian mixture regression compatible to scikit-learn. Parameters ---------- - - n_components : int (> 0) + n_components : int Number of MVNs that compose the GMM. + priors : array, shape (n_components,), optional + Weights of the components. + + means : array, shape (n_components, n_features), optional + Means of the components. + + covariances : array, shape (n_components, n_features, n_features), optional + Covariances of the components. + + verbose : int, optional (default: 0) + Verbosity level. + random_state : int or RandomState, optional (default: global random state) If an integer is given, it fixes the seed. Defaults to the global numpy random number generator. - R_diff : float + R_diff : float, optional (default: 1e-4) Minimum allowed difference of responsibilities between successive EM iterations. - n_iter : int + n_iter : int, optional (default: 500) Maximum number of iterations. init_params : str, optional (default: 'random') @@ -34,15 +52,10 @@ class GMMRegression(MultiOutputMixin, RegressorMixin, BaseEstimator): set based on the average distances of samples in each dimensions. This is computationally more expensive but often gives much better results. - - Returns - ------- - self : GMMRegression - This object. """ def __init__(self, n_components, priors=None, means=None, covariances=None, - verbose=0, random_state=None, R_diff=1e-4, n_iter=500, init_params="random"): + verbose=0, random_state=None, R_diff=1e-4, n_iter=500, init_params="random"): self.n_components = n_components self.priors = priors self.means = means @@ -52,31 +65,30 @@ def __init__(self, n_components, priors=None, means=None, covariances=None, self.R_diff = R_diff self.n_iter = n_iter self.init_params = init_params - - def fit(self, X, y): - self.gmm = GMM(self.n_components, priors=self.priors, means=self.means, + + self.gmm_ = GMM(self.n_components, priors=self.priors, means=self.means, covariances=self.covariances, verbose=self.verbose, random_state=self.random_state) - if y.ndim > 2: - raise ValueError("y must have at most two dimensions.") - elif y.ndim == 1: - y = np.expand_dims(y, 1) - - if X.ndim > 2: - raise ValueError("y must have at most two dimensions.") - elif X.ndim == 1: + def fit(self, X, y): + X, y = check_X_y(X, y, estimator=self.gmm_, dtype=FLOAT_DTYPES, multi_output=True) + if X.ndim == 1: X = np.expand_dims(X, 1) + if y.ndim == 1: + y = np.expand_dims(y, 1) - self._indices = np.arange(X.shape[1]) + self.indices_ = np.arange(X.shape[1]) - self.gmm.from_samples(np.hstack((X, y)), - R_diff=self.R_diff, n_iter=self.n_iter, init_params=self.init_params) + self.gmm_.from_samples(np.hstack((X, y)), + R_diff=self.R_diff, n_iter=self.n_iter, init_params=self.init_params) return self - + def predict(self, X): + check_is_fitted(self, ["gmm_", "indices_"]) + X = check_array(X, estimator=self.gmm_, dtype=FLOAT_DTYPES) + if X.ndim > 2: raise ValueError("y must have at most two dimensions.") elif X.ndim == 1: X = np.expand_dims(X, 1) - return self.gmm.predict(self._indices, X) \ No newline at end of file + return self.gmm_.predict(self.indices_, X) diff --git a/gmr/tests/test_gmm.py b/gmr/tests/test_gmm.py index 7177eccc25..621c93d090 100644 --- a/gmr/tests/test_gmm.py +++ b/gmr/tests/test_gmm.py @@ -11,7 +11,7 @@ except ImportError: # Python 3 from io import StringIO -from gmr import GMM, MVN, GMMRegression, plot_error_ellipses, kmeansplusplus_initialization, covariance_initialization +from gmr import GMM, MVN, plot_error_ellipses, kmeansplusplus_initialization, covariance_initialization from test_mvn import AxisStub @@ -261,12 +261,6 @@ def test_regression_with_2d_input(): y = np.vstack((y1, y2)) + noise samples = np.hstack((x, x[::-1], y)) - gmm = GMMRegression(n_components=2, random_state=random_state) - gmm.fit(np.hstack((x, x[::-1])), y) - - pred = gmm.predict(np.hstack((x, x[::-1]))) - mse = np.sum((y - pred) ** 2) / n_samples - def test_regression_without_noise(): """Test regression without noise.""" @@ -289,17 +283,6 @@ def test_regression_without_noise(): mse = np.sum((y - pred) ** 2) / n_samples assert_less(mse, 0.01) - random_state = check_random_state(0) - - gmm = GMMRegression(n_components=2, random_state=random_state) - gmm.fit(x, y) - assert_array_almost_equal(gmm.gmm.priors, 0.5 * np.ones(2), decimal=2) - assert_array_almost_equal(gmm.gmm.means[0], np.array([1.5, 2.5]), decimal=2) - assert_array_almost_equal(gmm.gmm.means[1], np.array([0.5, 2.5]), decimal=1) - - pred = gmm.predict(x) - mse = np.sum((y - pred) ** 2) / n_samples - assert_less(mse, 0.01) def test_plot(): """Test plot of GMM.""" diff --git a/gmr/tests/test_sklearn.py b/gmr/tests/test_sklearn.py new file mode 100644 index 0000000000..3fbfcc0f3e --- /dev/null +++ b/gmr/tests/test_sklearn.py @@ -0,0 +1,68 @@ +import numpy as np +from nose.tools import assert_less +from numpy.testing import assert_array_almost_equal +from gmr.utils import check_random_state + +from gmr.sklearn import GaussianMixtureRegressor + + +def test_sklearn_regression(): + """Test regression with GaussianMixtureRegressor.""" + random_state = check_random_state(0) + + n_samples = 200 + x = np.linspace(0, 2, n_samples)[:, np.newaxis] + y1 = 3 * x[:n_samples // 2] + 1 + y2 = -3 * x[n_samples // 2:] + 7 + noise = random_state.randn(n_samples, 1) * 0.01 + y = np.vstack((y1, y2)) + noise + + gmr = GaussianMixtureRegressor(n_components=2, random_state=random_state) + gmr.fit(x, y) + assert_array_almost_equal(gmr.gmm_.priors, 0.5 * np.ones(2), decimal=2) + assert_array_almost_equal(gmr.gmm_.means[0], np.array([0.5, 2.5]), decimal=2) + assert_array_almost_equal(gmr.gmm_.means[1], np.array([1.5, 2.5]), decimal=1) + + pred = gmr.predict(x) + mse = np.sum((y - pred) ** 2) / n_samples + assert_less(mse, 0.01) + + +def test_sklearn_regression_with_2d_input(): + """Test regression with GaussianMixtureRegressor and two-dimensional input.""" + random_state = check_random_state(0) + + n_samples = 200 + x = np.linspace(0, 2, n_samples)[:, np.newaxis] + y1 = 3 * x[:n_samples // 2] + 1 + y2 = -3 * x[n_samples // 2:] + 7 + noise = random_state.randn(n_samples, 1) * 0.01 + y = np.vstack((y1, y2)) + noise + + gmr = GaussianMixtureRegressor(n_components=2, random_state=random_state) + gmr.fit(x, y) + + pred = gmr.predict(x) + mse = np.sum((y - pred) ** 2) / n_samples + assert_less(mse, 0.01) + + +def test_sklearn_regression_without_noise(): + """Test regression without noise.""" + random_state = 0 + + n_samples = 200 + x = np.linspace(0, 2, n_samples)[:, np.newaxis] + y1 = 3 * x[:n_samples // 2] + 1 + y2 = -3 * x[n_samples // 2:] + 7 + y = np.vstack((y1, y2)) + + gmr = GaussianMixtureRegressor(n_components=2, random_state=random_state) + gmr.fit(x, y) + assert_array_almost_equal(gmr.gmm_.priors, 0.5 * np.ones(2), decimal=2) + assert_array_almost_equal(gmr.gmm_.means[0], np.array([1.5, 2.5]), decimal=2) + assert_array_almost_equal(gmr.gmm_.means[1], np.array([0.5, 2.5]), decimal=1) + + pred = gmr.predict(x) + mse = np.sum((y - pred) ** 2) / n_samples + assert_less(mse, 0.01) From de8044219c95c64c63bed5fc156bd74f23ef88dd Mon Sep 17 00:00:00 2001 From: "Marcelo R. Albuquerque" Date: Wed, 14 Apr 2021 02:40:11 -0300 Subject: [PATCH 4/4] Refactor gmr/sklearn.py --- gmr/sklearn.py | 9 +-------- gmr/tests/test_gmm.py | 2 -- gmr/tests/test_sklearn.py | 16 ++++++++++++++++ 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/gmr/sklearn.py b/gmr/sklearn.py index dcd3b73f32..31ec009fbb 100644 --- a/gmr/sklearn.py +++ b/gmr/sklearn.py @@ -66,13 +66,11 @@ def __init__(self, n_components, priors=None, means=None, covariances=None, self.n_iter = n_iter self.init_params = init_params + def fit(self, X, y): self.gmm_ = GMM(self.n_components, priors=self.priors, means=self.means, covariances=self.covariances, verbose=self.verbose, random_state=self.random_state) - def fit(self, X, y): X, y = check_X_y(X, y, estimator=self.gmm_, dtype=FLOAT_DTYPES, multi_output=True) - if X.ndim == 1: - X = np.expand_dims(X, 1) if y.ndim == 1: y = np.expand_dims(y, 1) @@ -86,9 +84,4 @@ def predict(self, X): check_is_fitted(self, ["gmm_", "indices_"]) X = check_array(X, estimator=self.gmm_, dtype=FLOAT_DTYPES) - if X.ndim > 2: - raise ValueError("y must have at most two dimensions.") - elif X.ndim == 1: - X = np.expand_dims(X, 1) - return self.gmm_.predict(self.indices_, X) diff --git a/gmr/tests/test_gmm.py b/gmr/tests/test_gmm.py index 621c93d090..b8693141c0 100644 --- a/gmr/tests/test_gmm.py +++ b/gmr/tests/test_gmm.py @@ -251,8 +251,6 @@ def test_regression_with_2d_input(): pred = gmm.predict(np.array([0, 1]), np.hstack((x, x[::-1]))) mse = np.sum((y - pred) ** 2) / n_samples - random_state = check_random_state(0) - n_samples = 200 x = np.linspace(0, 2, n_samples)[:, np.newaxis] y1 = 3 * x[:n_samples // 2] + 1 diff --git a/gmr/tests/test_sklearn.py b/gmr/tests/test_sklearn.py index 3fbfcc0f3e..d4cd46efa2 100644 --- a/gmr/tests/test_sklearn.py +++ b/gmr/tests/test_sklearn.py @@ -47,6 +47,22 @@ def test_sklearn_regression_with_2d_input(): assert_less(mse, 0.01) +def test_sklearn_regression_with_1d_output(): + """Test regression with GaussianMixtureRegressor and two-dimensional input.""" + random_state = check_random_state(0) + + n_samples = 200 + x = np.linspace(0, 2, n_samples)[:, np.newaxis] + y = 3 * x + 1 + y = y.flatten() + + gmr = GaussianMixtureRegressor(n_components=1, random_state=random_state) + gmr.fit(x, y) + + pred = gmr.predict(x) + mse = np.sum((y - pred) ** 2) / n_samples + + def test_sklearn_regression_without_noise(): """Test regression without noise.""" random_state = 0