From 68cf2f1ebec8aa0fbc8ee7e8134ffd75fcaf2a8b Mon Sep 17 00:00:00 2001
From: "Marcelo R. Albuquerque" <marceloralbuquerque@gmail.com>
Date: Fri, 2 Apr 2021 21:55:40 -0300
Subject: [PATCH 1/4] Add GMMRegression scikit-learn RegressorMixin

---
 gmr/__init__.py       |  2 +-
 gmr/gmm.py            | 78 +++++++++++++++++++++++++++++++++++++++++++
 gmr/tests/test_gmm.py | 29 +++++++++++++++-
 3 files changed, 107 insertions(+), 2 deletions(-)

diff --git a/gmr/__init__.py b/gmr/__init__.py
index 3bb6d29837..6d1aff0245 100644
--- a/gmr/__init__.py
+++ b/gmr/__init__.py
@@ -23,7 +23,7 @@
     __all__ = ["gmm", "mvn", "utils"]
 
     from .mvn import MVN, plot_error_ellipse
-    from .gmm import (GMM, plot_error_ellipses, kmeansplusplus_initialization,
+    from .gmm import (GMM, GMMRegression, plot_error_ellipses, kmeansplusplus_initialization,
                       covariance_initialization)
 
     __all__.extend(["MVN", "plot_error_ellipse", "GMM", "plot_error_ellipses",
diff --git a/gmr/gmm.py b/gmr/gmm.py
index f1f213e5d7..20e4823473 100644
--- a/gmr/gmm.py
+++ b/gmr/gmm.py
@@ -1,6 +1,7 @@
 import numpy as np
 from scipy.spatial.distance import cdist, pdist
 from scipy.stats import chi2
+from sklearn.base import BaseEstimator, RegressorMixin, MultiOutputMixin
 from .utils import check_random_state
 from .mvn import MVN
 
@@ -489,6 +490,83 @@ def extract_mvn(self, component_idx):
             covariance=self.covariances[component_idx], verbose=self.verbose,
             random_state=self.random_state)
 
+class GMMRegression(MultiOutputMixin, RegressorMixin, BaseEstimator):
+    """Scikit-learn RegressorMixin for the GMM class.
+
+    Parameters
+    ----------
+
+    n_components : int (> 0)
+        Number of MVNs that compose the GMM.
+
+    random_state : int or RandomState, optional (default: global random state)
+        If an integer is given, it fixes the seed. Defaults to the global numpy
+        random number generator.
+
+    R_diff : float
+        Minimum allowed difference of responsibilities between successive
+        EM iterations.
+
+    n_iter : int
+        Maximum number of iterations.
+
+    init_params : str, optional (default: 'random')
+        Parameter initialization strategy. If means and covariances are
+        given in the constructor, this parameter will have no effect.
+        'random' will sample initial means randomly from the dataset
+        and set covariances to identity matrices. This is the
+        computationally cheap solution.
+        'kmeans++' will use k-means++ initialization for means and
+        initialize covariances to diagonal matrices with variances
+        set based on the average distances of samples in each dimensions.
+        This is computationally more expensive but often gives much
+        better results.
+
+    Returns
+    -------
+    self : GMMRegression
+        This object.
+    """
+
+    def __init__(self, n_components, priors=None, means=None, covariances=None,
+                 verbose=0, random_state=None, R_diff=1e-4, n_iter=500, init_params="random"):
+        self.n_components = n_components
+        self.priors = priors
+        self.means = means
+        self.covariances = covariances
+        self.verbose = verbose
+        self.random_state = random_state
+        self.R_diff = R_diff
+        self.n_iter = n_iter
+        self.init_params = init_params
+    
+    def fit(self, X, y):
+        self.gmm = GMM(self.n_components, priors=self.priors, means=self.means, 
+                        covariances=self.covariances, verbose=self.verbose, random_state=self.random_state)
+
+        if y.ndim > 2:
+            raise ValueError("y must have at most two dimensions.")
+        elif y.ndim == 1:
+            y = np.expand_dims(y, 1)
+        
+        if X.ndim > 2:
+            raise ValueError("y must have at most two dimensions.")
+        elif X.ndim == 1:
+            X = np.expand_dims(X, 1)
+
+        self._indices = np.arange(X.shape[1])
+
+        self.gmm.from_samples(np.hstack((X, y)), 
+                              R_diff=self.R_diff, n_iter=self.n_iter, init_params=self.init_params)
+        return self
+    
+    def predict(self, X):
+        if X.ndim > 2:
+            raise ValueError("y must have at most two dimensions.")
+        elif X.ndim == 1:
+            X = np.expand_dims(X, 1)
+
+        return self.gmm.predict(self._indices, X)
 
 def plot_error_ellipses(ax, gmm, colors=None, alpha=0.25, factors=np.linspace(0.25, 2.0, 8)):
     """Plot error ellipses of GMM components.
diff --git a/gmr/tests/test_gmm.py b/gmr/tests/test_gmm.py
index 362220e094..7177eccc25 100644
--- a/gmr/tests/test_gmm.py
+++ b/gmr/tests/test_gmm.py
@@ -11,7 +11,7 @@
 except ImportError:
     # Python 3
     from io import StringIO
-from gmr import GMM, MVN, plot_error_ellipses, kmeansplusplus_initialization, covariance_initialization
+from gmr import GMM, MVN, GMMRegression, plot_error_ellipses, kmeansplusplus_initialization, covariance_initialization
 from test_mvn import AxisStub
 
 
@@ -251,6 +251,22 @@ def test_regression_with_2d_input():
     pred = gmm.predict(np.array([0, 1]), np.hstack((x, x[::-1])))
     mse = np.sum((y - pred) ** 2) / n_samples
 
+    random_state = check_random_state(0)
+
+    n_samples = 200
+    x = np.linspace(0, 2, n_samples)[:, np.newaxis]
+    y1 = 3 * x[:n_samples // 2] + 1
+    y2 = -3 * x[n_samples // 2:] + 7
+    noise = random_state.randn(n_samples, 1) * 0.01
+    y = np.vstack((y1, y2)) + noise
+    samples = np.hstack((x, x[::-1], y))
+
+    gmm = GMMRegression(n_components=2, random_state=random_state)
+    gmm.fit(np.hstack((x, x[::-1])), y)
+
+    pred = gmm.predict(np.hstack((x, x[::-1])))
+    mse = np.sum((y - pred) ** 2) / n_samples
+
 
 def test_regression_without_noise():
     """Test regression without noise."""
@@ -273,6 +289,17 @@ def test_regression_without_noise():
     mse = np.sum((y - pred) ** 2) / n_samples
     assert_less(mse, 0.01)
 
+    random_state = check_random_state(0)
+
+    gmm = GMMRegression(n_components=2, random_state=random_state)
+    gmm.fit(x, y)
+    assert_array_almost_equal(gmm.gmm.priors, 0.5 * np.ones(2), decimal=2)
+    assert_array_almost_equal(gmm.gmm.means[0], np.array([1.5, 2.5]), decimal=2)
+    assert_array_almost_equal(gmm.gmm.means[1], np.array([0.5, 2.5]), decimal=1)
+
+    pred = gmm.predict(x)
+    mse = np.sum((y - pred) ** 2) / n_samples
+    assert_less(mse, 0.01)
 
 def test_plot():
     """Test plot of GMM."""

From 714cd94670d6625cb963e417aa18505dc4e4be0c Mon Sep 17 00:00:00 2001
From: "Marcelo R. Albuquerque" <marceloralbuquerque@gmail.com>
Date: Sat, 3 Apr 2021 19:07:05 -0300
Subject: [PATCH 2/4] Refactor GMMRegression to a separate gmr/sklearn.py
 module

---
 gmr/__init__.py | 10 +++++-
 gmr/gmm.py      | 79 -----------------------------------------------
 gmr/sklearn.py  | 82 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 91 insertions(+), 80 deletions(-)
 create mode 100644 gmr/sklearn.py

diff --git a/gmr/__init__.py b/gmr/__init__.py
index 6d1aff0245..fd148624f9 100644
--- a/gmr/__init__.py
+++ b/gmr/__init__.py
@@ -20,11 +20,19 @@
 if not __GMR_SETUP__:
     from . import gmm, mvn, utils
 
+    import warnings
+
     __all__ = ["gmm", "mvn", "utils"]
 
     from .mvn import MVN, plot_error_ellipse
-    from .gmm import (GMM, GMMRegression, plot_error_ellipses, kmeansplusplus_initialization,
+    from .gmm import (GMM, plot_error_ellipses, kmeansplusplus_initialization,
                       covariance_initialization)
 
+    try:
+        from .sklearn import GMMRegression
+    except ImportError:
+        class GMMRegression:
+            warnings.warn("Scikit-learn is required to use GMMRegression.")
+
     __all__.extend(["MVN", "plot_error_ellipse", "GMM", "plot_error_ellipses",
                     "kmeansplusplus_initialization", "covariance_initialization"])
diff --git a/gmr/gmm.py b/gmr/gmm.py
index 20e4823473..197976a8cb 100644
--- a/gmr/gmm.py
+++ b/gmr/gmm.py
@@ -1,7 +1,6 @@
 import numpy as np
 from scipy.spatial.distance import cdist, pdist
 from scipy.stats import chi2
-from sklearn.base import BaseEstimator, RegressorMixin, MultiOutputMixin
 from .utils import check_random_state
 from .mvn import MVN
 
@@ -490,84 +489,6 @@ def extract_mvn(self, component_idx):
             covariance=self.covariances[component_idx], verbose=self.verbose,
             random_state=self.random_state)
 
-class GMMRegression(MultiOutputMixin, RegressorMixin, BaseEstimator):
-    """Scikit-learn RegressorMixin for the GMM class.
-
-    Parameters
-    ----------
-
-    n_components : int (> 0)
-        Number of MVNs that compose the GMM.
-
-    random_state : int or RandomState, optional (default: global random state)
-        If an integer is given, it fixes the seed. Defaults to the global numpy
-        random number generator.
-
-    R_diff : float
-        Minimum allowed difference of responsibilities between successive
-        EM iterations.
-
-    n_iter : int
-        Maximum number of iterations.
-
-    init_params : str, optional (default: 'random')
-        Parameter initialization strategy. If means and covariances are
-        given in the constructor, this parameter will have no effect.
-        'random' will sample initial means randomly from the dataset
-        and set covariances to identity matrices. This is the
-        computationally cheap solution.
-        'kmeans++' will use k-means++ initialization for means and
-        initialize covariances to diagonal matrices with variances
-        set based on the average distances of samples in each dimensions.
-        This is computationally more expensive but often gives much
-        better results.
-
-    Returns
-    -------
-    self : GMMRegression
-        This object.
-    """
-
-    def __init__(self, n_components, priors=None, means=None, covariances=None,
-                 verbose=0, random_state=None, R_diff=1e-4, n_iter=500, init_params="random"):
-        self.n_components = n_components
-        self.priors = priors
-        self.means = means
-        self.covariances = covariances
-        self.verbose = verbose
-        self.random_state = random_state
-        self.R_diff = R_diff
-        self.n_iter = n_iter
-        self.init_params = init_params
-    
-    def fit(self, X, y):
-        self.gmm = GMM(self.n_components, priors=self.priors, means=self.means, 
-                        covariances=self.covariances, verbose=self.verbose, random_state=self.random_state)
-
-        if y.ndim > 2:
-            raise ValueError("y must have at most two dimensions.")
-        elif y.ndim == 1:
-            y = np.expand_dims(y, 1)
-        
-        if X.ndim > 2:
-            raise ValueError("y must have at most two dimensions.")
-        elif X.ndim == 1:
-            X = np.expand_dims(X, 1)
-
-        self._indices = np.arange(X.shape[1])
-
-        self.gmm.from_samples(np.hstack((X, y)), 
-                              R_diff=self.R_diff, n_iter=self.n_iter, init_params=self.init_params)
-        return self
-    
-    def predict(self, X):
-        if X.ndim > 2:
-            raise ValueError("y must have at most two dimensions.")
-        elif X.ndim == 1:
-            X = np.expand_dims(X, 1)
-
-        return self.gmm.predict(self._indices, X)
-
 def plot_error_ellipses(ax, gmm, colors=None, alpha=0.25, factors=np.linspace(0.25, 2.0, 8)):
     """Plot error ellipses of GMM components.
 
diff --git a/gmr/sklearn.py b/gmr/sklearn.py
new file mode 100644
index 0000000000..d877fd5c49
--- /dev/null
+++ b/gmr/sklearn.py
@@ -0,0 +1,82 @@
+import numpy as np
+from sklearn.base import BaseEstimator, RegressorMixin, MultiOutputMixin
+
+from .gmm import GMM
+
+class GMMRegression(MultiOutputMixin, RegressorMixin, BaseEstimator):
+    """Scikit-learn RegressorMixin for the GMM class.
+
+    Parameters
+    ----------
+
+    n_components : int (> 0)
+        Number of MVNs that compose the GMM.
+
+    random_state : int or RandomState, optional (default: global random state)
+        If an integer is given, it fixes the seed. Defaults to the global numpy
+        random number generator.
+
+    R_diff : float
+        Minimum allowed difference of responsibilities between successive
+        EM iterations.
+
+    n_iter : int
+        Maximum number of iterations.
+
+    init_params : str, optional (default: 'random')
+        Parameter initialization strategy. If means and covariances are
+        given in the constructor, this parameter will have no effect.
+        'random' will sample initial means randomly from the dataset
+        and set covariances to identity matrices. This is the
+        computationally cheap solution.
+        'kmeans++' will use k-means++ initialization for means and
+        initialize covariances to diagonal matrices with variances
+        set based on the average distances of samples in each dimensions.
+        This is computationally more expensive but often gives much
+        better results.
+
+    Returns
+    -------
+    self : GMMRegression
+        This object.
+    """
+
+    def __init__(self, n_components, priors=None, means=None, covariances=None,
+                verbose=0, random_state=None, R_diff=1e-4, n_iter=500, init_params="random"):
+        self.n_components = n_components
+        self.priors = priors
+        self.means = means
+        self.covariances = covariances
+        self.verbose = verbose
+        self.random_state = random_state
+        self.R_diff = R_diff
+        self.n_iter = n_iter
+        self.init_params = init_params
+    
+    def fit(self, X, y):
+        self.gmm = GMM(self.n_components, priors=self.priors, means=self.means, 
+                        covariances=self.covariances, verbose=self.verbose, random_state=self.random_state)
+
+        if y.ndim > 2:
+            raise ValueError("y must have at most two dimensions.")
+        elif y.ndim == 1:
+            y = np.expand_dims(y, 1)
+        
+        if X.ndim > 2:
+            raise ValueError("y must have at most two dimensions.")
+        elif X.ndim == 1:
+            X = np.expand_dims(X, 1)
+
+        self._indices = np.arange(X.shape[1])
+
+        self.gmm.from_samples(np.hstack((X, y)), 
+                            R_diff=self.R_diff, n_iter=self.n_iter, init_params=self.init_params)
+        return self
+    
+    def predict(self, X):
+        if X.ndim > 2:
+            raise ValueError("y must have at most two dimensions.")
+        elif X.ndim == 1:
+            X = np.expand_dims(X, 1)
+
+        return self.gmm.predict(self._indices, X)
\ No newline at end of file

From 556e9ecc3934bf2aadfbe1722b13bdc140dd79c5 Mon Sep 17 00:00:00 2001
From: "Marcelo R. Albuquerque" <marceloralbuquerque@gmail.com>
Date: Sun, 4 Apr 2021 09:35:21 -0300
Subject: [PATCH 3/4] Refactor GaussianMixtureRegressor and add
 gmr/tests/test_sklearn.py test suite.

---
 gmr/__init__.py           | 10 +-----
 gmr/gmm.py                |  1 +
 gmr/sklearn.py            | 70 +++++++++++++++++++++++----------------
 gmr/tests/test_gmm.py     | 19 +----------
 gmr/tests/test_sklearn.py | 68 +++++++++++++++++++++++++++++++++++++
 5 files changed, 112 insertions(+), 56 deletions(-)
 create mode 100644 gmr/tests/test_sklearn.py

diff --git a/gmr/__init__.py b/gmr/__init__.py
index fd148624f9..ebbc39c378 100644
--- a/gmr/__init__.py
+++ b/gmr/__init__.py
@@ -20,19 +20,11 @@
 if not __GMR_SETUP__:
     from . import gmm, mvn, utils
 
-    import warnings
-
-    __all__ = ["gmm", "mvn", "utils"]
+    __all__ = ["gmm", "mvn", "utils", "sklearn"]
 
     from .mvn import MVN, plot_error_ellipse
     from .gmm import (GMM, plot_error_ellipses, kmeansplusplus_initialization,
                       covariance_initialization)
 
-    try:
-        from .sklearn import GMMRegression
-    except ImportError:
-        class GMMRegression:
-            warnings.warn("Scikit-learn is required to use GMMRegression.")
-
     __all__.extend(["MVN", "plot_error_ellipse", "GMM", "plot_error_ellipses",
                     "kmeansplusplus_initialization", "covariance_initialization"])
diff --git a/gmr/gmm.py b/gmr/gmm.py
index 197976a8cb..f1f213e5d7 100644
--- a/gmr/gmm.py
+++ b/gmr/gmm.py
@@ -489,6 +489,7 @@ def extract_mvn(self, component_idx):
             covariance=self.covariances[component_idx], verbose=self.verbose,
             random_state=self.random_state)
 
+
 def plot_error_ellipses(ax, gmm, colors=None, alpha=0.25, factors=np.linspace(0.25, 2.0, 8)):
     """Plot error ellipses of GMM components.
 
diff --git a/gmr/sklearn.py b/gmr/sklearn.py
index d877fd5c49..dcd3b73f32 100644
--- a/gmr/sklearn.py
+++ b/gmr/sklearn.py
@@ -1,26 +1,44 @@
 import numpy as np
-from sklearn.base import BaseEstimator, RegressorMixin, MultiOutputMixin
+
+try:
+    from sklearn.base import BaseEstimator, RegressorMixin, MultiOutputMixin
+    from sklearn.utils import check_X_y
+    from sklearn.utils.validation import check_is_fitted, check_array, FLOAT_DTYPES
+except ImportError:
+    raise ImportError("Install scikit-learn (e.g. pip install scikit-learn) to use this extension.")
 
 from .gmm import GMM
 
-class GMMRegression(MultiOutputMixin, RegressorMixin, BaseEstimator):
-    """Scikit-learn RegressorMixin for the GMM class.
+
+class GaussianMixtureRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
+    """Gaussian mixture regression compatible to scikit-learn.
 
     Parameters
     ----------
-
-    n_components : int (> 0)
+    n_components : int
         Number of MVNs that compose the GMM.
 
+    priors : array, shape (n_components,), optional
+        Weights of the components.
+
+    means : array, shape (n_components, n_features), optional
+        Means of the components.
+
+    covariances : array, shape (n_components, n_features, n_features), optional
+        Covariances of the components.
+
+    verbose : int, optional (default: 0)
+        Verbosity level.
+
     random_state : int or RandomState, optional (default: global random state)
         If an integer is given, it fixes the seed. Defaults to the global numpy
         random number generator.
 
-    R_diff : float
+    R_diff : float, optional (default: 1e-4)
         Minimum allowed difference of responsibilities between successive
         EM iterations.
 
-    n_iter : int
+    n_iter : int, optional (default: 500)
         Maximum number of iterations.
 
     init_params : str, optional (default: 'random')
@@ -34,15 +52,10 @@ class GMMRegression(MultiOutputMixin, RegressorMixin, BaseEstimator):
         set based on the average distances of samples in each dimensions.
         This is computationally more expensive but often gives much
         better results.
-
-    Returns
-    -------
-    self : GMMRegression
-        This object.
     """
 
     def __init__(self, n_components, priors=None, means=None, covariances=None,
-                verbose=0, random_state=None, R_diff=1e-4, n_iter=500, init_params="random"):
+                 verbose=0, random_state=None, R_diff=1e-4, n_iter=500, init_params="random"):
         self.n_components = n_components
         self.priors = priors
         self.means = means
@@ -52,31 +65,30 @@ def __init__(self, n_components, priors=None, means=None, covariances=None,
         self.R_diff = R_diff
         self.n_iter = n_iter
         self.init_params = init_params
-    
-    def fit(self, X, y):
-        self.gmm = GMM(self.n_components, priors=self.priors, means=self.means, 
+
+        self.gmm_ = GMM(self.n_components, priors=self.priors, means=self.means,
                         covariances=self.covariances, verbose=self.verbose, random_state=self.random_state)
 
-        if y.ndim > 2:
-            raise ValueError("y must have at most two dimensions.")
-        elif y.ndim == 1:
-            y = np.expand_dims(y, 1)
-        
-        if X.ndim > 2:
-            raise ValueError("y must have at most two dimensions.")
-        elif X.ndim == 1:
+    def fit(self, X, y):
+        X, y = check_X_y(X, y, estimator=self.gmm_, dtype=FLOAT_DTYPES, multi_output=True)
+        if X.ndim == 1:
             X = np.expand_dims(X, 1)
+        if y.ndim == 1:
+            y = np.expand_dims(y, 1)
 
-        self._indices = np.arange(X.shape[1])
+        self.indices_ = np.arange(X.shape[1])
 
-        self.gmm.from_samples(np.hstack((X, y)), 
-                            R_diff=self.R_diff, n_iter=self.n_iter, init_params=self.init_params)
+        self.gmm_.from_samples(np.hstack((X, y)),
+                               R_diff=self.R_diff, n_iter=self.n_iter, init_params=self.init_params)
         return self
-    
+
     def predict(self, X):
+        check_is_fitted(self, ["gmm_", "indices_"])
+        X = check_array(X, estimator=self.gmm_, dtype=FLOAT_DTYPES)
+
         if X.ndim > 2:
             raise ValueError("y must have at most two dimensions.")
         elif X.ndim == 1:
             X = np.expand_dims(X, 1)
 
-        return self.gmm.predict(self._indices, X)
\ No newline at end of file
+        return self.gmm_.predict(self.indices_, X)
diff --git a/gmr/tests/test_gmm.py b/gmr/tests/test_gmm.py
index 7177eccc25..621c93d090 100644
--- a/gmr/tests/test_gmm.py
+++ b/gmr/tests/test_gmm.py
@@ -11,7 +11,7 @@
 except ImportError:
     # Python 3
     from io import StringIO
-from gmr import GMM, MVN, GMMRegression, plot_error_ellipses, kmeansplusplus_initialization, covariance_initialization
+from gmr import GMM, MVN, plot_error_ellipses, kmeansplusplus_initialization, covariance_initialization
 from test_mvn import AxisStub
 
 
@@ -261,12 +261,6 @@ def test_regression_with_2d_input():
     y = np.vstack((y1, y2)) + noise
     samples = np.hstack((x, x[::-1], y))
 
-    gmm = GMMRegression(n_components=2, random_state=random_state)
-    gmm.fit(np.hstack((x, x[::-1])), y)
-
-    pred = gmm.predict(np.hstack((x, x[::-1])))
-    mse = np.sum((y - pred) ** 2) / n_samples
-
 
 def test_regression_without_noise():
     """Test regression without noise."""
@@ -289,17 +283,6 @@ def test_regression_without_noise():
     mse = np.sum((y - pred) ** 2) / n_samples
     assert_less(mse, 0.01)
 
-    random_state = check_random_state(0)
-
-    gmm = GMMRegression(n_components=2, random_state=random_state)
-    gmm.fit(x, y)
-    assert_array_almost_equal(gmm.gmm.priors, 0.5 * np.ones(2), decimal=2)
-    assert_array_almost_equal(gmm.gmm.means[0], np.array([1.5, 2.5]), decimal=2)
-    assert_array_almost_equal(gmm.gmm.means[1], np.array([0.5, 2.5]), decimal=1)
-
-    pred = gmm.predict(x)
-    mse = np.sum((y - pred) ** 2) / n_samples
-    assert_less(mse, 0.01)
 
 def test_plot():
     """Test plot of GMM."""
diff --git a/gmr/tests/test_sklearn.py b/gmr/tests/test_sklearn.py
new file mode 100644
index 0000000000..3fbfcc0f3e
--- /dev/null
+++ b/gmr/tests/test_sklearn.py
@@ -0,0 +1,68 @@
+import numpy as np
+from nose.tools import assert_less
+from numpy.testing import assert_array_almost_equal
+from gmr.utils import check_random_state
+
+from gmr.sklearn import GaussianMixtureRegressor
+
+
+def test_sklearn_regression():
+    """Test regression with GaussianMixtureRegressor."""
+    random_state = check_random_state(0)
+
+    n_samples = 200
+    x = np.linspace(0, 2, n_samples)[:, np.newaxis]
+    y1 = 3 * x[:n_samples // 2] + 1
+    y2 = -3 * x[n_samples // 2:] + 7
+    noise = random_state.randn(n_samples, 1) * 0.01
+    y = np.vstack((y1, y2)) + noise
+
+    gmr = GaussianMixtureRegressor(n_components=2, random_state=random_state)
+    gmr.fit(x, y)
+    assert_array_almost_equal(gmr.gmm_.priors, 0.5 * np.ones(2), decimal=2)
+    assert_array_almost_equal(gmr.gmm_.means[0], np.array([0.5, 2.5]), decimal=2)
+    assert_array_almost_equal(gmr.gmm_.means[1], np.array([1.5, 2.5]), decimal=1)
+
+    pred = gmr.predict(x)
+    mse = np.sum((y - pred) ** 2) / n_samples
+    assert_less(mse, 0.01)
+
+
+def test_sklearn_regression_with_2d_input():
+    """Test regression with GaussianMixtureRegressor and two-dimensional input."""
+    random_state = check_random_state(0)
+
+    n_samples = 200
+    x = np.linspace(0, 2, n_samples)[:, np.newaxis]
+    y1 = 3 * x[:n_samples // 2] + 1
+    y2 = -3 * x[n_samples // 2:] + 7
+    noise = random_state.randn(n_samples, 1) * 0.01
+    y = np.vstack((y1, y2)) + noise
+
+    gmr = GaussianMixtureRegressor(n_components=2, random_state=random_state)
+    gmr.fit(x, y)
+
+    pred = gmr.predict(x)
+    mse = np.sum((y - pred) ** 2) / n_samples
+    assert_less(mse, 0.01)
+
+
+def test_sklearn_regression_without_noise():
+    """Test regression without noise."""
+    random_state = 0
+
+    n_samples = 200
+    x = np.linspace(0, 2, n_samples)[:, np.newaxis]
+    y1 = 3 * x[:n_samples // 2] + 1
+    y2 = -3 * x[n_samples // 2:] + 7
+    y = np.vstack((y1, y2))
+
+    gmr = GaussianMixtureRegressor(n_components=2, random_state=random_state)
+    gmr.fit(x, y)
+    assert_array_almost_equal(gmr.gmm_.priors, 0.5 * np.ones(2), decimal=2)
+    assert_array_almost_equal(gmr.gmm_.means[0], np.array([1.5, 2.5]), decimal=2)
+    assert_array_almost_equal(gmr.gmm_.means[1], np.array([0.5, 2.5]), decimal=1)
+
+    pred = gmr.predict(x)
+    mse = np.sum((y - pred) ** 2) / n_samples
+    assert_less(mse, 0.01)

From de8044219c95c64c63bed5fc156bd74f23ef88dd Mon Sep 17 00:00:00 2001
From: "Marcelo R. Albuquerque" <marceloralbuquerque@gmail.com>
Date: Wed, 14 Apr 2021 02:40:11 -0300
Subject: [PATCH 4/4] Refactor gmr/sklearn.py

---
 gmr/sklearn.py            |  9 +--------
 gmr/tests/test_gmm.py     |  2 --
 gmr/tests/test_sklearn.py | 16 ++++++++++++++++
 3 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/gmr/sklearn.py b/gmr/sklearn.py
index dcd3b73f32..31ec009fbb 100644
--- a/gmr/sklearn.py
+++ b/gmr/sklearn.py
@@ -66,13 +66,11 @@ def __init__(self, n_components, priors=None, means=None, covariances=None,
         self.n_iter = n_iter
         self.init_params = init_params
 
+    def fit(self, X, y):
         self.gmm_ = GMM(self.n_components, priors=self.priors, means=self.means,
                         covariances=self.covariances, verbose=self.verbose, random_state=self.random_state)
 
-    def fit(self, X, y):
         X, y = check_X_y(X, y, estimator=self.gmm_, dtype=FLOAT_DTYPES, multi_output=True)
-        if X.ndim == 1:
-            X = np.expand_dims(X, 1)
         if y.ndim == 1:
             y = np.expand_dims(y, 1)
 
@@ -86,9 +84,4 @@ def predict(self, X):
         check_is_fitted(self, ["gmm_", "indices_"])
         X = check_array(X, estimator=self.gmm_, dtype=FLOAT_DTYPES)
 
-        if X.ndim > 2:
-            raise ValueError("y must have at most two dimensions.")
-        elif X.ndim == 1:
-            X = np.expand_dims(X, 1)
-
         return self.gmm_.predict(self.indices_, X)
diff --git a/gmr/tests/test_gmm.py b/gmr/tests/test_gmm.py
index 621c93d090..b8693141c0 100644
--- a/gmr/tests/test_gmm.py
+++ b/gmr/tests/test_gmm.py
@@ -251,8 +251,6 @@ def test_regression_with_2d_input():
     pred = gmm.predict(np.array([0, 1]), np.hstack((x, x[::-1])))
     mse = np.sum((y - pred) ** 2) / n_samples
 
-    random_state = check_random_state(0)
-
     n_samples = 200
     x = np.linspace(0, 2, n_samples)[:, np.newaxis]
     y1 = 3 * x[:n_samples // 2] + 1
diff --git a/gmr/tests/test_sklearn.py b/gmr/tests/test_sklearn.py
index 3fbfcc0f3e..d4cd46efa2 100644
--- a/gmr/tests/test_sklearn.py
+++ b/gmr/tests/test_sklearn.py
@@ -47,6 +47,22 @@ def test_sklearn_regression_with_2d_input():
     assert_less(mse, 0.01)
 
 
+def test_sklearn_regression_with_1d_output():
+    """Test regression with GaussianMixtureRegressor and two-dimensional input."""
+    random_state = check_random_state(0)
+
+    n_samples = 200
+    x = np.linspace(0, 2, n_samples)[:, np.newaxis]
+    y = 3 * x + 1
+    y = y.flatten()
+
+    gmr = GaussianMixtureRegressor(n_components=1, random_state=random_state)
+    gmr.fit(x, y)
+
+    pred = gmr.predict(x)
+    mse = np.sum((y - pred) ** 2) / n_samples
+
+
 def test_sklearn_regression_without_noise():
     """Test regression without noise."""
     random_state = 0