Merge pull request #436 from GAA-UAM/feature/several_dim_kernel_smoot…

…hing Smoothing in several dimensions
GAA-UAM · Oct 10, 2023 · fac57c8 · fac57c8
2 parents ebab51d + e6ce505
commit fac57c8
Show file tree

Hide file tree

Showing 4 changed files with 122 additions and 51 deletions.
diff --git a/skfda/misc/hat_matrix.py b/skfda/misc/hat_matrix.py
@@ -11,20 +11,21 @@
 
 import abc
 import math
-from typing import Callable, TypeVar, Union, overload
+from typing import Callable, Final, TypeVar, Union, overload
 
 import numpy as np
 
 from .._utils._sklearn_adapter import BaseEstimator
 from ..representation._functional_data import FData
 from ..representation.basis import FDataBasis
-from ..typing._base import GridPointsLike
 from ..typing._numpy import NDArrayFloat
 from . import kernels
 
-Input = TypeVar("Input", bound=Union[FData, GridPointsLike])
+Input = TypeVar("Input", bound=Union[FData, NDArrayFloat])
 Prediction = TypeVar("Prediction", bound=Union[NDArrayFloat, FData])
 
+DEFAULT_BANDWIDTH_PERCENTILE: Final = 15
+
 
 class HatMatrix(
     BaseEstimator,
@@ -52,8 +53,8 @@ def __call__(
         self,
         *,
         delta_x: NDArrayFloat,
-        X_train: Input | None = None,
-        X: Input | None = None,
+        X_train: Input,
+        X: Input,
         y_train: None = None,
         weights: NDArrayFloat | None = None,
         _cv: bool = False,
@@ -65,8 +66,8 @@ def __call__(
         self,
         *,
         delta_x: NDArrayFloat,
-        X_train: Input | None = None,
-        X: Input | None = None,
+        X_train: Input,
+        X: Input,
         y_train: Prediction | None = None,
         weights: NDArrayFloat | None = None,
         _cv: bool = False,
@@ -77,8 +78,8 @@ def __call__(
         self,
         *,
         delta_x: NDArrayFloat,
-        X_train: Input | None = None,
-        X: Input | None = None,
+        X_train: Input,
+        X: Input,
         y_train: NDArrayFloat | FData | None = None,
         weights: NDArrayFloat | None = None,
         _cv: bool = False,
@@ -186,7 +187,7 @@ def _hat_matrix_function_not_normalized(
     ) -> NDArrayFloat:
 
         bandwidth = (
-            np.percentile(np.abs(delta_x), 15)
+            float(np.percentile(delta_x, DEFAULT_BANDWIDTH_PERCENTILE))
             if self.bandwidth is None
             else self.bandwidth
         )
@@ -278,8 +279,8 @@ def __call__(
         self,
         *,
         delta_x: NDArrayFloat,
-        X_train: FData | GridPointsLike | None = None,
-        X: FData | GridPointsLike | None = None,
+        X_train: FData | NDArrayFloat,
+        X: FData | NDArrayFloat,
         y_train: None = None,
         weights: NDArrayFloat | None = None,
         _cv: bool = False,
@@ -291,8 +292,8 @@ def __call__(
         self,
         *,
         delta_x: NDArrayFloat,
-        X_train: FData | GridPointsLike | None = None,
-        X: FData | GridPointsLike | None = None,
+        X_train: FData | NDArrayFloat,
+        X: FData | NDArrayFloat,
         y_train: Prediction | None = None,
         weights: NDArrayFloat | None = None,
         _cv: bool = False,
@@ -303,19 +304,37 @@ def __call__(  # noqa: D102
         self,
         *,
         delta_x: NDArrayFloat,
-        X_train: FData | GridPointsLike | None = None,
-        X: FData | GridPointsLike | None = None,
+        X_train: FData | NDArrayFloat,
+        X: FData | NDArrayFloat,
         y_train: NDArrayFloat | FData | None = None,
         weights: NDArrayFloat | None = None,
         _cv: bool = False,
     ) -> NDArrayFloat | FData:
 
         bandwidth = (
-            np.percentile(np.abs(delta_x), 15)
+            float(np.percentile(delta_x, DEFAULT_BANDWIDTH_PERCENTILE))
             if self.bandwidth is None
             else self.bandwidth
         )
 
+        # Smoothing for functions of one variable
+        if not isinstance(X_train, FDataBasis) and X_train[0].shape[0] == 1:
+            delta_x = np.subtract.outer(
+                X.flatten(),
+                X_train.flatten(),
+            )
+
+            assert y_train is None
+
+            return super().__call__(  # noqa: WPS503
+                delta_x=delta_x,
+                X_train=X_train,
+                X=X,
+                y_train=y_train,
+                weights=weights,
+                _cv=_cv,
+            )
+
         # Regression
         if isinstance(X_train, FData):
             assert isinstance(X, FData)
@@ -326,44 +345,41 @@ def __call__(  # noqa: D102
             ):
                 raise ValueError("Only FDataBasis is supported for now.")
 
+            m1 = X_train.coefficients
+            m2 = X.coefficients
+
             if y_train is None:
                 y_train = np.identity(X_train.n_samples)
 
-            m1 = X_train.coefficients
-            m2 = X.coefficients
+        else:
+            m1 = X_train
+            m2 = X
+
+            if y_train is None:
+                y_train = np.identity(X_train.shape[0])
 
-            # Subtract previous matrices obtaining a 3D matrix
-            # The i-th element contains the matrix X_train - X[i]
-            C = m1 - m2[:, np.newaxis]
+        # Subtract previous matrices obtaining a 3D matrix
+        # The i-th element contains the matrix X_train - X[i]
+        C = m1 - m2[:, np.newaxis]
 
+        # Inner product matrix only is applicable in regression
+        if isinstance(X_train, FDataBasis):
             inner_product_matrix = X_train.basis.inner_product_matrix()
 
             # Calculate new coefficients taking into account cross-products
             # if the basis is orthonormal, C would not change
             C = C @ inner_product_matrix  # noqa: WPS350
 
-            # Adding a column of ones in the first position of all matrices
-            dims = (C.shape[0], C.shape[1], 1)
-            C = np.concatenate((np.ones(dims), C), axis=-1)
-
-            return self._solve_least_squares(
-                delta_x=delta_x,
-                coefs=C,
-                y_train=y_train,
-                bandwidth=bandwidth,
-            )
-
-        # Smoothing
-        else:
+        # Adding a column of ones in the first position of all matrices
+        dims = (C.shape[0], C.shape[1], 1)
+        C = np.concatenate((np.ones(dims), C), axis=-1)
 
-            return super().__call__(  # type: ignore[misc, type-var] # noqa: WPS503
-                delta_x=delta_x,
-                X_train=X_train,
-                X=X,
-                y_train=y_train,  # type: ignore[arg-type]
-                weights=weights,
-                _cv=_cv,
-            )
+        return self._solve_least_squares(
+            delta_x=delta_x,
+            coefs=C,
+            y_train=y_train,
+            bandwidth=bandwidth,
+        )
 
     def _solve_least_squares(
         self,
@@ -485,7 +501,7 @@ def _hat_matrix_function_not_normalized(
         # For each row in the distances matrix, it calculates the furthest
         # point within the k nearest neighbours
         vec = np.sort(
-            np.abs(delta_x),
+            delta_x,
             axis=1,
         )[:, n_neighbors - 1] + tol
 

diff --git a/skfda/preprocessing/smoothing/_kernel_smoothers.py b/skfda/preprocessing/smoothing/_kernel_smoothers.py
@@ -9,9 +9,11 @@
 
 import numpy as np
 
-from ..._utils._utils import _to_grid_points
+from ..._utils._utils import _cartesian_product, _to_grid_points
 from ...misc.hat_matrix import HatMatrix, NadarayaWatsonHatMatrix
+from ...misc.metrics import PairwiseMetric, l2_distance
 from ...typing._base import GridPointsLike
+from ...typing._metric import Metric
 from ...typing._numpy import NDArrayFloat
 from ._linear import _LinearSmoother
 
@@ -114,10 +116,12 @@ def __init__(
         *,
         weights: Optional[NDArrayFloat] = None,
         output_points: Optional[GridPointsLike] = None,
+        metric: Metric[NDArrayFloat] = l2_distance,
     ):
         self.kernel_estimator = kernel_estimator
         self.weights = weights
         self.output_points = output_points
+        self.metric = metric
         self._cv = False  # For testing purposes only
 
     def _hat_matrix(
@@ -126,18 +130,18 @@ def _hat_matrix(
         output_points: GridPointsLike,
     ) -> NDArrayFloat:
 
-        input_points = _to_grid_points(input_points)
-        output_points = _to_grid_points(output_points)
+        input_points = _cartesian_product(_to_grid_points(input_points))
+        output_points = _cartesian_product(_to_grid_points(output_points))
 
         if self.kernel_estimator is None:
             self.kernel_estimator = NadarayaWatsonHatMatrix()
 
-        delta_x = np.subtract.outer(output_points[0], input_points[0])
+        delta_x = PairwiseMetric(self.metric)(output_points, input_points)
 
         return self.kernel_estimator(
             delta_x=delta_x,
             weights=self.weights,
-            X_train=input_points,
-            X=output_points,
+            X_train=np.array(input_points),
+            X=np.array(output_points),
             _cv=self._cv,
         )
diff --git a/skfda/preprocessing/smoothing/_linear.py b/skfda/preprocessing/smoothing/_linear.py
@@ -28,6 +28,7 @@ class _LinearSmoother(
     ``hat_matrix`` to define the smoothing or 'hat' matrix.
 
     """
+
     input_points_: GridPoints
     output_points_: GridPoints
 
@@ -116,9 +117,25 @@ def transform(
             )
         )
 
+        dims = [len(e) for e in self.output_points_]
+        dims += [len(e) for e in self.input_points_]
+
+        hat_matrix = np.reshape(
+            self.hat_matrix_,
+            dims,
+        )
+
+        data_matrix = np.einsum(
+            hat_matrix,
+            [Ellipsis, *range(1, len(self.output_points_) + 1)],
+            X.data_matrix,
+            [0, *range(1, len(self.output_points_) + 2)],
+            [0, Ellipsis, len(self.output_points_) + 1],
+        )
+
         # The matrix is cached
         return X.copy(
-            data_matrix=self.hat_matrix_ @ X.data_matrix,
+            data_matrix=data_matrix,
             grid_points=self.output_points_,
         )
 

diff --git a/skfda/tests/test_smoothing.py b/skfda/tests/test_smoothing.py
@@ -4,6 +4,7 @@
 
 import numpy as np
 import sklearn
+from sklearn.datasets import load_digits
 from typing_extensions import Literal
 
 import skfda
@@ -173,6 +174,39 @@ def test_knn(self) -> None:
         np.testing.assert_allclose(hat_matrix, hat_matrix_r)
 
 
+class TestSurfaceSmoother(unittest.TestCase):
+    """Test that smoothing works on surfaces."""
+
+    def test_knn(self) -> None:
+        """Check 2D smoothing using digits dataset."""
+        X, y = load_digits(return_X_y=True)
+        X = X.reshape(-1, 8, 8)
+
+        fd = skfda.FDataGrid(X)
+
+        ks = KernelSmoother(
+            kernel_estimator=KNeighborsHatMatrix(n_neighbors=1))
+        fd_trans = ks.fit_transform(fd)
+
+        np.testing.assert_allclose(fd_trans.data_matrix, fd.data_matrix)
+
+        ks = KernelSmoother(
+            kernel_estimator=KNeighborsHatMatrix(n_neighbors=3))
+        fd_trans = ks.fit_transform(fd)
+
+        res = [
+            [0, 1.25, 7.75, 10.5, 8.25, 6.25, 1.5, 0],
+            [0, 3.2, 9.6, 10.6, 9.8, 8.4, 5.6, 1.25],
+            [0.75, 4.4, 9, 6.4, 4.6, 8.4, 6.4, 2],
+            [1, 4.8, 7.8, 2.8, 1.6, 7.2, 6.4, 2],
+            [1.25, 4.2, 7.2, 1.6, 2, 7.4, 6.4, 2],
+            [1, 4.4, 7.4, 3.4, 4.6, 8.2, 5.4, 1.75],
+            [0.5, 4, 7.6, 8.4, 7.6, 6.8, 3.8, 0],
+            [0, 2, 8.25, 8.5, 8.25, 5.5, 0, 0],
+        ]
+        np.testing.assert_allclose(fd_trans.data_matrix[0, ..., 0], res)
+
+
 class TestBasisSmoother(unittest.TestCase):
     """Test Basis Smoother."""