Skip to content

Commit

Permalink
Merge pull request #436 from GAA-UAM/feature/several_dim_kernel_smoot…
Browse files Browse the repository at this point in the history
…hing

Smoothing in several dimensions
  • Loading branch information
vnmabus committed Oct 10, 2023
2 parents ebab51d + e6ce505 commit fac57c8
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 51 deletions.
104 changes: 60 additions & 44 deletions skfda/misc/hat_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,21 @@

import abc
import math
from typing import Callable, TypeVar, Union, overload
from typing import Callable, Final, TypeVar, Union, overload

import numpy as np

from .._utils._sklearn_adapter import BaseEstimator
from ..representation._functional_data import FData
from ..representation.basis import FDataBasis
from ..typing._base import GridPointsLike
from ..typing._numpy import NDArrayFloat
from . import kernels

Input = TypeVar("Input", bound=Union[FData, GridPointsLike])
Input = TypeVar("Input", bound=Union[FData, NDArrayFloat])
Prediction = TypeVar("Prediction", bound=Union[NDArrayFloat, FData])

DEFAULT_BANDWIDTH_PERCENTILE: Final = 15


class HatMatrix(
BaseEstimator,
Expand Down Expand Up @@ -52,8 +53,8 @@ def __call__(
self,
*,
delta_x: NDArrayFloat,
X_train: Input | None = None,
X: Input | None = None,
X_train: Input,
X: Input,
y_train: None = None,
weights: NDArrayFloat | None = None,
_cv: bool = False,
Expand All @@ -65,8 +66,8 @@ def __call__(
self,
*,
delta_x: NDArrayFloat,
X_train: Input | None = None,
X: Input | None = None,
X_train: Input,
X: Input,
y_train: Prediction | None = None,
weights: NDArrayFloat | None = None,
_cv: bool = False,
Expand All @@ -77,8 +78,8 @@ def __call__(
self,
*,
delta_x: NDArrayFloat,
X_train: Input | None = None,
X: Input | None = None,
X_train: Input,
X: Input,
y_train: NDArrayFloat | FData | None = None,
weights: NDArrayFloat | None = None,
_cv: bool = False,
Expand Down Expand Up @@ -186,7 +187,7 @@ def _hat_matrix_function_not_normalized(
) -> NDArrayFloat:

bandwidth = (
np.percentile(np.abs(delta_x), 15)
float(np.percentile(delta_x, DEFAULT_BANDWIDTH_PERCENTILE))
if self.bandwidth is None
else self.bandwidth
)
Expand Down Expand Up @@ -278,8 +279,8 @@ def __call__(
self,
*,
delta_x: NDArrayFloat,
X_train: FData | GridPointsLike | None = None,
X: FData | GridPointsLike | None = None,
X_train: FData | NDArrayFloat,
X: FData | NDArrayFloat,
y_train: None = None,
weights: NDArrayFloat | None = None,
_cv: bool = False,
Expand All @@ -291,8 +292,8 @@ def __call__(
self,
*,
delta_x: NDArrayFloat,
X_train: FData | GridPointsLike | None = None,
X: FData | GridPointsLike | None = None,
X_train: FData | NDArrayFloat,
X: FData | NDArrayFloat,
y_train: Prediction | None = None,
weights: NDArrayFloat | None = None,
_cv: bool = False,
Expand All @@ -303,19 +304,37 @@ def __call__( # noqa: D102
self,
*,
delta_x: NDArrayFloat,
X_train: FData | GridPointsLike | None = None,
X: FData | GridPointsLike | None = None,
X_train: FData | NDArrayFloat,
X: FData | NDArrayFloat,
y_train: NDArrayFloat | FData | None = None,
weights: NDArrayFloat | None = None,
_cv: bool = False,
) -> NDArrayFloat | FData:

bandwidth = (
np.percentile(np.abs(delta_x), 15)
float(np.percentile(delta_x, DEFAULT_BANDWIDTH_PERCENTILE))
if self.bandwidth is None
else self.bandwidth
)

# Smoothing for functions of one variable
if not isinstance(X_train, FDataBasis) and X_train[0].shape[0] == 1:
delta_x = np.subtract.outer(
X.flatten(),
X_train.flatten(),
)

assert y_train is None

return super().__call__( # noqa: WPS503
delta_x=delta_x,
X_train=X_train,
X=X,
y_train=y_train,
weights=weights,
_cv=_cv,
)

# Regression
if isinstance(X_train, FData):
assert isinstance(X, FData)
Expand All @@ -326,44 +345,41 @@ def __call__( # noqa: D102
):
raise ValueError("Only FDataBasis is supported for now.")

m1 = X_train.coefficients
m2 = X.coefficients

if y_train is None:
y_train = np.identity(X_train.n_samples)

m1 = X_train.coefficients
m2 = X.coefficients
else:
m1 = X_train
m2 = X

if y_train is None:
y_train = np.identity(X_train.shape[0])

# Subtract previous matrices obtaining a 3D matrix
# The i-th element contains the matrix X_train - X[i]
C = m1 - m2[:, np.newaxis]
# Subtract previous matrices obtaining a 3D matrix
# The i-th element contains the matrix X_train - X[i]
C = m1 - m2[:, np.newaxis]

# Inner product matrix only is applicable in regression
if isinstance(X_train, FDataBasis):
inner_product_matrix = X_train.basis.inner_product_matrix()

# Calculate new coefficients taking into account cross-products
# if the basis is orthonormal, C would not change
C = C @ inner_product_matrix # noqa: WPS350

# Adding a column of ones in the first position of all matrices
dims = (C.shape[0], C.shape[1], 1)
C = np.concatenate((np.ones(dims), C), axis=-1)

return self._solve_least_squares(
delta_x=delta_x,
coefs=C,
y_train=y_train,
bandwidth=bandwidth,
)

# Smoothing
else:
# Adding a column of ones in the first position of all matrices
dims = (C.shape[0], C.shape[1], 1)
C = np.concatenate((np.ones(dims), C), axis=-1)

return super().__call__( # type: ignore[misc, type-var] # noqa: WPS503
delta_x=delta_x,
X_train=X_train,
X=X,
y_train=y_train, # type: ignore[arg-type]
weights=weights,
_cv=_cv,
)
return self._solve_least_squares(
delta_x=delta_x,
coefs=C,
y_train=y_train,
bandwidth=bandwidth,
)

def _solve_least_squares(
self,
Expand Down Expand Up @@ -485,7 +501,7 @@ def _hat_matrix_function_not_normalized(
# For each row in the distances matrix, it calculates the furthest
# point within the k nearest neighbours
vec = np.sort(
np.abs(delta_x),
delta_x,
axis=1,
)[:, n_neighbors - 1] + tol

Expand Down
16 changes: 10 additions & 6 deletions skfda/preprocessing/smoothing/_kernel_smoothers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@

import numpy as np

from ..._utils._utils import _to_grid_points
from ..._utils._utils import _cartesian_product, _to_grid_points
from ...misc.hat_matrix import HatMatrix, NadarayaWatsonHatMatrix
from ...misc.metrics import PairwiseMetric, l2_distance
from ...typing._base import GridPointsLike
from ...typing._metric import Metric
from ...typing._numpy import NDArrayFloat
from ._linear import _LinearSmoother

Expand Down Expand Up @@ -114,10 +116,12 @@ def __init__(
*,
weights: Optional[NDArrayFloat] = None,
output_points: Optional[GridPointsLike] = None,
metric: Metric[NDArrayFloat] = l2_distance,
):
self.kernel_estimator = kernel_estimator
self.weights = weights
self.output_points = output_points
self.metric = metric
self._cv = False # For testing purposes only

def _hat_matrix(
Expand All @@ -126,18 +130,18 @@ def _hat_matrix(
output_points: GridPointsLike,
) -> NDArrayFloat:

input_points = _to_grid_points(input_points)
output_points = _to_grid_points(output_points)
input_points = _cartesian_product(_to_grid_points(input_points))
output_points = _cartesian_product(_to_grid_points(output_points))

if self.kernel_estimator is None:
self.kernel_estimator = NadarayaWatsonHatMatrix()

delta_x = np.subtract.outer(output_points[0], input_points[0])
delta_x = PairwiseMetric(self.metric)(output_points, input_points)

return self.kernel_estimator(
delta_x=delta_x,
weights=self.weights,
X_train=input_points,
X=output_points,
X_train=np.array(input_points),
X=np.array(output_points),
_cv=self._cv,
)
19 changes: 18 additions & 1 deletion skfda/preprocessing/smoothing/_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class _LinearSmoother(
``hat_matrix`` to define the smoothing or 'hat' matrix.
"""

input_points_: GridPoints
output_points_: GridPoints

Expand Down Expand Up @@ -116,9 +117,25 @@ def transform(
)
)

dims = [len(e) for e in self.output_points_]
dims += [len(e) for e in self.input_points_]

hat_matrix = np.reshape(
self.hat_matrix_,
dims,
)

data_matrix = np.einsum(
hat_matrix,
[Ellipsis, *range(1, len(self.output_points_) + 1)],
X.data_matrix,
[0, *range(1, len(self.output_points_) + 2)],
[0, Ellipsis, len(self.output_points_) + 1],
)

# The matrix is cached
return X.copy(
data_matrix=self.hat_matrix_ @ X.data_matrix,
data_matrix=data_matrix,
grid_points=self.output_points_,
)

Expand Down
34 changes: 34 additions & 0 deletions skfda/tests/test_smoothing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import numpy as np
import sklearn
from sklearn.datasets import load_digits
from typing_extensions import Literal

import skfda
Expand Down Expand Up @@ -173,6 +174,39 @@ def test_knn(self) -> None:
np.testing.assert_allclose(hat_matrix, hat_matrix_r)


class TestSurfaceSmoother(unittest.TestCase):
"""Test that smoothing works on surfaces."""

def test_knn(self) -> None:
"""Check 2D smoothing using digits dataset."""
X, y = load_digits(return_X_y=True)
X = X.reshape(-1, 8, 8)

fd = skfda.FDataGrid(X)

ks = KernelSmoother(
kernel_estimator=KNeighborsHatMatrix(n_neighbors=1))
fd_trans = ks.fit_transform(fd)

np.testing.assert_allclose(fd_trans.data_matrix, fd.data_matrix)

ks = KernelSmoother(
kernel_estimator=KNeighborsHatMatrix(n_neighbors=3))
fd_trans = ks.fit_transform(fd)

res = [
[0, 1.25, 7.75, 10.5, 8.25, 6.25, 1.5, 0],
[0, 3.2, 9.6, 10.6, 9.8, 8.4, 5.6, 1.25],
[0.75, 4.4, 9, 6.4, 4.6, 8.4, 6.4, 2],
[1, 4.8, 7.8, 2.8, 1.6, 7.2, 6.4, 2],
[1.25, 4.2, 7.2, 1.6, 2, 7.4, 6.4, 2],
[1, 4.4, 7.4, 3.4, 4.6, 8.2, 5.4, 1.75],
[0.5, 4, 7.6, 8.4, 7.6, 6.8, 3.8, 0],
[0, 2, 8.25, 8.5, 8.25, 5.5, 0, 0],
]
np.testing.assert_allclose(fd_trans.data_matrix[0, ..., 0], res)


class TestBasisSmoother(unittest.TestCase):
"""Test Basis Smoother."""

Expand Down

0 comments on commit fac57c8

Please sign in to comment.