Skip to content

Commit

Permalink
Merge pull request #505 from GAA-UAM/FDataBasis/cov
Browse files Browse the repository at this point in the history
Covariance function as tensor product
  • Loading branch information
vnmabus committed Jun 25, 2023
2 parents fbe63ce + 4c411e4 commit 3663905
Show file tree
Hide file tree
Showing 12 changed files with 447 additions and 70 deletions.
8 changes: 4 additions & 4 deletions skfda/exploratory/stats/_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from __future__ import annotations

from builtins import isinstance
from typing import TypeVar, Union
from typing import Callable, TypeVar, Union

import numpy as np
from scipy import integrate
Expand Down Expand Up @@ -71,7 +71,7 @@ def gmean(X: FDataGrid) -> FDataGrid:
return X.gmean()


def cov(X: FData) -> FDataGrid:
def cov(X: FData) -> Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]:
"""
Compute the covariance.
Expand All @@ -83,10 +83,10 @@ def cov(X: FData) -> FDataGrid:
Returns:
Covariance of all the samples in the original object, as a
:term:`functional data object` with just one sample.
callable.
"""
return X.cov() # type: ignore[no-any-return]
return X.cov()


def modified_epigraph_index(X: FDataGrid) -> NDArrayFloat:
Expand Down
5 changes: 3 additions & 2 deletions skfda/exploratory/stats/covariance/_base.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from __future__ import annotations

from abc import abstractmethod
from typing import Generic, TypeVar
from typing import Callable, Generic, TypeVar

from ...._utils._sklearn_adapter import BaseEstimator
from ....representation import FData
from ....typing._numpy import NDArrayFloat

Input = TypeVar("Input", bound=FData)

Expand All @@ -15,7 +16,7 @@ class CovarianceEstimator(
):

location_: Input
covariance_: Input
covariance_: Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]

def __init__(
self,
Expand Down
9 changes: 4 additions & 5 deletions skfda/exploratory/stats/covariance/_empirical.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from __future__ import annotations

from typing import Generic, TypeVar
from typing import Callable, TypeVar

from ....representation import FData
from ....typing._numpy import NDArrayFloat
from ._base import CovarianceEstimator

Input = TypeVar("Input", bound=FData)
Expand All @@ -11,11 +12,9 @@
class EmpiricalCovariance(
CovarianceEstimator[Input],
):
covariance_: Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]

def fit(self, X: Input, y: object = None) -> EmpiricalCovariance[Input]:

super(EmpiricalCovariance, self).fit(X, y)

super().fit(X, y)
self.covariance_ = X.cov()

return self
17 changes: 14 additions & 3 deletions skfda/exploratory/stats/covariance/_parametric_gaussian.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
from __future__ import annotations

from typing import Callable

import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Kernel, WhiteKernel

from ....misc.covariances import Covariance
from ....misc.covariances import Covariance, EmpiricalGrid
from ....representation import FDataGrid
from ....typing._numpy import NDArrayFloat
from ._empirical import EmpiricalCovariance


class ParametricGaussianCovariance(EmpiricalCovariance[FDataGrid]):

covariance_: Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]

def __init__(
self,
cov: Kernel | Covariance,
Expand Down Expand Up @@ -48,8 +53,14 @@ def fit(
regressor.fit(grid_points, data_matrix.T)

# TODO: Skip cov computation?
self.covariance_ = X.cov().copy(
data_matrix=regressor.kernel_(grid_points)[np.newaxis, ...],
# TODO: Use a user-public structure to represent the covariance,
# instead of a Callable object
self.covariance_ = X.cov()
assert isinstance(self.covariance_, EmpiricalGrid)
self.covariance_.cov_fdata = self.covariance_.cov_fdata.copy(
data_matrix=regressor.kernel_(
grid_points,
)[np.newaxis, ...],
)

return self
29 changes: 19 additions & 10 deletions skfda/inference/anova/_anova_oneway.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np
from typing_extensions import Literal

from ..._utils import constants
from ...datasets import make_gaussian
from ...misc.metrics import lp_distance
from ...misc.validation import validate_random_state
Expand Down Expand Up @@ -203,25 +204,33 @@ def _anova_bootstrap(
)

# List with sizes of each group
sizes = [fd.n_samples for fd in fd_grouped]
sizes = [fdg.n_samples for fdg in fd_grouped]

# Instance a random state object in case random_state is an int
random_state = validate_random_state(random_state)

if equal_var:
k_est = concatenate(fd_grouped).cov().data_matrix[0, ..., 0]
k_est = [k_est] * len(fd_grouped)
else:
# Estimating covariances for each group
k_est = [fd.cov().data_matrix[0, ..., 0] for fd in fd_grouped]

# Simulating n_reps observations for each of the n_groups gaussian
# processes
grid_points = getattr(fd_grouped[0], "grid_points", None)
if grid_points is None:
start, stop = fd_grouped[0].domain_range[0]
n_features = k_est[0].shape[0]
grid_points = np.linspace(start, stop, n_features)
grid_points = np.linspace(start, stop, constants.N_POINTS_FINE_MESH)

if equal_var:
cov_est = concatenate(fd_grouped).cov(
grid_points,
grid_points,
)
k_est = [cov_est] * len(fd_grouped)
else:
# Estimating covariances for each group
k_est = [
fdg.cov(
grid_points,
grid_points,
)
for fdg in fd_grouped
]

sim = [
make_gaussian(
Expand Down
16 changes: 11 additions & 5 deletions skfda/inference/hotelling/_hotelling.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from typing_extensions import Literal

from ...misc.validation import validate_random_state
from ...representation import FData, FDataBasis
from ...representation import FData, FDataBasis, FDataGrid
from ...typing._base import RandomStateLike
from ...typing._numpy import NDArrayFloat

Expand Down Expand Up @@ -85,7 +85,7 @@ def hotelling_t2(
n = n1 + n2 # Size of full sample
m = fd1.mean() - fd2.mean() # Delta mean

if isinstance(fd1, FDataBasis):
if isinstance(fd1, FDataBasis) and isinstance(fd2, FDataBasis):
if fd1.basis != fd2.basis:
raise ValueError(
"Both FDataBasis objects must share the same basis.",
Expand All @@ -97,11 +97,17 @@ def hotelling_t2(
# If no weight matrix is passed, then we compute the Gram Matrix
weights = fd1.basis.gram_matrix()
weights = np.sqrt(weights)
else:
elif isinstance(fd1, FDataGrid) and isinstance(fd2, FDataGrid):
# Working with standard discretized data
m = m.data_matrix[0, ..., 0]
k1 = fd1.cov().data_matrix[0, ..., 0]
k2 = fd2.cov().data_matrix[0, ..., 0]
k1 = fd1.cov(
fd1.grid_points[0],
fd1.grid_points[0],
)
k2 = fd2.cov(
fd2.grid_points[0],
fd2.grid_points[0],
)

m = m.reshape((-1, 1)) # Reshaping the mean for a proper matrix product
k_pool = ((n1 - 1) * k1 + (n2 - 1) * k2) / (n - 2) # Combination of covs
Expand Down
97 changes: 97 additions & 0 deletions skfda/misc/covariances.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from matplotlib.figure import Figure
from scipy.special import gamma, kv

from ..representation import FData, FDataBasis, FDataGrid
from ..representation.basis import TensorBasis
from ..typing._numpy import ArrayLike, NDArrayFloat


Expand Down Expand Up @@ -758,3 +760,98 @@ def to_sklearn(self) -> sklearn_kern.Kernel:
self.variance
* sklearn_kern.Matern(length_scale=self.length_scale, nu=self.nu)
)


class Empirical(Covariance):
r"""
Sample covariance function.
The sample covariance function is defined as
. math::
K(t, s) = \frac{1}{n}\sum_{n=1}^N\left(x_n(t) - \bar{x}(t)\right)
\left(x_n(s) - \bar{x}(s)\right)
where :math:`x_n(t)` is the n-th sample and :math:`\bar{x}(t)` is the
mean of the samples.
"""

_latex_formula = (
r"K(t, s) = \frac{1}{n}\sum_{n=1}^N(x_n(t) - \bar{x}(t))"
r"(x_n(s) - \bar{x}(s))"
)
_parameters_str = [
("data", "data"),
]

cov_fdata: FData

@abc.abstractmethod
def __init__(self, data: FData) -> None:
if data.dim_domain != 1 or data.dim_codomain != 1:
raise NotImplementedError(
"Covariance only implemented "
"for univariate functions",
)

def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat:
"""Evaluate the covariance function.
Args:
x: First array of points of evaluation.
y: Second array of points of evaluation.
Returns:
Covariance function evaluated at the grid formed by x and y.
"""
return self.cov_fdata([x, y], grid=True)[0, ..., 0]


class EmpiricalGrid(Empirical):
"""Sample covariance function for FDataGrid."""

cov_fdata: FDataGrid

def __init__(self, data: FDataGrid) -> None:
super().__init__(data=data)

self.cov_fdata = data.copy(
data_matrix=np.cov(
data.data_matrix[..., 0],
rowvar=False,
)[np.newaxis, ...],
grid_points=[
data.grid_points[0],
data.grid_points[0],
],
domain_range=[
data.domain_range[0],
data.domain_range[0],
],
argument_names=data.argument_names * 2,
sample_names=("covariance",),
)


class EmpiricalBasis(Empirical):
"""
Sample covariance function for FDataBasis.
In this case one may use the basis expression of the samples to
express the sample covariance function in the tensor product basis
of the original basis.
"""

cov_fdata: FDataBasis
coeff_matrix: NDArrayFloat

def __init__(self, data: FDataBasis) -> None:
super().__init__(data=data)

self.coeff_matrix = np.cov(data.coefficients, rowvar=False)
self.cov_fdata = FDataBasis(
basis=TensorBasis([data.basis, data.basis]),
coefficients=self.coeff_matrix.flatten(),
argument_names=data.argument_names * 2,
sample_names=("covariance",),
)
5 changes: 4 additions & 1 deletion skfda/ml/classification/_qda.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,10 @@ def _fit_gaussian_process(

cov_estimators.append(cov_estimator)
means.append(cov_estimator.location_)
covariance.append(cov_estimator.covariance_.data_matrix[0, ..., 0])
# TODO: QDA should use the covariance estimators interface
covariance.append(
cov_estimator.covariance_.cov_fdata.data_matrix[0, ..., 0],
)

self.means_ = means
self._covariances = np.asarray(covariance)
Expand Down
49 changes: 49 additions & 0 deletions skfda/representation/_functional_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from typing import (
TYPE_CHECKING,
Any,
Callable,
Iterable,
Iterator,
NoReturn,
Expand Down Expand Up @@ -819,6 +820,54 @@ def sum( # noqa: WPS125

return self

@overload
def cov( # noqa: WPS451
self: T,
s_points: NDArrayFloat,
t_points: NDArrayFloat,
/,
) -> NDArrayFloat:
pass

@overload
def cov( # noqa: WPS451
self: T,
/,
) -> Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]:
pass

@abstractmethod
def cov( # noqa: WPS320, WPS451
self: T,
s_points: Optional[NDArrayFloat] = None,
t_points: Optional[NDArrayFloat] = None,
/,
) -> Union[
Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat],
NDArrayFloat,
]:
"""Compute the covariance of the functional data object.
Calculates the unbiased sample covariance function of the data.
This is expected to be only defined for univariate functions.
The resulting covariance function is defined in the cartesian
product of the domain of the functions.
If s_points or t_points are not provided, this method returns
a callable object representing the covariance function.
If s_points and t_points are provided, this method returns the
evaluation of the covariance function at the grid formed by the
cartesian product of the points in s_points and t_points.
Args:
s_points: Points where the covariance function is evaluated.
t_points: Points where the covariance function is evaluated.
Returns:
Covariance function.
"""
pass

def mean(
self: T,
*,
Expand Down

0 comments on commit 3663905

Please sign in to comment.