Skip to content

Commit

Permalink
Merge pull request #556 from GAA-UAM/feature/555-ddof-parameter-in-fd…
Browse files Browse the repository at this point in the history
…atacov

ddof parameter in fdatacov
  • Loading branch information
vnmabus committed Jul 13, 2023
2 parents 415796b + 26c6d22 commit 2101576
Show file tree
Hide file tree
Showing 7 changed files with 76 additions and 19 deletions.
18 changes: 14 additions & 4 deletions skfda/exploratory/stats/_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,19 +41,22 @@ def mean(
return (X * weight).sum()


def var(X: FData) -> FDataGrid:
def var(X: FData, ddof: int = 1) -> FDataGrid:
"""
Compute the variance of a set of samples in a FData object.
Args:
X: Object containing all the set of samples whose variance is desired.
ddof: "Delta Degrees of Freedom": the divisor used in the calculation
is `N - ddof`, where `N` represents the number of elements. By
default `ddof` is 1.
Returns:
Variance of all the samples in the original object, as a
:term:`functional data object` with just one sample.
"""
return X.var() # type: ignore[no-any-return]
return X.var(ddof=ddof) # type: ignore[no-any-return]


def gmean(X: FDataGrid) -> FDataGrid:
Expand All @@ -71,7 +74,10 @@ def gmean(X: FDataGrid) -> FDataGrid:
return X.gmean()


def cov(X: FData) -> Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]:
def cov(
X: FData,
ddof: int = 1
) -> Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]:
"""
Compute the covariance.
Expand All @@ -80,13 +86,17 @@ def cov(X: FData) -> Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]:
Args:
X: Object containing different samples of a functional variable.
ddof: "Delta Degrees of Freedom": the divisor used in the calculation
is `N - ddof`, where `N` represents the number of elements. By
default `ddof` is 1.
Returns:
Covariance of all the samples in the original object, as a
callable.
"""
return X.cov()
return X.cov(ddof=ddof)


def modified_epigraph_index(X: FDataGrid) -> NDArrayFloat:
Expand Down
27 changes: 20 additions & 7 deletions skfda/misc/covariances.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,23 +768,27 @@ class Empirical(Covariance):
The sample covariance function is defined as
. math::
K(t, s) = \frac{1}{n}\sum_{n=1}^N\left(x_n(t) - \bar{x}(t)\right)
\left(x_n(s) - \bar{x}(s)\right)
K(t, s) = \frac{1}{N-\text{ddof}}\sum_{n=1}^N\left(x_n(t) -
\bar{x}(t)\right) \left(x_n(s) - \bar{x}(s)\right)
where :math:`x_n(t)` is the n-th sample and :math:`\bar{x}(t)` is the
mean of the samples.
mean of the samples. :math:`N` is the number of samples,
:math:`\text{ddof}` means "Delta Degrees of Freedom" and is such that
:math:`N-\text{ddof}` is the divisor used in the calculation of the
covariance function.
"""

_latex_formula = (
r"K(t, s) = \frac{1}{n}\sum_{n=1}^N(x_n(t) - \bar{x}(t))"
r"K(t, s) = \frac{1}{N-\text{ddof}}\sum_{n=1}^N(x_n(t) - \bar{x}(t))"
r"(x_n(s) - \bar{x}(s))"
)
_parameters_str = [
("data", "data"),
]

cov_fdata: FData
ddof: int

@abc.abstractmethod
def __init__(self, data: FData) -> None:
Expand All @@ -811,14 +815,17 @@ class EmpiricalGrid(Empirical):
"""Sample covariance function for FDataGrid."""

cov_fdata: FDataGrid
ddof: int

def __init__(self, data: FDataGrid) -> None:
def __init__(self, data: FDataGrid, ddof: int = 1) -> None:
super().__init__(data=data)

self.ddof = ddof
self.cov_fdata = data.copy(
data_matrix=np.cov(
data.data_matrix[..., 0],
rowvar=False,
ddof=ddof,
)[np.newaxis, ...],
grid_points=[
data.grid_points[0],
Expand All @@ -844,11 +851,17 @@ class EmpiricalBasis(Empirical):

cov_fdata: FDataBasis
coeff_matrix: NDArrayFloat
ddof: int

def __init__(self, data: FDataBasis) -> None:
def __init__(self, data: FDataBasis, ddof: int = 1) -> None:
super().__init__(data=data)

self.coeff_matrix = np.cov(data.coefficients, rowvar=False)
self.ddof = ddof
self.coeff_matrix = np.cov(
data.coefficients,
rowvar=False,
ddof=ddof,
)
self.cov_fdata = FDataBasis(
basis=TensorBasis([data.basis, data.basis]),
coefficients=self.coeff_matrix.flatten(),
Expand Down
2 changes: 1 addition & 1 deletion skfda/misc/scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def _var(
from ..exploratory.stats import mean, var

if weights is None:
return var(x)
return var(x, ddof=0)

return mean( # type: ignore[no-any-return]
np.power(x - mean(x, weights=weights), 2),
Expand Down
2 changes: 1 addition & 1 deletion skfda/ml/clustering/_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def _check_clustering(self, fdata: Input) -> Input:
return fdata

def _tolerance(self, fdata: Input) -> float:
variance = fdata.var()
variance = fdata.var(ddof=0)
mean_variance = np.mean(variance[0].data_matrix)

return float(mean_variance * self.tol)
Expand Down
6 changes: 6 additions & 0 deletions skfda/representation/_functional_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -826,13 +826,15 @@ def cov( # noqa: WPS451
s_points: NDArrayFloat,
t_points: NDArrayFloat,
/,
ddof: int = 1,
) -> NDArrayFloat:
pass

@overload
def cov( # noqa: WPS451
self: T,
/,
ddof: int = 1,
) -> Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]:
pass

Expand All @@ -842,6 +844,7 @@ def cov( # noqa: WPS320, WPS451
s_points: Optional[NDArrayFloat] = None,
t_points: Optional[NDArrayFloat] = None,
/,
ddof: int = 1,
) -> Union[
Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat],
NDArrayFloat,
Expand All @@ -861,6 +864,9 @@ def cov( # noqa: WPS320, WPS451
Args:
s_points: Points where the covariance function is evaluated.
t_points: Points where the covariance function is evaluated.
ddof: "Delta Degrees of Freedom": the divisor used in the
calculation is `N - ddof`, where `N` represents the number
of elements. By default `ddof` is 1.
Returns:
Covariance function.
Expand Down
19 changes: 16 additions & 3 deletions skfda/representation/basis/_fdatabasis.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,11 @@ def sum( # noqa: WPS125
sample_names=(None,),
)

def var(self: T, eval_points: Optional[NDArrayFloat] = None) -> T:
def var(
self: T,
eval_points: Optional[NDArrayFloat] = None,
ddof: int = 1,
) -> T:
"""Compute the variance of the functional data object.
A numerical approach its used. The object its transformed into its
Expand All @@ -456,26 +460,31 @@ def var(self: T, eval_points: Optional[NDArrayFloat] = None) -> T:
numpy.linspace with bounds equal to the ones defined in
self.domain_range and the number of points the maximum
between 501 and 10 times the number of basis.
ddof: "Delta Degrees of Freedom": the divisor used in the
calculation is `N - ddof`, where `N` represents the number of
elements. By default `ddof` is 1.
Returns:
Variance of the original object.
"""
return self.to_grid(eval_points).var().to_basis(self.basis)
return self.to_grid(eval_points).var(ddof=ddof).to_basis(self.basis)

@overload
def cov( # noqa: WPS451
self: T,
s_points: NDArrayFloat,
t_points: NDArrayFloat,
/,
ddof: int = 1,
) -> NDArrayFloat:
pass

@overload
def cov( # noqa: WPS451
self: T,
/,
ddof: int = 1,
) -> Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]:
pass

Expand All @@ -484,6 +493,7 @@ def cov( # noqa: WPS320, WPS451
s_points: Optional[NDArrayFloat] = None,
t_points: Optional[NDArrayFloat] = None,
/,
ddof: int = 1,
) -> Union[
Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat],
NDArrayFloat,
Expand All @@ -505,14 +515,17 @@ def cov( # noqa: WPS320, WPS451
Args:
s_points: Points where the covariance function is evaluated.
t_points: Points where the covariance function is evaluated.
ddof: "Delta Degrees of Freedom": the divisor used in the
calculation is `N - ddof`, where `N` represents the number
of elements. By default `ddof` is 1.
Returns:
Covariance function.
"""
# To avoid circular imports
from ...misc.covariances import EmpiricalBasis
cov_function = EmpiricalBasis(self)
cov_function = EmpiricalBasis(self, ddof=ddof)
if s_points is None or t_points is None:
return cov_function
return cov_function(s_points, t_points)
Expand Down
21 changes: 18 additions & 3 deletions skfda/representation/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,16 +582,25 @@ def sum( # noqa: WPS125
sample_names=(None,),
)

def var(self: T) -> T:
def var(self: T, ddof: int = 1) -> T:
"""Compute the variance of a set of samples in a FDataGrid object.
Args:
ddof: "Delta Degrees of Freedom": the divisor used in the
calculation is `N - ddof`, where `N` represents the number of
elements. By default `ddof` is 1.
Returns:
A FDataGrid object with just one sample representing the
variance of all the samples in the original FDataGrid object.
"""
return self.copy(
data_matrix=np.array([np.var(self.data_matrix, 0)]),
data_matrix=np.array([np.var(
self.data_matrix,
axis=0,
ddof=ddof,
)]),
sample_names=("variance",),
)

Expand All @@ -601,13 +610,15 @@ def cov( # noqa: WPS451
s_points: NDArrayFloat,
t_points: NDArrayFloat,
/,
ddof: int = 1,
) -> NDArrayFloat:
pass

@overload
def cov( # noqa: WPS451
self: T,
/,
ddof: int = 1,
) -> Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]:
pass

Expand All @@ -616,6 +627,7 @@ def cov( # noqa: WPS320, WPS451
s_points: Optional[NDArrayFloat] = None,
t_points: Optional[NDArrayFloat] = None,
/,
ddof: int = 1,
) -> Union[
Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat],
NDArrayFloat,
Expand All @@ -631,14 +643,17 @@ def cov( # noqa: WPS320, WPS451
Args:
s_points: Grid points where the covariance function is evaluated.
t_points: Grid points where the covariance function is evaluated.
ddof: "Delta Degrees of Freedom": the divisor used in the
calculation is `N - ddof`, where `N` represents the number
of elements. By default `ddof` is 1.
Returns:
Covariance function.
"""
# To avoid circular imports
from ..misc.covariances import EmpiricalGrid
cov_function = EmpiricalGrid(self)
cov_function = EmpiricalGrid(self, ddof=ddof)
if s_points is None or t_points is None:
return cov_function
return cov_function(s_points, t_points)
Expand Down

0 comments on commit 2101576

Please sign in to comment.