Skip to content

Commit

Permalink
Merge pull request #578 from GAA-UAM/fix/576-change-ddof-argument-to-…
Browse files Browse the repository at this point in the history
…correction-in-var-and-cov

Change `ddof` argument to `correction` in var and cov functions
  • Loading branch information
vnmabus committed Oct 5, 2023
2 parents 5779840 + 025f8c0 commit 3fb0ea9
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 57 deletions.
20 changes: 10 additions & 10 deletions skfda/exploratory/stats/_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,22 +41,22 @@ def mean(
return (X * weight).sum()


def var(X: FData, ddof: int = 1) -> FDataGrid:
def var(X: FData, correction: int = 1) -> FDataGrid:
"""
Compute the variance of a set of samples in a FData object.
Args:
X: Object containing all the set of samples whose variance is desired.
ddof: "Delta Degrees of Freedom": the divisor used in the calculation
is `N - ddof`, where `N` represents the number of elements. By
default `ddof` is 1.
correction: degrees of freedom adjustment. The divisor used in the
calculation is `N - correction`, where `N` represents the number of
elements. Default: `1`.
Returns:
Variance of all the samples in the original object, as a
:term:`functional data object` with just one sample.
"""
return X.var(ddof=ddof) # type: ignore[no-any-return]
return X.var(correction=correction) # type: ignore[no-any-return]


def gmean(X: FDataGrid) -> FDataGrid:
Expand All @@ -76,7 +76,7 @@ def gmean(X: FDataGrid) -> FDataGrid:

def cov(
X: FData,
ddof: int = 1
correction: int = 1,
) -> Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]:
"""
Compute the covariance.
Expand All @@ -86,17 +86,17 @@ def cov(
Args:
X: Object containing different samples of a functional variable.
ddof: "Delta Degrees of Freedom": the divisor used in the calculation
is `N - ddof`, where `N` represents the number of elements. By
default `ddof` is 1.
correction: degrees of freedom adjustment. The divisor used in the
calculation is `N - correction`, where `N` represents the number of
elements. Default: `1`.
Returns:
Covariance of all the samples in the original object, as a
callable.
"""
return X.cov(ddof=ddof)
return X.cov(correction=correction)


def modified_epigraph_index(X: FDataGrid) -> NDArrayFloat:
Expand Down
30 changes: 15 additions & 15 deletions skfda/misc/covariances.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,27 +768,27 @@ class Empirical(Covariance):
The sample covariance function is defined as
. math::
K(t, s) = \frac{1}{N-\text{ddof}}\sum_{n=1}^N\left(x_n(t) -
K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N\left(x_n(t) -
\bar{x}(t)\right) \left(x_n(s) - \bar{x}(s)\right)
where :math:`x_n(t)` is the n-th sample and :math:`\bar{x}(t)` is the
mean of the samples. :math:`N` is the number of samples,
:math:`\text{ddof}` means "Delta Degrees of Freedom" and is such that
:math:`N-\text{ddof}` is the divisor used in the calculation of the
covariance function.
:math:`\text{correction}` is the degrees of freedom adjustment and is such
that :math:`N-\text{correction}` is the divisor used in the calculation of
the covariance function.
"""

_latex_formula = (
r"K(t, s) = \frac{1}{N-\text{ddof}}\sum_{n=1}^N(x_n(t) - \bar{x}(t))"
r"(x_n(s) - \bar{x}(s))"
r"K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N"
r"(x_n(t) - \bar{x}(t))(x_n(s) - \bar{x}(s))"
)
_parameters_str = [
("data", "data"),
]

cov_fdata: FData
ddof: int
correction: int

@abc.abstractmethod
def __init__(self, data: FData) -> None:
Expand All @@ -815,17 +815,17 @@ class EmpiricalGrid(Empirical):
"""Sample covariance function for FDataGrid."""

cov_fdata: FDataGrid
ddof: int
correction: int

def __init__(self, data: FDataGrid, ddof: int = 1) -> None:
def __init__(self, data: FDataGrid, correction: int = 1) -> None:
super().__init__(data=data)

self.ddof = ddof
self.correction = correction
self.cov_fdata = data.copy(
data_matrix=np.cov(
data.data_matrix[..., 0],
rowvar=False,
ddof=ddof,
ddof=correction,
)[np.newaxis, ...],
grid_points=[
data.grid_points[0],
Expand All @@ -851,16 +851,16 @@ class EmpiricalBasis(Empirical):

cov_fdata: FDataBasis
coeff_matrix: NDArrayFloat
ddof: int
correction: int

def __init__(self, data: FDataBasis, ddof: int = 1) -> None:
def __init__(self, data: FDataBasis, correction: int = 1) -> None:
super().__init__(data=data)

self.ddof = ddof
self.correction = correction
self.coeff_matrix = np.cov(
data.coefficients,
rowvar=False,
ddof=ddof,
ddof=correction,
)
self.cov_fdata = FDataBasis(
basis=TensorBasis([data.basis, data.basis]),
Expand Down
2 changes: 1 addition & 1 deletion skfda/misc/scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def _var(
from ..exploratory.stats import mean, var

if weights is None:
return var(x, ddof=0)
return var(x, correction=0)

return mean( # type: ignore[no-any-return]
np.power(x - mean(x, weights=weights), 2),
Expand Down
2 changes: 1 addition & 1 deletion skfda/ml/clustering/_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def _check_clustering(self, fdata: Input) -> Input:
return fdata

def _tolerance(self, fdata: Input) -> float:
variance = fdata.var(ddof=0)
variance = fdata.var(correction=0)
mean_variance = np.mean(variance[0].data_matrix)

return float(mean_variance * self.tol)
Expand Down
12 changes: 6 additions & 6 deletions skfda/representation/_functional_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -826,15 +826,15 @@ def cov( # noqa: WPS451
s_points: NDArrayFloat,
t_points: NDArrayFloat,
/,
ddof: int = 1,
correction: int = 1,
) -> NDArrayFloat:
pass

@overload
def cov( # noqa: WPS451
self: T,
/,
ddof: int = 1,
correction: int = 1,
) -> Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]:
pass

Expand All @@ -844,7 +844,7 @@ def cov( # noqa: WPS320, WPS451
s_points: Optional[NDArrayFloat] = None,
t_points: Optional[NDArrayFloat] = None,
/,
ddof: int = 1,
correction: int = 1,
) -> Union[
Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat],
NDArrayFloat,
Expand All @@ -864,9 +864,9 @@ def cov( # noqa: WPS320, WPS451
Args:
s_points: Points where the covariance function is evaluated.
t_points: Points where the covariance function is evaluated.
ddof: "Delta Degrees of Freedom": the divisor used in the
calculation is `N - ddof`, where `N` represents the number
of elements. By default `ddof` is 1.
correction: degrees of freedom adjustment. The divisor used in the
calculation is `N - correction`, where `N` represents the
number of elements. Default: `1`.
Returns:
Covariance function.
Expand Down
26 changes: 14 additions & 12 deletions skfda/representation/basis/_fdatabasis.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ def sum( # noqa: WPS125
def var(
self: T,
eval_points: Optional[NDArrayFloat] = None,
ddof: int = 1,
correction: int = 1,
) -> T:
"""Compute the variance of the functional data object.
Expand All @@ -460,31 +460,33 @@ def var(
numpy.linspace with bounds equal to the ones defined in
self.domain_range and the number of points the maximum
between 501 and 10 times the number of basis.
ddof: "Delta Degrees of Freedom": the divisor used in the
calculation is `N - ddof`, where `N` represents the number of
elements. By default `ddof` is 1.
correction: degrees of freedom adjustment. The divisor used in the
calculation is `N - correction`, where `N` represents the
number of elements. Default: `1`.
Returns:
Variance of the original object.
"""
return self.to_grid(eval_points).var(ddof=ddof).to_basis(self.basis)
return self.to_grid(
eval_points,
).var(correction=correction).to_basis(self.basis)

@overload
def cov( # noqa: WPS451
self: T,
s_points: NDArrayFloat,
t_points: NDArrayFloat,
/,
ddof: int = 1,
correction: int = 1,
) -> NDArrayFloat:
pass

@overload
def cov( # noqa: WPS451
self: T,
/,
ddof: int = 1,
correction: int = 1,
) -> Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]:
pass

Expand All @@ -493,7 +495,7 @@ def cov( # noqa: WPS320, WPS451
s_points: Optional[NDArrayFloat] = None,
t_points: Optional[NDArrayFloat] = None,
/,
ddof: int = 1,
correction: int = 1,
) -> Union[
Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat],
NDArrayFloat,
Expand All @@ -515,17 +517,17 @@ def cov( # noqa: WPS320, WPS451
Args:
s_points: Points where the covariance function is evaluated.
t_points: Points where the covariance function is evaluated.
ddof: "Delta Degrees of Freedom": the divisor used in the
calculation is `N - ddof`, where `N` represents the number
of elements. By default `ddof` is 1.
correction: degrees of freedom adjustment. The divisor used in the
calculation is `N - correction`, where `N` represents the
number of elements. Default: `1`.
Returns:
Covariance function.
"""
# To avoid circular imports
from ...misc.covariances import EmpiricalBasis
cov_function = EmpiricalBasis(self, ddof=ddof)
cov_function = EmpiricalBasis(self, correction=correction)
if s_points is None or t_points is None:
return cov_function
return cov_function(s_points, t_points)
Expand Down
24 changes: 12 additions & 12 deletions skfda/representation/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,13 +582,13 @@ def sum( # noqa: WPS125
sample_names=(None,),
)

def var(self: T, ddof: int = 1) -> T:
def var(self: T, correction: int = 1) -> T:
"""Compute the variance of a set of samples in a FDataGrid object.
Args:
ddof: "Delta Degrees of Freedom": the divisor used in the
calculation is `N - ddof`, where `N` represents the number of
elements. By default `ddof` is 1.
correction: "Delta Degrees of Freedom": the divisor used in the
calculation is `N - correction`, where `N` represents the number of
elements. By default `correction` is 1.
Returns:
A FDataGrid object with just one sample representing the
Expand All @@ -599,7 +599,7 @@ def var(self: T, ddof: int = 1) -> T:
data_matrix=np.array([np.var(
self.data_matrix,
axis=0,
ddof=ddof,
ddof=correction,
)]),
sample_names=("variance",),
)
Expand All @@ -610,15 +610,15 @@ def cov( # noqa: WPS451
s_points: NDArrayFloat,
t_points: NDArrayFloat,
/,
ddof: int = 1,
correction: int = 1,
) -> NDArrayFloat:
pass

@overload
def cov( # noqa: WPS451
self: T,
/,
ddof: int = 1,
correction: int = 1,
) -> Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]:
pass

Expand All @@ -627,7 +627,7 @@ def cov( # noqa: WPS320, WPS451
s_points: Optional[NDArrayFloat] = None,
t_points: Optional[NDArrayFloat] = None,
/,
ddof: int = 1,
correction: int = 1,
) -> Union[
Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat],
NDArrayFloat,
Expand All @@ -643,17 +643,17 @@ def cov( # noqa: WPS320, WPS451
Args:
s_points: Grid points where the covariance function is evaluated.
t_points: Grid points where the covariance function is evaluated.
ddof: "Delta Degrees of Freedom": the divisor used in the
calculation is `N - ddof`, where `N` represents the number
of elements. By default `ddof` is 1.
correction: degrees of freedom adjustment. The divisor used in the
calculation is `N - correction`, where `N` represents the
number of elements. Default: `1`.
Returns:
Covariance function.
"""
# To avoid circular imports
from ..misc.covariances import EmpiricalGrid
cov_function = EmpiricalGrid(self, ddof=ddof)
cov_function = EmpiricalGrid(self, correction=correction)
if s_points is None or t_points is None:
return cov_function
return cov_function(s_points, t_points)
Expand Down

0 comments on commit 3fb0ea9

Please sign in to comment.