Skip to content

Commit

Permalink
Merge pull request #536 from GAA-UAM/feature/irregular_operations
Browse files Browse the repository at this point in the history
Feature/irregular_structure_and_operations
  • Loading branch information
vnmabus committed Mar 11, 2024
2 parents adb80fe + 21f7bad commit 6b11674
Show file tree
Hide file tree
Showing 18 changed files with 3,500 additions and 40 deletions.
23 changes: 23 additions & 0 deletions docs/modules/representation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,29 @@ methods.

skfda.representation.basis.Basis


Irregular representation
------------------------

In practice, many functional datasets do not contain functions evaluated
uniformly over a fixed grid. In other words, it is paramount to be able
to represent irregular functional data.

While the FDataGrid class could support these kind of datasets by filling a
common grid with possibly emtpy (or nan) values, it is inefficient to store a
complete grid with low data density. Furthermore, there are specific methods
that can be applied to irregular data in order to obtain, among other things,
a better conversion to basis representation.

The FDataIrregular class provides the functionality which suits these purposes.


.. autosummary::
:toctree: autosummary

skfda.representation.irregular.FDataIrregular


Generic representation
----------------------

Expand Down
4 changes: 3 additions & 1 deletion skfda/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
"representation",
],
submod_attrs={
'representation': ["FData", "FDataBasis", "FDataGrid"],
'representation': [
"FData", "FDataBasis", "FDataGrid", "FDataIrregular",
],
'representation._functional_data': ['concatenate'],
},
)
Expand Down
1 change: 1 addition & 0 deletions skfda/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"fetch_tecator",
"fetch_ucr",
"fetch_weather",
"fetch_bone_density",
],
"_samples_generators": [
"make_gaussian",
Expand Down
95 changes: 93 additions & 2 deletions skfda/datasets/_real_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from typing_extensions import Literal

from ..representation import FDataGrid
from ..representation.irregular import FDataIrregular
from ..typing._numpy import NDArrayFloat, NDArrayInt


Expand Down Expand Up @@ -162,7 +163,7 @@ def fetch_ucr(
return_X_y: bool = False,
**kwargs: Any,
) -> Bunch | Tuple[FDataGrid, NDArrayInt]:
"""
r"""
Fetch a dataset from the UCR/UEA repository.
The UCR/UEA Time Series Classification repository, hosted at
Expand All @@ -173,6 +174,7 @@ def fetch_ucr(
Args:
name: Dataset name.
return_X_y: Return tuple (data, target)
kwargs: Additional parameters for the function
:func:`skdatasets.repositories.ucr.fetch`.
Expand Down Expand Up @@ -247,7 +249,7 @@ def _fetch_fda_usc(name: str) -> Any:
Acoustic-Phonetic Continuous Speech Corpus, NTIS, US Dept of Commerce)
which is a widely used resource for research in speech recognition. A
dataset was formed by selecting five phonemes for
classification based on digitized speech from this database.
classification based on digitized speech from this database.
phonemes are transcribed as follows: "sh" as in "she", "dcl" as in
"dark", "iy" as the vowel in "she", "aa" as the vowel in "dark", and
"ao" as the first vowel in "water". From continuous speech of 50 male
Expand Down Expand Up @@ -1551,3 +1553,92 @@ def fetch_mco(
cite=":footcite:p:`ruiz-meana++_2003_cariporide`",
bibliography=".. footbibliography::",
) + _param_descr


def _fetch_loon_data(name: str) -> Any:
return _fetch_cran_no_encoding_warning(
name,
"loon.data",
version="0.1.3",
)


_bone_density_descr = """
The Bone Density dataset is a study of bone density
in boys and girls aged 8-17. It contains data from 423
individuals, measured irregularly in different times,
with an average of ~3 points per individual.
References:
https://cran.r-project.org/package=loon.data
Laura K. Bachrach, Trevor Hastie, May-Choo Wang,
Balasubramanian Narasimhan, and Robert Marcus (1999)
"Bone Mineral Acquisition in Healthy Asian, Hispanic, Black
and Caucasian Youth. A Longitudinal Study",
J Clin Endocrinol Metab, 84, 4702-12.
Trevor Hastie, Robert Tibshirani, and Jerome Friedman (2009)
"The Elements of Statistical Learning",
2nd Edition, Springer New York <doi:10.1007/978-0-387-84858-7>
"""


def fetch_bone_density(
return_X_y: bool = False,
as_frame: bool = False,
) -> Bunch | Tuple[FDataGrid, NDArrayInt] | Tuple[DataFrame, Series]:
"""
Load the Bone Density dataset. This is an irregular dataset.
The data is obtained from the R package 'loon.data', which compiles several
irregular datasets. Sources to be determined.
"""
descr = _bone_density_descr
frame = None

raw_dataset = _fetch_loon_data("bone_ext")

data = raw_dataset["bone_ext"]

curve_name = "idnum"
argument_name = "age"
target_name = "sex"
coordinate_name = "spnbmd"

curves = FDataIrregular._from_dataframe(
data,
id_column=curve_name,
argument_columns=argument_name,
coordinate_columns=coordinate_name,
argument_names=[argument_name],
coordinate_names=[coordinate_name],
dataset_name="bone_ext",
)

target = pd.Series(
data.drop_duplicates(subset=["idnum"])[target_name],
name="group",
)

feature_name = curves.dataset_name.lower()
target_names = target.values.tolist()

if as_frame:
curves = pd.DataFrame({feature_name: curves})
target_as_frame = target.reset_index(drop=True).to_frame()
frame = pd.concat([curves, target_as_frame], axis=1)
else:
target = target.values.codes

if return_X_y:
return curves, target

return Bunch(
data=curves,
target=target,
frame=frame,
categories={},
feature_names=[argument_name],
target_names=target_names,
DESCR=descr,
)
27 changes: 22 additions & 5 deletions skfda/exploratory/stats/_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,12 @@
from typing import Callable, TypeVar, Union

import numpy as np
from scipy import integrate
from scipy.stats import rankdata

from skfda._utils.ndfunction import average_function_value

from ...misc.metrics._lp_distances import l2_distance
from ...representation import FData, FDataBasis, FDataGrid
from ...representation import FData, FDataBasis, FDataGrid, FDataIrregular
from ...typing._metric import Metric
from ...typing._numpy import NDArrayFloat
from ..depth import Depth, ModifiedBandDepth
Expand Down Expand Up @@ -103,7 +102,7 @@ def cov(


@functools.singledispatch
def std(X: F, correction: int = 1) -> F:
def std(X: F, correction: int = 0) -> F:
r"""
Compute the standard deviation of all the samples in a FData object.
Expand All @@ -127,7 +126,7 @@ def std(X: F, correction: int = 1) -> F:


@std.register
def std_fdatagrid(X: FDataGrid, correction: int = 1) -> FDataGrid:
def std_fdatagrid(X: FDataGrid, correction: int = 0) -> FDataGrid:
"""Compute the standard deviation of a FDataGrid."""
return X.copy(
data_matrix=np.std(
Expand All @@ -138,7 +137,25 @@ def std_fdatagrid(X: FDataGrid, correction: int = 1) -> FDataGrid:


@std.register
def std_fdatabasis(X: FDataBasis, correction: int = 1) -> FDataBasis:
def std_fdatairregular(
X: FDataIrregular, correction: int = 0,
) -> FDataIrregular:
"""Compute the standard deviation of a FDataIrregular."""
common_points, common_values = X._get_common_points_and_values()
std_values = np.std(
common_values, axis=0, ddof=correction,
)

return FDataIrregular(
start_indices=np.array([0]),
points=common_points,
values=std_values,
sample_names=(None,),
)


@std.register
def std_fdatabasis(X: FDataBasis, correction: int = 0) -> FDataBasis:
"""Compute the standard deviation of a FDataBasis."""
from ..._utils import function_to_fdatabasis

Expand Down
Loading

0 comments on commit 6b11674

Please sign in to comment.