Skip to content

Commit

Permalink
Higher dimensional data objects
Browse files Browse the repository at this point in the history
  • Loading branch information
mcarbajo committed Jan 27, 2018
1 parent 9010a05 commit d32dba6
Show file tree
Hide file tree
Showing 4 changed files with 155 additions and 83 deletions.
178 changes: 117 additions & 61 deletions fda/FDataGrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,32 @@ class FDataGrid:
in a grid of points.
Attributes:
data_matrix (numpy.ndarray): a matrix where each row contains the
values of a functional datum evaluated at the
points of discretisation.
sample_points (numpy.ndarray): an array containing the points of
discretisation where values have been recorded or a list of lists
with each of the list containing the points of dicretisation for
each axis.
sample_range (tuple or list): contains the edges of the interval
in which the functional data is considered to exist.
data_matrix (numpy.ndarray): a matrix where each entry of the first
axis contains the values of a functional datum evaluated at the
points of discretisation.
sample_points (numpy.ndarray): 2 dimension matrix where each row
contains the points of dicretisation for each axis of data_matrix.
sample_range (numpy.ndarray): 2 dimension matrix where each row
contains the bounds of the interval in which the functional data
is considered to exist for each one of the axies.
dataset_label (str): name of the dataset.
axes_labels (list): list containing the labels of the different
axis. The first element is the x label, the second the y label
and so on.
Examples:
Representation of a functional data object with 2 samples
representing a function :math:`f : \mathbb{R}\longmapsto\mathbb{R}'.
>>> data_matrix = [[1, 2], [2, 3]]
>>> sample_points = [2, 4]
>>> FDataGrid(data_matrix, sample_points)
FDataGrid(
array([[1, 2],
[2, 3]])
,sample_points=array([[2, 4]])
...)
The number of columns of data_matrix have to be the length of
sample_points.
Expand All @@ -45,6 +56,29 @@ class FDataGrid:
....
ValueError: Incorrect dimension in data_matrix and sample_points.
FDataGrid support higher dimensional data both in the domain and image.
Representation of a functional data object with 2 samples
representing a function :math:`f : \mathbb{R}\longmapsto\mathbb{R}^2'.
>>> data_matrix = [[[1, 0.3], [2, 0.4]], [[2, 0.5], [3, 0.6]]]
>>> sample_points = [2, 4]
>>> fd = FDataGrid(data_matrix, sample_points)
>>> fd.ndim_domain, fd.ndim_image
(1, 2)
Representation of a functional data object with 2 samples
representing a function :math:`f : \mathbb{R}^2\longmapsto\mathbb{R}'.
>>> data_matrix = [[[1, 0.3], [2, 0.4]], [[2, 0.5], [3, 0.6]]]
>>> sample_points = [[2, 4], [3,6]]
>>> fd = FDataGrid(data_matrix, sample_points)
>>> fd.ndim_domain, fd.ndim_image
(2, 1)
"""
def __init__(self, data_matrix, sample_points=None,
sample_range=None, dataset_label='Data set',
Expand All @@ -58,56 +92,55 @@ def __init__(self, data_matrix, sample_points=None,
points of discretisation where values have been recorded or a list
of lists with each of the list containing the points of
dicretisation for each axis.
sample_range (tuple or list, optional): contains the edges
of the interval in which the functional data is considered to
exist.
sample_range (tuple or list of tuples, optional): contains the
edges of the interval in which the functional data is
considered to exist (if the argument has 2 dimensions each
row is interpreted as the limits of one of the dimension of
the domain.
dataset_label (str, optional): name of the dataset.
axes_labels (list, optional): list containing the labels of the
different axes. The first element is the x label, the second
the y label and so on.
"""
self.data_matrix = numpy.atleast_2d(data_matrix)
# TODO check dimensionality

if sample_points is None:
if self.data_matrix.ndim > 2:
self.sample_points = [numpy.linspace(0, 1,
self.data_matrix.shape[i]) for i
in range(1, self.data_matrix.ndim)]
else:
self.sample_points = numpy.linspace(0, 1,
self.data_matrix.shape[1])
self.sample_points = numpy.array(
[numpy.linspace(0, 1, self.data_matrix.shape[i]) for i in
range(1, self.data_matrix.ndim)])

else:
# Check that the dimension of the data matches the sample_points
# list
self.sample_points = numpy.atleast_1d(sample_points)
if ((self.data_matrix.ndim == 2
and len(self.sample_points) != self.data_matrix.shape[1])
or (self.data_matrix.ndim > 2
and self.data_matrix.ndim != len(self.sample_points) + 1)):
self.sample_points = numpy.atleast_2d([numpy.asarray(i) for i in
numpy.atleast_1d(
sample_points)])
if not numpy.array_equal(
self.data_matrix.shape[1: 1 + self.ndim_domain],
[len(i) for i in self.sample_points]):
raise ValueError("Incorrect dimension in data_matrix and "
"sample_points.")
f"sample_points.")

if sample_range is None:
if self.data_matrix.ndim == 2:
self.sample_range = (self.sample_points[0],
self.sample_points[-1])
else:
self.sample_range = [(self.sample_points[i][0],
self.sample_points[i][-1])
for i in range(len(self.sample_points))]
self.sample_range = numpy.array(
[(self.sample_points[i][0], self.sample_points[i][-1])
for i in range(self.ndim_domain)])
# Default value for sample_range is a list of tuples with
# the first and last element of each list ofthe sample_points.
else:
self.sample_range = sample_range
if len(self.sample_range) != 2:
raise ValueError("Incorrect value of sample_range. It "
"should have two elements.")
if (self.sample_range[0] > self.sample_points[0]
or self.sample_range[-1] < self.sample_points[-1]):
raise ValueError("Timestamps must be within the time range.")
self.sample_range = numpy.atleast_2d(sample_range)
# sample range must by a 2 dimension matrix with as many rows as
# dimensions in the domain and 2 columns
if (self.sample_range.ndim != 2 or self.sample_range.shape[1] != 2
or self.sample_range.shape[0] != self.ndim_domain):
raise ValueError("Incorrect shape of sample_range.")
for i in range(self.ndim_domain):
if (self.sample_range[i, 0] > self.sample_points[i, 0]
or self.sample_range[i, -1] < self.sample_points[i,
-1]):
raise ValueError("Sample points must be within the sample "
"range.")

self.dataset_label = dataset_label
self.axes_labels = axes_labels
Expand Down Expand Up @@ -140,18 +173,23 @@ def ndim_domain(self):
Returns:
int: Number of dimensions of the domain.
"""
if self.sample_points.ndim == 1:
return 1
return self.sample_points.shape[0]

@property
def ndim_image(self):
""" Number of dimensions of the domain.
""" Number of dimensions of the image
Returns:
int: Number of dimensions of the domain.
int: Number of dimensions of the image.
"""
return self.data_matrix.ndim[(0,) * (1 + self.ndim_domain)]
try:
# The dimension of the image is the length of the array that can
# be extracted from the data_matrix using all the dimensions of
# the domain.
return self.data_matrix.shape[1 + self.ndim_domain]
# If there is no array that means the dimension of the image is 1.
except IndexError:
return 1

@property
def ndim(self):
Expand Down Expand Up @@ -228,8 +266,8 @@ def derivative(self, order=1):
>>> fdata.derivative()
FDataGrid(
array([[ 1. , 1.5, 1.5, 2. , 3. ]])
,sample_points=array([0, 1, 2, 3, 4])
,sample_range=(0, 4)
,sample_points=array([[0, 1, 2, 3, 4]])
,sample_range=array([[0, 4]])
,dataset_label='Data set - 1 derivative'
,...)
Expand All @@ -239,12 +277,17 @@ def derivative(self, order=1):
>>> fdata.derivative(2)
FDataGrid(
array([[ 0.5 , 0.25, 0.25, 0.75, 1. ]])
,sample_points=array([0, 1, 2, 3, 4])
,sample_range=(0, 4)
,sample_points=array([[0, 1, 2, 3, 4]])
,sample_range=array([[0, 4]])
,dataset_label='Data set - 2 derivative'
,...)
"""
if self.ndim_domain != 1:
raise NotImplementedError(
"This method only works when the dimension "
"of the domain of the FDatagrid object is "
"one.")
if order < 1:
raise ValueError("The order of a derivative has to be greater "
"or equal than 1.")
Expand All @@ -255,7 +298,7 @@ def derivative(self, order=1):
raise ValueError("The FDataGrid object cannot contain nan "
"elements.")
data_matrix = self.data_matrix
sample_points = self.sample_points
sample_points = self.sample_points[0]
for _ in range(order):
mdata = []
for i in range(self.nsamples):
Expand All @@ -280,6 +323,11 @@ def __check_same_dimensions(self, other):
raise ValueError(
"Sample points for both objects must be equal")

def mean(self):
return FDataGrid([self.data_matrix.mean(axis=0)],
self.sample_points, self.sample_range,
self.dataset_label, self.axes_labels)

def __add__(self, other):
if isinstance(other, (numpy.ndarray, numbers.Number)):
data_matrix = other
Expand Down Expand Up @@ -351,8 +399,8 @@ def concatenate(self, other):
>>> fd.concatenate(fd_2)
FDataGrid(
array([[1, 2, 4, 5, 8],
[3, 4, 7, 9, 2]])
,sample_points=array([0, 1, 2, 3, 4])
[3, 4, 7, 9, 2]])
,sample_points=array([[0, 1, 2, 3, 4]])
...
"""
Expand Down Expand Up @@ -405,9 +453,12 @@ def plot(self, ax=None, **kwargs):
List of lines that were added to the plot.
"""
if self.ndim_domain != 1:
raise NotImplementedError("Plot only supported for functional "
"data with a domain dimension of 1.")
if ax is None:
ax = matplotlib.pyplot.gca()
_plot = ax.plot(self.sample_points,
_plot = ax.plot(self.sample_points[0],
numpy.transpose(self.data_matrix),
**kwargs)
self._set_labels(ax)
Expand All @@ -427,11 +478,14 @@ def scatter(self, ax=None, **kwargs):
:obj:`matplotlib.collections.PathCollection`
"""
if self.ndim_domain != 1:
raise NotImplementedError("Scatter only supported for functional "
"data with a domain dimension of 1.")
if ax is None:
ax = matplotlib.pyplot.gca()
_plot = None
for i in range(self.nsamples):
_plot = ax.scatter(self.sample_points,
_plot = ax.scatter(self.sample_points[0],
self.data_matrix[i],
**kwargs)
self._set_labels(ax)
Expand All @@ -446,18 +500,20 @@ def __str__(self):
def __repr__(self):
""" Return repr(self). """
return (f"FDataGrid("
+ f"\n {repr(self.data_matrix)}"
+ f"\n ,sample_points={repr(self.sample_points)}"
+ f"\n ,sample_range={repr(self.sample_range)}"
+ f"\n ,dataset_label={repr(self.dataset_label)}"
+ f"\n ,axes_labels={repr(self.axes_labels)}"
+ f")")
+ f"\n{repr(self.data_matrix)}"
+ f"\n,sample_points={repr(self.sample_points)}"
+ f"\n,sample_range={repr(self.sample_range)}"
+ f"\n,dataset_label={repr(self.dataset_label)}"
+ f"\n,axes_labels={repr(self.axes_labels)}"
+ f")").replace('\n', '\n ')

def __getitem__(self, key):
""" Return self[key]. """
if isinstance(key, tuple) and len(key) > 1:
return FDataGrid(self.data_matrix[key],
self.sample_points[key[1:1 + self.ndim_domain]],
[self.sample_points[i, subkey]
for i, subkey in enumerate(
key[1:1 + self.ndim_domain])],
self.sample_range, self.dataset_label,
self.axes_labels)
return FDataGrid(self.data_matrix[key], self.sample_points,
Expand Down
30 changes: 20 additions & 10 deletions fda/math_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,23 @@
__status__ = "Development"


def mean(fdatagrid):
""" Computes the mean of all the samples in a FDataGrid object.
def mean(fdata):
""" Computes the mean of all the samples in a FData object.
Computes the mean of all the samples in a FDataGrid or FDataBasis object.
Args:
fdatagrid (FDataGrid): Object containing all the samples whose mean
fdata(FDataGrid or FDataBasis): Object containing all the samples
whose mean
is wanted.
Returns:
FDataGrid: A FDataGrid object with just one sample representing the
mean of all the samples in the original FDataGrid object.
FDataGrid or FDataBasis: A FDataGrid or FDataBasis object with just
one sample representing the mean of all the samples in the original
object.
"""
return FDataGrid([numpy.mean(fdatagrid.data_matrix, 0)],
fdatagrid.sample_points, fdatagrid.sample_range,
fdatagrid.dataset_label, fdatagrid.axes_labels)
return fdata.mean()


def var(fdatagrid):
Expand Down Expand Up @@ -79,7 +81,10 @@ def cov(fdatagrid):
numpy.darray: Matrix of covariances.
"""
return numpy.cov(fdatagrid.data_matrix)
return FDataGrid(numpy.cov(fdatagrid.data_matrix),
[fdatagrid.sample_points[0], fdatagrid.sample_points[0]],
[fdatagrid.sample_range[0], fdatagrid.sample_range[0]],
fdatagrid.dataset_label + ' - covariance')


def sqrt(fdatagrid):
Expand Down Expand Up @@ -256,6 +261,10 @@ def inner_product(fdatagrid, fdatagrid2):
[ 1. , 0.5 ]])
"""
if fdatagrid.ndim_domain != 1:
raise NotImplementedError("This method only works when the dimension "
"of the domain of the FDatagrid object is "
"one.")
# Checks
if not numpy.array_equal(fdatagrid.sample_points,
fdatagrid2.sample_points):
Expand All @@ -269,7 +278,8 @@ def inner_product(fdatagrid, fdatagrid2):
# Calculates the inner product using Simpson's rule.
_matrix[i, j] = (scipy.integrate.simps(fdatagrid.data_matrix[i] *
fdatagrid2.data_matrix[j],
x=fdatagrid.sample_points))
x=fdatagrid.sample_points[0]
))
return _matrix


Expand Down

0 comments on commit d32dba6

Please sign in to comment.