Skip to content

Commit

Permalink
some FDataGrid methods set as properties. kernel_smoothers.local_line…
Browse files Browse the repository at this point in the history
…ar_regressiona added. docstrings updated
  • Loading branch information
mcarbajo committed Nov 9, 2017
1 parent 9faa8d5 commit e79cd19
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 28 deletions.
54 changes: 31 additions & 23 deletions fda/FDataGrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ class FDataGrid:
....
ValueError: Incorrect dimension in data_matrix and argvals arguments.
"""
def __init__(self, data_matrix, argvals=None, argvals_range=None,
names=None):
Expand All @@ -47,8 +46,8 @@ def __init__(self, data_matrix, argvals=None, argvals_range=None,
points of discretisation.
argvals (array_like, optional): an array containing the points of
discretisation where values have been recorded or a list of
lists with each of the list containing the points of
dicretisation for each axis.
lists with each of the list containing the points of
dicretisation for each axis.
argvals_range (tuple or list, optional): contains the edges of
the interval in which the functional data is considered to
exist.
Expand All @@ -71,11 +70,11 @@ def __init__(self, data_matrix, argvals=None, argvals_range=None,
else:
# Check that the dimension of the data matches the argvals list
self.argvals = numpy.asarray(argvals)
if self.data_matrix.ndim == 1 \
if (self.data_matrix.ndim == 1
or (self.data_matrix.ndim == 2
and len(self.argvals) != self.data_matrix.shape[1]) \
and len(self.argvals) != self.data_matrix.shape[1])
or (self.data_matrix.ndim > 2
and self.data_matrix.ndim != len(self.argvals) + 1):
and self.data_matrix.ndim != len(self.argvals) + 1)):
raise ValueError("Incorrect dimension in data_matrix and "
"argvals arguments.")

Expand All @@ -92,8 +91,8 @@ def __init__(self, data_matrix, argvals=None, argvals_range=None,
if len(self.argvals_range) != 2:
raise ValueError("Incorrect value of argvals_range. It should"
" have two elements.")
if self.argvals_range[0] > self.argvals[0] \
or self.argvals_range[-1] < self.argvals[-1]:
if (self.argvals_range[0] > self.argvals[0]
or self.argvals_range[-1] < self.argvals[-1]):
raise ValueError("Timestamps must be within the time range.")

self.names = names
Expand All @@ -120,12 +119,16 @@ def round(self, decimals=0):
self.argvals.round(decimals),
self.argvals_range, self.names)

@property
def ndim(self):
""" Number of dimensions of the data
""" Number of dimensions of the data.
Returns:
int: Number of dimensions of the data.
"""
return self.data_matrix.ndim

@property
def nrow(self):
""" Number of rows of the data_matrix. Also the number of samples.
Expand All @@ -135,6 +138,7 @@ def nrow(self):
"""
return self.data_matrix.shape[0]

@property
def ncol(self):
""" Number of columns of the data_matrix. Also the number of points
of discretisation.
Expand All @@ -145,6 +149,7 @@ def ncol(self):
"""
return self.data_matrix.shape[1]

@property
def shape(self):
""" Dimensions (aka shape) of the data_matrix.
Expand All @@ -161,8 +166,7 @@ def derivative(self, order=1):
Its calculated using lagged differences. If we call :math:`D` the
data_matrix, :math:`D^1` the derivative of order 1 and :math:`T` the
vector
contaning the points of discretisation; :math:`D^1` is
vector contaning the points of discretisation; :math:`D^1` is
calculated as it follows:
.. math::
Expand Down Expand Up @@ -205,17 +209,17 @@ def derivative(self, order=1):
if order < 1:
raise ValueError("The order of a derivative has to be greater "
"or equal than 1.")
if self.ndim() > 2:
if self.ndim > 2:
raise NotImplementedError("Not implemented for 2 or more"
" dimensional data.")
if numpy.isnan(self.data_matrix).any():
raise ValueError("The FDataGrid object cannot contain nan "
"elements.")
data_matrix = self.data_matrix
argvals = self.argvals
for k in range(order):
for _ in range(order):
mdata = []
for i in range(self.nrow()):
for i in range(self.nrow):
arr = numpy.diff(data_matrix[i])/(argvals[1:] - argvals[:-1])
arr = numpy.append(arr, arr[-1])
arr[1:-1] += arr[:-2]
Expand Down Expand Up @@ -271,14 +275,18 @@ def plot(self, *args, **kwargs):
*args, **kwargs)

def __str__(self):
return 'Data set:\t' + str(self.data_matrix) \
+ '\nargvals:\t' + str(self.argvals) \
+ '\ntime range:\t' + str(self.argvals_range)
return ('Data set:\t' + str(self.data_matrix)
+ '\nargvals:\t' + str(self.argvals)
+ '\ntime range:\t' + str(self.argvals_range))

def __repr__(self):
return "FDataGrid(\n " \
+ self.data_matrix.__repr__() \
+ "\n ,argvals=" + self.argvals.__repr__() \
+ "\n ,argvals_range=" + self.argvals_range.__repr__() \
+ "\n ,names=" + self.names.__repr__() \
+ ")"
return ("FDataGrid(\n "
+ self.data_matrix.__repr__()
+ "\n ,argvals=" + self.argvals.__repr__()
+ "\n ,argvals_range=" + self.argvals_range.__repr__()
+ "\n ,names=" + self.names.__repr__()
+ ")")

def __getitem__(self, key):
return FDataGrid(self.data_matrix[key], self.argvals,
self.argvals_range, self.names)
52 changes: 49 additions & 3 deletions fda/kernel_smoothers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
relaying on a discrete representation of functional data.
Todo:
* llr (Local linear regression)
* Document nw
* Closed-form for KNN
* Decide whether to include module level examples
"""
Expand Down Expand Up @@ -38,7 +38,53 @@ def nw(argvals, h=None, kernel=kernels.normal, w=None, cv=False):
return (k.T/rs).T


def llr(argvals, h, kernel=kernels.normal, w=None, cv=False):
def local_linear_regression(argvals, h, kernel=kernels.normal, w=None,
cv=False):
"""Local linear regression smoothing method.
Provides an smoothing matrix :math:`\hat{H}` for the discretisation
points in argvals by the local linear regression estimator. The smoothed
values :math:`\hat{Y}` can be calculated as :math:`\hat{
Y} = \hat{H}Y` where :math:`Y` is the vector of observations at the points
of discretisation :math:`(x_1, x_2, ..., x_n)`.
.. math::
\\hat{H}_{i,j} = \\frac{b_i(x_j)}{\\sum_{k=1}^{n}b_k(x_j)}
.. math::
b_i(x) = K(\\frac{x_i - x}{h}) S_{n,2}(x) - (x_i - x)S_{n,1}(x)
.. math::
S_{n,k} = \\sum_{i=1}^{n}K(\\frac{x_i-x}{h})(x_i-x)^k
Args:
argvals (ndarray): Vector of discretisation points.
h (float, optional): Window width of the kernel.
kernel (function, optional): kernel function. By default a normal
kernel.
w (ndarray, optional): Case weights matrix.
cv (bool, optional): Flag for cross-validation methods.
Defaults to False.
Examples:
>>> local_linear_regression(numpy.array([1,2,4,5,7]), 3.5).round(3)
array([[ 0.614, 0.429, 0.077, -0.03 , -0.09 ],
[ 0.381, 0.595, 0.168, -0. , -0.143],
[-0.104, 0.112, 0.697, 0.398, -0.104],
[-0.147, -0.036, 0.392, 0.639, 0.152],
[-0.095, -0.079, 0.117, 0.308, 0.75 ]])
>>> local_linear_regression(numpy.array([1,2,4,5,7]), 2).round(3)
array([[ 0.714, 0.386, -0.037, -0.053, -0.01 ],
[ 0.352, 0.724, 0.045, -0.081, -0.04 ],
[-0.078, 0.052, 0.74 , 0.364, -0.078],
[-0.07 , -0.067, 0.36 , 0.716, 0.061],
[-0.012, -0.032, -0.025, 0.154, 0.915]])
Returns:
ndarray: Smoothing matrix.
"""
tt = numpy.abs(numpy.subtract.outer(argvals, argvals))
if cv:
numpy.fill_diagonal(tt, math.inf)
Expand All @@ -58,7 +104,7 @@ def knn(argvals, k=None, kernel=kernels.uniform, w=None, cv=False):
""" K-nearest neighbour kernel smoother.
Provides an smoothing matrix S for the discretisation points in argvals by
the k nearest neighbours estimator.
the k nearest neighbours estimator.
Args:
argvals (ndarray): Vector of discretisation points.
Expand Down
4 changes: 2 additions & 2 deletions fda/kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def cosine(u):
return 0


def epan(u):
def epanechnikov(u):
if isinstance(u, numpy.ndarray):
res = numpy.zeros(u.shape)
res[abs(u) <= 1] = 0.75*(1-u[abs(u) <= 1]**2)
Expand All @@ -40,7 +40,7 @@ def epan(u):
return 0


def tri(u):
def tri_weight(u):
if isinstance(u, numpy.ndarray):
res = numpy.zeros(u.shape)
res[abs(u) <= 1] = 35/32*(1-u[abs(u) <= 1]**2)**3
Expand Down

0 comments on commit e79cd19

Please sign in to comment.