some FDataGrid methods set as properties. kernel_smoothers.local_line…

…ar_regressiona added. docstrings updated
GAA-UAM · Nov 9, 2017 · e79cd19 · e79cd19
1 parent 9faa8d5
commit e79cd19
Show file tree

Hide file tree

Showing 3 changed files with 82 additions and 28 deletions.
diff --git a/fda/FDataGrid.py b/fda/FDataGrid.py
@@ -36,7 +36,6 @@ class FDataGrid:
             ....
         ValueError: Incorrect dimension in data_matrix and argvals arguments.
 
-
     """
     def __init__(self, data_matrix, argvals=None, argvals_range=None,
                  names=None):
@@ -47,8 +46,8 @@ def __init__(self, data_matrix, argvals=None, argvals_range=None,
                 points of discretisation.
             argvals (array_like, optional): an array containing the points of
                 discretisation where values have been recorded or a list of
-                    lists with each of the list containing the points of
-                    dicretisation for each axis.
+                lists with each of the list containing the points of
+                dicretisation for each axis.
             argvals_range (tuple or list, optional): contains the edges of
                 the interval in which the functional data is considered to
                 exist.
@@ -71,11 +70,11 @@ def __init__(self, data_matrix, argvals=None, argvals_range=None,
         else:
             # Check that the dimension of the data matches the argvals list
             self.argvals = numpy.asarray(argvals)
-            if self.data_matrix.ndim == 1 \
+            if (self.data_matrix.ndim == 1
                     or (self.data_matrix.ndim == 2
-                        and len(self.argvals) != self.data_matrix.shape[1]) \
+                        and len(self.argvals) != self.data_matrix.shape[1])
                     or (self.data_matrix.ndim > 2
-                        and self.data_matrix.ndim != len(self.argvals) + 1):
+                        and self.data_matrix.ndim != len(self.argvals) + 1)):
                 raise ValueError("Incorrect dimension in data_matrix and "
                                  "argvals arguments.")
 
@@ -92,8 +91,8 @@ def __init__(self, data_matrix, argvals=None, argvals_range=None,
             if len(self.argvals_range) != 2:
                 raise ValueError("Incorrect value of argvals_range. It should"
                                  " have two elements.")
-            if self.argvals_range[0] > self.argvals[0] \
-                    or self.argvals_range[-1] < self.argvals[-1]:
+            if (self.argvals_range[0] > self.argvals[0]
+                    or self.argvals_range[-1] < self.argvals[-1]):
                 raise ValueError("Timestamps must be within the time range.")
 
         self.names = names
@@ -120,12 +119,16 @@ def round(self, decimals=0):
                          self.argvals.round(decimals),
                          self.argvals_range, self.names)
 
+    @property
     def ndim(self):
-        """ Number of dimensions of the data
+        """ Number of dimensions of the data.
 
+        Returns:
+            int: Number of dimensions of the data.
         """
         return self.data_matrix.ndim
 
+    @property
     def nrow(self):
         """ Number of rows of the data_matrix. Also the number of samples.
 
@@ -135,6 +138,7 @@ def nrow(self):
         """
         return self.data_matrix.shape[0]
 
+    @property
     def ncol(self):
         """ Number of columns of the data_matrix. Also the number of points
         of discretisation.
@@ -145,6 +149,7 @@ def ncol(self):
         """
         return self.data_matrix.shape[1]
 
+    @property
     def shape(self):
         """ Dimensions (aka shape) of the data_matrix.
 
@@ -161,8 +166,7 @@ def derivative(self, order=1):
 
         Its calculated using lagged differences. If we call :math:`D` the
         data_matrix, :math:`D^1` the derivative of order 1 and :math:`T` the
-        vector
-        contaning the points of discretisation; :math:`D^1` is
+        vector contaning the points of discretisation; :math:`D^1` is
         calculated as it follows:
 
         .. math::
@@ -205,17 +209,17 @@ def derivative(self, order=1):
         if order < 1:
             raise ValueError("The order of a derivative has to be greater "
                              "or equal than 1.")
-        if self.ndim() > 2:
+        if self.ndim > 2:
             raise NotImplementedError("Not implemented for 2 or more"
                                       " dimensional data.")
         if numpy.isnan(self.data_matrix).any():
             raise ValueError("The FDataGrid object cannot contain nan "
                              "elements.")
         data_matrix = self.data_matrix
         argvals = self.argvals
-        for k in range(order):
+        for _ in range(order):
             mdata = []
-            for i in range(self.nrow()):
+            for i in range(self.nrow):
                 arr = numpy.diff(data_matrix[i])/(argvals[1:] - argvals[:-1])
                 arr = numpy.append(arr, arr[-1])
                 arr[1:-1] += arr[:-2]
@@ -271,14 +275,18 @@ def plot(self, *args, **kwargs):
                                *args, **kwargs)
 
     def __str__(self):
-        return 'Data set:\t' + str(self.data_matrix) \
-                 + '\nargvals:\t' + str(self.argvals) \
-                 + '\ntime range:\t' + str(self.argvals_range)
+        return ('Data set:\t' + str(self.data_matrix)
+                + '\nargvals:\t' + str(self.argvals)
+                + '\ntime range:\t' + str(self.argvals_range))
 
     def __repr__(self):
-        return "FDataGrid(\n    " \
-               + self.data_matrix.__repr__() \
-               + "\n    ,argvals=" + self.argvals.__repr__() \
-               + "\n    ,argvals_range=" + self.argvals_range.__repr__() \
-               + "\n    ,names=" + self.names.__repr__() \
-               + ")"
+        return ("FDataGrid(\n    "
+                + self.data_matrix.__repr__()
+                + "\n    ,argvals=" + self.argvals.__repr__()
+                + "\n    ,argvals_range=" + self.argvals_range.__repr__()
+                + "\n    ,names=" + self.names.__repr__()
+                + ")")
+
+    def __getitem__(self, key):
+        return FDataGrid(self.data_matrix[key], self.argvals,
+                         self.argvals_range, self.names)
diff --git a/fda/kernel_smoothers.py b/fda/kernel_smoothers.py
@@ -6,8 +6,8 @@
  relaying on a discrete representation of functional data.
 
 Todo:
-    * llr (Local linear regression)
     * Document nw
+    * Closed-form for KNN
     * Decide whether to include module level examples
 
 """
@@ -38,7 +38,53 @@ def nw(argvals, h=None, kernel=kernels.normal, w=None, cv=False):
     return (k.T/rs).T
 
 
-def llr(argvals, h, kernel=kernels.normal, w=None, cv=False):
+def local_linear_regression(argvals, h, kernel=kernels.normal, w=None,
+                           cv=False):
+    """Local linear regression smoothing method.
+
+    Provides an smoothing matrix :math:`\hat{H}` for the discretisation
+    points in argvals by the local linear regression estimator. The smoothed
+    values :math:`\hat{Y}` can be calculated as :math:`\hat{
+    Y} = \hat{H}Y` where :math:`Y` is the vector of observations at the points
+    of discretisation :math:`(x_1, x_2, ..., x_n)`.
+
+    .. math::
+        \\hat{H}_{i,j} = \\frac{b_i(x_j)}{\\sum_{k=1}^{n}b_k(x_j)}
+
+    .. math::
+        b_i(x) = K(\\frac{x_i - x}{h}) S_{n,2}(x) - (x_i - x)S_{n,1}(x)
+
+    .. math::
+        S_{n,k} = \\sum_{i=1}^{n}K(\\frac{x_i-x}{h})(x_i-x)^k
+
+    Args:
+        argvals (ndarray): Vector of discretisation points.
+        h (float, optional): Window width of the kernel.
+        kernel (function, optional): kernel function. By default a normal
+            kernel.
+        w (ndarray, optional): Case weights matrix.
+        cv (bool, optional): Flag for cross-validation methods.
+            Defaults to False.
+
+    Examples:
+        >>> local_linear_regression(numpy.array([1,2,4,5,7]), 3.5).round(3)
+        array([[ 0.614,  0.429,  0.077, -0.03 , -0.09 ],
+               [ 0.381,  0.595,  0.168, -0.   , -0.143],
+               [-0.104,  0.112,  0.697,  0.398, -0.104],
+               [-0.147, -0.036,  0.392,  0.639,  0.152],
+               [-0.095, -0.079,  0.117,  0.308,  0.75 ]])
+        >>> local_linear_regression(numpy.array([1,2,4,5,7]), 2).round(3)
+        array([[ 0.714,  0.386, -0.037, -0.053, -0.01 ],
+               [ 0.352,  0.724,  0.045, -0.081, -0.04 ],
+               [-0.078,  0.052,  0.74 ,  0.364, -0.078],
+               [-0.07 , -0.067,  0.36 ,  0.716,  0.061],
+               [-0.012, -0.032, -0.025,  0.154,  0.915]])
+
+
+    Returns:
+        ndarray: Smoothing matrix.
+
+    """
     tt = numpy.abs(numpy.subtract.outer(argvals, argvals))
     if cv:
         numpy.fill_diagonal(tt, math.inf)
@@ -58,7 +104,7 @@ def knn(argvals, k=None, kernel=kernels.uniform, w=None, cv=False):
     """ K-nearest neighbour kernel smoother.
 
     Provides an smoothing matrix S for the discretisation points in argvals by
-     the k nearest neighbours estimator.
+    the k nearest neighbours estimator.
 
     Args:
         argvals (ndarray): Vector of discretisation points.

diff --git a/fda/kernels.py b/fda/kernels.py
@@ -29,7 +29,7 @@ def cosine(u):
         return 0
 
 
-def epan(u):
+def epanechnikov(u):
     if isinstance(u, numpy.ndarray):
         res = numpy.zeros(u.shape)
         res[abs(u) <= 1] = 0.75*(1-u[abs(u) <= 1]**2)
@@ -40,7 +40,7 @@ def epan(u):
         return 0
 
 
-def tri(u):
+def tri_weight(u):
     if isinstance(u, numpy.ndarray):
         res = numpy.zeros(u.shape)
         res[abs(u) <= 1] = 35/32*(1-u[abs(u) <= 1]**2)**3