Skip to content

Commit

Permalink
Distance, norm and inner_product added
Browse files Browse the repository at this point in the history
  • Loading branch information
mcarbajo committed Dec 6, 2017
1 parent e739098 commit 53bf8da
Showing 1 changed file with 175 additions and 5 deletions.
180 changes: 175 additions & 5 deletions fda/math_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from fda.FDataGrid import FDataGrid
import numpy
import scipy.stats.mstats
import scipy.integrate


__author__ = "Miguel Carbajo Berrocal"
Expand Down Expand Up @@ -86,7 +87,7 @@ def sqrt(fdatagrid):
Args:
fdatagrid (FDataGrid): Object to whose elements the square root
operations is going to be applied.
operation is going to be applied.
Returns:
FDataGrid: Object whose elements are the square roots of the original.
Expand Down Expand Up @@ -136,7 +137,7 @@ def exp(fdatagrid):
Args:
fdatagrid (FDataGrid): Object to whose elements the exponential
operations is going to be applied.
operation is going to be applied.
Returns:
FDataGrid: Object whose elements are the result of exponentiating
Expand All @@ -153,7 +154,7 @@ def log(fdatagrid):
Args:
fdatagrid (FDataGrid): Object to whose elements the logarithm
operations is going to be applied.
operation is going to be applied.
Returns:
FDataGrid: Object whose elements are the logarithm of the original.
Expand All @@ -169,7 +170,7 @@ def log10(fdatagrid):
Args:
fdatagrid (FDataGrid): Object to whose elements the base 10 logarithm
operations is going to be applied.
operation is going to be applied.
Returns:
FDataGrid: Object whose elements are the base 10 logarithm of the
Expand All @@ -186,7 +187,7 @@ def log2(fdatagrid):
Args:
fdatagrid (FDataGrid): Object to whose elements the binary logarithm
operations is going to be applied.
operation is going to be applied.
Returns:
FDataGrid: Object whose elements are the binary logarithm of the
Expand All @@ -212,3 +213,172 @@ def cumsum(fdatagrid):
return FDataGrid(numpy.cumsum(fdatagrid.data_matrix, axis=0),
fdatagrid.sample_points, fdatagrid.sample_range,
fdatagrid.names)


def inner_product(fdatagrid, fdatagrid2):
""" Calculates the inner product amongst all the samples in two
FDataGrid objects.
For each pair of samples f and g the inner product is defined as:
.. math::
<f, g> = \\int_a^bf(x)g(x)dx
The integral is approximated using Simpson's rule.
Args:
fdatagrid (FDataGrid): First FDataGrid object.
fdatagrid2 (FDataGrid): Second FDataGrid object.
Returns:
numpy.darray: Matrix with as many rows as samples in the first
object and as many columns as samples in the second one. Each
element (i, j) of the matrix is the inner product of the ith sample
of the first object and the jth sample of the second one.
Examples:
The inner product of the :math:'f(x) = x` and the constant
:math:`y=1` defined over the interval [0,1] is the area of the
triangle delimited by the the lines y = 0, x = 1 and y = x; 0.5.
>>> x = numpy.linspace(0,1,1001)
>>> fd1 = FDataGrid(x,x)
>>> fd2 = FDataGrid(numpy.ones(len(x)),x)
>>> inner_product(fd1, fd2)
array([[ 0.5]])
If the FDataGrid object contains more than one sample
>>> fd1 = FDataGrid([x, numpy.ones(len(x))], x)
>>> fd2 = FDataGrid([numpy.ones(len(x)), x] ,x)
>>> inner_product(fd1, fd2).round(2)
array([[ 0.5 , 0.33],
[ 1. , 0.5 ]])
"""
# Checks
if not numpy.array_equal(fdatagrid.sample_points,
fdatagrid2.sample_points):
raise ValueError("Sample points for both objects must be equal")

# Creates an empty matrix with the desired size to store the results.
_matrix = numpy.empty([fdatagrid.n_samples, fdatagrid2.n_samples])
# Iterates over the different samples of both objects.
for i in range(fdatagrid.n_samples):
for j in range(fdatagrid2.n_samples):
# Calculates the inner product using Simpson's rule.
_matrix[i, j] = (scipy.integrate.simps(fdatagrid.data_matrix[i] *
fdatagrid2.data_matrix[j],
x=fdatagrid.sample_points))
return _matrix


def norm_lp(fdatagrid, p=2):
""" Calculates the norm of all the samples in a FDataGrid object.
For each sample sample f the lp norm is defined as:
.. math::
\\lVert f \\rVert = \\left( \\int_D \\lvert f \\rvert^p dx \\right)^{
\\frac{1}{p}}
Where D is the domain over which the functions are defined.
The integral is approximated using Simpson's rule.
Args:
fdatagrid (FDataGrid): FDataGrid object.
p (int, optional): p of the lp norm. Must be greater or equal
than 1. Defaults to 2.
Returns:
numpy.darray: Matrix with as many rows as samples in the first
object and as many columns as samples in the second one. Each
element (i, j) of the matrix is the inner product of the ith sample
of the first object and the jth sample of the second one.
Examples:
Calculates the norm of a FDataGrid containing the functions y = 1
and y = x defined in the interval [0,1].
>>> x = numpy.linspace(0,1,1001)
>>> fd = FDataGrid([numpy.ones(len(x)), x] ,x)
>>> norm_lp(fd).round(2)
array([ 1. , 0.58])
The lp norm is only defined if p >= 1.
>>> norm_lp(fd, p = 0.5)
Traceback (most recent call last):
....
ValueError: p must be equal or greater than 1.
"""
# Checks that the lp normed is well defined
if p < 1:
raise ValueError("p must be equal or greater than 1.")

# Computes the norm, approximating the integral with Simpson's rule.
return scipy.integrate.simps(numpy.abs(fdatagrid.data_matrix) ** p,
x=fdatagrid.sample_points
) ** (1/p)


def metric(fdatagrid, fdatagrid2, norm=norm_lp, **kwargs):
""" Calculates the distance between all possible pairs of one sample of
the first FDataGrid object and one of the second one.
For each pair of samples f and g the distance between them is defined as:
.. math::
d(f, g) = d(f, g) = \\lVert f - g \\rVert
The norm is specified as a parameter but defaults to the l2 norm.
Args:
fdatagrid (FDataGrid): First FDataGrid object.
fdatagrid2 (FDataGrid): Second FDataGrid object.
norm (Function, optional): Norm function used in the definition of
the distance.
**kwargs (dict, optional): parameters dictionary to be passed to the
norm function.
Returns:
numpy.darray: Matrix with as many rows as samples in the first
object and as many columns as samples in the second one. Each
element (i, j) of the matrix is the distance between the ith sample
of the first object and the jth sample of the second one.
Examples:
Computes the distances between an object containing functional data
corresponding to the functions y = 1 and y = x defined over the
interval [0, 1] and another ones containing data of the functions y
= 0 and y = x/2. The result then is an array 2x2 with the computed
l2 distance between every pair of functions.
>>> x = numpy.linspace(0, 1, 1001)
>>> fd = FDataGrid([numpy.ones(len(x)), x], x)
>>> fd2 = FDataGrid([numpy.zeros(len(x)), x/2 + 0.5], x)
>>> metric(fd, fd2).round(2)
array([[ 1. , 0.29],
[ 0.58, 0.29]])
If the functional data are defined over a different set of points of
discretisation the functions returns an exception.
>>> x = numpy.linspace(0, 2, 1001)
>>> fd2 = FDataGrid([numpy.zeros(len(x)), x/2 + 0.5], x)
>>> metric(fd, fd2)
Traceback (most recent call last):
....
ValueError: Sample points for both objects must be equal
"""
# Checks
if not numpy.array_equal(fdatagrid.sample_points,
fdatagrid2.sample_points):
raise ValueError("Sample points for both objects must be equal")
# Creates an empty matrix with the desired size to store the results.
_matrix = numpy.empty([fdatagrid.n_samples, fdatagrid2.n_samples])
# Iterates over the different samples of both objects.
for i in range(fdatagrid.n_samples):
for j in range(fdatagrid2.n_samples):
_matrix[i, j] = norm(fdatagrid[i] - fdatagrid2[j], **kwargs)
# Computes the metric between x and y as norm(x -y).
return _matrix

0 comments on commit 53bf8da

Please sign in to comment.