Skip to content

Commit

Permalink
Merge pull request #519 from GAA-UAM/feature/speedUpPenalization
Browse files Browse the repository at this point in the history
Speed up the calculation of the penalization matrix for FDataGrid
  • Loading branch information
vnmabus committed Jun 24, 2023
2 parents 0154d69 + f69db8f commit c40e372
Show file tree
Hide file tree
Showing 6 changed files with 315 additions and 45 deletions.
237 changes: 203 additions & 34 deletions skfda/misc/operators/_linear_differential_operator.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from __future__ import annotations

import numbers
from typing import Callable, Sequence, Union
from typing import Callable, Sequence, Tuple, Union

import findiff
import numpy as np
import scipy.integrate
from numpy import polyder, polyint, polymul, polyval
Expand All @@ -16,9 +17,10 @@
CustomBasis,
FourierBasis,
MonomialBasis,
_GridBasis,
)
from ...typing._base import DomainRangeLike
from ...typing._numpy import NDArrayFloat
from ...typing._numpy import NDArrayFloat, NDArrayInt
from ._operators import Operator, gram_matrix, gram_matrix_optimization

Order = int
Expand Down Expand Up @@ -577,50 +579,199 @@ def bspline_penalty_matrix_optimized(
return penalty_matrix


def _optimized_operator_evaluation_in_grid(
linear_operator: LinearDifferentialOperator,
grid_points: NDArrayFloat,
findiff_accuracy: int,
) -> Tuple[NDArrayFloat, NDArrayInt]:
"""
Compute the linear operator applied to the delta basis of the grid.
The delta basis is the basis where the i-th basis function is 1 in the
i-th point and 0 in the rest of points.
Returns:
If the linear operator does not have constant weights, NotImplemented.
Otherwise, returns a tuple with:
- A matrix where the i-th row contains the values of the linear operator
applied to the i-th basis function of the delta basis.
- The domain limits of each basis function of the delta basis. Outside
these limits, the result of applying the linear operator to the
given basis function is zero.
Example:
>>> import numpy as np
>>> from skfda.misc.operators import LinearDifferentialOperator
>>> _optimized_operator_evaluation_in_grid(
... LinearDifferentialOperator(2),
... np.linspace(0, 7, 8),
... 2,
... )
(array([[ 2., 1., 0., 0., 0., 0., 0., 0.],
[-5., -2., 1., 0., 0., 0., 0., 0.],
[ 4., 1., -2., 1., 0., 0., 0., 0.],
[-1., 0., 1., -2., 1., 0., 0., 0.],
[ 0., 0., 0., 1., -2., 1., 0., -1.],
[ 0., 0., 0., 0., 1., -2., 1., 4.],
[ 0., 0., 0., 0., 0., 1., -2., -5.],
[ 0., 0., 0., 0., 0., 0., 1., 2.]]),
array([[0, 1],
[0, 2],
[0, 3],
[0, 4],
[3, 7],
[4, 7],
[5, 7],
[6, 7]]))
Explanation:
Each row of the first array contains the values of the linear operator
applied to the delta basis function.
As it can be seen, each row of the second array cointains the domain
limits of the corresponding row of the first array. That is to say,
the range in which their values are not 0.
For example, the third row of the first array is non-zero until the
fourth element, so the third row of the second array is [0, 3].
"""
n_points = grid_points.shape[0]
diff_coefficients = np.zeros((n_points, n_points))

domain_limits = np.hstack(
[
np.ones((n_points, 1)) * n_points,
np.zeros((n_points, 1)),
],
).astype(int)

linear_operator_weights = linear_operator.constant_weights()
if linear_operator_weights is None:
return NotImplemented, NotImplemented

# The desired matrix can be obtained by appending the coefficients
# of the finite differences for each point column-wise.
for dif_order, w in enumerate(linear_operator_weights):
if w == 0:
continue
for point_index in range(n_points):
# Calculate the finite differences coefficients
# the point point_index. These coefficients express the
# contribution of a function in each of the points in the
# grid to the value of the operator applied to the function
# in the point point_index.
finite_diff = findiff.coefs.coefficients_non_uni(
deriv=dif_order,
acc=findiff_accuracy,
coords=grid_points,
idx=point_index,
)

# The offsets are the relative positions of the
# points for which the coefficients are being
# calculated
offsets = finite_diff["offsets"]
coefficients = finite_diff["coefficients"]

# Add the position of the point where the finite
# difference is calculated to get the absolute position
# of the points where each coefficient is applied
positions = offsets + point_index

# If the coefficients for calculating the value of
# the operator in this point (point_index)
# include a coefficient for the i-th point,
# then the i-th basis function contains the point point_index
# in its domain.
# (by definition that fuction is 1 in the i-th point).
domain_limits[positions, 0] = np.minimum(
domain_limits[positions, 0],
np.ones(positions.shape) * point_index,
)
domain_limits[positions, 1] = np.maximum(
domain_limits[positions, 1],
np.ones(positions.shape) * point_index,
)

w_eval = w(grid_points[point_index]) if callable(w) else w
diff_coefficients[positions, point_index] += w_eval * coefficients

return (diff_coefficients, domain_limits)


@gram_matrix_optimization.register
def fdatagrid_penalty_matrix_optimized(
def gridbasis_penalty_matrix_optimized(
linear_operator: LinearDifferentialOperator,
basis: FDataGrid,
basis: _GridBasis,
) -> NDArrayFloat:
"""
Optimized version for FDatagrid.
If the data_matrix of the FDataGrid is the identity, the resulting
matrix will be the gram matrix for functions discretized in the
given grid points. That is to say, in order to calculate the norm
of the linear operator applied to functions discretized in the given
grid points, it is enough to multiply the returned matrix by the
values in the points of the grid on both sides.
If the data_matrix of the FDataGrid is not the identity, the resulting
matrix will be the gram matrix for functions expressed as a combination
of the given set of functions (the entries of the fdatagrid). The
intended use is to calculate the norm of the linear operator for
functions expressed in a basis that is a FDataGrid (see CustomBasis).
Note that the first case is a particular case of the second one.
Optimized version for GridBasis.
It evaluates the operator in the grid points.
"""
evaluated_basis = sum(
w(basis.grid_points[0]) if callable(w) else w
* basis.derivative(order=i)(basis.grid_points[0])
for i, w in enumerate(linear_operator.weights)
n_points = basis.grid_points[0].shape[0]
findiff_accuracy = 2

evaluated_basis, domain_limits = _optimized_operator_evaluation_in_grid(
linear_operator,
basis.grid_points[0],
findiff_accuracy=findiff_accuracy,
)
assert not isinstance(evaluated_basis, int)

indices = np.triu_indices(basis.n_samples)
product = evaluated_basis[indices[0]] * evaluated_basis[indices[1]]
# If the operator does not have constant weights, the
# optimized evaluation is not implemented.
if evaluated_basis is NotImplemented:
return NotImplemented

triang_vec = scipy.integrate.simps(product[..., 0], x=basis.grid_points)
# The support of the result of applying the linear operator
# is a small subset of the grid points. We calculate the
# maximum number of points to the left and right of the
# point where the linear operator is applied that can be
# different from zero.

matrix = np.empty((basis.n_samples, basis.n_samples))
max_domain_width = np.max(
np.abs(domain_limits[:, 0] - domain_limits[:, 1]),
)

# Set upper matrix
matrix[indices] = triang_vec
# Precompute the simpson quadrature for the entire domain.
# This is done to avoid computing it for each subinterval of integration.
quadrature = scipy.integrate.simpson(
np.identity(n_points),
basis.grid_points[0],
)

# Set lower matrix
matrix[(indices[1], indices[0])] = triang_vec
product_matrix = np.zeros((basis.n_basis, basis.n_basis))
for i in range(n_points):
# Maximum index of another function with not-disjoint support
max_no_disjoint_index = min(n_points, i + 2 * max_domain_width)

for j in range(i, max_no_disjoint_index):
if domain_limits[i, 1] < domain_limits[j, 0]:
continue

left_lim = min(
domain_limits[i, 0],
domain_limits[j, 0],
)
right_lim = max(
domain_limits[i, 1],
domain_limits[j, 1],
)

# The right limit of the interval is excluded when slicing
right_lim += 1

# Limit both the multiplication and the integration to
# the interval where the functions are not 0.
operator_product = (
evaluated_basis[i, left_lim:right_lim]
* evaluated_basis[j, left_lim:right_lim]
)
sub_quadrature = quadrature[left_lim:right_lim]

return matrix
product_matrix[i, j] = np.dot(operator_product, sub_quadrature)
product_matrix[j, i] = product_matrix[i, j]

return product_matrix


@gram_matrix_optimization.register
Expand All @@ -644,6 +795,24 @@ def fdatabasis_penalty_matrix_optimized(
return coef_matrix @ basis_pen_matrix @ coef_matrix.T


@gram_matrix_optimization.register
def fdatagrid_penalty_matrix_optimized(
linear_operator: LinearDifferentialOperator,
fdatagrid: FDataGrid,
) -> NDArrayFloat:
"""Optimized version for FDataGrid."""
from ...misc import inner_product_matrix

operator_evaluated = linear_operator(fdatagrid)(
fdatagrid.grid_points[0],
)
fdatagrid_evaluated = FDataGrid(
data_matrix=operator_evaluated[..., 0],
grid_points=fdatagrid.grid_points[0],
)
return inner_product_matrix(fdatagrid_evaluated)


@gram_matrix_optimization.register
def custombasis_penalty_matrix_optimized(
linear_operator: LinearDifferentialOperator,
Expand Down
9 changes: 2 additions & 7 deletions skfda/preprocessing/dim_reduction/_fpca.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from ..._utils._sklearn_adapter import BaseEstimator, InductiveTransformerMixin
from ...misc.regularization import L2Regularization, compute_penalty_matrix
from ...representation import FData
from ...representation.basis import Basis, FDataBasis
from ...representation.basis import Basis, FDataBasis, _GridBasis
from ...representation.grid import FDataGrid
from ...typing._numpy import ArrayLike, NDArrayFloat

Expand Down Expand Up @@ -360,13 +360,8 @@ def _fit_grid(

weights_matrix = np.diag(self._weights)

basis = FDataGrid(
data_matrix=np.identity(n_points_discretization),
grid_points=X.grid_points,
)

regularization_matrix = compute_penalty_matrix(
basis_iterable=(basis,),
basis_iterable=(_GridBasis(grid_points=X.grid_points),),
regularization_parameter=1,
regularization=self.regularization,
)
Expand Down
2 changes: 2 additions & 0 deletions skfda/representation/basis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"_fdatabasis": ["FDataBasis", "FDataBasisDType"],
"_finite_element_basis": ["FiniteElementBasis", "FiniteElement"],
"_fourier_basis": ["FourierBasis", "Fourier"],
"_grid_basis": ["_GridBasis"],
"_monomial_basis": ["MonomialBasis", "Monomial"],
"_tensor_basis": ["TensorBasis", "Tensor"],
"_vector_basis": ["VectorValuedBasis", "VectorValued"],
Expand Down Expand Up @@ -42,6 +43,7 @@
Fourier as Fourier,
FourierBasis as FourierBasis,
)
from ._grid_basis import _GridBasis as _GridBasis
from ._monomial_basis import (
Monomial as Monomial,
MonomialBasis as MonomialBasis,
Expand Down
3 changes: 1 addition & 2 deletions skfda/representation/basis/_custom_basis.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,8 @@ def __init__(
domain_range=fdata.domain_range,
n_basis=fdata.n_samples,
)
self._check_linearly_independent(fdata)

self.fdata = fdata
self._check_linearly_independent(fdata)

@multimethod.multidispatch
def _check_linearly_independent(self, fdata: FData) -> None:
Expand Down
68 changes: 68 additions & 0 deletions skfda/representation/basis/_grid_basis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from __future__ import annotations

from typing import Any, TypeVar

from ..._utils import _to_grid_points
from ...typing._base import GridPointsLike
from ...typing._numpy import NDArrayFloat
from ._basis import Basis

T = TypeVar("T", bound="_GridBasis")


class _GridBasis(Basis):
"""
Basis associated to a grid of points.
Used to express functions whose values are known in a grid of points.
Each basis function is guaranteed to be zero in all points of the grid
except one, where it is one. The intermediate values are interpolated
depending on the interpolation object passed as argument.
This basis is used internally as an alternate representation of a
FDataGrid object. In certain cases, it is more convenient to work with
FDataGrid objects as a basis representation in this basis since, then,
all functional data can be represented as an FDataBasis object.
Parameters:
grid_points: Grid of points where the functions are evaluated.
"""

def __init__(
self,
*,
grid_points: GridPointsLike,
) -> None:
"""Basis constructor."""
self.grid_points = _to_grid_points(grid_points)
domain_range = tuple((s[0], s[-1]) for s in self.grid_points)
super().__init__(
domain_range=domain_range,
n_basis=len(self.grid_points[0]),
)

def _evaluate(self, eval_points: NDArrayFloat) -> NDArrayFloat:
raise NotImplementedError(
"Evaluation is not implemented in this basis",
)

def __eq__(self, other: Any) -> bool:
return (
super().__eq__(other)
and self.grid_points == other.grid_points
)

def __repr__(self) -> str:
return f"{type(self).__name__}(grid_points={self.grid_points}) "

def __hash__(self) -> int:
return hash(
(
super(),
(
tuple(grid_point_axis)
for grid_point_axis in self.grid_points
),
),
)

0 comments on commit c40e372

Please sign in to comment.