Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up the calculation of the penalization matrix for FDataGrid #519

Merged
merged 34 commits into from
Jun 24, 2023
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
c21d72a
Usable up to 2000 points
Ddelval Feb 4, 2023
258027d
Regularization usable up to 10 000 points.
Ddelval Feb 4, 2023
fa9e86d
Improve regularization optimization
Ddelval Feb 17, 2023
d106fbb
Complete documentation
Ddelval Feb 17, 2023
5d6ccda
Assign the fdata before using it.
Ddelval Mar 1, 2023
bb05968
Simplify custombasis_penalty_matrix_optimized
Ddelval Mar 7, 2023
6d60b05
Improve fdatagrid penalty matrix calculation
Ddelval Mar 7, 2023
d714160
Remove weird imports introduced by autocomplete
Ddelval Mar 7, 2023
3a950c6
Avoid circular import
Ddelval Mar 7, 2023
5a19294
Barebones implementation of GridBasis
Ddelval Mar 15, 2023
350bce4
Use gridBasis in fpca regularization
Ddelval Mar 15, 2023
138d678
Complete grid_basis implementation
Ddelval Mar 15, 2023
6b08ed7
Simplify GridBasis implementation
Ddelval Apr 15, 2023
8f39f18
Extract fdatagrid specialization to function
Ddelval Apr 15, 2023
aba68ec
Style fixes
Ddelval Apr 16, 2023
ed68f68
More style changes
Ddelval Apr 16, 2023
9efe090
Merge remote-tracking branch 'origin/develop' into feature/speedUpPen…
Ddelval Apr 16, 2023
3bfab32
Fix unregistered function
Ddelval Apr 16, 2023
f8d4554
Add _optimized_operator_evaluation_in_grid example
Ddelval Apr 16, 2023
c1389d9
Add a test for the GridBasis penalty matrix
Ddelval Apr 16, 2023
188e7bd
Sort imports
Ddelval Apr 16, 2023
93bd28f
Style fixes
Ddelval Apr 16, 2023
979e8d2
Remove trailing whitespace
Ddelval Apr 16, 2023
5d0a9e0
Fix docstring spacing
Ddelval Apr 16, 2023
7bdc97d
Remove left-over parameter
Ddelval Apr 16, 2023
4b46b1c
Remove left-over import
Ddelval Apr 16, 2023
73a9c79
Use constant weights in linear_differential_operator
Ddelval Jun 21, 2023
7b480c4
Fix typo
Ddelval Jun 21, 2023
1ec886e
Tidy grid basis implementation
Ddelval Jun 21, 2023
ff23777
Change grid basis hash calculation
Ddelval Jun 21, 2023
56fe4de
Add missing empty line between methods
Ddelval Jun 21, 2023
cd92f80
Merge branch 'develop' into feature/speedUpPenalization
Ddelval Jun 21, 2023
6757f1b
Merge branch 'develop' into feature/speedUpPenalization
Ddelval Jun 23, 2023
f69db8f
Use tuples in the hash calculation
Ddelval Jun 23, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
225 changes: 191 additions & 34 deletions skfda/misc/operators/_linear_differential_operator.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from __future__ import annotations

import numbers
from typing import Callable, Sequence, Union
from typing import Callable, Sequence, Tuple, Union

import findiff
import numpy as np
import scipy.integrate
from numpy import polyder, polyint, polymul, polyval
Expand All @@ -16,9 +17,10 @@
CustomBasis,
FourierBasis,
MonomialBasis,
_GridBasis,
)
from ...typing._base import DomainRangeLike
from ...typing._numpy import NDArrayFloat
from ...typing._numpy import NDArrayFloat, NDArrayInt
from ._operators import Operator, gram_matrix, gram_matrix_optimization

Order = int
Expand Down Expand Up @@ -577,50 +579,188 @@ def bspline_penalty_matrix_optimized(
return penalty_matrix


def _optimized_operator_evaluation_in_grid(
linear_operator: LinearDifferentialOperator,
grid_points: NDArrayFloat,
findiff_accuracy: int,
) -> Tuple[NDArrayFloat, NDArrayInt]:
"""
Compute the linear operator applied to the delta basis of the grid.

The delta basis is the basis where the i-th basis function is 1 in the
i-th point and 0 in the rest of points.

Returns:
- A matrix where the i-th row contians the values of the linear operator
Ddelval marked this conversation as resolved.
Show resolved Hide resolved
applied to the i-th basis function of the delta basis.

- The domain limits of each basis function of the delta basis. Outside
these limits, the result of applying the linear operator to the
given basis function is zero.

Example:
>>> import numpy as np
>>> from skfda.misc.operators import LinearDifferentialOperator
>>> _optimized_operator_evaluation_in_grid(
... LinearDifferentialOperator(2),
... np.linspace(0, 7, 8),
... 2,
... )
(array([[ 2., 1., 0., 0., 0., 0., 0., 0.],
[-5., -2., 1., 0., 0., 0., 0., 0.],
[ 4., 1., -2., 1., 0., 0., 0., 0.],
[-1., 0., 1., -2., 1., 0., 0., 0.],
[ 0., 0., 0., 1., -2., 1., 0., -1.],
[ 0., 0., 0., 0., 1., -2., 1., 4.],
[ 0., 0., 0., 0., 0., 1., -2., -5.],
[ 0., 0., 0., 0., 0., 0., 1., 2.]]),
array([[0, 1],
[0, 2],
[0, 3],
[0, 4],
[3, 7],
[4, 7],
[5, 7],
[6, 7]]))

Explanation:
Each row of the first array contains the values of the linear operator
applied to the delta basis function.
As it can be seen, each row of the second array cointains the domain
limits of the corresponding row of the first array. That is to say,
the range in which their values are not 0.
For example, the third row of the first array is non-zero until the
fourth element, so the third row of the second array is [0, 3].
"""
n_points = grid_points.shape[0]
diff_coefficients = np.zeros((n_points, n_points))

domain_limits = np.hstack(
[
np.ones((n_points, 1)) * n_points,
np.zeros((n_points, 1)),
],
).astype(int)

# The desired matrix can be obtained by appending the coefficients
# of the finite differences for each point column-wise.
for dif_order, w in enumerate(linear_operator.weights):
Ddelval marked this conversation as resolved.
Show resolved Hide resolved
if w == 0:
continue
for point_index in range(n_points):
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
# Calculate the finite differences coefficients
# the point point_index. These coefficients express the
# contribution of a function in each of the points in the
# grid to the value of the operator applied to the function
# in the point point_index.
finite_diff = findiff.coefs.coefficients_non_uni(
deriv=dif_order,
acc=findiff_accuracy,
coords=grid_points,
idx=point_index,
)

# The offsets are the relative positions of the
# points for which the coefficients are being
# calculated
offsets = finite_diff["offsets"]
coefficients = finite_diff["coefficients"]

# Add the position of the point where the finite
# difference is calculated to get the absolute position
# of the points where each coefficient is applied
positions = offsets + point_index

# If the coefficients for calculating the value of
# the operator in this point (point_index)
# include a coefficient for the i-th point,
# then the i-th basis function contains the point point_index
# in its domain.
# (by definition that fuction is 1 in the i-th point).
domain_limits[positions, 0] = np.minimum(
domain_limits[positions, 0],
np.ones(positions.shape) * point_index,
)
domain_limits[positions, 1] = np.maximum(
domain_limits[positions, 1],
np.ones(positions.shape) * point_index,
)

w_eval = w(grid_points[point_index]) if callable(w) else w
diff_coefficients[positions, point_index] += w_eval * coefficients

return (diff_coefficients, domain_limits)


@gram_matrix_optimization.register
Ddelval marked this conversation as resolved.
Show resolved Hide resolved
def fdatagrid_penalty_matrix_optimized(
def gridbasis_penalty_matrix_optimized(
linear_operator: LinearDifferentialOperator,
basis: FDataGrid,
basis: _GridBasis,
) -> NDArrayFloat:
"""
Optimized version for FDatagrid.

If the data_matrix of the FDataGrid is the identity, the resulting
matrix will be the gram matrix for functions discretized in the
given grid points. That is to say, in order to calculate the norm
of the linear operator applied to functions discretized in the given
grid points, it is enough to multiply the returned matrix by the
values in the points of the grid on both sides.

If the data_matrix of the FDataGrid is not the identity, the resulting
matrix will be the gram matrix for functions expressed as a combination
of the given set of functions (the entries of the fdatagrid). The
intended use is to calculate the norm of the linear operator for
functions expressed in a basis that is a FDataGrid (see CustomBasis).

Note that the first case is a particular case of the second one.
Optimized version for GridBasis.

It evaluates the operator in the grid points.
"""
evaluated_basis = sum(
w(basis.grid_points[0]) if callable(w) else w
* basis.derivative(order=i)(basis.grid_points[0])
for i, w in enumerate(linear_operator.weights)
n_points = basis.grid_points[0].shape[0]
findiff_accuracy = 2

evaluated_basis, domain_limits = _optimized_operator_evaluation_in_grid(
linear_operator,
basis.grid_points[0],
findiff_accuracy=findiff_accuracy,
)
assert not isinstance(evaluated_basis, int)

indices = np.triu_indices(basis.n_samples)
product = evaluated_basis[indices[0]] * evaluated_basis[indices[1]]
# The support of the result of applying the linear operator
# is a small subset of the grid points. We calculate the
# maximum number of points to the left and right of the
# point where the linear operator is applied that can be
# different from zero.

triang_vec = scipy.integrate.simps(product[..., 0], x=basis.grid_points)
max_domain_width = np.max(
np.abs(domain_limits[:, 0] - domain_limits[:, 1]),
)

matrix = np.empty((basis.n_samples, basis.n_samples))
# Precompute the simpson quadrature for the entire domain.
# This is done to avoid computing it for each subinterval of integration.
quadrature = scipy.integrate.simpson(
np.identity(n_points),
basis.grid_points[0],
)

# Set upper matrix
matrix[indices] = triang_vec
product_matrix = np.zeros((basis.n_basis, basis.n_basis))
for i in range(n_points):
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
# Maximum index of another function with not-disjoint support
max_no_disjoint_index = min(n_points, i + 2 * max_domain_width)

# Set lower matrix
matrix[(indices[1], indices[0])] = triang_vec
for j in range(i, max_no_disjoint_index):
if domain_limits[i, 1] < domain_limits[j, 0]:
continue

left_lim = min(
domain_limits[i, 0],
domain_limits[j, 0],
)
right_lim = max(
domain_limits[i, 1],
domain_limits[j, 1],
)

# The right limit of the interval is excluded when slicing
right_lim += 1

# Limit both the multiplication and the integration to
# the interval where the functions are not 0.
operator_product = (
evaluated_basis[i, left_lim:right_lim]
* evaluated_basis[j, left_lim:right_lim]
)
sub_quadrature = quadrature[left_lim:right_lim]

product_matrix[i, j] = np.dot(operator_product, sub_quadrature)
product_matrix[j, i] = product_matrix[i, j]

return matrix
return product_matrix


@gram_matrix_optimization.register
Expand All @@ -644,6 +784,23 @@ def fdatabasis_penalty_matrix_optimized(
return coef_matrix @ basis_pen_matrix @ coef_matrix.T


@gram_matrix_optimization.register
Ddelval marked this conversation as resolved.
Show resolved Hide resolved
Ddelval marked this conversation as resolved.
Show resolved Hide resolved
def fdatagrid_penalty_matrix_optimized(
Ddelval marked this conversation as resolved.
Show resolved Hide resolved
Ddelval marked this conversation as resolved.
Show resolved Hide resolved
linear_operator: LinearDifferentialOperator,
fdatagrid: FDataGrid,
) -> NDArrayFloat:
"""Optimized version for FDataGrid."""
Ddelval marked this conversation as resolved.
Show resolved Hide resolved
Ddelval marked this conversation as resolved.
Show resolved Hide resolved
from ...misc import inner_product_matrix

operator_evaluated = linear_operator(fdatagrid)(
fdatagrid.grid_points[0],
)
fdatagrid_evaluated = FDataGrid(
data_matrix=operator_evaluated[..., 0],
grid_points=fdatagrid.grid_points[0],
)
return inner_product_matrix(fdatagrid_evaluated)

@gram_matrix_optimization.register
Ddelval marked this conversation as resolved.
Show resolved Hide resolved
def custombasis_penalty_matrix_optimized(
linear_operator: LinearDifferentialOperator,
Expand Down
9 changes: 2 additions & 7 deletions skfda/preprocessing/dim_reduction/_fpca.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from ..._utils._sklearn_adapter import BaseEstimator, InductiveTransformerMixin
from ...misc.regularization import L2Regularization, compute_penalty_matrix
from ...representation import FData
from ...representation.basis import Basis, FDataBasis
from ...representation.basis import Basis, FDataBasis, _GridBasis
from ...representation.grid import FDataGrid
from ...typing._numpy import ArrayLike, NDArrayFloat

Expand Down Expand Up @@ -360,13 +360,8 @@ def _fit_grid(

weights_matrix = np.diag(self._weights)

basis = FDataGrid(
data_matrix=np.identity(n_points_discretization),
grid_points=X.grid_points,
)

regularization_matrix = compute_penalty_matrix(
basis_iterable=(basis,),
basis_iterable=(_GridBasis(grid_points=X.grid_points),),
regularization_parameter=1,
regularization=self.regularization,
)
Expand Down
2 changes: 2 additions & 0 deletions skfda/representation/basis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"_fdatabasis": ["FDataBasis", "FDataBasisDType"],
"_finite_element_basis": ["FiniteElementBasis", "FiniteElement"],
"_fourier_basis": ["FourierBasis", "Fourier"],
"_grid_basis": ["_GridBasis"],
"_monomial_basis": ["MonomialBasis", "Monomial"],
"_tensor_basis": ["TensorBasis", "Tensor"],
"_vector_basis": ["VectorValuedBasis", "VectorValued"],
Expand Down Expand Up @@ -42,6 +43,7 @@
Fourier as Fourier,
FourierBasis as FourierBasis,
)
from ._grid_basis import _GridBasis as _GridBasis
from ._monomial_basis import (
Monomial as Monomial,
MonomialBasis as MonomialBasis,
Expand Down
3 changes: 1 addition & 2 deletions skfda/representation/basis/_custom_basis.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,8 @@ def __init__(
domain_range=fdata.domain_range,
n_basis=fdata.n_samples,
)
self._check_linearly_independent(fdata)

self.fdata = fdata
self._check_linearly_independent(fdata)
Ddelval marked this conversation as resolved.
Show resolved Hide resolved

@multimethod.multidispatch
def _check_linearly_independent(self, fdata: FData) -> None:
Expand Down
69 changes: 69 additions & 0 deletions skfda/representation/basis/_grid_basis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from __future__ import annotations

from typing import Any, TypeVar

from ..._utils import _to_grid_points
from ...typing._base import GridPointsLike
from ...typing._numpy import NDArrayFloat
from ..evaluator import Evaluator
Ddelval marked this conversation as resolved.
Show resolved Hide resolved
from ._basis import Basis

T = TypeVar("T", bound="_GridBasis")


class _GridBasis(Basis):
"""
Basis associated to a grid of points.

Used to express functions whose values are known in a grid of points.
Each basis function is guaranteed to be zero in all points of the grid
except one, where it is one. The intermediate values are interpolated
depending on the interpolation object passed as argument.

This basis is used internally as an alternate representation of a
FDataGrid object. In certain cases, it is more convenient to work with
FDataGrid objects as a basis representation in this basis since, then,
all functional data can be represented as an FDataBasis object.

Parameters:
grid_points: Grid of points where the functions are evaluated.

"""

def __init__(
self,
*,
grid_points: GridPointsLike,
) -> None:
"""Basis constructor."""
self.grid_points = _to_grid_points(grid_points)
domain_range = tuple((s[0], s[-1]) for s in self.grid_points)
super().__init__(
domain_range=domain_range,
n_basis=len(grid_points[0]),
)

def _evaluate(self, eval_points: NDArrayFloat) -> NDArrayFloat:
raise NotImplementedError(
"Evaluation is not implemented in this basis",
)

def __eq__(self, other: Any) -> bool:
return (
super().__eq__(other)
and self.grid_points == other.grid_points
)

def __repr__(self) -> str:
return (
f"{type(self).__name__}("
f"grid_points={self.grid_points}, "
)

def __hash__(self) -> int:
return (
super().__hash__()
^ hash(
Ddelval marked this conversation as resolved.
Show resolved Hide resolved
self.grid_points,
Ddelval marked this conversation as resolved.
Show resolved Hide resolved
)
)
Loading