Skip to content

Commit

Permalink
fix Cosine kernel for multivariate inputs (#1357)
Browse files Browse the repository at this point in the history
The previous version operating on Euclidean distance was returning indefinite covariance matrices on multivariate data. This version, derived from eq. 4.7 of Wilson (2014), is always positive semidefinite.
Closes #1328.

This PR also changes the definition of the cosine kernel slightly, from sigma * cos(|x - x'| / l) to sigma * cos(2 * pi * (x - x') / l). This makes the lengthscale parameter directly interpretable as period length.

It introduces new IsotropicStationary and AnisotropicStationary base classes.
  • Loading branch information
ilia-kats committed Mar 26, 2020
1 parent c442e87 commit 8d0128c
Show file tree
Hide file tree
Showing 9 changed files with 216 additions and 60 deletions.
4 changes: 2 additions & 2 deletions doc/source/notebooks/tailor/kernel_design.pct.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
# %matplotlib inline

# %% [markdown]
# To make this new kernel class, we inherit from the base class `gpflow.kernels.Kernel` and implement the three functions below. **NOTE:** Depending on the kernel to be implemented, other classes can be more adequate. For example, if the kernel to be implemented is stationary, you can immediately subclass `gpflow.kernels.Stationary` (at which point you
# only have to override `K_r` or `K_r2`; see the `Stationary` class docstring).
# To make this new kernel class, we inherit from the base class `gpflow.kernels.Kernel` and implement the three functions below. **NOTE:** Depending on the kernel to be implemented, other classes can be more adequate. For example, if the kernel to be implemented is isotropic stationary, you can immediately subclass `gpflow.kernels.IsotropicStationary` (at which point you
# only have to override `K_r` or `K_r2`; see the `IsotropicStationary` class docstring). Stationary but anisotropic kernels should subclass `gpflow.kernels.AnisotropicStationary` and override `K_d`.
#
# #### `__init__`
# In this simple example, the constructor takes no argument (though it could, if that was convenient, for example to pass in an initial value for `variance`). It *must* call the constructor of the superclass with appropriate arguments. Brownian motion is only defined in one dimension, and we'll assume that the `active_dims` are `[0]`, for simplicity.
Expand Down
2 changes: 2 additions & 0 deletions gpflow/kernels/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
Matern52,
RationalQuadratic,
Stationary,
IsotropicStationary,
AnisotropicStationary,
)

Bias = Constant
Expand Down
19 changes: 6 additions & 13 deletions gpflow/kernels/periodic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@

from ..base import Parameter
from ..utilities import positive
from ..utilities.ops import difference_matrix
from .base import Kernel
from .stationaries import Stationary
from .stationaries import Stationary, IsotropicStationary


class Periodic(Kernel):
Expand Down Expand Up @@ -37,16 +38,16 @@ class Periodic(Kernel):
the constructor doesn't have it as an argument.
"""

def __init__(self, base_kernel: Stationary, period: Union[float, List[float]] = 1.0):
def __init__(self, base_kernel: IsotropicStationary, period: Union[float, List[float]] = 1.0):
"""
:param base_kernel: the base kernel to make periodic; must inherit from Stationary
Note that `active_dims` should be specified in the base kernel.
:param period: the period; to induce a different period per active dimension
this must be initialized with an array the same length as the number
of active dimensions e.g. [1., 1., 1.]
"""
if not isinstance(base_kernel, Stationary):
raise TypeError("Periodic requires a Stationary kernel as the `base_kernel`")
if not isinstance(base_kernel, IsotropicStationary):
raise TypeError("Periodic requires an IsotropicStationary kernel as the `base_kernel`")

super().__init__()
self.base_kernel = base_kernel
Expand All @@ -65,15 +66,7 @@ def K_diag(self, X: tf.Tensor) -> tf.Tensor:
return self.base_kernel.K_diag(X)

def K(self, X: tf.Tensor, X2: Optional[tf.Tensor] = None) -> tf.Tensor:
if X2 is None:
X2 = X

# Introduce dummy dimension so we can use broadcasting
# TODO: does not support kernel broadcasting
f = tf.expand_dims(X, 1) # now [N, 1, D]
f2 = tf.expand_dims(X2, 0) # now [1, M, D]

r = np.pi * (f - f2) / self.period
r = np.pi * (difference_matrix(X, X2)) / self.period
scaled_sine = tf.sin(r) / self.base_kernel.lengthscales
if hasattr(self.base_kernel, "K_r"):
sine_r = tf.reduce_sum(tf.abs(scaled_sine), -1)
Expand Down
97 changes: 70 additions & 27 deletions gpflow/kernels/stationaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@

from ..base import Parameter
from ..utilities import positive
from ..utilities.ops import square_distance
from ..utilities.ops import square_distance, difference_matrix
from .base import Kernel


class Stationary(Kernel):
"""
Base class for kernels that are stationary, that is, they only depend on
r = || x - x' ||
d = x - x'
This class handles 'ard' behaviour, which stands for 'Automatic Relevance
Determination'. This means that the kernel has one lengthscale per
Expand Down Expand Up @@ -46,35 +46,76 @@ def ard(self) -> bool:
"""
return self.lengthscales.shape.ndims > 0

def scaled_squared_euclid_dist(self, X, X2=None):
"""
Returns ||(X - X2ᵀ) / ℓ||² i.e. squared L2-norm.
"""
X_scaled = X / self.lengthscales
X2_scaled = X2 / self.lengthscales if X2 is not None else X2
return square_distance(X_scaled, X2_scaled)
def scale(self, X):
X_scaled = X / self.lengthscales if X is not None else X
return X_scaled

def K_diag(self, X):
return tf.fill(tf.shape(X)[:-1], tf.squeeze(self.variance))


class IsotropicStationary(Stationary):
"""
Base class for isotropic stationary kernels, i.e. kernels that only
depend on
r = ‖x - x'‖
Derived classes should implement one of:
K_r2(self, r2): Returns the kernel evaluated on r² (r2), which is the
squared scaled Euclidean distance Should operate element-wise on r2.
K_r(self, r): Returns the kernel evaluated on r, which is the scaled
Euclidean distance. Should operate element-wise on r.
"""

def K(self, X, X2=None):
r2 = self.scaled_squared_euclid_dist(X, X2)
return self.K_r2(r2)

def K_diag(self, X):
return tf.fill(tf.shape(X)[:-1], tf.squeeze(self.variance))

def K_r2(self, r2):
"""
Returns the kernel evaluated on r² (`r2`), which is the squared scaled Euclidean distance
Should operate element-wise on r²
"""
if hasattr(self, "K_r"):
# Clipping around the (single) float precision which is ~1e-45.
r = tf.sqrt(tf.maximum(r2, 1e-40))
return self.K_r(r) # pylint: disable=no-member
raise NotImplementedError

def scaled_squared_euclid_dist(self, X, X2=None):
"""
Returns ‖(X - X2ᵀ) / ℓ‖², i.e. the squared L₂-norm.
"""
return square_distance(self.scale(X), self.scale(X2))


class AnisotropicStationary(Stationary):
"""
Base class for anisotropic stationary kernels, i.e. kernels that only
depend on
d = x - x'
Derived classes should implement K_d(self, d): Returns the kernel evaluated
on d, which is the pairwise difference matrix, scaled by the lengthscale
parameter ℓ (i.e. [(X - X2ᵀ) / ℓ]). The last axis corresponds to the
input dimension.
"""

def K(self, X, X2=None):
return self.K_d(self.scaled_difference_matrix(X, X2))

def scaled_difference_matrix(self, X, X2=None):
"""
Returns [(X - X2ᵀ) / ℓ]. If X has shape [..., N, D] and
X2 has shape [..., M, D], the output will have shape [..., N, M, D].
"""
return difference_matrix(self.scale(X), self.scale(X2))

def K_d(self, d):
raise NotImplementedError


class SquaredExponential(Stationary):
class SquaredExponential(IsotropicStationary):
"""
The radial basis function (RBF) or squared exponential kernel. The kernel equation is
Expand All @@ -91,7 +132,7 @@ def K_r2(self, r2):
return self.variance * tf.exp(-0.5 * r2)


class RationalQuadratic(Stationary):
class RationalQuadratic(IsotropicStationary):
"""
Rational Quadratic kernel,
Expand All @@ -112,7 +153,7 @@ def K_r2(self, r2):
return self.variance * (1 + 0.5 * r2 / self.alpha) ** (-self.alpha)


class Exponential(Stationary):
class Exponential(IsotropicStationary):
"""
The Exponential kernel. It is equivalent to a Matern12 kernel with doubled lengthscales.
"""
Expand All @@ -121,7 +162,7 @@ def K_r(self, r):
return self.variance * tf.exp(-0.5 * r)


class Matern12(Stationary):
class Matern12(IsotropicStationary):
"""
The Matern 1/2 kernel. Functions drawn from a GP with this kernel are not
differentiable anywhere. The kernel equation is
Expand All @@ -137,7 +178,7 @@ def K_r(self, r):
return self.variance * tf.exp(-r)


class Matern32(Stationary):
class Matern32(IsotropicStationary):
"""
The Matern 3/2 kernel. Functions drawn from a GP with this kernel are once
differentiable. The kernel equation is
Expand All @@ -154,7 +195,7 @@ def K_r(self, r):
return self.variance * (1.0 + sqrt3 * r) * tf.exp(-sqrt3 * r)


class Matern52(Stationary):
class Matern52(IsotropicStationary):
"""
The Matern 5/2 kernel. Functions drawn from a GP with this kernel are twice
differentiable. The kernel equation is
Expand All @@ -171,17 +212,19 @@ def K_r(self, r):
return self.variance * (1.0 + sqrt5 * r + 5.0 / 3.0 * tf.square(r)) * tf.exp(-sqrt5 * r)


class Cosine(Stationary):
class Cosine(AnisotropicStationary):
"""
The Cosine kernel. Functions drawn from a GP with this kernel are sinusoids
(with a random phase). The kernel equation is
k(r) = σ² cos{r}
k(r) = σ² cos{2πd}
where:
r is the Euclidean distance between the input points, scaled by the lengthscale parameter ℓ,
d is the sum of the per-dimension differences between the input points, scaled by the
lengthscale parameter ℓ (i.e. Σᵢ [(X - X2ᵀ) / ℓ]ᵢ),
σ² is the variance parameter.
"""

def K_r(self, r):
return self.variance * tf.cos(r)
def K_d(self, d):
d = tf.reduce_sum(d, axis=-1)
return self.variance * tf.cos(2 * np.pi * d)
31 changes: 27 additions & 4 deletions gpflow/utilities/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,10 @@ def square_distance(X, X2):
result may actually be very slightly negative for entries very
close to each other.
This function can deal with leading dimensions in X and X2.
In the sample case, where X and X2 are both 2 dimensional,
for example, X is [N, D] and X2 is [M, D], then a tensor of shape
[N, M] is returned. If X is [N1, S1, D] and X2 is [N2, S2, D]
This function can deal with leading dimensions in X and X2.
In the sample case, where X and X2 are both 2 dimensional,
for example, X is [N, D] and X2 is [M, D], then a tensor of shape
[N, M] is returned. If X is [N1, S1, D] and X2 is [N2, S2, D]
then the output will be [N1, S1, N2, S2].
"""
if X2 is None:
Expand All @@ -103,6 +103,29 @@ def square_distance(X, X2):
return dist


def difference_matrix(X, X2):
"""
Returns (X - X2ᵀ)
This function can deal with leading dimensions in X and X2.
For example, If X has shape [M, D] and X2 has shape [N, D],
the output will have shape [M, N, D]. If X has shape [I, J, M, D]
and X2 has shape [K, L, N, D], the output will have shape
[I, J, M, K, L, N, D].
"""
if X2 is None:
X2 = X
diff = X[..., :, tf.newaxis, :] - X2[..., tf.newaxis, :, :]
return diff
Xshape = tf.shape(X)
X2shape = tf.shape(X2)
X = tf.reshape(X, (-1, Xshape[-1]))
X2 = tf.reshape(X2, (-1, X2shape[-1]))
diff = X[:, tf.newaxis, :] - X2[tf.newaxis, :, :]
diff = tf.reshape(diff, tf.concat((Xshape[:-1], X2shape[:-1], [Xshape[-1]]), 0))
return diff


def pca_reduce(X: tf.Tensor, latent_dim: tf.Tensor) -> tf.Tensor:
"""
A helpful function for linearly reducing the dimensionality of the input
Expand Down
29 changes: 22 additions & 7 deletions tests/gpflow/kernels/test_broadcasting.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,19 @@
# Following kernels do not broadcast: see https://github.com/GPflow/GPflow/issues/1339
pytest.param(kernels.ArcCosine, marks=pytest.mark.xfail), # broadcasting not implemented
pytest.param(kernels.Coregion, marks=pytest.mark.xfail), # broadcasting not implemented
pytest.param(kernels.Periodic, marks=pytest.mark.xfail), # broadcasting not implemented
pytest.param(kernels.ChangePoints, marks=pytest.mark.xfail), # broadcasting not implemented
pytest.param(kernels.Convolutional, marks=pytest.mark.xfail), # broadcasting not implemented
]


def check_broadcasting(kernel):
S, N, M, D = 5, 4, 3, 2
X1 = np.random.randn(S, N, D)
X2 = np.random.randn(M, D)

compare_vs_map(X1, X2, kernel)


@pytest.mark.parametrize("kernel_class", gpflow.ci_utils.subclasses(kernels.Kernel))
def test_no_kernels_missed(kernel_class):
tested_kernel_classes = KERNEL_CLASSES + [kernels.Sum, kernels.Product]
Expand All @@ -62,26 +69,34 @@ def test_no_kernels_missed(kernel_class):
gpflow.kernels.base.ReducingCombination,
kernels.Static,
kernels.Stationary,
kernels.IsotropicStationary,
kernels.AnisotropicStationary,
]
needs_constructor_parameters = [kernels.Periodic]
if kernel_class in tested_kernel_classes:
return # tested
return # tested by test_broadcast_no_active_dims
if kernel_class in skipped_kernel_classes:
return # not tested but currently expected to fail
if kernel_class in abstract_base_classes:
return # cannot test abstract base classes
if kernel_class in needs_constructor_parameters:
return # this has a separate test, see test_broadcast_no_active_dims_periodic
if issubclass(kernel_class, kernels.MultioutputKernel):
return # TODO: cannot currently test MultioutputKernels - see https://github.com/GPflow/GPflow/issues/1339
assert False, f"no broadcasting test for kernel class {kernel_class}"


@pytest.mark.parametrize("kernel_class", KERNEL_CLASSES)
def test_broadcast_no_active_dims(kernel_class):
S, N, M, D = 5, 4, 3, 2
X1 = np.random.randn(S, N, D)
X2 = np.random.randn(M, D)
kernel = kernel_class()
check_broadcasting(kernel_class())

compare_vs_map(X1, X2, kernel)

@pytest.mark.parametrize(
"base_class", [kernel for kernel in gpflow.ci_utils.subclasses(kernels.IsotropicStationary)],
)
def test_broadcast_no_active_dims_periodic(base_class):
kernel = gpflow.kernels.Periodic(base_class())
check_broadcasting(kernel)


@pytest.mark.parametrize("kernel_class", [gpflow.kernels.SquaredExponential])
Expand Down

0 comments on commit 8d0128c

Please sign in to comment.