Skip to content

Commit

Permalink
clean up SGPR common __init__ (#1289)
Browse files Browse the repository at this point in the history
`gpflow.models.sgpr` contains two sparse GP models, SGPR and GPRFITC. They had a common subclass to provide the upper_bound() method. They also have the same `__init__`, which is unified in this PR.
  • Loading branch information
st-- committed Feb 26, 2020
1 parent 398c49b commit 5f79844
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 89 deletions.
2 changes: 1 addition & 1 deletion gpflow/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,6 @@
from .model import BayesianModel, GPModel
# from .gplvm import PCA_reduce
from .sgpmc import SGPMC
from .sgpr import GPRFITC, SGPR, SGPRUpperMixin
from .sgpr import GPRFITC, SGPR
from .svgp import SVGP
from .vgp import VGP, VGPOpperArchambeau
159 changes: 71 additions & 88 deletions gpflow/models/sgpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,37 +26,70 @@
from ..mean_functions import Zero, MeanFunction


class SGPRUpperMixin(GPModel):
class SGPRBase(GPModel):
"""
Common base class for SGPR and GPRFITC that provides the common __init__
and upper_bound() methods.
"""
Upper bound for the GP regression marginal likelihood.
It is implemented here as a Mixin class which works with SGPR and GPRFITC.
Note that the same inducing points are used for calculating the upper bound,
as are used for computing the likelihood approximation. This may not lead to
the best upper bound. The upper bound can be tightened by optimising Z, just
as just like the lower bound. This is especially important in FITC, as FITC
is known to produce poor inducing point locations. An optimisable upper bound
can be found in https://github.com/markvdw/gp_upper.
The key reference is

::
def __init__(self,
data: Data,
kernel: Kernel,
mean_function: Optional[MeanFunction] = None,
inducing_variable: Optional[InducingPoints] = None,
num_latent: Optional[int] = None,
noise_variance: float = 1.0,
):
"""
`data`: a tuple of (X, Y), where the inputs X has shape [N, D]
and the outputs Y has shape [N, R].
`inducing_variable`: an InducingPoints instance or a matrix of
the pseudo inputs Z, of shape [M, D].
`kernel`, `mean_function` are appropriate GPflow objects
This method only works with a Gaussian likelihood, its variance is
initialized to `noise_variance`.
"""
likelihood = likelihoods.Gaussian(noise_variance)
x_data, y_data = data
num_latent = y_data.shape[-1] if num_latent is None else num_latent
super().__init__(kernel, likelihood, mean_function, num_latent=num_latent)

@misc{titsias_2014,
title={Variational Inference for Gaussian and Determinantal Point Processes},
url={http://www2.aueb.gr/users/mtitsias/papers/titsiasNipsVar14.pdf},
publisher={Workshop on Advances in Variational Inference (NIPS 2014)},
author={Titsias, Michalis K.},
year={2014},
month={Dec}
}
self.data = data
self.num_data = x_data.shape[0]

The key quantity, the trace term, can be computed with
# To compute each individual element of the trace term, use...
v = conditionals.conditional(X, r.model.inducing_variable.Z.numpy(), r.model.kernel,
np.zeros((len(r.model.inducing_variable), 1)))[1].numpy()
"""
self.inducing_variable = inducingpoint_wrapper(inducing_variable)

def upper_bound(self):
"""
Upper bound for the sparse GP regression marginal likelihood. Note that
the same inducing points are used for calculating the upper bound, as are
used for computing the likelihood approximation. This may not lead to the
best upper bound. The upper bound can be tightened by optimising Z, just
like the lower bound. This is especially important in FITC, as FITC is
known to produce poor inducing point locations. An optimisable upper bound
can be found in https://github.com/markvdw/gp_upper.
The key reference is
::
@misc{titsias_2014,
title={Variational Inference for Gaussian and Determinantal Point Processes},
url={http://www2.aueb.gr/users/mtitsias/papers/titsiasNipsVar14.pdf},
publisher={Workshop on Advances in Variational Inference (NIPS 2014)},
author={Titsias, Michalis K.},
year={2014},
month={Dec}
}
The key quantity, the trace term, can be computed via
>>> _, v = conditionals.conditional(X, model.inducing_variable.Z, model.kernel,
... np.zeros((len(model.inducing_variable), 1)))
which computes each individual element of the trace term.
"""
x_data, y_data = self.data
num_data = tf.cast(tf.shape(y_data)[0], default_float())

Expand Down Expand Up @@ -92,7 +125,7 @@ def upper_bound(self):
return const + logdet + quad


class SGPR(SGPRUpperMixin):
class SGPR(SGPRBase):
"""
Sparse Variational GP regression. The key reference is
Expand All @@ -112,31 +145,6 @@ class SGPR(SGPRUpperMixin):
"""

def __init__(self,
data: Data,
kernel: Kernel,
mean_function: Optional[MeanFunction] = None,
inducing_variable: Optional[InducingPoints] = None,
num_latent: Optional[int] = None,
noise_variance: float = 1.0,
):
"""
X is a data matrix, size [N, D]
Y is a data matrix, size [N, R]
Z is a matrix of pseudo inputs, size [M, D]
kernel, mean_function are appropriate GPflow objects
This method only works with a Gaussian likelihood.
"""
likelihood = likelihoods.Gaussian(noise_variance)
x_data, y_data = data
num_latent = y_data.shape[-1] if num_latent is None else num_latent
super().__init__(kernel, likelihood, mean_function, num_latent)
self.data = data
self.num_data = x_data.shape[0]

self.inducing_variable = inducingpoint_wrapper(inducing_variable)

def log_likelihood(self):
"""
Construct a tensorflow function to compute the bound on the marginal
Expand Down Expand Up @@ -238,50 +246,25 @@ def compute_qu(self):
return mu, cov


class GPRFITC(SGPRUpperMixin):
def __init__(self,
data: Data,
kernel: Kernel,
mean_function: Optional[MeanFunction] = None,
inducing_variable: Optional[InducingPoints] = None,
noise_variance: float = 1.0,
):
"""
This implements GP regression with the FITC approximation.
The key reference is
class GPRFITC(SGPRBase):
"""
This implements GP regression with the FITC approximation.
The key reference is
@inproceedings{Snelson06sparsegaussian,
::
@inproceedings{Snelson06sparsegaussian,
author = {Edward Snelson and Zoubin Ghahramani},
title = {Sparse Gaussian Processes using Pseudo-inputs},
booktitle = {Advances In Neural Information Processing Systems },
booktitle = {Advances In Neural Information Processing Systems},
year = {2006},
pages = {1257--1264},
publisher = {MIT press}
}
Implementation loosely based on code from GPML matlab library although
obviously gradients are automatic in GPflow.
X is a data matrix, size [N, D]
Y is a data matrix, size [N, R]
Z is a matrix of pseudo inputs, size [M, D]
kernel, mean_function are appropriate GPflow objects
This method only works with a Gaussian likelihood.
"""

mean_function = Zero() if mean_function is None else mean_function

likelihood = likelihoods.Gaussian(noise_variance)
x_data, y_data = data
num_latent = y_data.shape[-1]
super().__init__(kernel, likelihood, mean_function, num_latent=num_latent)

self.data = data
self.num_data = x_data.shape[0]
}
self.inducing_variable = inducingpoint_wrapper(inducing_variable)
Implementation loosely based on code from GPML matlab library although
obviously gradients are automatic in GPflow.
"""

def common_terms(self):
x_data, y_data = self.data
Expand Down

0 comments on commit 5f79844

Please sign in to comment.