Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Heteroscedastic GPR model #1704

Draft
wants to merge 19 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
512 changes: 512 additions & 0 deletions doc/source/notebooks/advanced/het_gpr_script.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions gpflow/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from .gplvm import GPLVM, BayesianGPLVM
from .gpmc import GPMC
from .gpr import GPR
from .het_gpr import het_GPR
from .model import BayesianModel, GPModel
from .sgpmc import SGPMC
from .sgpr import GPRFITC, SGPR
Expand Down
108 changes: 108 additions & 0 deletions gpflow/models/het_gpr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import tensorflow as tf
from typing import Optional

from gpflow import posteriors
from ..kernels import Kernel
from ..logdensities import multivariate_normal
from ..mean_functions import MeanFunction
from ..models.gpr import GPR_with_posterior
from ..models.training_mixins import RegressionData, InputData
from ..base import MeanAndVariance
from ..utilities import add_het_noise_cov


class het_GPR(GPR_with_posterior):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be useful if there was a way to follow the existing naming GPflow model naming convention in this case.

""" While the vanilla GPR enforces a constant noise variance across the input space, here we allow the
noise amplitude to vary linearly (and hence the noise variance to change quadratically) across the input space.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it true that the noise amplitude varies linearly with this model? Even with the PseudoPoissonLikelihood?

"""

def __init__(
self,
data: RegressionData,
kernel: Kernel,
likelihood,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this type annotation be MultiLatentTFPConditional?

mean_function: Optional[MeanFunction] = None,
noise_variance: float = 1.0,
):
super().__init__(data, kernel, mean_function, noise_variance)
self.likelihood = likelihood

def posterior(self, precompute_cache=posteriors.PrecomputeCacheType.TENSOR):
"""
Create the Posterior object which contains precomputed matrices for
faster prediction.

precompute_cache has three settings:

- `PrecomputeCacheType.TENSOR` (or `"tensor"`): Precomputes the cached
quantities and stores them as tensors (which allows differentiating
through the prediction). This is the default.
- `PrecomputeCacheType.VARIABLE` (or `"variable"`): Precomputes the cached
quantities and stores them as variables, which allows for updating
their values without changing the compute graph (relevant for AOT
compilation).
- `PrecomputeCacheType.NOCACHE` (or `"nocache"` or `None`): Avoids
immediate cache computation. This is useful for avoiding extraneous
computations when you only want to call the posterior's
`fused_predict_f` method.
"""

X, Y = self.data

return posteriors.HeteroskedasticGPRPosterior(
kernel=self.kernel,
X_data=X,
Y_data=Y,
likelihood=self.likelihood,
mean_function=self.mean_function,
precompute_cache=precompute_cache,
)

def predict_y(
self, Xnew: InputData, full_cov: bool = False, full_output_cov: bool = False
) -> MeanAndVariance:
"""
Compute the mean and variance of the held-out data at the input points.
"""
if full_cov or full_output_cov:
# See https://github.com/GPflow/GPflow/issues/1461
raise NotImplementedError(
"The predict_y method currently supports only the argument values full_cov=False and full_output_cov=False"
)

f_mean, f_var = self.predict_f(Xnew, full_cov=full_cov, full_output_cov=full_output_cov)
Fs = tf.concat([f_mean, Xnew], axis=-1)
dummy_f_var = tf.zeros_like(f_var)
F_vars = tf.concat([f_var, dummy_f_var], axis=-1)
return self.likelihood.predict_mean_and_var(Fs, F_vars)

def _add_noise_cov(self, X, K: tf.Tensor) -> tf.Tensor:
"""
Returns K + diag(σ²), where σ² is the likelihood noise variance (vector),
and I is the corresponding identity matrix.
"""
dummy_f = tf.zeros_like(X)
Fs = tf.concat([dummy_f, X], axis=-1)
variances = self.likelihood.conditional_variance(Fs)
return add_het_noise_cov(K, tf.squeeze(variances))

def log_marginal_likelihood(self) -> tf.Tensor:
r"""
Computes the log marginal likelihood.

.. math::
\log p(Y | \theta).

"""
X, Y = self.data
K = self.kernel(X)
ks = self._add_noise_cov(X, K)
L = tf.linalg.cholesky(ks)
m = self.mean_function(X)

# [R,] log-likelihoods for each independent dimension of Y
log_prob = multivariate_normal(Y, m, L)
return tf.reduce_sum(log_prob)



38 changes: 35 additions & 3 deletions gpflow/posteriors.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,10 @@
SeparateIndependentInducingVariables,
SharedIndependentInducingVariables,
)
from .likelihoods import Likelihood
from .kernels import Kernel
from .mean_functions import MeanFunction
from .utilities import Dispatcher, add_noise_cov
from .utilities import Dispatcher, add_noise_cov, add_het_noise_cov
from .utilities.ops import leading_transpose


Expand Down Expand Up @@ -303,7 +304,7 @@ def _conditional_with_precompute(

def _precompute(self) -> Tuple[tf.Tensor, ...]:
Kmm = self.kernel(self.X_data)
Kmm_plus_s = add_noise_cov(Kmm, self.likelihood_variance)
Kmm_plus_s = self.add_noise(Kmm, self.X_data)

Lm = tf.linalg.cholesky(Kmm_plus_s)
Kmm_plus_s_inv = tf.linalg.cholesky_solve(Lm, tf.eye(self.X_data.shape[0], dtype=Lm.dtype))
Expand Down Expand Up @@ -331,12 +332,43 @@ def _conditional_fused(
Kmm = self.kernel(self.X_data)
Knn = self.kernel(Xnew, full_cov=full_cov)
Kmn = self.kernel(self.X_data, Xnew)
Kmm_plus_s = add_noise_cov(Kmm, self.likelihood_variance)
Kmm_plus_s = self.add_noise(Kmm, self.X_data)

return base_conditional(
Kmn, Kmm_plus_s, Knn, err, full_cov=full_cov, white=False
) # [N, P], [N, P] or [P, N, N]

def add_noise(self, K: tf.Tensor, X: tf.Tensor):
return add_noise_cov(K, self.likelihood_variance)


class HeteroskedasticGPRPosterior(GPRPosterior):

def __init__(self,
kernel,
X_data: tf.Tensor,
Y_data: tf.Tensor,
likelihood: Likelihood,
mean_function: Optional[mean_functions.MeanFunction] = None,
*,
precompute_cache: Optional[PrecomputeCacheType],
):

self.likelihood = likelihood
data = (X_data, Y_data)
super().__init__(kernel, data, likelihood_variance=Parameter(1e-6, trainable=False), mean_function=mean_function, precompute_cache=precompute_cache)

def evaluate_noise_variance(self, X: tf.Tensor):
""" Noise variance contribution. """

noise_scale = self.likelihood.scale_transform(X)
return tf.math.square(noise_scale)

def add_noise(self, K: tf.Tensor, X: tf.Tensor):

noise_variance = self.evaluate_noise_variance(X)
return add_het_noise_cov(K, noise_variance)


class SGPRPosterior(AbstractPosterior):
"""
Expand Down
9 changes: 9 additions & 0 deletions gpflow/utilities/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,12 @@ def add_noise_cov(K: tf.Tensor, likelihood_variance: Parameter) -> tf.Tensor:
k_diag = tf.linalg.diag_part(K)
s_diag = tf.fill(tf.shape(k_diag), likelihood_variance)
return tf.linalg.set_diag(K, k_diag + s_diag)


def add_het_noise_cov(K: tf.Tensor, noise_variance: tf.Tensor) -> tf.Tensor:
"""
Returns K + diag(σ²), where σ² is the likelihood noise variance (vector).
"""
k_diag = tf.linalg.diag_part(K)
return tf.linalg.set_diag(K, k_diag + tf.reshape(noise_variance, [-1]))