GPflow · frgsimpson · Jul 27, 2021 · Jul 28, 2021 · Jul 28, 2021 · Jul 29, 2021
diff --git a/doc/source/notebooks/advanced/het_gpr_script.ipynb b/doc/source/notebooks/advanced/het_gpr_script.ipynb
diff --git a/gpflow/models/__init__.py b/gpflow/models/__init__.py
@@ -2,6 +2,7 @@
 from .gplvm import GPLVM, BayesianGPLVM
 from .gpmc import GPMC
 from .gpr import GPR
+from .het_gpr import het_GPR
 from .model import BayesianModel, GPModel
 from .sgpmc import SGPMC
 from .sgpr import GPRFITC, SGPR

diff --git a/gpflow/models/het_gpr.py b/gpflow/models/het_gpr.py
@@ -0,0 +1,108 @@
+import tensorflow as tf
+from typing import Optional
+
+from gpflow import posteriors
+from ..kernels import Kernel
+from ..logdensities import multivariate_normal
+from ..mean_functions import MeanFunction
+from ..models.gpr import GPR_with_posterior
+from ..models.training_mixins import RegressionData, InputData
+from ..base import MeanAndVariance
+from ..utilities import add_het_noise_cov
+
+
+class het_GPR(GPR_with_posterior):
+    """ While the vanilla GPR enforces a constant noise variance across the input space, here we allow the
+    noise amplitude to vary linearly (and hence the noise variance to change quadratically) across the input space.
+    """
+
+    def __init__(
+        self,
+        data: RegressionData,
+        kernel: Kernel,
+        likelihood,
+        mean_function: Optional[MeanFunction] = None,
+        noise_variance: float = 1.0,
+    ):
+        super().__init__(data, kernel, mean_function, noise_variance)
+        self.likelihood = likelihood
+
+    def posterior(self, precompute_cache=posteriors.PrecomputeCacheType.TENSOR):
+        """
+        Create the Posterior object which contains precomputed matrices for
+        faster prediction.
+
+        precompute_cache has three settings:
+
+        - `PrecomputeCacheType.TENSOR` (or `"tensor"`): Precomputes the cached
+          quantities and stores them as tensors (which allows differentiating
+          through the prediction). This is the default.
+        - `PrecomputeCacheType.VARIABLE` (or `"variable"`): Precomputes the cached
+          quantities and stores them as variables, which allows for updating
+          their values without changing the compute graph (relevant for AOT
+          compilation).
+        - `PrecomputeCacheType.NOCACHE` (or `"nocache"` or `None`): Avoids
+          immediate cache computation. This is useful for avoiding extraneous
+          computations when you only want to call the posterior's
+          `fused_predict_f` method.
+        """
+
+        X, Y = self.data
+
+        return posteriors.HeteroskedasticGPRPosterior(
+            kernel=self.kernel,
+            X_data=X,
+            Y_data=Y,
+            likelihood=self.likelihood,
+            mean_function=self.mean_function,
+            precompute_cache=precompute_cache,
+        )
+
+    def predict_y(
+        self, Xnew: InputData, full_cov: bool = False, full_output_cov: bool = False
+    ) -> MeanAndVariance:
+        """
+        Compute the mean and variance of the held-out data at the input points.
+        """
+        if full_cov or full_output_cov:
+            # See https://github.com/GPflow/GPflow/issues/1461
+            raise NotImplementedError(
+                "The predict_y method currently supports only the argument values full_cov=False and full_output_cov=False"
+            )
+
+        f_mean, f_var = self.predict_f(Xnew, full_cov=full_cov, full_output_cov=full_output_cov)
+        Fs = tf.concat([f_mean, Xnew], axis=-1)
+        dummy_f_var = tf.zeros_like(f_var)
+        F_vars = tf.concat([f_var, dummy_f_var], axis=-1)
+        return self.likelihood.predict_mean_and_var(Fs, F_vars)
+
+    def _add_noise_cov(self, X, K: tf.Tensor) -> tf.Tensor:
+        """
+        Returns K + diag(σ²), where σ² is the likelihood noise variance (vector),
+        and I is the corresponding identity matrix.
+        """
+        dummy_f = tf.zeros_like(X)
+        Fs = tf.concat([dummy_f, X], axis=-1)
+        variances = self.likelihood.conditional_variance(Fs)
+        return add_het_noise_cov(K, tf.squeeze(variances))
+
+    def log_marginal_likelihood(self) -> tf.Tensor:
+        r"""
+        Computes the log marginal likelihood.
+
+        .. math::
+            \log p(Y | \theta).
+
+        """
+        X, Y = self.data
+        K = self.kernel(X)
+        ks = self._add_noise_cov(X, K)
+        L = tf.linalg.cholesky(ks)
+        m = self.mean_function(X)
+
+        # [R,] log-likelihoods for each independent dimension of Y
+        log_prob = multivariate_normal(Y, m, L)
+        return tf.reduce_sum(log_prob)
+
+
+
diff --git a/gpflow/posteriors.py b/gpflow/posteriors.py
@@ -39,9 +39,10 @@
     SeparateIndependentInducingVariables,
     SharedIndependentInducingVariables,
 )
+from .likelihoods import Likelihood
 from .kernels import Kernel
 from .mean_functions import MeanFunction
-from .utilities import Dispatcher, add_noise_cov
+from .utilities import Dispatcher, add_noise_cov, add_het_noise_cov
 from .utilities.ops import leading_transpose
 
 
@@ -303,7 +304,7 @@ def _conditional_with_precompute(
 
     def _precompute(self) -> Tuple[tf.Tensor, ...]:
         Kmm = self.kernel(self.X_data)
-        Kmm_plus_s = add_noise_cov(Kmm, self.likelihood_variance)
+        Kmm_plus_s = self.add_noise(Kmm, self.X_data)
 
         Lm = tf.linalg.cholesky(Kmm_plus_s)
         Kmm_plus_s_inv = tf.linalg.cholesky_solve(Lm, tf.eye(self.X_data.shape[0], dtype=Lm.dtype))
@@ -331,12 +332,43 @@ def _conditional_fused(
         Kmm = self.kernel(self.X_data)
         Knn = self.kernel(Xnew, full_cov=full_cov)
         Kmn = self.kernel(self.X_data, Xnew)
-        Kmm_plus_s = add_noise_cov(Kmm, self.likelihood_variance)
+        Kmm_plus_s = self.add_noise(Kmm, self.X_data)
 
         return base_conditional(
             Kmn, Kmm_plus_s, Knn, err, full_cov=full_cov, white=False
         )  # [N, P], [N, P] or [P, N, N]
 
+    def add_noise(self, K: tf.Tensor, X: tf.Tensor):
+        return add_noise_cov(K, self.likelihood_variance)
+
+
+class HeteroskedasticGPRPosterior(GPRPosterior):
+
+    def __init__(self,
+                 kernel,
+                 X_data: tf.Tensor,
+                 Y_data: tf.Tensor,
+                 likelihood: Likelihood,
+                 mean_function: Optional[mean_functions.MeanFunction] = None,
+                 *,
+                 precompute_cache: Optional[PrecomputeCacheType],
+                 ):
+
+        self.likelihood = likelihood
+        data = (X_data, Y_data)
+        super().__init__(kernel, data, likelihood_variance=Parameter(1e-6, trainable=False), mean_function=mean_function, precompute_cache=precompute_cache)
+
+    def evaluate_noise_variance(self, X: tf.Tensor):
+        """ Noise variance contribution. """
+
+        noise_scale = self.likelihood.scale_transform(X)
+        return tf.math.square(noise_scale)
+
+    def add_noise(self, K: tf.Tensor, X: tf.Tensor):
+
+        noise_variance = self.evaluate_noise_variance(X)
+        return add_het_noise_cov(K, noise_variance)
+
 
 class SGPRPosterior(AbstractPosterior):
     """

diff --git a/gpflow/utilities/model_utils.py b/gpflow/utilities/model_utils.py
@@ -11,3 +11,12 @@ def add_noise_cov(K: tf.Tensor, likelihood_variance: Parameter) -> tf.Tensor:
     k_diag = tf.linalg.diag_part(K)
     s_diag = tf.fill(tf.shape(k_diag), likelihood_variance)
     return tf.linalg.set_diag(K, k_diag + s_diag)
+
+
+def add_het_noise_cov(K: tf.Tensor, noise_variance: tf.Tensor) -> tf.Tensor:
+    """
+    Returns K + diag(σ²), where σ² is the likelihood noise variance (vector).
+    """
+    k_diag = tf.linalg.diag_part(K)
+    return tf.linalg.set_diag(K, k_diag + tf.reshape(noise_variance, [-1]))
+