GPflow · st-- · Oct 7, 2020 · Oct 5, 2020 · Oct 5, 2020 · Oct 6, 2020
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -80,7 +80,7 @@ jobs:
         environment:
             ORGANIZATION: GPflow
             PROJECT: docs
-            BRANCH: develop
+            BRANCH: << pipeline.git.branch >>
 
     steps:
       - run:
@@ -159,7 +159,9 @@ workflows:
             - notebook-test
           filters:
             branches:
-              only: develop
+              only:
+                - master
+                - develop
       - deploy:
           requires:
             - unit-test

diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1 @@
+blank_issues_enabled: false
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-2.1.2
+2.1.3
diff --git a/doc/requirements.txt b/doc/requirements.txt
@@ -1,9 +1,7 @@
-gast==0.2.2
-ipython==7.8.0
-sphinx==2.2.0
-sphinx_autodoc_typehints==1.8.0
-sphinx_rtd_theme==0.4.3
-numpydoc==0.9.1
-nbsphinx==0.4.3
+ipython==7.18.1
+Sphinx==3.2.1
+sphinx-autodoc-typehints==1.11.0
+sphinx-rtd-theme==0.5.0
+numpydoc==1.1.0
+nbsphinx==0.7.1
 pandoc==1.0.2
-git+https://github.com/GPflow/GPflow.git@develop#egg=gpflow
diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -84,8 +84,8 @@
 
 # General information about the project.
 project = "GPflow"
-copyright = "2016-2020, James Hensman, Alexander G. de G. Matthews and the GPflow contributors"
-author = "James Hensman and Alexander G. de G. Matthews and others"
+copyright = "2016-2020 The GPflow Contributors"
+author = "James Hensman and Alexander G. de G. Matthews and many others"
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -94,7 +94,7 @@
 # The short X.Y version.
 version = "2.1"
 # The full version, including alpha/beta/rc tags.
-release = "2.1.2"
+release = "2.1.3"
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

diff --git a/doc/source/intro.rst b/doc/source/intro.rst
@@ -88,7 +88,7 @@ Citing GPflow
 
 To cite GPflow, please reference the `JMLR paper <http://www.jmlr.org/papers/volume18/16-537/16-537.pdf>`_. Sample BibTeX is given below:
 
-.. code-block:: bst
+.. code-block:: bib
 
     @ARTICLE{GPflow2017,
         author = {Matthews, Alexander G. de G. and {van der Wilk}, Mark and Nickson, Tom and Fujii, Keisuke. and {Boukouvalas}, Alexis and {Le{\'o}n-Villagr{\'a}}, Pablo and Ghahramani, Zoubin and Hensman, James},
@@ -107,7 +107,7 @@ with the framework for interdomain approximations and multioutput priors. We rev
 framework and describe the design in an `arXiv paper <https://arxiv.org/abs/2003.01115>`_
 which can be cited by users.
 
-.. code-block:: bst
+.. code-block:: bib
 
     @article{GPflow2020multioutput,
       author = {{van der Wilk}, Mark and Dutordoir, Vincent and John, ST and

diff --git a/doc/source/notebooks/advanced/heteroskedastic.pct.py b/doc/source/notebooks/advanced/heteroskedastic.pct.py
@@ -43,7 +43,7 @@
 
 
 # %% [markdown]
-# # Data Generation
+# ## Data Generation
 # We generate heteroskedastic data by substituting the random latent functions $f_1$ and $f_2$ of the generative model by deterministic $\sin$ and $\cos$ functions. The input $X$ is built with $N=1001$ uniformly spaced values in the interval $[0, 4\pi]$. The outputs $Y$ are still sampled from a Gaussian likelihood.
 #
 # $$ x_i \in [0, 4\pi], \quad i = 1,\dots,N $$
@@ -77,7 +77,7 @@
 Y = np.random.normal(loc, scale)
 
 # %% [markdown]
-# # Plot Data
+# ### Plot Data
 # Note how the distribution density (shaded area) and the outputs $Y$ both change depending on the input $X$.
 
 # %%
@@ -100,10 +100,10 @@ def plot_distribution(X, Y, loc, scale):
 
 
 # %% [markdown]
-# # Build Model
+# ## Build Model
 
 # %% [markdown]
-# ## Likelihood
+# ### Likelihood
 # This implements the following part of the generative model:
 # $$ \text{loc}(x) = f_1(x) $$
 # $$ \text{scale}(x) = \text{transform}(f_2(x)) $$
@@ -118,7 +118,7 @@ def plot_distribution(X, Y, loc, scale):
 print(f"Likelihood's expected latent_dim: {likelihood.latent_dim}")
 
 # %% [markdown]
-# ## Kernel
+# ### Kernel
 # This implements the following part of the generative model:
 # $$ f_1(x) \sim \mathcal{GP}(0, k_1(\cdot, \cdot)) $$
 # $$ f_2(x) \sim \mathcal{GP}(0, k_2(\cdot, \cdot)) $$
@@ -134,7 +134,7 @@ def plot_distribution(X, Y, loc, scale):
 # The number of kernels contained in gpf.kernels.SeparateIndependent must be the same as likelihood.latent_dim
 
 # %% [markdown]
-# # Inducing Points
+# ### Inducing Points
 # Since we will use the **SVGP** model to perform inference, we need to implement the inducing variables $U_1$ and $U_2$, both with size $M=20$, which are used to approximate $f_1$ and $f_2$ respectively, and initialize the inducing points positions $Z_1$ and $Z_2$. This gives a total of $2M=40$ inducing variables and inducing points.
 #
 # The inducing variables and their corresponding inputs will be Separate and Independent, but both $Z_1$ and $Z_2$ will be initialized as $Z$, which are placed as $M=20$ equally spaced points in $[\min(X), \max(X)]$.
@@ -154,7 +154,7 @@ def plot_distribution(X, Y, loc, scale):
 )
 
 # %% [markdown]
-# ## SVGP Model
+# ### SVGP Model
 # Build the **SVGP** model by composing the **Kernel**, the **Likelihood** and the **Inducing Variables**.
 #
 # Note that the model needs to be instructed about the number of latent GPs by passing `num_latent_gps=likelihood.latent_dim`.
@@ -170,7 +170,9 @@ def plot_distribution(X, Y, loc, scale):
 model
 
 # %% [markdown]
-# # Build Optimizers (NatGrad + Adam)
+# ## Model Optimization
+#
+# ### Build Optimizers (NatGrad + Adam)
 
 # %%
 data = (X, Y)
@@ -193,7 +195,7 @@ def optimisation_step():
 
 
 # %% [markdown]
-# # Run Optimization Loop
+# ### Run Optimization Loop
 
 # %%
 epochs = 100
@@ -211,3 +213,8 @@ def optimisation_step():
         plot_distribution(X, Y, Ymean, Ystd)
 
 model
+
+# %% [markdown]
+# ## Further reading
+#
+# See [Chained Gaussian Processes](http://proceedings.mlr.press/v51/saul16.html) by Saul et al. (AISTATS 2016).
diff --git a/doc/source/notebooks/advanced/varying_noise.pct.py b/doc/source/notebooks/advanced/varying_noise.pct.py
@@ -302,3 +302,7 @@ def generate_data(N=100):
 from gpflow.utilities import print_summary
 
 print_summary(model, fmt="notebook")
+
+# %% [markdown]
+# ## Further reading
+# To model the variance using a second GP, see the [Heteroskedastic regression notebook](heteroskedastic.ipynb).
diff --git a/doc/source/notebooks/intro.md b/doc/source/notebooks/intro.md
@@ -49,7 +49,7 @@ This section explains the more complex models and features that are available in
 
   - [Markov Chain Monte Carlo (MCMC)](advanced/mcmc.ipynb): using Hamiltonian Monte Carlo to sample the posterior GP and hyperparameters.
   - [Ordinal regression](advanced/ordinal_regression.ipynb): using GPflow to deal with ordinal variables.
-  - [Gaussian process regression with varying output noise](advanced/varying_noise.ipynb) for different data points, using a custom likelihood or the `SwitchedLikelihood`.
+  - [Gaussian process regression with varying output noise](advanced/varying_noise.ipynb) for different data points, using a custom likelihood or the `SwitchedLikelihood`, and [Heteroskedastic regression with a multi-latent likelihood](advanced/heteroskedastic.ipynb).
   - [Multiclass classification](advanced/multiclass_classification.ipynb) for non-binary examples.
   - [GPs for big data](advanced/gps_for_big_data.ipynb): using GPflow's Sparse Variational Gaussian Process (SVGP) model (Hensman et al., 2013; 2015). Use sparse methods when dealing with large datasets (more than around a thousand data points).
 <!--  - [GPs for big data (part 2)](advanced/advanced_many_points.ipynb)  **[TODO]** -->

diff --git a/doc/source/notebooks/intro_to_gpflow2.pct.py b/doc/source/notebooks/intro_to_gpflow2.pct.py
@@ -6,7 +6,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.4.2
+#       jupytext_version: 1.6.0
 #   kernelspec:
 #     display_name: Python 3
 #     language: python
@@ -210,6 +210,8 @@ def noisy_sin(x):
 vgp_model.training_loss_closure(compile=False)  # uncompiled, same as vgp_model.training_loss
 
 # %% [markdown]
+# ### External data
+#
 # The SVGP model inherits from ExternalDataTrainingLossMixin and expects the data to be passed to training_loss().
 # For SVGP as for the other regression models, `data` is a two-tuple of `(X, Y)`, where `X` is an array/tensor with shape `(num_data, input_dim)` and `Y` is an array/tensor with shape `(num_data, output_dim)`:
 
@@ -473,7 +475,7 @@ def checkpointing_training_loop(
     )
 
 # %% [markdown]
-# ## Copying (hyper)parameter values between models
+# ### Copying (hyper)parameter values between models
 #
 # It is easy to interact with the set of all parameters of a model or a subcomponent programmatically.
 #
@@ -495,45 +497,32 @@ def checkpointing_training_loop(
 # %% [markdown]
 # ### TensorFlow `saved_model`
 #
-# At present, TensorFlow does not support saving custom variables like instances of the `gpflow.base.Parameter` class, see [this TensorFlow github issue](https://github.com/tensorflow/tensorflow/issues/34908).
-#
-# However, once training is complete, it is possible to clone the model and replace all `gpflow.base.Parameter`s with `tf.constant`s holding the same value:
-
-# %%
-model
-
-# %%
-frozen_model = gpflow.utilities.freeze(model)
-
-# %% [markdown]
-# In order to save the model we need to define a `tf.Module` holding the `tf.function`'s that we wish to export, as well as a reference to the underlying model:
+# In order to save the model we need to explicitly store the `tf.function`-compiled functions that we wish to export:
 
 # %%
-module_to_save = tf.Module()
-predict_fn = tf.function(
-    frozen_model.predict_f, input_signature=[tf.TensorSpec(shape=[None, 1], dtype=tf.float64)]
+model.predict_f_compiled = tf.function(
+    model.predict_f, input_signature=[tf.TensorSpec(shape=[None, 1], dtype=tf.float64)]
 )
-module_to_save.predict = predict_fn
 
 # %% [markdown]
-# Save original result for futher comparison. We also convert `samples_input` to a tensor. For a tensor input a `tf.function` will compile a single graph.
+# We also save the original prediction for later comparison. Here `samples_input` needs to be a tensor so that `tf.function` will compile a single graph:
 
 # %%
 samples_input = tf.convert_to_tensor(samples_input, dtype=default_float())
-original_result = module_to_save.predict(samples_input)
+original_result = model.predict_f_compiled(samples_input)
 
 # %% [markdown]
-# Let's save the module
+# Let's save the model:
 # %%
 save_dir = str(pathlib.Path(tempfile.gettempdir()))
-tf.saved_model.save(module_to_save, save_dir)
+tf.saved_model.save(model, save_dir)
 
 # %% [markdown]
-# Load module back as new instance and compare predict results
+# We can load the module back as a new instance and compare the prediction results:
 
 # %%
 loaded_model = tf.saved_model.load(save_dir)
-loaded_result = loaded_model.predict(samples_input)
+loaded_result = loaded_model.predict_f_compiled(samples_input)
 
 np.testing.assert_array_equal(loaded_result, original_result)
 

diff --git a/gpflow/conditionals/util.py b/gpflow/conditionals/util.py
@@ -279,7 +279,7 @@ def independent_interdomain_conditional(
 
     # Compute the projection matrix A
     Kmn = tf.reshape(tf.transpose(Kmn, (1, 0, 2, 3)), (L, M, N * P))
-    A = tf.linalg.triangular_solve(Lm, Kmn, lower=True)  # [L, M, M]  *  [L, M, P]  ->  [L, M, P]
+    A = tf.linalg.triangular_solve(Lm, Kmn, lower=True)  # [L, M, M] \ [L, M, N*P] -> [L, M, N*P]
     Ar = tf.reshape(A, (L, M, N, P))
 
     # compute the covariance due to the conditioning
@@ -300,7 +300,7 @@ def independent_interdomain_conditional(
 
     # another backsubstitution in the unwhitened case
     if not white:
-        A = tf.linalg.triangular_solve(Lm, Ar)  # [L, M, M]  *  [L, M, P]  ->  [L, M, P]
+        A = tf.linalg.triangular_solve(Lm, A)  # [L, M, M] \ [L, M, N*P]  ->  [L, M, N*P]
         Ar = tf.reshape(A, (L, M, N, P))
 
     fmean = tf.tensordot(Ar, f, [[1, 0], [0, 1]])  # [N, P]

diff --git a/gpflow/kullback_leiblers.py b/gpflow/kullback_leiblers.py
@@ -15,6 +15,7 @@
 # -*- coding: utf-8 -*-
 
 import tensorflow as tf
+from packaging.version import Version
 
 from .config import default_float, default_jitter
 from .covariances.kuus import Kuu
@@ -131,10 +132,11 @@ def gauss_kl(q_mu, q_sqrt, K=None, *, K_cholesky=None):
             ]  # [M, M] -> [M, 1]
             trace = tf.reduce_sum(K_inv * tf.square(q_sqrt))
         else:
-            # TODO: broadcast instead of tile when tf allows -- tf2.1 segfaults
-            # (https://github.com/tensorflow/tensorflow/issues/37584).
-            # See # https://github.com/GPflow/GPflow/issues/1321
-            Lp_full = Lp if is_batched else tf.tile(tf.expand_dims(Lp, 0), [L, 1, 1])
+            if is_batched or Version(tf.__version__) >= Version("2.2"):
+                Lp_full = Lp
+            else:
+                # workaround for segfaults when broadcasting in TensorFlow<2.2
+                Lp_full = tf.tile(tf.expand_dims(Lp, 0), [L, 1, 1])
             LpiLq = tf.linalg.triangular_solve(Lp_full, Lq_full, lower=True)
             trace = tf.reduce_sum(tf.square(LpiLq))
 

diff --git a/gpflow/models/model.py b/gpflow/models/model.py
@@ -211,6 +211,12 @@ def predict_y(
         """
         Compute the mean and variance of the held-out data at the input points.
         """
+        if full_cov or full_output_cov:
+            # See https://github.com/GPflow/GPflow/issues/1461
+            raise NotImplementedError(
+                "The predict_y method currently supports only the argument values full_cov=False and full_output_cov=False"
+            )
+
         f_mean, f_var = self.predict_f(Xnew, full_cov=full_cov, full_output_cov=full_output_cov)
         return self.likelihood.predict_mean_and_var(f_mean, f_var)
 
@@ -220,6 +226,12 @@ def predict_log_density(
         """
         Compute the log density of the data at the new data points.
         """
+        if full_cov or full_output_cov:
+            # See https://github.com/GPflow/GPflow/issues/1461
+            raise NotImplementedError(
+                "The predict_log_density method currently supports only the argument values full_cov=False and full_output_cov=False"
+            )
+
         X, Y = data
         f_mean, f_var = self.predict_f(X, full_cov=full_cov, full_output_cov=full_output_cov)
         return self.likelihood.predict_log_density(f_mean, f_var, Y)
diff --git a/gpflow/utilities/utilities.py b/gpflow/utilities/utilities.py
@@ -245,34 +245,6 @@ def update_state(parameter_or_variable, path, state):
     return state
 
 
-if Version(tfp.__version__) >= Version("0.11.0"):
-    if hasattr(tfp.bijectors.Identity()._cache, "clear"):
-        # implementation in `master` branch (checked 29 Sep 2020) provides clear():
-
-        def _clear_bijector_cache(bijector: tfp.bijectors.Bijector):
-            bijector._cache.clear()
-
-    else:
-        # previous versions (including the versions 0.11.0 and 0.11.1 released as of 29 Sep 2020) provide reset(), but its implementation is broken
-
-        def _clear_bijector_cache(bijector: tfp.bijectors.Bijector):
-            # workaround for broken implementation of bijector._cache.reset():
-            cache = bijector._cache
-            cache_type = type(cache.forward)
-            assert type(cache.inverse) == cache_type
-            cache.__init__(cache.forward._func, cache.inverse._func, cache_type)
-
-
-else:
-    # fallback for backwards-compatibility with tensorflow_probability < 0.11.0
-
-    def _clear_bijector_cache(bijector: tfp.bijectors.Bijector):
-        # `_from_x` and `_from_y` are cache dictionaries for forward and inverse transformations
-        # in bijector class.
-        bijector._from_x.clear()
-        bijector._from_y.clear()
-
-
 def reset_cache_bijectors(input_module: tf.Module) -> tf.Module:
     """
     Recursively finds tfp.bijectors.Bijector-s inside the components of the tf.Module using `traverse_component`.
@@ -281,6 +253,31 @@ def reset_cache_bijectors(input_module: tf.Module) -> tf.Module:
     :param input_module: tf.Module including keras.Model, keras.layers.Layer and gpflow.Module.
     :return:
     """
+    if Version(tfp.__version__) >= Version("0.11.0"):
+        if hasattr(tfp.bijectors.Identity()._cache, "clear"):
+            # implementation in `master` branch (checked 29 Sep 2020) provides clear():
+
+            def _clear_bijector_cache(bijector: tfp.bijectors.Bijector):
+                bijector._cache.clear()
+
+        else:
+            # previous versions (including the versions 0.11.0 and 0.11.1 released as of 29 Sep 2020) provide reset(), but its implementation is broken
+
+            def _clear_bijector_cache(bijector: tfp.bijectors.Bijector):
+                # workaround for broken implementation of bijector._cache.reset():
+                cache = bijector._cache
+                cache_type = type(cache.forward)
+                assert type(cache.inverse) == cache_type
+                cache.__init__(cache.forward._func, cache.inverse._func, cache_type)
+
+    else:
+        # fallback for backwards-compatibility with tensorflow_probability < 0.11.0
+
+        def _clear_bijector_cache(bijector: tfp.bijectors.Bijector):
+            # `_from_x` and `_from_y` are cache dictionaries for forward and inverse transformations
+            bijector._from_x.clear()
+            bijector._from_y.clear()
+
     target_types = (tfp.bijectors.Bijector,)
     accumulator = ("", None)