GPflow · st-- · Jun 4, 2020 · May 28, 2020 · May 28, 2020 · Jun 1, 2020
diff --git a/doc/source/notebooks/advanced/natural_gradients.pct.py b/doc/source/notebooks/advanced/natural_gradients.pct.py
@@ -74,6 +74,7 @@
 
 # %%
 vgp = VGP(data, kernel=gpflow.kernels.Matern52(), likelihood=gpflow.likelihoods.Gaussian())
+# (Note that GPflow's NaturalGradient optimizer does not implement diagonal covariance parametrization, i.e., it does not work for `q_diag=True`.)
 
 # %% [markdown]
 # The log marginal likelihood lower bound (evidence lower bound or ELBO) of the approximate GP model is:

diff --git a/gpflow/optimizers/natgrad.py b/gpflow/optimizers/natgrad.py
@@ -132,6 +132,9 @@ class NaturalGradient(tf.optimizers.Optimizer):
     a custom signature (var_list needs to be a list of (q_mu, q_sqrt) tuples,
     where q_mu and q_sqrt are gpflow.Parameter instances, not tf.Variable).
 
+    Note furthermore that the natural gradients are implemented only for the
+    full covariance case (i.e., q_diag=True is NOT supported).
+
     When using in your work, please cite
 
         @inproceedings{salimbeni18,
@@ -209,6 +212,11 @@ def _natgrad_steps(
             ):
                 self._natgrad_apply_gradients(q_mu_grad, q_sqrt_grad, q_mu, q_sqrt, xi_transform)
 
+    def _assert_shapes(self, q_mu, q_sqrt):
+        tf.debugging.assert_shapes(
+            [(q_mu, ["M", "L"]), (q_sqrt, ["L", "M", "M"]),]
+        )
+
     def _natgrad_apply_gradients(
         self,
         q_mu_grad: tf.Tensor,
@@ -250,10 +258,13 @@ def _natgrad_apply_gradients(
 
         :param q_mu_grad: gradient of loss w.r.t. q_mu (in unconstrained space)
         :param q_sqrt_grad: gradient of loss w.r.t. q_sqrt (in unconstrained space)
-        :param q_mu: parameter for the mean of q(u)
+        :param q_mu: parameter for the mean of q(u) with shape [M, L]
         :param q_sqrt: parameter for the square root of the covariance of q(u)
+            with shape [L, M, M] (the diagonal parametrization, q_diag=True, is NOT supported)
         :param xi_transform: the ξ transform to use (self.xi_transform if not specified)
         """
+        self._assert_shapes(q_mu, q_sqrt)
+
         if xi_transform is None:
             xi_transform = self.xi_transform
 
@@ -331,7 +342,9 @@ def swap_dimensions(method):
     def wrapper(a_nd, b_dnn, swap=True):
         if swap:
             if a_nd.shape.ndims != 2:  # pragma: no cover
-                raise ValueError("The `a_nd` input must have 2 dimensions.")
+                raise ValueError("The mean parametrization must have 2 dimensions.")
+            if b_dnn.shape.ndims != 3:  # pragma: no cover
+                raise ValueError("The covariance parametrization must have 3 dimensions.")
             a_dn1 = tf.linalg.adjoint(a_nd)[:, :, None]
             A_dn1, B_dnn = method(a_dn1, b_dnn)
             A_nd = tf.linalg.adjoint(A_dn1[:, :, 0])