Skip to content

Commit

Permalink
improve NaturalGradient optimizer documentation and add shape checks (#…
Browse files Browse the repository at this point in the history
…1489)

As discussed in #878, GPflow's NaturalGradient optimizer does not implement the diagonal covariance parametrization (`q_diag=True`). This PR clarifies this in the documentation and adds extra shape checks.
  • Loading branch information
st-- committed Jun 4, 2020
1 parent 647ca90 commit 6fda0e9
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 2 deletions.
1 change: 1 addition & 0 deletions doc/source/notebooks/advanced/natural_gradients.pct.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@

# %%
vgp = VGP(data, kernel=gpflow.kernels.Matern52(), likelihood=gpflow.likelihoods.Gaussian())
# (Note that GPflow's NaturalGradient optimizer does not implement diagonal covariance parametrization, i.e., it does not work for `q_diag=True`.)

# %% [markdown]
# The log marginal likelihood lower bound (evidence lower bound or ELBO) of the approximate GP model is:
Expand Down
17 changes: 15 additions & 2 deletions gpflow/optimizers/natgrad.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,9 @@ class NaturalGradient(tf.optimizers.Optimizer):
a custom signature (var_list needs to be a list of (q_mu, q_sqrt) tuples,
where q_mu and q_sqrt are gpflow.Parameter instances, not tf.Variable).
Note furthermore that the natural gradients are implemented only for the
full covariance case (i.e., q_diag=True is NOT supported).
When using in your work, please cite
@inproceedings{salimbeni18,
Expand Down Expand Up @@ -209,6 +212,11 @@ def _natgrad_steps(
):
self._natgrad_apply_gradients(q_mu_grad, q_sqrt_grad, q_mu, q_sqrt, xi_transform)

def _assert_shapes(self, q_mu, q_sqrt):
tf.debugging.assert_shapes(
[(q_mu, ["M", "L"]), (q_sqrt, ["L", "M", "M"]),]
)

def _natgrad_apply_gradients(
self,
q_mu_grad: tf.Tensor,
Expand Down Expand Up @@ -250,10 +258,13 @@ def _natgrad_apply_gradients(
:param q_mu_grad: gradient of loss w.r.t. q_mu (in unconstrained space)
:param q_sqrt_grad: gradient of loss w.r.t. q_sqrt (in unconstrained space)
:param q_mu: parameter for the mean of q(u)
:param q_mu: parameter for the mean of q(u) with shape [M, L]
:param q_sqrt: parameter for the square root of the covariance of q(u)
with shape [L, M, M] (the diagonal parametrization, q_diag=True, is NOT supported)
:param xi_transform: the ξ transform to use (self.xi_transform if not specified)
"""
self._assert_shapes(q_mu, q_sqrt)

if xi_transform is None:
xi_transform = self.xi_transform

Expand Down Expand Up @@ -331,7 +342,9 @@ def swap_dimensions(method):
def wrapper(a_nd, b_dnn, swap=True):
if swap:
if a_nd.shape.ndims != 2: # pragma: no cover
raise ValueError("The `a_nd` input must have 2 dimensions.")
raise ValueError("The mean parametrization must have 2 dimensions.")
if b_dnn.shape.ndims != 3: # pragma: no cover
raise ValueError("The covariance parametrization must have 3 dimensions.")
a_dn1 = tf.linalg.adjoint(a_nd)[:, :, None]
A_dn1, B_dnn = method(a_dn1, b_dnn)
A_nd = tf.linalg.adjoint(A_dn1[:, :, 0])
Expand Down

0 comments on commit 6fda0e9

Please sign in to comment.