Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve NaturalGradient optimizer documentation and add shape checks #1489

Merged
merged 5 commits into from
Jun 4, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/notebooks/advanced/natural_gradients.pct.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@

# %%
vgp = VGP(data, kernel=gpflow.kernels.Matern52(), likelihood=gpflow.likelihoods.Gaussian())
# (Note that GPflow's NaturalGradient optimizer does not implement diagonal covariance parametrization, i.e., it does not work for `q_diag=True`.)

# %% [markdown]
# The log marginal likelihood lower bound (evidence lower bound or ELBO) of the approximate GP model is:
Expand Down
17 changes: 15 additions & 2 deletions gpflow/optimizers/natgrad.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,9 @@ class NaturalGradient(tf.optimizers.Optimizer):
a custom signature (var_list needs to be a list of (q_mu, q_sqrt) tuples,
where q_mu and q_sqrt are gpflow.Parameter instances, not tf.Variable).

Note furthermore that the natural gradients are implemented only for the
full covariance case (i.e., q_diag=True is NOT supported).

When using in your work, please cite

@inproceedings{salimbeni18,
Expand Down Expand Up @@ -209,6 +212,11 @@ def _natgrad_steps(
):
self._natgrad_apply_gradients(q_mu_grad, q_sqrt_grad, q_mu, q_sqrt, xi_transform)

def _assert_shapes(self, q_mu, q_sqrt):
tf.debugging.assert_shapes(
[(q_mu, ["M", "L"]), (q_sqrt, ["L", "M", "M"]),]
)

def _natgrad_apply_gradients(
self,
q_mu_grad: tf.Tensor,
Expand Down Expand Up @@ -250,10 +258,13 @@ def _natgrad_apply_gradients(

:param q_mu_grad: gradient of loss w.r.t. q_mu (in unconstrained space)
:param q_sqrt_grad: gradient of loss w.r.t. q_sqrt (in unconstrained space)
:param q_mu: parameter for the mean of q(u)
:param q_mu: parameter for the mean of q(u) with shape [M, L]
:param q_sqrt: parameter for the square root of the covariance of q(u)
with shape [L, M, M] (the diagonal parametrization, q_diag=True, is NOT supported)
:param xi_transform: the ξ transform to use (self.xi_transform if not specified)
"""
self._assert_shapes(q_mu, q_sqrt)

if xi_transform is None:
xi_transform = self.xi_transform

Expand Down Expand Up @@ -331,7 +342,9 @@ def swap_dimensions(method):
def wrapper(a_nd, b_dnn, swap=True):
if swap:
if a_nd.shape.ndims != 2: # pragma: no cover
raise ValueError("The `a_nd` input must have 2 dimensions.")
raise ValueError("The mean parametrization must have 2 dimensions.")
if b_dnn.shape.ndims != 3: # pragma: no cover
raise ValueError("The covariance parametrization must have 3 dimensions.")
a_dn1 = tf.linalg.adjoint(a_nd)[:, :, None]
A_dn1, B_dnn = method(a_dn1, b_dnn)
A_nd = tf.linalg.adjoint(A_dn1[:, :, 0])
Expand Down