Priors on constrained and unconstrained parts of a parameter (#1177)

GPflow · Feb 9, 2020 · f30d42c · f30d42c
1 parent 056e59f
commit f30d42c
Show file tree

Hide file tree

Showing 6 changed files with 281 additions and 105 deletions.
diff --git a/doc/source/notebooks/advanced/mcmc.ipynb b/doc/source/notebooks/advanced/mcmc.ipynb
diff --git a/doc/source/notebooks/advanced/mcmc.pct.py b/doc/source/notebooks/advanced/mcmc.pct.py
@@ -36,6 +36,9 @@
 # convert to float64 for tfp to play nicely with gpflow in 64
 f64 = gpflow.utilities.to_default_float
 
+tf.random.set_seed(123)
+
+
 # %matplotlib inline
 
 # %% [markdown]
@@ -64,9 +67,11 @@
 # ### Data for a one-dimensional regression problem
 
 # %%
+rng = np.random.RandomState(42)
+
 N = 30
-X = np.random.rand(N,1)
-Y = np.sin(12*X) + 0.66*np.cos(25*X) + np.random.randn(N,1)*0.1 + 3
+X = rng.rand(N,1)
+Y = np.sin(12*X) + 0.66*np.cos(25*X) + rng.randn(N,1)*0.1 + 3
 data = (X, Y)
 
 plt.figure(figsize=(12,6))
@@ -121,9 +126,8 @@ def objective():
 # We now sample from the posterior using HMC. 
 
 # %%
-hmc_helper =  gpflow.optimizers.SamplingHelper(
-    model.trainable_parameters, 
-    model.log_marginal_likelihood
+hmc_helper = gpflow.optimizers.SamplingHelper(
+    model.log_marginal_likelihood, model.trainable_parameters
 )
 
 hmc = tfp.mcmc.HamiltonianMonteCarlo(
@@ -166,7 +170,7 @@ def run_chain_fn():
 def plot_samples(samples, y_axis_label):
 
     plt.figure(figsize=(8,4))
-    for val, param in zip(samples,  model.parameters):
+    for val, param in zip(samples, model.parameters):
         plt.plot(tf.squeeze(val), label=param_to_name[param])
     plt.legend(bbox_to_anchor=(1., 1.))
     plt.xlabel('hmc iteration')
@@ -268,11 +272,11 @@ def plot_joint_marginals(samples, y_axis_label):
 # %%
 # Generate data by sampling from RBF Kernel, and classifying with the argmax
 C, N = 3, 100
-X = np.random.rand(N, 1)
+X = rng.rand(N, 1)
 kernel = gpflow.kernels.RBF(lengthscale=0.1)
 K = kernel.K(X) + np.eye(N) * 1e-6
 
-f = np.random.multivariate_normal(mean=np.zeros(N), cov=K, size=(C)).T
+f = rng.multivariate_normal(mean=np.zeros(N), cov=K, size=(C)).T
 Y = np.argmax(f, 1).reshape(-1,).astype(int)
 # One-hot encoding
 Y_hot = np.zeros((N, C), dtype=bool)
@@ -337,8 +341,7 @@ def objective():
 num_samples = 500
 
 hmc_helper =  gpflow.optimizers.SamplingHelper(
-    model.trainable_parameters, 
-    model.log_marginal_likelihood
+    model.log_marginal_likelihood, model.trainable_parameters
 )
 
 hmc = tfp.mcmc.HamiltonianMonteCarlo(
@@ -415,7 +418,7 @@ def run_chain_fn():
 
 # %%
 X = np.linspace(-3,3,20)
-Y = np.random.exponential(np.sin(X)**2)
+Y = rng.exponential(np.sin(X)**2)
 
 plt.figure()
 plt.plot(X,Y,'x')
@@ -486,8 +489,7 @@ def run_adam(model, iterations):
 num_samples = 500
 
 hmc_helper =  gpflow.optimizers.SamplingHelper(
-    model.trainable_parameters, 
-    model.log_marginal_likelihood
+    model.log_marginal_likelihood, model.trainable_parameters
 )
 
 hmc = tfp.mcmc.HamiltonianMonteCarlo(

diff --git a/gpflow/base.py b/gpflow/base.py
@@ -1,4 +1,5 @@
 import functools
+from enum import Enum
 from typing import List, Optional, Tuple, Union
 
 import numpy as np
@@ -42,25 +43,33 @@ def _repr_pretty_(self, p, cycle):
         p.text(tabulate_module_summary(self, tablefmt=''))
 
 
+class PriorOn(Enum):
+     CONSTRAINED = "constrained"
+     UNCONSTRAINED = "unconstrained"
+
+
 class Parameter(tf.Module):
     def __init__(self,
                  value,
                  *,
                  transform: Optional[Transform] = None,
                  prior: Optional[Prior] = None,
+                 prior_on: Union[str, PriorOn] = PriorOn.CONSTRAINED,
                  trainable: bool = True,
                  dtype: Optional[DType] = None,
                  name: Optional[str] = None):
         """
-        Unconstrained parameter representation.
-        According to standard terminology `y` is always transformed representation or,
-        in other words, it is constrained version of the parameter. Normally, it is hard
-        to operate with unconstrained parameters. For e.g. `variance` cannot be negative,
-        therefore we need positive constraint and it is natural to use constrained values.
+        A parameter retains both constrained and unconstrained
+        representations. If no transform is provided, these two values will be the same.
+        It is often challenging to operate with unconstrained parameters. For example, a variance cannot be negative,
+        therefore we need a positive constraint and it is natural to use constrained values.
+        A prior can be imposed either on the constrained version (default) or on the unconstrained version of the parameter.
         """
         super().__init__()
 
         self._transform = transform
+        self.prior = prior
+        self.prior_on = PriorOn(prior_on)
 
         if isinstance(value, tf.Variable):
             self._unconstrained = value
@@ -69,21 +78,30 @@ def __init__(self,
             self._unconstrained = tf.Variable(unconstrained_value,
                                               dtype=dtype, name=name, trainable=trainable)
 
-        self.prior = prior
-
     def log_prior(self):
-        x = self.read_value()
-        y = self._unconstrained
+        """ Prior probability density of the constrained variable. """
 
-        if self.prior is not None:
-            out = tf.reduce_sum(self.prior.log_prob(x))
-            if self.transform is not None:
-                log_det_jacobian = self.transform.forward_log_det_jacobian(y, y.shape.ndims)
-                out += tf.reduce_sum(log_det_jacobian)
-            return out
-        else:
+        if self.prior is None:
             return tf.convert_to_tensor(0., dtype=self.dtype)
 
+        y = self.read_value()
+
+        if self.prior_on == PriorOn.CONSTRAINED:
+            # evaluation is in same space as prior
+            return tf.reduce_sum(self.prior.log_prob(y))
+
+        else:
+            # prior on unconstrained, but evaluating log-prior in constrained space
+            x = self._unconstrained
+            log_p = tf.reduce_sum(self.prior.log_prob(x))
+
+            if self.transform is not None:
+                # need to include log|Jacobian| to account for coordinate transform
+                log_det_jacobian = self.transform.inverse_log_det_jacobian(y, y.shape.ndims)
+                log_p += tf.reduce_sum(log_det_jacobian)
+
+            return log_p
+
     def value(self):
         return _to_constrained(self._unconstrained.value(), self.transform)
 

diff --git a/gpflow/optimizers/mcmc.py b/gpflow/optimizers/mcmc.py
@@ -31,7 +31,7 @@ class SamplingHelper:
 
     Example:
         model = <Create GPflow model>
-        hmc_helper = SamplingHelper(m.trainable_parameters, lambda: -model.neg_log_marginal_likelihood())
+        hmc_helper = SamplingHelper(model.log_marginal_likelihood, m.trainable_parameters)
 
         target_log_prob_fn = hmc_helper.target_log_prob_fn
         current_state = hmc_helper.current_state
@@ -47,11 +47,11 @@ def run_chain_fn():
         parameter_samples = hmc_helper.convert_samples_to_parameter_values(hmc_samples)
 
     Args:
-        parameters: List of `tensorflow.Variable`s or `gpflow.Parameter`s used as a state of the Markov chain.
         target_log_prob_fn: Python callable which represents log-density under the target distribution.
+        model_parameters: List of `tensorflow.Variable`s or `gpflow.Parameter`s used as a state of the Markov chain.
     """
 
-    def __init__(self, model_parameters: ModelParameters, target_log_prob_fn: LogProbabilityFunction):
+    def __init__(self, target_log_prob_fn: LogProbabilityFunction, model_parameters: ModelParameters):
         assert all([isinstance(p, (Parameter, tf.Variable)) for p in model_parameters])
 
         self._model_parameters = model_parameters
@@ -84,6 +84,14 @@ def _target_log_prob_fn_closure(*variables):
             with tf.GradientTape(watch_accessed_variables=False) as tape:
                 tape.watch(variables_list)
                 log_prob = self._target_log_prob_fn()
+                # Now need to correct for the fact that the prob fn is evaluated on the
+                # constrained space while we wish to evaluate it in the unconstrained space
+                for param in self._model_parameters:
+                    if isinstance(param, Parameter) and param.transform is not None:
+                        x = param.unconstrained_variable
+                        log_det_jacobian = param.transform.forward_log_det_jacobian(x,
+                                                                                    x.shape.ndims)
+                        log_prob += tf.reduce_sum(log_det_jacobian)
 
             @tf.function
             def grad_fn(dy, variables: Optional[tf.Tensor] = None):

diff --git a/tests/test_mcmc_helper.py b/tests/test_mcmc_helper.py
@@ -4,8 +4,12 @@
 import tensorflow_probability as tfp
 
 import gpflow
+from gpflow import default_float
+from gpflow.base import PriorOn
 from gpflow.config import set_default_float
 from gpflow.utilities import to_default_float
+from tensorflow_probability.python.bijectors import Exp
+from tensorflow_probability.python.distributions import Uniform
 
 np.random.seed(1)
 
@@ -32,7 +36,7 @@ def test_mcmc_helper_parameters():
     model = build_model(data)
 
     hmc_helper = gpflow.optimizers.SamplingHelper(
-        model.trainable_parameters, model.log_marginal_likelihood
+        model.log_marginal_likelihood, model.trainable_parameters
     )
 
     for i in range(len(model.trainable_parameters)):
@@ -42,34 +46,116 @@ def test_mcmc_helper_parameters():
             assert model.trainable_parameters[i].unconstrained_variable == hmc_helper.current_state[i]
 
 
-def test_mcmc_helper_target_function():
+def test_mcmc_helper_target_function_constrained():
+    """ Set up priors on the model parameters such that we can
+    readily compute their expected values. """
     data = build_data()
     model = build_model(data)
 
+    prior_width = 200.0
+
     hmc_helper = gpflow.optimizers.SamplingHelper(
-        model.trainable_parameters, model.log_marginal_likelihood
+        model.log_marginal_likelihood, model.trainable_parameters
     )
+    target_log_prob_fn = hmc_helper.target_log_prob_fn
+
+    # Priors which are set on the constrained space
+    expected_log_prior = 0.
+    for param in model.trainable_parameters:
+        if param.value() < 1e-3:
+            # Avoid values which would be pathological for the Exp transform
+            param.assign(1.0)
+
+        param.transform = Exp()
+
+        low_value = -100
+        high_value = low_value + prior_width
+
+        param.prior = Uniform(low=np.float64(low_value), high=np.float64(high_value))
+        param.prior_on = PriorOn.CONSTRAINED
+
+        prior_density_on_constrained = 1 / prior_width
+        prior_density_on_unconstrained = prior_density_on_constrained * param.value()
+
+        expected_log_prior += np.log(prior_density_on_unconstrained)
+
+    log_likelihood = model.log_likelihood().numpy()
+    expected_log_prob = log_likelihood + expected_log_prior
+
+    np.testing.assert_allclose(target_log_prob_fn(), expected_log_prob)
+
+
+def test_mcmc_helper_target_function_unconstrained():
+    """ Verifies the objective for a set of priors which are defined on the unconstrained space.
+    """
+    data = build_data()
+    model = build_model(data)
+
+    # Set up priors on the model parameters such that we can readily compute their expected values.
+    expected_log_prior = 0.
+    prior_width = 200.0
+
+    hmc_helper = gpflow.optimizers.SamplingHelper(
+        model.log_marginal_likelihood, model.trainable_parameters
+    )
+
+    for param in model.trainable_parameters:
+        low_value = -100
+        high_value = low_value + prior_width
+        param.prior = Uniform(low=np.float64(low_value), high=np.float64(high_value))
+        param.prior_on = 'unconstrained'
+
+        prior_density = 1 / prior_width
+        expected_log_prior += np.log(prior_density)
 
     target_log_prob_fn = hmc_helper.target_log_prob_fn
+    expected_log_prob = model.log_likelihood().numpy() + expected_log_prior
 
-    assert model.log_marginal_likelihood() == target_log_prob_fn()
+    np.testing.assert_allclose(target_log_prob_fn(), expected_log_prob)
 
-    model.likelihood.variance.assign(1)
 
-    assert model.log_marginal_likelihood() == target_log_prob_fn()
+@pytest.mark.parametrize("prior_on", ["constrained", "unconstrained"])
+def test_mcmc_helper_target_function_no_transforms(prior_on):
+    """ Verifies the objective for a set of priors where no transforms are set.
+    """
+    data = build_data()
+    model = build_model(data)
+    expected_log_prior = 0.
+    prior_width = 200.0
 
-    # test the wrapped closure
+    hmc_helper = gpflow.optimizers.SamplingHelper(
+        model.log_marginal_likelihood, model.trainable_parameters
+    )
+
+    for param in model.trainable_parameters:
+        param.transform = None
+        low_value = -100
+        high_value = low_value + prior_width
+        param.prior = Uniform(low=np.float64(low_value), high=np.float64(high_value))
+        param.prior_on = prior_on
+
+        prior_density = 1 / prior_width
+        expected_log_prior += np.log(prior_density)
+
+    log_likelihood = model.log_likelihood().numpy()
+    expected_log_prob = log_likelihood + expected_log_prior
+    target_log_prob_fn = hmc_helper.target_log_prob_fn
+
+    np.testing.assert_allclose(target_log_prob_fn(), expected_log_prob)
+
+    # Test the wrapped closure
     log_prob, grad_fn = target_log_prob_fn.__original_wrapped__()
-    grad, nones =  grad_fn(1, [None] * len(model.trainable_parameters))
+    grad, nones = grad_fn(1, [None] * len(model.trainable_parameters))
     assert len(grad) == len(model.trainable_parameters)
     assert nones == [None] * len(model.trainable_parameters)
 
+
 def test_mcmc_sampler_integration():
     data = build_data()
     model = build_model(data)
 
     hmc_helper = gpflow.optimizers.SamplingHelper(
-        model.trainable_parameters, model.log_marginal_likelihood
+        model.log_marginal_likelihood, model.trainable_parameters
     )
 
     hmc = tfp.mcmc.HamiltonianMonteCarlo(