In [30]:
from pathlib import Path
import gpflow
import tensorflow as tf
import numpy as np
import warnings

import numpy as np
import gpflow as gp

import gpflow.multioutput.kernels as mk
import gpflow.multioutput.features as mfa

from gpflow import settings

tf.logging.set_verbosity(tf.logging.FATAL)
warnings.filterwarnings('ignore')

### 1. Computation time increases when I create gpflow objects.

_Bad example:_

In [2]:
for n in range(2, 4):
    kernel = gpflow.kernels.RBF(1)  # It is a gpflow object with tf.Variables inside
    x = np.random.randn(n, 1)
    kxx = kernel.K(x)  # It is a tensor!

**Remember, we operate on a tensorflow graph!**

Everytime we create (build and compile) a new gpflow object, although we override the kernel variable, we keep adding more tensors to the graph and just change the reference to them.

So, unnecessary expanding the graph slows down your computation!

_Good example (imagine running this code snippet in `ipython` again and again):_

In [3]:
for n in range(2, 4):
    gpflow.reset_default_graph_and_session()
    kernel = gpflow.kernels.RBF(1)
    x = np.random.randn(n, 1)
    kxx = kernel.K(x)

or

In [4]:
for n in range(2, 4):
    with tf.Graph().as_default() as graph:
        with tf.Session(graph=graph).as_default():
            kernel = gpflow.kernels.RBF(1)
            x = np.random.randn(n, 1)
            kxx = kernel.K(x)

### 2. How can I reuse a model on different data?

In [5]:
x = np.random.randn(2, 1)
y = np.random.randn(2, 1)
kernel = gpflow.kernels.RBF(1)
model = gpflow.models.GPR(x, y, kernel)

x_new = np.random.randn(100, 1)
y_new = np.random.randn(100, 1)

We can compute the loglikelihood of the model on different data. Note, we haven't changed the original model!

In [6]:
x_tensor = model.X.parameter_tensor
y_tensor = model.Y.parameter_tensor
model.compute_log_likelihood(feed_dict={x_tensor: x_new, y_tensor: y_new})  # we can still probe the model with the old data

-146.19026836342385

We can do the same by updating (permanently) the value of the dataholders.

In [7]:
model.X = x_new
model.Y = y_new
model.compute_log_likelihood()

-146.19026836342385

### 3. I would like to use external TensorFlow tensors and pass them to a GPflow model

You can pass tensorflow tensors for any non-trainable parameter of the gpflow objects like DataHolders.

In [8]:
kernel = gpflow.kernels.RBF(1)
likelihood = gpflow.likelihoods.Gaussian()

x_tensor = tf.random_normal((100, 1), dtype=settings.float_type)
y_tensor = tf.random_normal((100, 1), dtype=settings.float_type)
z = np.random.randn(10, 1)

model = gpflow.models.SVGP(x_tensor, y_tensor, kern=kernel, likelihood=likelihood, Z=z)
model.compute_log_likelihood()

-196.77394755910805

You can also use TensorFlow variables for trainable objects:

In [9]:
z = tf.Variable(np.random.randn(10, 1))
model = gpflow.models.SVGP(x_tensor, y_tensor, kern=kernel, likelihood=likelihood, Z=z)

But you have to initialize them manually, before iteracting with a model:

In [10]:
session = gpflow.get_default_session()
session.run(z.initializer)
model.compute_log_likelihood()

-187.27162747308242

### 5. I would like to share parameters between GPflow objects

Sometimes we want to impose a hard-coded strtucture to the model.

For example, we have a multi-output model where some output dimensions share the same kernel and others don't.

Unfortunately we cannot do this after the kernel object is compiled. We have to do it at build time and then manually compile the object.

In [11]:
with gpflow.decors.defer_build():
    kernels = [gpflow.kernels.RBF(1) for _ in range(3)]
    mo_kernels = mk.SeparateMixedMok(kernels, W=np.random.randn(3, 4))
    mo_kernels.kernels[0].lengthscales = mo_kernels.kernels[1].lengthscales
    mo_kernels.compile()

assert mo_kernels.kernels[0].lengthscales == mo_kernels.kernels[1].lengthscales

### 6. When I try to read parameter values, I'm getting stale values.

In [12]:
x = np.random.randn(100, 1)
y = np.random.randn(100, 1)

kernel = gpflow.kernels.RBF(1)
model = gpflow.models.GPR(x, y, kernel)
optimizer = gpflow.training.AdamOptimizer()
optimizer_tensor = optimizer.make_optimize_tensor(model)

The initial value before optimisation is

In [13]:
model.kern.lengthscales.value

array(1.)

Let's call one step of the optimization and check the new value of the parameter

In [14]:
gpflow.get_default_session().run(optimizer_tensor)
model.kern.lengthscales.value

array(1.)

After optimization you would expect that the parameters have been updated, but they haven't. The trick is that the `value` property returns a cached numpy value of a parameter.

You can get the value of the optimized parameter via the `read_value()` method, specifying the correct `session`.

In [15]:
model.kern.lengthscales.read_value(session)

1.0006322363201534

Or you can `anchor()` your model to the session after the optimisation step. After that your parameters' cache have been updated.

In [16]:
model.anchor(session)
model.kern.lengthscales.value

array(1.00063224)

### 7. Optimizing again my model slows down the computation time

Bad practice:

In [17]:

x = np.random.randn(100, 1)
y = np.random.randn(100, 1)
model = gpflow.models.GPR(x, y, kernel)

optimizer = gpflow.training.AdamOptimizer()

optimizer.minimize(model, maxiter=2)

# Do something with the model

optimizer.minimize(model, maxiter=2)

The `minimize()` call creates a bunch of optimization tensors. Calling `minimize()` again causes the same issue pointed in question (1).

The correct way of doing it without polluting your graph:

In [18]:
kernel = gpflow.kernels.RBF(1)
x = np.random.randn(100, 1)
y = np.random.randn(100, 1)
model = gpflow.models.GPR(x, y, kernel)

optimizer = gpflow.training.AdamOptimizer()
optimizer_tensor = optimizer.make_optimize_tensor(model)
session = gpflow.get_default_session()
for _ in range(2):
    session.run(optimizer_tensor)

Don't forget to **anchor** your model to the session after optimisation steps. Then you can continue working with your model.<br/>

In [19]:
model.anchor(session)

Now, if you need to optimize it again, you can reuse the same optimizer tensor.

In [20]:
for _ in range(2):
    session.run(optimizer_tensor)

model.anchor(session)

### How can I save/load a GPflow model?

In [29]:
kernel = gpflow.kernels.RBF(1)
x = np.random.randn(100, 1)
y = np.random.randn(100, 1)
model = gpflow.models.GPR(x, y, kernel)


filename = "/tmp/gpr.gpflow"
path = Path(filename)
if path.exists():
    path.unlink()
saver = gpflow.saver.Saver()
saver.save(filename, model)

You can load the model back into different graph:

In [22]:
with tf.Graph().as_default() as graph, tf.Session().as_default():
    model_copy = saver.load(filename)

Or you can load the model into the same session:

In [23]:
ctx_for_loading = gpflow.saver.SaverContext(autocompile=False)
model_copy = saver.load(filename, context=ctx_for_loading)
model_copy.clear()
model_copy.compile()

The difference between the former and the latter approach is in tensorflow name scopes which are used for naming variables. The former approach replicates the instance of the tensorflow objects (which already exist in the original graph), so we need to load it in a new graph.
The latter uses different name scopes for the variables so we can dump the model in the same graph.