# Mixing TensorFlow models with GPflow

This notebook explores two ways to combine TensorFlow neural networks with GPflow models.


In [134]:
import numpy as np
import tensorflow as tf
from matplotlib import pyplot as plt
import gpflow
from gpflow.test_util import notebook_niter, is_continuous_integration
from scipy.cluster.vq import kmeans2

float_type = gpflow.settings.float_type 

ITERATIONS = notebook_niter(100)

ModuleNotFoundError: No module named 'gpflow.test_util'

## Example 1: A convolutional network inside a GPflow model
Here we'll use the GPflow functionality, but put a non-GPflow model inside the kernel.

In [135]:
from typing import Dict, Optional, Tuple
import tensorflow as tf
import tensorflow_datasets as tfds
import gpflow
from gpflow.config import default_float

def map_fn(input_slice: Dict[str, tf.Tensor]):
    updated = input_slice
    updated["image"] = tf.cast(updated["image"], tf.float32) / 255.
    updated["label"] = tf.cast(updated["label"], default_float())
    return updated

autotune = tf.data.experimental.AUTOTUNE

original_dataset = tfds.load(name="mnist", split=tfds.Split.TRAIN)
dataset = original_dataset.shuffle(1024)\
    .batch(32, drop_remainder=True)\
    .map(map_fn, num_parallel_calls=autotune)\
    .prefetch(autotune)

In [136]:
# A vanilla ConvNet
# This gets 97.3% accuracy on MNIST when used on its own (+ final linear layer) after 20K iterations

class KernelWithConvNN(gpflow.kernels.Kernel):
    def __init__(self, input_shape: Tuple, output_dim: int, base_kernel: gpflow.kernels.Kernel):
        super().__init__()
        with self.name_scope:
            self.base_kernel = base_kernel
            self.cnn = tf.keras.Sequential([
                tf.keras.layers.Conv2D(filters=32, kernel_size=(28, 28), padding="same", activation="relu"),
                tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=2),
                tf.keras.layers.Conv2D(filters=64, kernel_size=(5, 5), padding="same", activation="relu"),
                tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=2),
                tf.keras.layers.Flatten(),
                tf.keras.layers.Dense(output_dim, activation="relu"),
                tf.keras.layers.Lambda(lambda x: tf.cast(x, default_float()))
            ])
            self.cnn.build(input_shape)
    
    def K(self, a_input: tf.Tensor, b_input: Optional[tf.Tensor] = None, presliced: bool = False) -> tf.Tensor:
        transformed_a = self.cnn(a_input)
        transformed_b = self.cnn(b_input) if b_input is not None else b_input
        return self.base_kernel.K(transformed_a, transformed_b, presliced)
    
    def K_diag(self, a_input: tf.Tensor, presliced: bool = False) -> tf.Tensor:
        return self.base_kernel.K_diag(a_input, presliced)

TensorShape([32, 32])

[0;31mSignature:[0m [0mcnn[0m[0;34m.[0m[0mbuild[0m[0;34m([0m[0minput_shape[0m[0;34m=[0m[0;32mNone[0m[0;34m)[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Builds the model based on input shapes received.

This is to be used for subclassed models, which do not know at instantiation
time what their inputs look like.

This method only exists for users who want to call `model.build()` in a
standalone way (as a substitute for calling the model on real data to
build it). It will never be called by the framework (and thus it will
never throw unexpected errors in an unrelated workflow).

Args:
 input_shape: Single tuple, TensorShape, or list of shapes, where shapes
     are tuples, integers, or TensorShapes.

Raises:
  ValueError:
    1. In case of invalid user-provided data (not of type tuple,
       list, or TensorShape).
    2. If the model requires call arguments that are agnostic
       to the input shapes (positional or kwarg in call signature).
    3. If not all layers were p

In [132]:
class KernelSpaceInducingPoints(gpflow.inducing_variables.InducingPoints):
    pass

In [133]:
v = kcnn(tf.random.normal((3, 28, 28, 1)))

In [98]:
# Kuf is in NN output space
@gpflow.features.dispatch(KernelSpaceInducingPoints, KernelWithNN, object)
def Kuf(inducing_variable, kernel, a_input):
    return kernel.base_kernel(inducing_variable, kernel.cnn(Xnew))


class NNComposedKernel(KernelWithNN):
    """
    This kernel class applies f() to X before calculating K
    """
    
    def K(self, X, X2=None):
        return super().K(self.f(X), self.f(X2))
    
    def Kdiag(self, X):
        return super().Kdiag(self.f(X))

AttributeError: module 'gpflow' has no attribute 'features'

In [5]:
def ex1():
    fX_dim = 5  
    M = 100

    # Unfortunately, only float32 and lower are supported by the Conv layers 
    f = lambda x: tf.cast(cnn_fn(tf.cast(x, tf.float32), fX_dim), float_type)
    kern = NNComposedKernel(gpflow.kernels.Matern32(fX_dim), f)

    # Build the model 

    lik = gpflow.likelihoods.MultiClass(Mnist.Nclasses)

    Z = kmeans2(Mnist.X, M, minit='points')[0]

    model = NN_SVGP(Mnist.X, Mnist.Y, kern, lik, Z=Z, num_latent=Mnist.Nclasses, minibatch_size=1000)

    # Use GPflow wrappers to train. Note all session handling is done for us
    gpflow.training.AdamOptimizer(0.001).minimize(model, maxiter=ITERATIONS)

    # Predictions
    m, v = model.predict_y(Mnist.Xtest)
    preds = np.argmax(m, 1).reshape(Mnist.Ytest.shape)
    correct = preds == Mnist.Ytest.astype(int)
    acc = np.average(correct.astype(float)) * 100.

    print('Accuracy is {:.4f}%'.format(acc))

gpflow.reset_default_graph_and_session()
ex1()

Accuracy is 98.6800%


In [6]:
def ex2b():
    fX_dim = 5  
    minibatch_size = notebook_niter(1000, test_n=10)
    M = notebook_niter(100, test_n=5)

    # Unfortunately, only float32 and lower are supported by the Conv layers 
    f = lambda x: tf.cast(cnn_fn(tf.cast(x, tf.float32), fX_dim), float_type)
    kern = KernelWithNN(gpflow.kernels.Matern32(fX_dim), f)
    
    # Reset inducing (these live in a different space to X, so we need to be careful here)
    ind = np.random.choice(Mnist.X.shape[0], minibatch_size, replace=False)
    
    # Currently a hack is needed due to model initialisation
    feat = KernelSpaceInducingPoints(np.empty((M, fX_dim)))
    #feat = FFeature(Z_0)  # Ideally, we could move the calculation of Z_0
    
    # Build the model 

    lik = gpflow.likelihoods.MultiClass(Mnist.Nclasses)

    #Z = kmeans2(Mnist.X, M, minit='points')[0]

    model = NN_SVGP(Mnist.X, Mnist.Y, kern, lik, feat=feat, num_latent=Mnist.Nclasses, minibatch_size=minibatch_size)

    fZ = model.kern.compute_f(Mnist.X[ind])
    # Z_0 = kmeans2(fZ, M)[0] might fail
    Z_0 = fZ[np.random.choice(len(fZ), M, replace=False)]
    model.feature.Z = Z_0

    # Use GPflow wrappers to train. Note all session handling is done for us
    gpflow.training.AdamOptimizer(0.001).minimize(model, maxiter=ITERATIONS)

    # Predictions
    m, v = model.predict_y(Mnist.Xtest)
    preds = np.argmax(m, 1).reshape(Mnist.Ytest.shape)
    correct = preds == Mnist.Ytest.astype(int)
    acc = np.average(correct.astype(float)) * 100.

    print('Accuracy is {:.4f}%'.format(acc))

gpflow.reset_default_graph_and_session()
ex2b()

Accuracy is 97.5600%


## Example 2: A GPflow model on top of a TensorFlow model
Now we'll do things the other way; take a model implemented in TensorFlow and explain how to put a GPflow model on top.

In [7]:
def ex2():
    minibatch_size = notebook_niter(1000, test_n=10)
    gp_dim = 5
    M = notebook_niter(100, test_n=5)

    # Placeholders
    X = tf.placeholder(tf.float32, [minibatch_size, Mnist.input_dim])  # Fixed shape so num_data works in SVGP
    Y = tf.placeholder(tf.float32, [minibatch_size, 1])
    Xtest = tf.placeholder(tf.float32, [None, Mnist.input_dim])

    # Build graph

    with tf.variable_scope('cnn'):
        f_X = tf.cast(cnn_fn(X, gp_dim), dtype=float_type)

    with tf.variable_scope('cnn', reuse=True):
        f_Xtest = tf.cast(cnn_fn(Xtest, gp_dim), dtype=float_type)

    gp_model = gpflow.models.SVGP(f_X, tf.cast(Y, dtype=float_type), 
                                  gpflow.kernels.RBF(gp_dim), gpflow.likelihoods.MultiClass(Mnist.Nclasses), 
                                  Z=np.zeros((M, gp_dim)), # we'll set this later
                                  num_latent=Mnist.Nclasses)

    loss = -gp_model.likelihood_tensor

    m, v = gp_model._build_predict(f_Xtest)
    my, yv = gp_model.likelihood.predict_mean_and_var(m, v)

    with tf.variable_scope('adam'):
        opt_step = tf.train.AdamOptimizer(0.001).minimize(loss)

    tf_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='adam')
    tf_vars += tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='cnn')

    # Initialise
    sess = tf.Session()
    sess.run(tf.variables_initializer(var_list=tf_vars))
    gp_model.initialize(session=sess)
    
    # Reset inducing (these live in a different space to X, so we need to be careful here)
    ind = np.random.choice(Mnist.X.shape[0], minibatch_size, replace=False)

    fZ = sess.run(f_X, feed_dict={X:Mnist.X[ind]})
    # Z_0 = kmeans2(fZ, M)[0] might fail
    Z_0 = fZ[np.random.choice(len(fZ), M, replace=False)]

    def set_gp_param(param, value):
        sess.run(tf.assign(param.unconstrained_tensor, param.transform.backward(value)))

    set_gp_param(gp_model.feature.Z, Z_0)

    # Train
    for i in range(ITERATIONS):
        ind = np.random.choice(Mnist.X.shape[0], minibatch_size, replace=False)
        sess.run(opt_step, feed_dict={X:Mnist.X[ind], Y:Mnist.Y[ind]})

    # Predict
    preds = np.argmax(sess.run(my, feed_dict={Xtest:Mnist.Xtest}), 1).reshape(Mnist.Ytest.shape)
    correct = preds == Mnist.Ytest.astype(int)
    acc = np.average(correct.astype(float)) * 100.
    print('acc is {:.4f}'.format(acc))

gpflow.reset_default_graph_and_session()
ex2()

gpflow.reset_default_graph_and_session()

acc is 98.5900
