Skip to content

Commit

Permalink
Update VGP model
Browse files Browse the repository at this point in the history
  • Loading branch information
awav committed Sep 10, 2019
1 parent bb09839 commit 6e1f56a
Show file tree
Hide file tree
Showing 6 changed files with 110 additions and 112 deletions.
1 change: 1 addition & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ runtest: &runtest
name: Upload coverage report
command: |
bash <(curl -s https://codecov.io/bash) -t "${CODECOV_TOKEN}"
version: 2.1

jobs:
Expand Down
4 changes: 2 additions & 2 deletions _unsorted/_test_method_equivalence.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def test_vgp_vs_opper_archambeau(self):
likelihood = gpflow.likelihoods.StudentT()

m_vgp = gpflow.models.VGP(X, Y, kernel, likelihood)
m_vgp_oa = gpflow.models.VGP_opper_archambeau(X, Y, kernel, likelihood)
m_vgp_oa = gpflow.models.VGPOpperArchambeau(X, Y, kernel, likelihood)
m_vgp.compile()
m_vgp_oa.compile()

Expand Down Expand Up @@ -217,7 +217,7 @@ def test_vgp_vs_opper_archambeau(self):
# kernel = gpflow.kernels.Matern52(DX)
# likelihood = gpflow.likelihoods.StudentT()
# m_vgp = gpflow.models.VGP(X, Y, kernel, likelihood)
# m_vgp_oa = gpflow.models.VGP_opper_archambeau(X, Y, kernel, likelihood)
# m_vgp_oa = gpflow.models.VGPOpperArchambeau(X, Y, kernel, likelihood)
# for m in [m_vgp, m_vgp_oa]:
# m.compile()
# opt = gpflow.train.ScipyOptimizer()
Expand Down
2 changes: 1 addition & 1 deletion gpflow/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@
from .svgp import SVGP
# from .svgp import SVGP
from .vgp import VGP
# from .vgp import VGP_opper_archambeau
# from .vgp import VGPOpperArchambeau
142 changes: 68 additions & 74 deletions gpflow/models/vgp.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,23 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Optional
import gpflow
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np

import gpflow
from ..base import Parameter
from ..config import default_float, default_jitter
from ..mean_functions import Zero
from ..conditionals import conditional
from ..config import default_float, default_jitter
from ..kernels import Kernel
from ..kullback_leiblers import gauss_kl
from ..models.model import GPModel
from ..likelihoods import Likelihood
from ..mean_functions import MeanFunction, Zero
from ..models.model import Data, DataPoint, GPModel, MeanAndVariance

# ERROR: Modify this
GPModelOLD = GPModel


class VGP(GPModelOLD):
class VGP(GPModel):
"""
This method approximates the Gaussian process posterior using a multivariate Gaussian.
Expand All @@ -47,34 +47,31 @@ class VGP(GPModelOLD):
"""

def __init__(self, X, Y, kernel, likelihood, mean_function=None, num_latent=None, **kwargs):
def __init__(self,
data: Data,
kernel: Kernel,
likelihood: Likelihood,
mean_function: Optional[MeanFunction] = None,
num_latent: Optional[int] = None):
"""
X is a data matrix, size [N, D]
Y is a data matrix, size [N, R]
kernel, likelihood, mean_function are appropriate GPflow objects
"""
GPModelOLD.__init__(self, X, Y, kernel, likelihood, mean_function, num_latent, **kwargs)
self.num_data = X.shape[0]
super().__init__(kernel, likelihood, mean_function, num_latent)

x_data, y_data = data
num_data = x_data.shape[0]
self.num_data = num_data
self.num_latent = num_latent or x_data.shape[1]
self.data = data

self.q_mu = Parameter(np.zeros((self.num_data, self.num_latent)))
q_sqrt = np.array([np.eye(self.num_data) for _ in range(self.num_latent)])
self.q_mu = Parameter(np.zeros((num_data, self.num_latent)))
q_sqrt = np.array([np.eye(num_data) for _ in range(self.num_latent)])
transform = tfp.bijectors.FillTriangular()
self.q_sqrt = Parameter(q_sqrt, transform=transform)

def _init_variational_parameters(self):
"""
Before calling the standard compile function, check to see if the size
of the data has changed and add variational parameters appropriately.
This is necessary because the shape of the parameters depends on the
shape of the data.
"""
if not self.num_data == self.X.shape[0]:
self.num_data = self.X.shape[0]
self.q_mu = Parameter(np.zeros((self.num_data, self.num_latent)))
self.q_sqrt = Parameter(np.eye(self.num_data)[:, :, None] * np.ones((1, 1, self.num_latent)))

def log_likelihood(self):
"""
This method computes the variational lower bound on the likelihood,
Expand All @@ -88,35 +85,40 @@ def log_likelihood(self):
"""

x_data, y_data = self.data
# Get prior KL.
KL = gauss_kl(self.q_mu, self.q_sqrt)

# Get conditionals
K = self.kernel(self.X) + tf.eye(self.num_data, dtype=default_float()) * default_jitter()
K = self.kernel(x_data) + tf.eye(self.num_data, dtype=default_float()) * default_jitter()
L = tf.linalg.cholesky(K)

fmean = tf.linalg.matmul(L, self.q_mu) + self.mean_function(self.X) # NN,ND->ND

fmean = tf.linalg.matmul(L, self.q_mu) + self.mean_function(x_data) # [NN, ND] -> ND
q_sqrt_dnn = tf.linalg.band_part(self.q_sqrt, -1, 0) # [D, N, N]

L_tiled = tf.tile(tf.expand_dims(L, 0), tf.stack([self.num_latent, 1, 1]))

LTA = tf.linalg.matmul(L_tiled, q_sqrt_dnn) # [D, N, N]
fvar = tf.reduce_sum(tf.square(LTA), 2)

fvar = tf.transpose(fvar)

# Get variational expectations.
var_exp = self.likelihood.variational_expectations(fmean, fvar, self.Y)
var_exp = self.likelihood.variational_expectations(fmean, fvar, y_data)

return tf.reduce_sum(var_exp) - KL

def predict_f(self, Xnew, full_cov=False, full_output_cov=False):
mu, var = conditional(Xnew, self.X, self.kernel, self.q_mu, q_sqrt=self.q_sqrt, full_cov=full_cov, white=True)
return mu + self.mean_function(Xnew), var
def predict_f(self, predict_at: DataPoint, full_cov: bool = False,
full_output_cov: bool = False) -> MeanAndVariance:
x_data, _y_data = self.data
mu, var = conditional(predict_at,
x_data,
self.kernel,
self.q_mu,
q_sqrt=self.q_sqrt,
full_cov=full_cov,
white=True)
return mu + self.mean_function(predict_at), var


class VGP_opper_archambeau(GPModel):
class VGPOpperArchambeau(GPModel):
"""
This method approximates the Gaussian process posterior using a multivariate Gaussian.
The key reference is:
Expand All @@ -143,38 +145,29 @@ class VGP_opper_archambeau(GPModel):
"""

def __init__(self, X, Y, kernel, likelihood, mean_function=None, num_latent=None, **kwargs):
def __init__(self,
data: Data,
kernel: Kernel,
likelihood: Likelihood,
mean_function: MeanFunction = None,
num_latent: Optional[int] = None):
"""
X is a data matrix, size [N, D]
Y is a data matrix, size [N, R]
kernel, likelihood, mean_function are appropriate GPflow objects
"""

mean_function = Zero() if mean_function is None else mean_function

X = DataHolder(X)
Y = DataHolder(Y)
GPModel.__init__(self, X, Y, kernel, likelihood, mean_function, **kwargs)
self.num_data = X.shape[0]
self.num_latent = num_latent or Y.shape[1]
self.q_alpha = Parameter(np.zeros((self.num_data, self.num_latent)))
self.q_lambda = Parameter(np.ones((self.num_data, self.num_latent)), transforms.positive)
super().__init__(kernel, likelihood, mean_function, num_latent)

def compile(self, session=None):
"""
Before calling the standard compile function, check to see if the size
of the data has changed and add variational parameters appropriately.
This is necessary because the shape of the parameters depends on the
shape of the data.
"""
if not self.num_data == self.X.shape[0]:
self.num_data = self.X.shape[0]
self.q_alpha = Parameter(np.zeros((self.num_data, self.num_latent)))
self.q_lambda = Parameter(np.ones((self.num_data, self.num_latent)), gpflow.positive)
return super(VGP_opper_archambeau, self).compile(session=session)
x_data, y_data = data
self.data = data
self.num_data = x_data.shape[0]
self.num_latent = num_latent or x_data.shape[1]
self.q_alpha = Parameter(np.zeros((self.num_data, self.num_latent)))
self.q_lambda = Parameter(np.ones((self.num_data, self.num_latent)), gpflow.positive())

def _build_likelihood(self):
def log_likelihood(self):
"""
q_alpha, q_lambda are variational parameters, size [N, R]
This method computes the variational lower bound on the likelihood,
Expand All @@ -183,17 +176,17 @@ def _build_likelihood(self):
with
q(f) = N(f | K alpha + mean, [K^-1 + diag(square(lambda))]^-1) .
"""
K = self.kernel(self.X)
x_data, y_data = self.data
K = self.kernel()
K_alpha = tf.linalg.matmul(K, self.q_alpha)
f_mean = K_alpha + self.mean_function(self.X)
f_mean = K_alpha + self.mean_function(x_data)

# compute the variance for each of the outputs
I = tf.tile(tf.expand_dims(tf.eye(self.num_data, dtype=default_float()), 0), [self.num_latent, 1, 1])
A = I + tf.expand_dims(tf.transpose(self.q_lambda), 1) * \
tf.expand_dims(tf.transpose(self.q_lambda), 2) * K
I = tf.tile(tf.eye(self.num_data, dtype=default_float())[None, ...], [self.num_latent, 1, 1])
A = I + tf.transpose(self.q_lambda)[:, None, ...] * tf.transpose(self.q_lambda)[:, :, None, ...] * K
L = tf.linalg.cholesky(A)
Li = tf.linalg.triangular_solve(L, I)
tmp = Li / tf.expand_dims(tf.transpose(self.q_lambda), 1)
tmp = Li / tf.transpose(self.q_lambda)[:, None, ...]
f_var = 1. / tf.square(self.q_lambda) - tf.transpose(tf.reduce_sum(tf.square(tmp), 1))

# some statistics about A are used in the KL
Expand All @@ -205,7 +198,7 @@ def _build_likelihood(self):
v_exp = self.likelihood.variational_expectations(f_mean, f_var, self.Y)
return tf.reduce_sum(v_exp) - KL

def _build_predict(self, Xnew, full_cov=False):
def predict_f(self, predict_at: DataPoint, full_cov: bool = False):
"""
The posterior variance of F is given by
q(f) = N(f | K alpha + mean, [K^-1 + diag(lambda**2)]^-1)
Expand All @@ -215,20 +208,21 @@ def _build_predict(self, Xnew, full_cov=False):
diag(lambda**-2)]^-1 K_{f*} )
"""

x_data, _y_data = self.data
# compute kernel things
Kx = self.kernel(self.X, Xnew)
K = self.kernel(self.X)
Kx = self.kernel(x_data, predict_at)
K = self.kernel(x_data)

# predictive mean
f_mean = tf.linalg.matmul(Kx, self.q_alpha, transpose_a=True) + self.mean_function(Xnew)
f_mean = tf.linalg.matmul(Kx, self.q_alpha, transpose_a=True) + self.mean_function(predict_at)

# predictive var
A = K + tf.linalg.diag(tf.transpose(1. / tf.square(self.q_lambda)))
L = tf.linalg.cholesky(A)
Kx_tiled = tf.tile(tf.expand_dims(Kx, 0), [self.num_latent, 1, 1])
Kx_tiled = tf.tile(Kx[None, ...], [self.num_latent, 1, 1])
LiKx = tf.linalg.triangular_solve(L, Kx_tiled)
if full_cov:
f_var = self.kernel(Xnew) - tf.linalg.matmul(LiKx, LiKx, transpose_a=True)
f_var = self.kernel(predict_at) - tf.linalg.matmul(LiKx, LiKx, transpose_a=True)
else:
f_var = self.kernel(Xnew) - tf.reduce_sum(tf.square(LiKx), 1)
f_var = self.kernel(predict_at) - tf.reduce_sum(tf.square(LiKx), 1)
return f_mean, tf.transpose(f_var)
26 changes: 15 additions & 11 deletions tests/test_mean_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,7 @@ def test_bug_277_regression():
gpflow.models.SGPR,
gpflow.models.GPRFITC,
gpflow.models.SVGP,
# gpflow.models.VGP(X, Y, mean_function=mf(), kernel=k(), likelihood=lik()),
# gpflow.models.VGP(X, Y, mean_function=mf(), kernel=k(), likelihood=lik()),
gpflow.models.VGP,
# gpflow.models.GPMC(X, Y, mean_function=mf(), kernel=k(), likelihood=lik()),
# gpflow.models.SGPMC(X, Y, mean_function=mf(), kernel=k(), likelihood=lik(), Z=Z)
]
Expand All @@ -212,38 +211,43 @@ def test_models_with_mean_functions_changes(model_class):
a constant results in a higher prediction, whereas addition of zero/
mutliplication with one does not.
"""
X, Y = rng.randn(Datum.N, Datum.input_dim), rng.randn(Datum.N, 1)
Xtest = rng.randn(Datum.Ntest, Datum.input_dim)
data = rng.randn(Datum.N, Datum.input_dim), rng.randn(Datum.N, 1)
predict_at = rng.randn(Datum.Ntest, Datum.input_dim)
inducing_variables = InducingPoints(Z=rng.randn(Datum.M, Datum.input_dim))
kernel = gpflow.kernels.Matern32()
likelihood = gpflow.likelihoods.Gaussian()
zero_mean = Zero()
non_zero_mean = Constant(c=np.ones(1) * 10)

if model_class in [gpflow.models.GPR]:
model_zero_mean = model_class((X, Y), kernel=kernel, mean_function=zero_mean)
model_non_zero_mean = model_class((X, Y), kernel=kernel, mean_function=non_zero_mean)
model_zero_mean = model_class(data, kernel=kernel, mean_function=zero_mean)
model_non_zero_mean = model_class(data, kernel=kernel, mean_function=non_zero_mean)
elif model_class in [gpflow.models.VGP]:
model_zero_mean = model_class(data, likelihood=likelihood, kernel=kernel, mean_function=zero_mean)
model_non_zero_mean = model_class(data, likelihood=likelihood, kernel=kernel, mean_function=non_zero_mean)
elif model_class in [gpflow.models.SVGP]:
model_zero_mean = model_class(kernel=kernel, likelihood=likelihood, inducing_variables=inducing_variables,
model_zero_mean = model_class(kernel=kernel,
likelihood=likelihood,
inducing_variables=inducing_variables,
mean_function=zero_mean)
model_non_zero_mean = model_class(kernel=kernel,
likelihood=likelihood,
inducing_variables=inducing_variables,
mean_function=non_zero_mean)
elif model_class in [gpflow.models.SGPR, gpflow.models.GPRFITC]:
model_zero_mean = model_class((X, Y),
model_zero_mean = model_class(data,
kernel=kernel,
inducing_variables=inducing_variables,
mean_function=zero_mean)
model_non_zero_mean = model_class((X, Y),
model_non_zero_mean = model_class(data,
kernel=kernel,
inducing_variables=inducing_variables,
mean_function=non_zero_mean)
else:
raise (NotImplementedError)

mu_zero, var_zero = model_zero_mean.predict_f(Xtest)
mu_non_zero, var_non_zero = model_non_zero_mean.predict_f(Xtest)
mu_zero, var_zero = model_zero_mean.predict_f(predict_at)
mu_non_zero, var_non_zero = model_non_zero_mean.predict_f(predict_at)
# predictive variance remains unchanged after modifying mean function
assert np.all(var_zero.numpy() == var_non_zero.numpy())
# predictive mean changes after modifying mean function
Expand Down

0 comments on commit 6e1f56a

Please sign in to comment.