In [1]:
import math
import torch
import gpytorch
#from matplotlib import pyplot as plt

import warnings
warnings.simplefilter("ignore", gpytorch.utils.warnings.NumericalWarning)

#%matplotlib inline
%load_ext autoreload
%autoreload 2

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
# Training data is 11 points in [0,1] inclusive regularly spaced
train_x = torch.linspace(0, 1, 50)
# True function is sin(2*pi*x) with Gaussian noise
train_y = torch.sin(train_x * (2 * math.pi)) + torch.randn(train_x.size()) * 0.2

In [12]:
HAVE_KEOPS = False

In [13]:
class ExactGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()

        if HAVE_KEOPS:
            self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.keops.RBFKernel())
        else:
            self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

# initialize likelihood and model
likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = ExactGPModel(train_x, train_y, likelihood)

In [14]:
if torch.cuda.is_available():
    train_x = train_x.cuda()
    train_y = train_y.cuda()
    model = model.cuda()
    likelihood = likelihood.cuda()

In [15]:
# this is for running the notebook in our testing framework
import os
smoke_test = ('CI' in os.environ)
training_iter = 2 if smoke_test else 50

# Find optimal model hyperparameters
model.train()
likelihood.train()

# Use the adam optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)  # Includes GaussianLikelihood parameters

# "Loss" for GPs - the marginal log likelihood
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

for i in range(training_iter):
    # Zero gradients from previous iteration
    optimizer.zero_grad()
    # Output from model
    output = model(train_x)
    # Calc loss and backprop gradients
    loss = -mll(output, train_y)
    loss.backward()
    print('Iter %d/%d - Loss: %.3f   lengthscale: %.3f   noise: %.3f' % (
        i + 1, training_iter, loss.item(),
        model.covar_module.base_kernel.lengthscale.item(),
        model.likelihood.noise.item()
    ))
    optimizer.step()

Iter 1/50 - Loss: 0.990   lengthscale: 0.693   noise: 0.693
Iter 2/50 - Loss: 0.960   lengthscale: 0.644   noise: 0.644
Iter 3/50 - Loss: 0.929   lengthscale: 0.598   noise: 0.598
Iter 4/50 - Loss: 0.896   lengthscale: 0.554   noise: 0.554
Iter 5/50 - Loss: 0.859   lengthscale: 0.513   noise: 0.513
Iter 6/50 - Loss: 0.816   lengthscale: 0.474   noise: 0.474
Iter 7/50 - Loss: 0.769   lengthscale: 0.438   noise: 0.437
Iter 8/50 - Loss: 0.718   lengthscale: 0.403   noise: 0.402
Iter 9/50 - Loss: 0.666   lengthscale: 0.371   noise: 0.370
Iter 10/50 - Loss: 0.618   lengthscale: 0.341   noise: 0.339
Iter 11/50 - Loss: 0.574   lengthscale: 0.313   noise: 0.311
Iter 12/50 - Loss: 0.536   lengthscale: 0.288   noise: 0.284
Iter 13/50 - Loss: 0.501   lengthscale: 0.267   noise: 0.259
Iter 14/50 - Loss: 0.469   lengthscale: 0.250   noise: 0.236
Iter 15/50 - Loss: 0.438   lengthscale: 0.236   noise: 0.215
Iter 16/50 - Loss: 0.406   lengthscale: 0.227   noise: 0.196
Iter 17/50 - Loss: 0.374   length

In [16]:
if HAVE_KEOPS:
    test_n = 50000
else:
    test_n = 10000

test_x = torch.linspace(0, 1, test_n)
if torch.cuda.is_available():
    test_x = test_x.cuda()
print(test_x.shape)

torch.Size([10000])


In [29]:
import time

model.train()
likelihood.train()

# Get into evaluation (predictive posterior) mode
model.eval()
likelihood.eval()

# Test points are regularly spaced along [0,1]
# Make predictions by feeding model through likelihood

test_x.requires_grad_(True)

with torch.no_grad():
    observed_pred = likelihood(model(test_x))

    # All relevant settings for using CIQ.
    #   ciq_samples(True) - Use CIQ for sampling
    #   num_contour_quadrature(10) -- Use 10 quadrature sites (Q in the paper)
    #   minres_tolerance -- error tolerance from minres (here, <0.01%).
    print("Running with CIQ")
    with gpytorch.settings.ciq_samples(True), gpytorch.settings.num_contour_quadrature(10), gpytorch.settings.minres_tolerance(1e-4):
        %time y_samples = [observed_pred.rsample() for i in range(2000)]

    # print("Running with Cholesky")
    # # Make sure we use Cholesky
    # with gpytorch.settings.fast_computations(covar_root_decomposition=False):
    #     %time y_samples = observed_pred.rsample()

Running with CIQ


KeyboardInterrupt: 

In [27]:
y_samples.size()

torch.Size([10000])