# Comparing a conventional and linear cg Multioutput-Output GP

In [1]:
import math
from time import perf_counter

import torch
import gpytorch
import numpy as np

import famgpytorch

torch.manual_seed(42)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Training on {torch.cuda.get_device_name(device) if device.type == 'cuda' else 'CPU'}.")

Training on NVIDIA GeForce RTX 3080.


## Set up some very simple training data

$$
\begin{align}
y_1 &= \sin(2\pi x) + \epsilon \\
y_2 &= \cos(2\pi x) + \epsilon \\
\epsilon &\sim \mathcal{N}(0, 0.04)
\end{align}
$$
With training and test examples regularly spaced points in [0,1]

In [2]:
nb_training_points = 3000
nb_test_points = 2000

train_x = torch.linspace(0, 1, nb_training_points, device=device)

train_y = torch.stack([
    torch.sin(train_x * (2 * math.pi)) + torch.randn(train_x.size(), device=device) * math.sqrt(0.04),
    torch.cos(train_x * (2 * math.pi)) + torch.randn(train_x.size(), device=device) * math.sqrt(0.04),
], -1)

In [3]:
class ConventionalGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_inputs, train_targets, likelihood):
        super(ConventionalGPModel, self).__init__(train_inputs, train_targets, likelihood)
        self.mean_module = gpytorch.means.MultitaskMean(
            gpytorch.means.ConstantMean(), num_tasks=2
        )
        self.covar_module = gpytorch.kernels.MultitaskKernel(
            gpytorch.kernels.RBFKernel(), num_tasks=2, rank=1
        )

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultitaskMultivariateNormal(mean_x, covar_x)

class LinearCGModel(gpytorch.models.ExactGP):
    def __init__(self, train_inputs, train_targets, likelihood):
        super(LinearCGModel, self).__init__(train_inputs, train_targets, likelihood)
        self.mean_module = gpytorch.means.MultitaskMean(
            gpytorch.means.ConstantMean(), num_tasks=2
        )
        self.covar_module = famgpytorch.kernels.MultitaskKernelLinearCG(
            gpytorch.kernels.RBFKernel(), num_tasks=2, rank=1
        )

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultitaskMultivariateNormal(mean_x, covar_x)

conv_likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=2)
conv_likelihood.to(device)
conv_model = ConventionalGPModel(train_x, train_y, conv_likelihood)
conv_model.to(device)

linear_cg_likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=2)
linear_cg_likelihood.to(device)
linear_cg_model = LinearCGModel(train_x, train_y, linear_cg_likelihood)
linear_cg_model.to(device)

# initialize task kernel to make GPs comparable
hypers = {
    "covar_module.task_covar_module.covar_factor": torch.randn(2, 1, device=device),
    "covar_module.task_covar_module.raw_var": torch.randn(2, device=device)
}
conv_model.initialize(**hypers)
linear_cg_model.initialize(**hypers)
None

## Training the model

In [4]:
# set models in training mode
conv_model.train()
linear_cg_model.train()
conv_likelihood.train()
linear_cg_likelihood.train()

# use adam optimizer, including the GaussianLikelihood parameters
conv_optimizer = torch.optim.Adam(conv_model.parameters(), lr=0.01)
linear_cg_optimizer = torch.optim.Adam(linear_cg_model.parameters(), lr=0.01)

# "Loss" for GPs - marginal log likelihood
conv_mll = gpytorch.mlls.ExactMarginalLogLikelihood(conv_likelihood, conv_model)
linear_cg_mll = gpytorch.mlls.ExactMarginalLogLikelihood(linear_cg_likelihood, linear_cg_model)

print("Conventional:")
start = perf_counter()
for i in range(50):
    conv_optimizer.zero_grad()
    conv_output = conv_model(train_x)
    conv_loss = -conv_mll(conv_output, train_y)
    conv_loss.backward()
    if i == 0 or (i + 1) % 10 == 0:
        task_noises = conv_model.likelihood.task_noises.tolist()
        print(
            f"\tIter {i + 1:02d}/50   "
            f"Loss: {' ' if conv_loss.item() >= 0 else ''}{conv_loss.item():.3f}   "
            f"lengthscale: {conv_model.covar_module.data_covar_module.lengthscale.item():.3f}   "
            f"task_noises: {np.around(np.array(task_noises), 3).tolist()}   "
            f"global_noise: {conv_model.likelihood.noise.item():.3f}"
        )
    conv_optimizer.step()
time_diff = perf_counter() - start
print(f"Taining in {time_diff:.3f} seconds.")

print("Linear CG")
start = perf_counter()
for i in range(50):
    linear_cg_optimizer.zero_grad()
    linear_cg_output = linear_cg_model(train_x)
    linear_cg_loss = -linear_cg_mll(linear_cg_output, train_y)
    linear_cg_loss.backward()
    if i == 0 or (i + 1) % 10 == 0:
        task_noises = linear_cg_model.likelihood.task_noises.tolist()
        print(
            f"\tIter {i + 1:02d}/50   "
            f"Loss: {' ' if linear_cg_loss.item() >= 0 else ''}{linear_cg_loss.item():.3f}   "
            f"lengthscale: {linear_cg_model.covar_module.data_covar_module.lengthscale.item():.3f}   "
            f"task_noises: {np.around(np.array(task_noises), 3).tolist()}   "
            f"global_noise: {linear_cg_model.likelihood.noise.item():.3f}"
        )
    linear_cg_optimizer.step()
time_diff = perf_counter() - start
print(f"Taining in {time_diff:.3f} seconds.")

Conventional:
	Iter 01/50   Loss:  1.128   lengthscale: 0.693   task_noises: [0.693, 0.693]   global_noise: 0.693
	Iter 10/50   Loss:  1.093   lengthscale: 0.649   task_noises: [0.649, 0.649]   global_noise: 0.649
	Iter 20/50   Loss:  1.052   lengthscale: 0.602   task_noises: [0.602, 0.603]   global_noise: 0.602
	Iter 30/50   Loss:  1.011   lengthscale: 0.558   task_noises: [0.558, 0.558]   global_noise: 0.558
	Iter 40/50   Loss:  0.969   lengthscale: 0.517   task_noises: [0.515, 0.516]   global_noise: 0.515
	Iter 50/50   Loss:  0.927   lengthscale: 0.481   task_noises: [0.475, 0.476]   global_noise: 0.475
Taining in 57.967 seconds.
Linear CG
	Iter 01/50   Loss:  1.128   lengthscale: 0.693   task_noises: [0.693, 0.693]   global_noise: 0.693
	Iter 10/50   Loss:  1.096   lengthscale: 0.649   task_noises: [0.652, 0.663]   global_noise: 0.649
	Iter 20/50   Loss:  1.056   lengthscale: 0.602   task_noises: [0.608, 0.615]   global_noise: 0.603
	Iter 30/50   Loss:  1.015   lengthscale: 0.558  

## Resulting covariance matrix

In [5]:
with torch.no_grad():
    print("\n\nResulting covariance matrices")
    print("--Conventional--")
    conv_f_train = conv_model(train_x)
    conv_train_covar = conv_f_train.covariance_matrix
    print(conv_train_covar)

    print("\n--Linear CG--")
    linear_cg_f_train = linear_cg_model(train_x)
    linear_cg_train_covar = linear_cg_f_train.covariance_matrix
    print(linear_cg_train_covar)

    rmse = torch.sqrt(torch.mean((conv_train_covar - linear_cg_train_covar) ** 2))
    print(f"\nRMSE = {rmse.item():.3f}")



Resulting covariance matrices
--Conventional--
tensor([[ 1.4807, -1.5081,  1.4807,  ..., -0.1695,  0.1662, -0.1693],
        [-1.5081,  2.7879, -1.5081,  ...,  0.3134, -0.1693,  0.3129],
        [ 1.4807, -1.5081,  1.4807,  ..., -0.1698,  0.1664, -0.1695],
        ...,
        [-0.1695,  0.3134, -0.1698,  ...,  2.7879, -1.5081,  2.7879],
        [ 0.1662, -0.1693,  0.1664,  ..., -1.5081,  1.4807, -1.5081],
        [-0.1693,  0.3129, -0.1695,  ...,  2.7879, -1.5081,  2.7879]],
       device='cuda:0', grad_fn=<MatmulBackward>)

--Linear CG--
tensor([[ 1.4807, -1.5072,  1.4807,  ..., -0.1696,  0.1664, -0.1693],
        [-1.5072,  2.7843, -1.5072,  ...,  0.3133, -0.1693,  0.3128],
        [ 1.4807, -1.5072,  1.4807,  ..., -0.1698,  0.1666, -0.1696],
        ...,
        [-0.1696,  0.3133, -0.1698,  ...,  2.7843, -1.5072,  2.7843],
        [ 0.1664, -0.1693,  0.1666,  ..., -1.5072,  1.4807, -1.5072],
        [-0.1693,  0.3128, -0.1696,  ...,  2.7843, -1.5072,  2.7843]],
       device='cud