# Comparing a conventional and approximate Multioutput-Output GP

In [1]:
import math

import torch
import gpytorch

import famgpytorch

%matplotlib inline
%load_ext autoreload
%autoreload 2
torch.manual_seed(42)
None

## Set up some very simple training data

$$
\begin{align}
y_1 &= \sin(2\pi x) + \epsilon \\
y_2 &= \cos(2\pi x) + \epsilon \\
\epsilon &\sim \mathcal{N}(0, 0.04)
\end{align}
$$
With training and test examples regularly spaced points in [0,1]

In [2]:
nb_training_points = 2
nb_test_points = 1

train_x = torch.linspace(0, 1, nb_training_points)

train_y = torch.stack([
    torch.sin(train_x * (2 * math.pi)) + torch.randn(train_x.size()) * math.sqrt(0.04),
    torch.cos(train_x * (2 * math.pi)) + torch.randn(train_x.size()) * math.sqrt(0.04),
], -1)

## Setting up the models
Both **GP Models** `gpytorch.models.ExactGP` with:
- **Likelihood** `gpytorch.likelihoods.GaussianLikelihood`
- **Zero Mean** `gpytorch.means.ZeroMean` for simplicity

One model witch conventional kernel:
- **RBF Kernel** `gpytorch.kernels.RBFKernel`

One model with approximate kernel:
- **Approximate RBF Kernel** `famgpytorch.kernels.RBFKernelApprox`

Mean and Covariance modules are wrapped by the corresponding multitask module:
- **MultitaskMean** `gpytorch.means.MultitaskMean`: one mean for each task
- **MultitaskKernel** `gpytorch.kernels.MultitaskKernel`: Kernel supporting Kronecker style multitask Gaussian processes
    - $K = K_{TT} \otimes K_{XX}$

In [3]:
# set up index kernel as task kernel to make gps comparable
class ConventionalGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_inputs, train_targets, likelihood):
        super(ConventionalGPModel, self).__init__(train_inputs, train_targets, likelihood)
        self.mean_module = gpytorch.means.MultitaskMean(
            gpytorch.means.ZeroMean(), num_tasks=2
        )
        self.covar_module = gpytorch.kernels.MultitaskKernel(
            gpytorch.kernels.RBFKernel(), num_tasks=2, rank=1
        )

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultitaskMultivariateNormal(mean_x, covar_x)

class ApproxGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_inputs, train_targets, likelihood):
        super(ApproxGPModel, self).__init__(train_inputs, train_targets, likelihood)
        self.mean_module = gpytorch.means.MultitaskMean(
            gpytorch.means.ConstantMean(), num_tasks=2
        )
        self.covar_module = gpytorch.kernels.MultitaskKernel(
            famgpytorch.kernels.RBFKernelApprox(), num_tasks=2, rank=1
        )

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultitaskMultivariateNormal(mean_x, covar_x)

conv_likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=2)
conv_model = ConventionalGPModel(train_x, train_y, conv_likelihood)

approx_likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=2)
approx_model = ApproxGPModel(train_x, train_y, conv_likelihood)
# approx_model.covar_module.data_covar_module.alpha = 2
#approx_model.covar_module.data_covar_module.raw_alpha.requires_grad_(False)

# manually initialize task kernel hyperparameters to make GPs comparable
covar_factor = torch.randn(*conv_model.covar_module.task_covar_module.batch_shape, 2, 1)
var = torch.randn(*conv_model.covar_module.task_covar_module.batch_shape, 2)
hypers = {
    "covar_module.task_covar_module.covar_factor": covar_factor,
    "covar_module.task_covar_module.raw_var": var
}
conv_model.initialize(**hypers)
approx_model.initialize(**hypers)
None

## Training the model

In [4]:
# set model in training mode
conv_model.train()
approx_model.train()
conv_likelihood.train()
approx_likelihood.train()

# use adam optimizer, including the GaussianLikelihood parameters
conv_optimizer = torch.optim.Adam(conv_model.parameters(), lr=0.1)
approx_optimizer = torch.optim.Adam(approx_model.parameters(), lr=0.1)

# "Loss" for GPs - marginal log likelihood
conv_mll = gpytorch.mlls.ExactMarginalLogLikelihood(conv_likelihood, conv_model)
approx_mll = gpytorch.mlls.ExactMarginalLogLikelihood(approx_likelihood, approx_model)

for i in range(50):
    # zero gradients from previous iteration
    conv_optimizer.zero_grad()
    approx_optimizer.zero_grad()

    # output from model -> multivariate normal with mean vector and covariance matrix
    conv_output = conv_model(train_x)
    approx_output = approx_model(train_x)

    # calc loss (negative marginal log likelihood)
    conv_loss = -conv_mll(conv_output, train_y)
    approx_loss = -approx_mll(approx_output, train_y)

    # backprop gradients
    conv_loss.backward()
    approx_loss.backward()

    if i == 0 or (i + 1) % 10 == 0:
        print(f'Iter {i + 1:02d}/50')
        print(
            f'\tConventional:   Loss: {conv_loss.item():.3f}   '
            f'lengthscale: {conv_model.covar_module.data_covar_module.lengthscale.item():.3f}'
        )
        print(
            f'\tApproximate:   Loss: {approx_loss.item():.3f}   '
            f'lengthscale: {approx_model.covar_module.data_covar_module.lengthscale.item():.3f}   '
            f'alpha: {approx_model.covar_module.data_covar_module.alpha.item():.3f}'
        )

    # step on optimizer
    conv_optimizer.step()
    approx_optimizer.step()

Iter 01/50
	Conventional:   Loss: 1.600   lengthscale: 0.693
	Approximate:   Loss: 1.600   lengthscale: 0.693   alpha: 0.693
Iter 10/50
	Conventional:   Loss: 0.905   lengthscale: 1.248
	Approximate:   Loss: 1.275   lengthscale: 1.223   alpha: 0.343
Iter 20/50
	Conventional:   Loss: 0.282   lengthscale: 2.066
	Approximate:   Loss: 1.161   lengthscale: 1.718   alpha: 0.140
Iter 30/50
	Conventional:   Loss: -0.215   lengthscale: 3.016
	Approximate:   Loss: 1.121   lengthscale: 1.960   alpha: 0.050
Iter 40/50
	Conventional:   Loss: -0.571   lengthscale: 4.003
	Approximate:   Loss: 1.097   lengthscale: 2.029   alpha: 0.013
Iter 50/50
	Conventional:   Loss: -0.738   lengthscale: 4.906
	Approximate:   Loss: 1.088   lengthscale: 2.003   alpha: 0.003


## Resulting covariance matrix

In [5]:
with torch.no_grad():
    print("--Conventional--")
    conv_f_train = conv_model(train_x)
    conv_train_covar = conv_f_train.covariance_matrix
    print("\tcovar:\t", conv_train_covar.__repr__().replace(
        '\n        ',
        '\n' + 5*'\t' + ' '
    ))

    print("\n--Approximate--")
    approx_f_train = approx_model(train_x)
    approx_train_covar = approx_f_train.covariance_matrix
    print("\tcovar:\t", approx_train_covar.__repr__().replace(
        '\n        ',
        '\n' + 5*'\t' + ' '
    ))

    rmse = torch.sqrt(torch.mean((conv_train_covar - approx_train_covar)**2))
    print("\nRMSE:", rmse)

--Conventional--
	covar:	 tensor([[0.0022, 0.0108, 0.0021, 0.0106],
					 [0.0108, 0.6989, 0.0106, 0.6850],
					 [0.0021, 0.0106, 0.0022, 0.0108],
					 [0.0106, 0.6850, 0.0108, 0.6989]], grad_fn=<MatmulBackward>)

--Approximate--
	covar:	 tensor([[0.0070, 0.0010, 0.0069, 0.0010],
					 [0.0010, 0.0191, 0.0010, 0.0189],
					 [0.0069, 0.0010, 0.0070, 0.0010],
					 [0.0010, 0.0189, 0.0010, 0.0191]], grad_fn=<MatmulBackward>)

RMSE: tensor(0.3366)

--Conventional--
	covar:	 tensor([[0.0022, 0.0108, 0.0021, 0.0106],
					 [0.0108, 0.6989, 0.0106, 0.6850],
					 [0.0021, 0.0106, 0.0022, 0.0108],
					 [0.0106, 0.6850, 0.0108, 0.6989]], grad_fn=<MatmulBackward>)

--Approximate--
	covar:	 tensor([[0.0070, 0.0010, 0.0069, 0.0010],
					 [0.0010, 0.0191, 0.0010, 0.0189],
					 [0.0069, 0.0010, 0.0070, 0.0010],
					 [0.0010, 0.0189, 0.0010, 0.0191]], grad_fn=<MatmulBackward>)

RMSE: tensor(0.3366)
