# Multitask GP Regression

Multitask regression, introduced in [this paper](https://papers.nips.cc/paper/3189-multi-task-gaussian-process-prediction.pdf) learns similarities in the outputs simultaneously. It's useful when you are performing regression on multiple functions that share the same inputs, especially if they have similarities (such as being sinusodial). 

Given inputs $x$ and $x'$, and tasks $i$ and $j$, the covariance between two datapoints and two tasks is given by

$$  k([x, i], [x', j]) = k_\text{inputs}(x, x') * k_\text{tasks}(i, j)
$$

where $k_\text{inputs}$ is a standard kernel (e.g. RBF) that operates on the inputs.
$k_\text{task}$ is a lookup table containing inter-task covariance.

In [39]:
import math
import torch
import gpytorch
from matplotlib import pyplot as plt
from Data_Gen_Script import VField
import numpy as np
from scipy.stats import uniform

%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [32]:
# Set up the training and testing data

n = 1500 # input size

# N=3, D=3
x = np.random.rand(n, 3)
vfield = VField(N=3, D=3, tgt_loc=np.array([0.2, 0.1, 0.1]),
                 tgt_vec=np.array([0.5, 1.0, 1.0]))
y = vfield(x)
train_x = torch.Tensor(x[:int(0.8*n), :])
test_x = torch.Tensor(x[int(0.8*n):, :])
# test_x.shape
train_y = y[:int(0.8*n), :]
test_y = y[int(0.8*n):, :]

In [33]:
class MultitaskGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(MultitaskGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.MultitaskMean(
            gpytorch.means.ConstantMean(), num_tasks=3
        )
        self.covar_module = gpytorch.kernels.MultitaskKernel(
            gpytorch.kernels.RBFKernel(), num_tasks=3, rank=1
        )

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultitaskMultivariateNormal(mean_x, covar_x)

    
likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=3)
model = MultitaskGPModel(train_x, train_y, likelihood)

In [34]:
# train the model hyperparameters
import os
smoke_test = ('CI' in os.environ)
training_iterations = 2 if smoke_test else 100


# Find optimal model hyperparameters
model.train()
likelihood.train()

# Use the adam optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)  # Includes GaussianLikelihood parameters

# "Loss" for GPs - the marginal log likelihood
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

for i in range(training_iterations):
    optimizer.zero_grad()
    output = model(train_x)
    loss = -mll(output, train_y)
    loss.backward()
    print('Iter %d/%d - Loss: %.3f' % (i + 1, training_iterations, loss.item()))
    optimizer.step()

Iter 1/100 - Loss: 1.149
Iter 2/100 - Loss: 1.107
Iter 3/100 - Loss: 1.067
Iter 4/100 - Loss: 1.026
Iter 5/100 - Loss: 0.986
Iter 6/100 - Loss: 0.946
Iter 7/100 - Loss: 0.906
Iter 8/100 - Loss: 0.866
Iter 9/100 - Loss: 0.825
Iter 10/100 - Loss: 0.785
Iter 11/100 - Loss: 0.744
Iter 12/100 - Loss: 0.702
Iter 13/100 - Loss: 0.661
Iter 14/100 - Loss: 0.620
Iter 15/100 - Loss: 0.578
Iter 16/100 - Loss: 0.537
Iter 17/100 - Loss: 0.495
Iter 18/100 - Loss: 0.454
Iter 19/100 - Loss: 0.413
Iter 20/100 - Loss: 0.373
Iter 21/100 - Loss: 0.333
Iter 22/100 - Loss: 0.293
Iter 23/100 - Loss: 0.254
Iter 24/100 - Loss: 0.216
Iter 25/100 - Loss: 0.179
Iter 26/100 - Loss: 0.143
Iter 27/100 - Loss: 0.109
Iter 28/100 - Loss: 0.076
Iter 29/100 - Loss: 0.044
Iter 30/100 - Loss: 0.015
Iter 31/100 - Loss: -0.013
Iter 32/100 - Loss: -0.039
Iter 33/100 - Loss: -0.062
Iter 34/100 - Loss: -0.083
Iter 35/100 - Loss: -0.101
Iter 36/100 - Loss: -0.117
Iter 37/100 - Loss: -0.130
Iter 38/100 - Loss: -0.140
Iter 39/100 -

In [35]:
# Making predictions with the model
# Set into eval mode
model.eval()
likelihood.eval()

# Initialize plots
# f, (y1_ax, y2_ax) = plt.subplots(1, 2, figsize=(8, 3))

# Make predictions
with torch.no_grad(): #, gpytorch.settings.fast_pred_var():
    predictions = likelihood(model(test_x))
    mean = predictions.mean
    covariance = predictions.covariance_matrix
    lower, upper = predictions.confidence_region()

print(mean.shape, covariance.shape)
print(f"mean:\n {mean}\n covariance:\n {covariance}")
    
# # This contains predictions for both tasks, flattened out
# # The first half of the predictions is for the first task
# # The second half is for the second task

# # Plot training data as black stars
# y1_ax.plot(train_x.detach().numpy(), train_y[:, 0].detach().numpy(), 'k*')
# # Predictive mean as blue line
# y1_ax.plot(test_x.numpy(), mean[:, 0].numpy(), 'b')
# # Shade in confidence 
# y1_ax.fill_between(test_x.numpy(), lower[:, 0].numpy(), upper[:, 0].numpy(), alpha=0.5)
# y1_ax.set_ylim([-3, 3])
# y1_ax.legend(['Observed Data', 'Mean', 'Confidence'])
# y1_ax.set_title('Observed Values (Likelihood)')

# # Plot training data as black stars
# y2_ax.plot(train_x.detach().numpy(), train_y[:, 1].detach().numpy(), 'k*')
# # Predictive mean as blue line
# y2_ax.plot(test_x.numpy(), mean[:, 1].numpy(), 'b')
# # Shade in confidence 
# y2_ax.fill_between(test_x.numpy(), lower[:, 1].numpy(), upper[:, 1].numpy(), alpha=0.5)
# y2_ax.set_ylim([-3, 3])
# y2_ax.legend(['Observed Data', 'Mean', 'Confidence'])
# y2_ax.set_title('Observed Values (Likelihood)')

# None

torch.Size([300, 3]) torch.Size([900, 900])
mean:
 tensor([[4.3849, 5.2057, 4.3876],
        [1.8034, 2.4224, 2.2262],
        [5.1913, 5.8939, 4.9780],
        [5.7490, 6.4342, 5.3938],
        [2.5892, 3.1373, 2.8268],
        [3.1759, 3.6756, 3.2161],
        [2.8989, 3.5296, 3.1113],
        [0.8837, 1.3391, 1.3807],
        [1.2371, 1.7803, 1.7356],
        [3.1492, 3.6672, 3.2647],
        [1.9213, 2.5409, 2.2478],
        [1.7748, 2.4167, 2.2217],
        [1.3245, 1.8439, 1.7654],
        [1.2924, 1.8240, 1.7629],
        [2.5050, 3.4243, 2.8292],
        [1.7997, 2.3359, 2.1699],
        [4.8220, 5.4338, 4.6401],
        [4.5486, 5.1376, 4.3965],
        [3.5196, 4.2405, 3.5285],
        [3.6234, 4.1845, 3.6602],
        [5.1070, 6.0470, 5.0324],
        [1.0917, 1.6054, 1.5993],
        [1.1855, 1.6639, 1.6474],
        [1.7184, 2.1844, 2.1145],
        [1.3175, 1.8654, 1.7766],
        [2.8076, 3.2315, 2.8759],
        [1.5799, 2.0482, 1.9729],
        [2.8744, 3.5374, 2.9029

In [36]:
# Check that the model correctly predicts the value of the input location associated with the target
loc = torch.Tensor([[0.2, 0.1, 0.1]])
with torch.no_grad(), gpytorch.settings.fast_pred_var():
    pred = likelihood(model(loc))
    mean = pred.mean
    covar = pred.covariance_matrix

error = abs(torch.Tensor([[0.5, 1., 1.]]) - mean)
error

tensor([[0.0085, 0.0006, 0.0372]])