# Overview

This notebook shows how to define a kernel that is robust to uncalibrated data in the input dimension.

In [2]:
import matplotlib.pyplot as plt

import torch
from botorch.models import MultiTaskGP
from botorch.fit import fit_gpytorch_mll
from gpytorch.mlls import ExactMarginalLogLikelihood

In [14]:
def plot_gp(gp, X, y):
    x = torch.linspace(0, 1, 200).reshape(-1,1)
    with torch.no_grad():
        p = gp.posterior(x)

        #get the mean
        m = p.mean

        #get the 2 sigma confidence region around the mean
        l,u = p.mvn.confidence_region()

    plt.figure()

    plt.ylabel('$f$')

    plt.fill_between(x.squeeze(), l[:,1], u[:,1], alpha = 0.25, lw = 0, color='C0')

    plt.scatter(X[:,0][X[:,1]==0], y[X[:,1]==0], color='C1', label='Low-fidelity data')
    plt.scatter(X[:,0][X[:,1]==1], y[X[:,1]==1], color='C0', label='High-fidelity data')
    plt.plot(x, m[:,1], color='C0', lw=1, label='Multi-fidelity GP prediction\n for high-fidelity data')

    plt.legend(loc=0, fontsize='small')
    plt.xlim(0, 1)
    plt.xticks([])
    plt.ylim(-2, 5)

    # Extract correlation coefficient
    cov = gp.task_covar_module._eval_covar_matrix()
    corr = cov[1,0]/torch.sqrt(cov[0,0]*cov[1,1]).item()
    plt.title( f'Correlation coefficient: {corr:.2f}' )

## Robustness to wrong calibration in output

The multifidelity kernel is naturally robust to mismatch between the two fidelities in the output dimension.

In [18]:
# Define synthetic data
# The low-fidelity data is wrongfully calibrated: vertical shift + negative correlation

X1 = torch.tensor([0.03, 0.1, 0.15, 0.17, 0.6, 0.68, 0.7, 0.8, 0.82, 0.95]).to(dtype=torch.float64)
X1 = torch.stack([X1, torch.zeros_like(X1)],dim=-1)
X2 = torch.tensor([0.03, 0.1, 0.14, 0.2, 0.3, 0.97]).to(dtype=torch.float64)
X2 = torch.stack([X2, torch.ones_like(X2)],dim=-1)
X = torch.cat([X1, X2])
y = 0.2*(torch.rand(len(X),1)-0.5) + torch.sin(6*X[:,0:1])*(1-2*X[:,1:]) + 0.6*(1-X[:,1:])

In [None]:
# Train and plot GP (do 3 different training to check for robustness)
for _ in range(3):
    gp = MultiTaskGP(X, y, task_feature=-1, rank=2)
    mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
    fit_gpytorch_mll(mll);

    plot_gp(gp, X, y)


## Non-robustness to wrong calibration in input

In [26]:
# Define synthetic data
# The low-fidelity data is wrongfully calibrated: horizontal shift

X1 = torch.tensor([0.1, 0.15, 0.17, 0.27, 0.3, 0.35, 0.4, 0.5, 0.6, 0.68, 0.7, 0.8, 0.82, 0.95]).to(dtype=torch.float64)
X1 = torch.stack([X1, torch.zeros_like(X1)],dim=-1)
X2 = torch.tensor([0.03, 0.1, 0.14, 0.2, 0.3, 0.97]).to(dtype=torch.float64)
X2 = torch.stack([X2, torch.ones_like(X2)],dim=-1)
X = torch.cat([X1, X2])
y = 0.2*(torch.rand(len(X),1)-0.5) + torch.sin(6*X[:,0:1]- 1.5*(1-X[:,1:]) )

In [None]:
# Train and plot GP (do 3 different training to check for robustness)
for _ in range(3):
    gp = MultiTaskGP(X, y, task_feature=-1, rank=2)
    mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
    fit_gpytorch_mll(mll);

    plot_gp(gp, X, y)