In [1]:
import math
import torch
import gpytorch
import tqdm
from matplotlib import pyplot as plt
import numpy as np
import os
import random

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
def load_dataset(dataset_dir):
    data = {}

    for category in ['train_l1', 'train_l2', 'val_l1', 'val_l2', 'test_l1', 'test_l2']:
        cat_data = np.load(os.path.join(dataset_dir, category + '.npz'))
        data['x_' + category] = cat_data['x']
        data['y_' + category] = cat_data['y']
    
    return data

data = load_dataset('mfnp_data/')

x_train = data['x_train_l2']
x_train = x_train.reshape(x_train.shape[0],-1)
y_train = data['y_train_l2']
y_train = y_train.reshape(y_train.shape[0],-1)
x_val = data['x_val_l2']
x_val = x_val.reshape(x_val.shape[0],-1)
y_val = data['y_val_l2']
y_val = y_val.reshape(y_val.shape[0],-1)
x_test = data['x_test_l2']
x_test = x_test.reshape(x_test.shape[0],-1)
y_test = data['y_test_l2']
y_test = y_test.reshape(y_test.shape[0],-1)

train_x = torch.from_numpy(x_train)
train_y = torch.from_numpy(y_train)

val_x = torch.from_numpy(x_val)
val_y = torch.from_numpy(y_val)

test_x = torch.from_numpy(x_test)
test_y = torch.from_numpy(y_test)

print(y_train.shape,y_val.shape,y_test.shape)

(32, 45414) (50, 45414) (50, 45414)


In [3]:
num_latents = 1
num_tasks = 45414

class MultitaskGPModel(gpytorch.models.ApproximateGP):
    def __init__(self):
        # Let's use a different set of inducing points for each latent function
        inducing_points = torch.rand(num_latents, 1, 45414)

        # We have to mark the CholeskyVariationalDistribution as batch
        # so that we learn a variational distribution for each task
        variational_distribution = gpytorch.variational.CholeskyVariationalDistribution(
            inducing_points.size(-2), batch_shape=torch.Size([num_latents])
        )

        # We have to wrap the VariationalStrategy in a LMCVariationalStrategy
        # so that the output will be a MultitaskMultivariateNormal rather than a batch output
        variational_strategy = gpytorch.variational.LMCVariationalStrategy(
            gpytorch.variational.VariationalStrategy(
                self, inducing_points, variational_distribution, learn_inducing_locations=True
            ),
            num_tasks=45414,
            num_latents=1,
            latent_dim=-1
        )

        super().__init__(variational_strategy)

        # The mean and covariance modules should be marked as batch
        # so we learn a different set of hyperparameters
        self.mean_module = gpytorch.means.ConstantMean(batch_shape=torch.Size([num_latents]))
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.RBFKernel(batch_shape=torch.Size([num_latents])),
            batch_shape=torch.Size([num_latents])
        )

    def forward(self, x):
        # The forward function should be written as if we were dealing with each output
        # dimension in batch
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)


In [4]:
# this is for running the notebook in our testing framework
smoke_test = ('CI' in os.environ)
num_epochs = 2 if smoke_test else 2000

random_seeds = [42,43,46]
pred_mean_list = []
pred_std_list = []
saved_model_list = []
pred_lower_list = []
pred_upper_list = []


for i in range(3):
    torch.manual_seed(random_seeds[i])
    np.random.seed(random_seeds[i])
    random.seed(random_seeds[i])
    
    model = MultitaskGPModel()
    likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=num_tasks)

    model.train()
    likelihood.train()

    optimizer = torch.optim.Adam([
        {'params': model.parameters()},
        {'params': likelihood.parameters()},
    ], lr=0.05)

    # Our loss object. We're using the VariationalELBO, which essentially just computes the ELBO
    mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=train_y.size(0))

    # We use more CG iterations here because the preconditioner introduced in the NeurIPS paper seems to be less
    # effective for VI.
    epochs_iter = tqdm.tqdm_notebook(range(num_epochs), desc="Epoch")

    min_val_loss = float('inf')
    patience = 100
    wait = 0

    for j in epochs_iter:
        # Within each iteration, we will go over each minibatch of data
        model.train()
        likelihood.train()

        optimizer.zero_grad()
        output = model(train_x)
        loss = -mll(output, train_y)

        epochs_iter.set_postfix(loss=loss.item())
        loss.backward()
        optimizer.step()
        
        
        # Make predictions
        model.eval()
        likelihood.eval()
        with torch.no_grad(), gpytorch.settings.fast_pred_var():
            val_predictions = likelihood(model(val_x))
            val_mean = val_predictions.mean.detach().numpy()
            val_loss = np.mean(np.abs(val_y.numpy() - val_mean))
            if val_loss < min_val_loss:
                wait = 0
                min_val_loss = val_loss
                saved_model = model
            elif val_loss >= min_val_loss:
                wait += 1
                if wait == patience:
#                     saved_model_list.append(saved_model)
#                     predictions = likelihood(saved_model(test_x))
#                     mean = predictions.mean
#                     pred_mean_list.append(mean)
#                     pred_lower, pred_upper = predictions.confidence_region()
#                     pred_lower_list.append(pred_lower)
#                     pred_upper_list.append(pred_upper)
#                     pred_std = mean - pred_lower
#                     pred_std_list.append(pred_std)
                    break
        
    with torch.no_grad(), gpytorch.settings.fast_pred_var():
        saved_model_list.append(saved_model)
        predictions = likelihood(saved_model(test_x))
        mean = predictions.mean
        pred_mean_list.append(mean)
        pred_lower, pred_upper = predictions.confidence_region()
        pred_lower_list.append(pred_lower)
        pred_upper_list.append(pred_upper)
        pred_std = mean - pred_lower
        pred_std_list.append(pred_std)
        

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  epochs_iter = tqdm.tqdm_notebook(range(num_epochs), desc="Epoch")


Epoch:   0%|          | 0/2000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/2000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/2000 [00:00<?, ?it/s]

In [5]:
def nll_metric(pred_mu, pred_std, y):

#         pred_mu = pred_mu * no_y_std + no_y_mean
#         pred_std = pred_std * no_y_std
#         y = y * no_y_std + no_y_mean
        gaussian = torch.distributions.Normal(pred_mu, pred_std)
        nll = -gaussian.log_prob(y)
        nll = torch.mean(nll).detach().numpy().item()
        return nll

def mae_metric(y_pred, y_true):
#     y_pred = y_pred * no_y_std + no_y_mean
#     y_true = y_true * no_y_std + no_y_mean
    loss = torch.abs(y_pred - y_true)
    loss[loss != loss] = 0
    loss = loss.mean().detach().numpy().item()
    return loss

In [6]:
# MAE
# truth = test_y.reshape(-1,6,87,87)
truth = test_y
mae_list = []
nll_list = []
for i in range(3):
    mean = pred_mean_list[i]
    std = pred_std_list[i]
#     mean = mean.reshape(-1,6,87,87)
    mae = mae_metric(mean,truth)
    mae_list.append(mae)
    nll = nll_metric(mean,std,truth)
    nll_list.append(nll)
    
print(mae_list,nll_list)



[0.9097384810447693, 0.9097420573234558, 0.909737229347229] [2.2886478900909424, 2.292630910873413, 2.283050537109375]


In [11]:
np.save('result/sfgp_mae.npy', np.array(mae_list))
np.save('result/sfgp_nll.npy', np.array(nll_list))
np.save('result/sfgp_mean.npy', torch.stack(pred_mean_list,0).detach().numpy())
np.save('result/sfgp_std.npy',  torch.stack(pred_std_list,0).detach().numpy())
np.save('result/sfgp_truth.npy', test_y.detach().numpy())