In [1]:
import math
import torch
import gpytorch
import tqdm
from matplotlib import pyplot as plt
import numpy as np
import os
import random

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
def load_dataset(dataset_dir):
    data = {}

    for category in ['train_l1', 'train_l2', 'val_l1', 'val_l2', 'test_l1', 'test_l2']:
        cat_data = np.load(os.path.join(dataset_dir, category + '.npz'))
        data['x_' + category] = cat_data['x']
        data['y_' + category] = cat_data['y']
        data['graph_' + category] = cat_data['graph']
    
    return data

data = load_dataset('../nargp_data/')


x_train_l = data['x_train_l1']
x_train_l = x_train_l.reshape(x_train_l.shape[0],-1)
y_train_l = np.log(data['y_train_l1']+10.) 
y_train_l = y_train_l.reshape(y_train_l.shape[0],-1)
graph_train_l = data['graph_train_l1']
graph_train_l = graph_train_l.reshape(graph_train_l.shape[0],-1)
x_train_h = data['x_train_l2']
x_train_h = x_train_h.reshape(x_train_h.shape[0],-1)
y_train_h = np.log(data['y_train_l2']+10.) 
y_train_h = y_train_h.reshape(y_train_h.shape[0],-1)
graph_train_h = data['graph_train_l2']
graph_train_h = graph_train_h.reshape(graph_train_h.shape[0],-1)

x_val_l = data['x_val_l1']
x_val_l = x_val_l.reshape(x_val_l.shape[0],-1)
y_val_l = np.log(data['y_val_l1']+10.)
y_val_l = y_val_l.reshape(y_val_l.shape[0],-1)
graph_val_l = data['graph_val_l1']
graph_val_l = graph_val_l.reshape(graph_val_l.shape[0],-1)
x_val_h = data['x_val_l2']
x_val_h = x_val_h.reshape(x_val_h.shape[0],-1)
y_val_h = np.log(data['y_val_l2']+10.)
y_val_h = y_val_h.reshape(y_val_h.shape[0],-1)
graph_val_h = data['graph_val_l2']
graph_val_h = graph_val_h.reshape(graph_val_h.shape[0],-1)

x_test_l = data['x_test_l1']
x_test_l = x_test_l.reshape(x_test_l.shape[0],-1)
y_test_l = np.log(data['y_test_l1']+10.)
y_test_l = y_test_l.reshape(y_test_l.shape[0],-1)
graph_test_l = data['graph_test_l1']
graph_test_l = graph_test_l.reshape(graph_test_l.shape[0],-1)
x_test_h = data['x_test_l2']
x_test_h = x_test_h.reshape(x_test_h.shape[0],-1)
y_test_h = np.log(data['y_test_l2']+10.)
y_test_h = y_test_h.reshape(y_test_h.shape[0],-1)
graph_test_h = data['graph_test_l2']
graph_test_h = graph_test_h.reshape(graph_test_h.shape[0],-1)

train_x_l = torch.from_numpy(x_train_l)
train_y_l = torch.from_numpy(y_train_l)
train_graph_l = torch.from_numpy(graph_train_l)

train_x_h = torch.from_numpy(x_train_h)
train_y_h = torch.from_numpy(y_train_h)
train_graph_h = torch.from_numpy(graph_train_h)

val_x_l = torch.from_numpy(x_val_l)
val_y_l = torch.from_numpy(y_val_l)
val_graph_l = torch.from_numpy(graph_val_l)

val_x_h = torch.from_numpy(x_val_h)
val_y_h = torch.from_numpy(y_val_h)
val_graph_h = torch.from_numpy(graph_val_h)

test_x_l = torch.from_numpy(x_test_l)
test_y_l = torch.from_numpy(y_test_l)
test_graph_l = torch.from_numpy(graph_test_l)

test_x_h = torch.from_numpy(x_test_h)
test_y_h = torch.from_numpy(y_test_h)
test_graph_h = torch.from_numpy(graph_test_h)

print(train_x_l.shape,train_y_l.shape, train_graph_l.shape)
print(train_x_h.shape,train_y_h.shape, train_graph_h.shape)

train_x_l = torch.cat([torch.repeat_interleave(train_x_l,28,-1)[:,:1476],train_graph_l],-1).float()
val_x_l = torch.cat([torch.repeat_interleave(val_x_l,28,-1)[:,:1476],val_graph_l],-1).float()
test_x_l = torch.cat([torch.repeat_interleave(test_x_l,28,-1)[:,:1476],test_graph_l],-1).float()

train_x_h = torch.cat([torch.repeat_interleave(train_x_h,5,-1),train_graph_h],-1).float()
val_x_h = torch.cat([torch.repeat_interleave(val_x_h,5,-1),val_graph_h],-1).float()
test_x_h = torch.cat([torch.repeat_interleave(test_x_h,5,-1),test_graph_h],-1).float()

print(train_x_l.shape,train_y_l.shape)
print(train_x_h.shape,train_y_h.shape)
print(test_x_h.shape,test_y_h.shape)

torch.Size([780, 54]) torch.Size([780, 1800]) torch.Size([780, 324])
torch.Size([150, 255]) torch.Size([150, 8500]) torch.Size([150, 7225])
torch.Size([780, 1800]) torch.Size([780, 1800])
torch.Size([150, 8500]) torch.Size([150, 8500])
torch.Size([1560, 8500]) torch.Size([1560, 8500])


In [3]:
num_latents = 1
num_tasks_l1 = 1800

class MultitaskGPModel_l1(gpytorch.models.ApproximateGP):
    def __init__(self):
        # Let's use a different set of inducing points for each latent function
        inducing_points = torch.rand(num_latents, 1, 1800)

        # We have to mark the CholeskyVariationalDistribution as batch
        # so that we learn a variational distribution for each task
        variational_distribution = gpytorch.variational.CholeskyVariationalDistribution(
            inducing_points.size(-2), batch_shape=torch.Size([num_latents])
        )

        # We have to wrap the VariationalStrategy in a LMCVariationalStrategy
        # so that the output will be a MultitaskMultivariateNormal rather than a batch output
        variational_strategy = gpytorch.variational.LMCVariationalStrategy(
            gpytorch.variational.VariationalStrategy(
                self, inducing_points, variational_distribution, learn_inducing_locations=True
            ),
            num_tasks=1800,
            num_latents=1,
            latent_dim= -1
        )

        super().__init__(variational_strategy)

        # The mean and covariance modules should be marked as batch
        # so we learn a different set of hyperparameters
        self.mean_module = gpytorch.means.ConstantMean(batch_shape=torch.Size([num_latents]))
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.RBFKernel(batch_shape=torch.Size([num_latents])),
            batch_shape=torch.Size([num_latents])
        )

    def forward(self, x):
        # The forward function should be written as if we were dealing with each output
        # dimension in batch
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [4]:
num_latents = 1
num_tasks_l2 = 8500 

class MultitaskGPModel_l2(gpytorch.models.ApproximateGP):
    def __init__(self):
        # Let's use a different set of inducing points for each latent function
        inducing_points = torch.rand(num_latents, 1, 10300) #8500+1800

        # We have to mark the CholeskyVariationalDistribution as batch
        # so that we learn a variational distribution for each task
        variational_distribution = gpytorch.variational.CholeskyVariationalDistribution(
            inducing_points.size(-2), batch_shape=torch.Size([num_latents])
        )

        # We have to wrap the VariationalStrategy in a LMCVariationalStrategy
        # so that the output will be a MultitaskMultivariateNormal rather than a batch output
        variational_strategy = gpytorch.variational.LMCVariationalStrategy(
            gpytorch.variational.VariationalStrategy(
                self, inducing_points, variational_distribution, learn_inducing_locations=True
            ),
            num_tasks= 8500,
            num_latents=1,
            latent_dim=-1
        )

        super().__init__(variational_strategy)

        # The mean and covariance modules should be marked as batch
        # so we learn a different set of hyperparameters
        self.mean_module = gpytorch.means.ConstantMean(batch_shape=torch.Size([num_latents]))
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.RBFKernel(batch_shape=torch.Size([num_latents]),active_dims=tuple(range(0,1800)))*
            gpytorch.kernels.RBFKernel(batch_shape=torch.Size([num_latents]),active_dims=tuple(range(1800,10300)))+
            gpytorch.kernels.RBFKernel(batch_shape=torch.Size([num_latents]),active_dims=tuple(range(1800,10300))),
            batch_shape=torch.Size([num_latents])
        )

    def forward(self, x):
        # The forward function should be written as if we were dealing with each output
        # dimension in batch
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)


In [5]:
# this is for running the notebook in our testing framework
smoke_test = ('CI' in os.environ)
num_epochs = 2 if smoke_test else 2000

random_seeds = [42,43,44]
v_pred_mean_list_l1 = []
te_pred_mean_list_l1 = []
tr_pred_mean_list_l1 = []
saved_model_list_l1 = []


for i in range(3):
    torch.manual_seed(random_seeds[i])
    np.random.seed(random_seeds[i])
    random.seed(random_seeds[i])
    
    model = MultitaskGPModel_l1()
    likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=num_tasks_l1)

    model.train()
    likelihood.train()

    optimizer = torch.optim.Adam([
        {'params': model.parameters()},
        {'params': likelihood.parameters()},
    ], lr=0.05)

    # Our loss object. We're using the VariationalELBO, which essentially just computes the ELBO
    mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=train_y_l.size(0))

    # We use more CG iterations here because the preconditioner introduced in the NeurIPS paper seems to be less
    # effective for VI.
    epochs_iter = tqdm.tqdm_notebook(range(num_epochs), desc="Epoch")

    min_val_loss = float('inf')
    patience = 100
    wait = 0

    for j in epochs_iter:
        # Within each iteration, we will go over each minibatch of data
        model.train()
        likelihood.train()

        optimizer.zero_grad()
        output = model(train_x_l)
        loss = -mll(output, train_y_l)

        epochs_iter.set_postfix(loss=loss.item())
        loss.backward()
        optimizer.step()
        
        
        # Make predictions
        model.eval()
        likelihood.eval()
        with torch.no_grad(), gpytorch.settings.fast_pred_var():
            val_predictions = likelihood(model(val_x_l))
            val_mean = val_predictions.mean.detach().numpy()
            val_loss = np.mean(np.abs(val_y_l.numpy() - val_mean))
            if val_loss < min_val_loss:
                wait = 0
                min_val_loss = val_loss
                saved_model = model
            elif val_loss >= min_val_loss:
                wait += 1
                if wait == patience:
#                     saved_model_list_l1.append(saved_model)
#                     te_predictions = likelihood(saved_model(test_x_l))
#                     te_mean = te_predictions.mean
#                     te_pred_mean_list_l1.append(te_mean)
# #                     te_pred_lower, te_pred_upper = te_predictions.confidence_region()
# #                     te_pred_std = te_mean - te_pred_lower
# #                     te_pred_std_list_l1.append(te_pred_std)
                    
#                     v_predictions = likelihood(saved_model(val_x_l))
#                     v_mean = v_predictions.mean
#                     v_pred_mean_list_l1.append(v_mean)
# #                     v_pred_lower, v_pred_upper = v_predictions.confidence_region()
# #                     v_pred_std = v_mean - v_pred_lower
# #                     v_pred_std_list_l1.append(v_pred_std)
                    
#                     tr_predictions = likelihood(saved_model(train_x_l))
#                     tr_mean = tr_predictions.mean
#                     tr_pred_mean_list_l1.append(tr_mean)
                    break
        
    with torch.no_grad(), gpytorch.settings.fast_pred_var():
        saved_model_list_l1.append(saved_model)
        te_predictions = likelihood(saved_model(test_x_l))
        te_mean = te_predictions.mean
        te_pred_mean_list_l1.append(te_mean)
#         te_pred_lower, te_pred_upper = te_predictions.confidence_region()
#         te_pred_std = te_mean - te_pred_lower
#         te_pred_std_list_l1.append(te_pred_std)

        v_predictions = likelihood(saved_model(val_x_l))
        v_mean = v_predictions.mean
        v_pred_mean_list_l1.append(v_mean)
#         v_pred_lower, v_pred_upper = v_predictions.confidence_region()
#         v_pred_std = v_mean - v_pred_lower
#         v_pred_std_list_l1.append(v_pred_std)

        tr_predictions = likelihood(saved_model(train_x_l))
        tr_mean = tr_predictions.mean
        tr_pred_mean_list_l1.append(tr_mean)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  epochs_iter = tqdm.tqdm_notebook(range(num_epochs), desc="Epoch")


Epoch:   0%|          | 0/2000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/2000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/2000 [00:00<?, ?it/s]

In [7]:
# this is for running the notebook in our testing framework
smoke_test = ('CI' in os.environ)
num_epochs = 2 if smoke_test else 2000

random_seeds = [42,43,44]
pred_mean_list_l2 = []
pred_std_list_l2 = []
saved_model_list_l2 = []


for i in range(3):
    torch.manual_seed(random_seeds[i])
    np.random.seed(random_seeds[i])
    random.seed(random_seeds[i])
    
    model = MultitaskGPModel_l2()
    likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=num_tasks_l2)

    model.train()
    likelihood.train()

    optimizer = torch.optim.Adam([
        {'params': model.parameters()},
        {'params': likelihood.parameters()},
    ], lr=0.05)

    # Our loss object. We're using the VariationalELBO, which essentially just computes the ELBO
    mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=train_y_h.size(0))

    # We use more CG iterations here because the preconditioner introduced in the NeurIPS paper seems to be less
    # effective for VI.
    epochs_iter = tqdm.tqdm_notebook(range(num_epochs), desc="Epoch")

    min_val_loss = float('inf')
    patience = 100
    wait = 0

    for j in epochs_iter:
        # Within each iteration, we will go over each minibatch of data
        model.train()
        likelihood.train()

        optimizer.zero_grad()
        train_xx = torch.cat((tr_pred_mean_list_l1[i][:train_x_h.shape[0],:], train_x_h),-1)
        output = model(train_xx)
        loss = -mll(output, train_y_h)

        epochs_iter.set_postfix(loss=loss.item())
        loss.backward()
        optimizer.step()
        
        
        # Make predictions
        model.eval()
        likelihood.eval()
        with torch.no_grad(), gpytorch.settings.fast_pred_var():
            val_xx = torch.cat((v_pred_mean_list_l1[i][:val_x_h.shape[0],:], val_x_h),-1)
            val_predictions = likelihood(model(val_xx))
            val_mean = val_predictions.mean.detach().numpy()
            val_loss = np.mean(np.abs(val_y_h.numpy() - val_mean))
            if val_loss < min_val_loss:
                wait = 0
                min_val_loss = val_loss
                saved_model = model
            elif val_loss >= min_val_loss:
                wait += 1
                if wait == patience:
#                     saved_model_list_l2.append(saved_model)
#                     test_xx = torch.cat((te_pred_mean_list_l1[i][:test_x_h.shape[0],:], test_x_h),-1)
#                     predictions = likelihood(saved_model(test_xx))
#                     mean = predictions.mean
#                     pred_mean_list_l2.append(mean)
                    
#                     pred_lower, pred_upper = predictions.confidence_region()
#                     pred_std = mean - pred_lower
#                     pred_std_list_l2.append(pred_std)
                    break
        
    with torch.no_grad(), gpytorch.settings.fast_pred_var():
        saved_model_list_l2.append(saved_model)
        test_xx = torch.cat((te_pred_mean_list_l1[i][:test_x_h.shape[0],:], test_x_h),-1)
        predictions = likelihood(saved_model(test_xx))
        mean = predictions.mean
        pred_mean_list_l2.append(mean)
        
        pred_lower, pred_upper = predictions.confidence_region()
        pred_std = mean - pred_lower
        pred_std_list_l2.append(pred_std)
        

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  epochs_iter = tqdm.tqdm_notebook(range(num_epochs), desc="Epoch")


Epoch:   0%|          | 0/2000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/2000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/2000 [00:00<?, ?it/s]

In [13]:
def nll_metric(pred_mu, pred_std, y):
#         pred_mu = torch.from_numpy(pred_mu)
#         pred_std = torch.from_numpy(pred_std)
#         y = torch.from_numpy(y)
        gaussian = torch.distributions.Normal(pred_mu, pred_std)
        nll = -gaussian.log_prob(y)
        nll = torch.mean(nll).detach().numpy().item()
        return nll

def mae_metric(y_pred, y_true):
    loss = torch.abs(y_pred - y_true)
    loss[loss != loss] = 0
    loss = loss.mean().detach().numpy().item()
    return loss

In [14]:
# MAE
# truth = test_y.reshape(-1,6,87,87)
truth = test_y_h
mae_list = []
nll_list = []
for i in range(3):
    mean = pred_mean_list_l2[i]
    std = pred_std_list_l2[i]
#     mean = mean.reshape(-1,6,87,87)
    mae = mae_metric(mean,truth)
    mae_list.append(mae)
    nll = nll_metric(mean,std,truth)
    nll_list.append(nll)
    
print(mae_list,nll_list)

[0.7691711627556101, 0.7692180150311272, 0.7674125238387164] [1.7085020014220018, 1.7056576732538988, 1.9208391020433289]


In [18]:
np.save('../result/nargp_mae.npy', np.array(mae_list))
np.save('../result/nargp_nll.npy', np.array(nll_list))
np.save('../result/nargp_mean.npy', torch.stack(pred_mean_list_l2,0).detach().numpy())
np.save('../result/nargp_std.npy', torch.stack(pred_std_list_l2,0).detach().numpy())
np.save('../result/nargp_truth.npy', test_y_h.detach().numpy())