In [8]:
import gpytorch
import torch
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF

from utils.metrics import MetricsLogger
from utils.data_loaders import get_wake_dataloaders

In [3]:
FACTORS_FOLDER = "discr_factors_x2_30_y-2_2_step0.125_TIstep0.01_CTstep0.01"
DATA_FOLDER = f"data/{FACTORS_FOLDER}/"
MODEL_NAME = f"multivariate_GP_ExactGP" #TODO
BEST_MODEL_PATH = f"saved_models/{FACTORS_FOLDER}/{MODEL_NAME}.pt"
CONSIDER_WS = False #TODO
COORDS_AS_INPUT = False # multivariate setting

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"{DEVICE=}")

# hyperparameters
EPOCHS = 50
LR = 0.1
BATCH_SIZE = 1

DEVICE=device(type='cpu')


In [5]:
valid_dataloader = None
train_dataloader, valid_dataloader, test_dataloader = get_wake_dataloaders(DATA_FOLDER,
                                                          consider_ws=CONSIDER_WS,
                                                          coords_as_input=COORDS_AS_INPUT,
                                                          train_perc=0.6,
                                                          test_perc=0.2,
                                                          validation_perc=0.2,
                                                          batch_size=BATCH_SIZE)

print(len(train_dataloader))

train_x, train_y = train_dataloader.dataset.inputs, train_dataloader.dataset.outputs

test_x, test_y = test_dataloader.dataset.inputs, test_dataloader.dataset.outputs
print("Train shapes: ", train_x.shape, train_y.shape)
print("Test shapes: ", test_x.shape, test_y.shape)

if valid_dataloader is not None:
    valid_x, valid_y = valid_dataloader.dataset.inputs, valid_dataloader.dataset.outputs
    print("Valid shapes: ", valid_x.shape, valid_y.shape)

grid_size = train_dataloader.dataset.num_cells
print(f"{grid_size=}")

5160
Train shapes:  torch.Size([5160, 2]) torch.Size([5160, 7168])
Test shapes:  torch.Size([1720, 2]) torch.Size([1720, 7168])
Valid shapes:  torch.Size([1720, 2]) torch.Size([1720, 7168])
grid_size=7168


In [5]:
"""
REDUCE THE AMOUNT OF DATA
num_instances = 10 #TODO
train_x= torch.split(train_x, num_instances)[0]#.reshape(num_instances, grid_size, num_features)
train_y = torch.split(train_y,num_instances)[0]#.reshape(num_instances, grid_size)
print(train_x.shape, train_y.shape)"""

torch.Size([71680, 4]) torch.Size([71680])


# Sklearn

In [7]:
kernel = RBF(length_scale=1.0)
gp = GaussianProcessRegressor(kernel=kernel)
gp.fit(train_x, train_y)

'\nToo much data for simple Regressor\nkernel = RBF(length_scale=1.0)\ngp = GaussianProcessRegressor(kernel=kernel)\ngp.fit(train_x, train_y)\n'

# Gpytorch

In [9]:
class MultitaskGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(MultitaskGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.MultitaskMean(
            gpytorch.means.ConstantMean(), num_tasks=grid_size
        )
        self.covar_module = gpytorch.kernels.MultitaskKernel(
            gpytorch.kernels.RBFKernel(), num_tasks=grid_size, rank=1
        )

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultitaskMultivariateNormal(mean_x, covar_x)


likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=grid_size)
model = MultitaskGPModel(train_x, train_y, likelihood)

In [10]:
# Find optimal model hyperparameters
model.train()
likelihood.train()

# Use the adam optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=LR)  # Includes GaussianLikelihood parameters

# "Loss" for GPs - the marginal log likelihood
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

metrics_logger = MetricsLogger(name=MODEL_NAME, automatic_save_after=5)
best_tloss = 1_000_000.
for epoch in range(EPOCHS):
    optimizer.zero_grad()
    output = model(train_x)
    loss = -mll(output, train_y)
    loss.backward()
    loss_value = loss.item()
    metrics_logger.log_metric(epoch, 'Training loss', loss_value)
    if loss_value < best_tloss:
        best_tloss = loss_value
        torch.save(model.state_dict(), BEST_MODEL_PATH)
    optimizer.step()

metrics_logger.plot_metrics_by_epoch()
metrics_logger.save_metrics()

Logging multivariate_GP_ExactGP
Epoch 0 ->	Training loss=1.0853651762008667	
Epoch 1 ->	Training loss=1.0487637519836426	
Epoch 2 ->	Training loss=1.0113800764083862	
Epoch 3 ->	Training loss=0.9732176661491394	
Epoch 4 ->	Training loss=0.9342836141586304	
Epoch 5 ->	Training loss=0.8945872187614441	
Epoch 6 ->	Training loss=0.85414057970047	
Epoch 7 ->	Training loss=0.8129580616950989	
Epoch 8 ->	Training loss=0.7710564136505127	
Epoch 9 ->	Training loss=0.7284545302391052	
Epoch 10 ->	Training loss=0.6851732134819031	
Epoch 11 ->	Training loss=0.6412352919578552	
Epoch 12 ->	Training loss=0.5966648459434509	
Epoch 13 ->	Training loss=0.5514872074127197	
Epoch 14 ->	Training loss=nan	
Epoch 15 ->	Training loss=nan	
Epoch 16 ->	Training loss=nan	

KeyboardInterrupt: 