In [1]:
import math
from tqdm import tqdm
import torch
import gpytorch
from matplotlib import pyplot as plt

# Make plots inline
%matplotlib inline

In [2]:
from ucimlrepo import fetch_ucirepo 
from math import floor
from torch.utils.data import TensorDataset, DataLoader

def downloader(uci_id):
    # fetch dataset 
    uci_download = fetch_ucirepo(id=uci_id)
    
    # data (as pandas dataframes) 
    X_data = uci_download.data.features

    # Drop category and date variables
    if uci_id == 1:
        X_data = X_data.drop(["Sex"], axis=1)
    if uci_id == 275:
        X_data = X_data.drop(["dteday"], axis=1)

    y_data = uci_download.data.targets
    y = y_data.squeeze()

    X = torch.tensor(X_data.values, dtype=torch.float32)
    y = torch.tensor(y.values, dtype=torch.float32)

    train_n = int(floor(0.8 * len(X)))
    train_x = X[:train_n, :].contiguous()
    train_y = y[:train_n].contiguous()

    test_x = X[train_n:, :].contiguous()
    test_y = y[train_n:].contiguous()

    # Create TensorDataset and DataLoader for training and test sets
    train_dataset = TensorDataset(train_x, train_y)
    test_dataset = TensorDataset(test_x, test_y)

    train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False)

    return train_x, train_y, test_x, test_y, train_loader, test_loader

def whole_process(train_x, train_y, test_x, test_y, train_loader, test_loader):

    data_dim = train_x.size(-1)

    class LargeFeatureExtractor(torch.nn.Sequential):
        def __init__(self):
            super(LargeFeatureExtractor, self).__init__()
            self.add_module('linear1', torch.nn.Linear(data_dim, 1000))
            self.add_module('relu1', torch.nn.ReLU())
            self.add_module('linear2', torch.nn.Linear(1000, 500))
            self.add_module('relu2', torch.nn.ReLU())
            self.add_module('linear3', torch.nn.Linear(500, 50))
            self.add_module('relu3', torch.nn.ReLU())
            self.add_module('linear4', torch.nn.Linear(50, 2))

    feature_extractor = LargeFeatureExtractor()
    class GPRegressionModel(gpytorch.models.ExactGP):
            def __init__(self, train_x, train_y, likelihood):
                super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)
                self.mean_module = gpytorch.means.ConstantMean()
                self.covar_module = gpytorch.kernels.ScaleKernel(
                gpytorch.kernels.SpectralMixtureKernel(num_mixtures=2, ard_num_dims=2))
                # self.covar_module = gpytorch.kernels.GridInterpolationKernel(
                #     gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(ard_num_dims=2)),
                #     num_dims=2, grid_size=64
                # )
                self.feature_extractor = feature_extractor

                # This module will scale the NN features so that they're nice values
                self.scale_to_bounds = gpytorch.utils.grid.ScaleToBounds(-1., 1.)

            def forward(self, x):
                # We're first putting our data through a deep net (feature extractor)
                projected_x = self.feature_extractor(x)
                projected_x = self.scale_to_bounds(projected_x)  # Make the NN values "nice"

                mean_x = self.mean_module(projected_x)
            
                covar_x = self.covar_module(projected_x)
                return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
            
    likelihood = gpytorch.likelihoods.GaussianLikelihood()
    model = GPRegressionModel(train_x, train_y, likelihood)

    training_iterations = 100

    # Find optimal model hyperparameters
    model.train()
    likelihood.train()

    # Use the adam optimizer
    optimizer = torch.optim.Adam([
        {'params': model.feature_extractor.parameters()},
        {'params': model.covar_module.parameters()},
        {'params': model.mean_module.parameters()},
        {'params': model.likelihood.parameters()},
    ], lr=0.01)

    # "Loss" for GPs - the marginal log likelihood
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

    def train():
        iterator = tqdm(range(training_iterations))
        for i in iterator:
            # Zero backprop gradients
            optimizer.zero_grad()
            # Get output from model
            output = model(train_x)
            # Calc loss and backprop derivatives
            loss = -mll(output, train_y)
            loss.backward()
            iterator.set_postfix(loss=loss.item())
            optimizer.step()

    %time train()

    model.eval()
    likelihood.eval()
    with torch.no_grad(), gpytorch.settings.use_toeplitz(False), gpytorch.settings.fast_pred_var():
        preds = model(test_x)

    print('Test MAE: {}'.format(torch.mean(torch.abs(preds.mean - test_y))))
    with torch.no_grad(), gpytorch.settings.use_toeplitz(False), gpytorch.settings.fast_pred_var():
        preds = model(train_x)

    print('Train MAE: {}'.format(torch.mean(torch.abs(preds.mean - train_y))))

experiment_datasets = [1, 275, 477]
# abalone, bike share, real estate

for experiment in experiment_datasets:
    print("Experiment " + str(experiment))
    train_x, train_y, test_x, test_y, train_loader, test_loader = downloader(experiment)
    whole_process(train_x, train_y, test_x, test_y, train_loader, test_loader)

Experiment 1


100%|██████████| 100/100 [02:49<00:00,  1.70s/it, loss=2.84]


CPU times: total: 16min 46s
Wall time: 2min 49s
Test MAE: 1.3678343296051025




Train MAE: 1.4716848134994507
Experiment 275


  5%|▌         | 5/100 [06:16<1:53:42, 71.82s/it, loss=8.24e+3]