In [1]:
import math
from tqdm import tqdm
import torch
import time
import numpy as np
import gpytorch
from matplotlib import pyplot as plt
from gpytorch.models import ApproximateGP
from gpytorch.variational import CholeskyVariationalDistribution
from gpytorch.variational import VariationalStrategy, GridInterpolationVariationalStrategy
from ucimlrepo import fetch_ucirepo 
from math import floor
from torch.utils.data import TensorDataset, DataLoader


# Make plots inline
%matplotlib inline

In [2]:
def downloader(uci_id):
    # fetch dataset 
    uci_download = fetch_ucirepo(id=uci_id)
    
    # data (as pandas dataframes) 
    X_data = uci_download.data.features
    # Drop category and date variables
    if uci_id == 1:
        X_data = X_data.drop(["Sex"], axis=1)
    if uci_id == 275:
        X_data = X_data.drop(["dteday"], axis=1)
    if uci_id == 374:
        X_data = X_data.drop(["date"], axis=1)
    if uci_id == 183:
        X_data = X_data.drop(["communityname"], axis=1)
        object_columns = X_data.select_dtypes(include=['object']).columns
        X_data = X_data.drop(columns=object_columns)

    y_data = uci_download.data.targets
    # select target for datasets with 2 targets
    if uci_id == 189:
        y_data = y_data["motor_UPDRS"]
    if uci_id == 713:
        y_data = y_data["verification.time"]

    y = y_data.squeeze()

    X = torch.tensor(X_data.values, dtype=torch.float32)
    y = torch.tensor(y.values, dtype=torch.float32)

    train_n = int(floor(0.8 * len(X)))
    train_x = X[:train_n, :].contiguous()
    train_y = y[:train_n].contiguous()

    test_x = X[train_n:, :].contiguous()
    test_y = y[train_n:].contiguous()

    # Create TensorDataset and DataLoader for training and test sets
    train_dataset = TensorDataset(train_x, train_y)
    test_dataset = TensorDataset(test_x, test_y)

    train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False)

    return train_x, train_y, test_x, test_y, train_loader, test_loader

def whole_process(train_x, train_y, test_x, test_y, train_loader, test_loader):

    data_dim = train_x.size(-1)

    class GPModel(ApproximateGP):
        def __init__(self, inducing_points):

            variational_distribution = CholeskyVariationalDistribution(inducing_points.size(0))

            variational_strategy = VariationalStrategy(self, inducing_points, variational_distribution, learn_inducing_locations=True)

            super(GPModel, self).__init__(variational_strategy)
            self.mean_module = gpytorch.means.ConstantMean()

            self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(ard_num_dims=data_dim))

        def forward(self, x):
            mean_x = self.mean_module(x)
            covar_x = self.covar_module(x)
            return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

    inducing_points = train_x[:5, :]
    model = GPModel(inducing_points=inducing_points)
    likelihood = gpytorch.likelihoods.GaussianLikelihood()

    num_epochs = 100
    start_time = time.time()
    model.train()
    likelihood.train()

    optimizer = torch.optim.Adam([
        {'params': likelihood.parameters()},
        {'params': model.parameters()}
    ], lr=0.01)
    # Our loss object. We're using the VariationalELBO
    mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=train_y.size(0))


    epochs_iter = tqdm(range(num_epochs), desc="Epoch")
    for i in epochs_iter:
        # Within each iteration, we will go over each minibatch of data
        minibatch_iter = tqdm(train_loader, desc="Minibatch", leave=False)
        for x_batch, y_batch in minibatch_iter:
            optimizer.zero_grad()
            output = model(x_batch)
            loss = -mll(output, y_batch)
            minibatch_iter.set_postfix(loss=loss.item())
            loss.backward()
            optimizer.step()

    end_time = time.time()
    wall_time = end_time - start_time
    
    model.eval()
    likelihood.eval()
    means = torch.tensor([0.])
    with torch.no_grad():
        for x_batch, y_batch in test_loader:
            preds = model(x_batch)
            means = torch.cat([means, preds.mean.cpu()])
    means = means[1:]
    test_rmse = torch.sqrt(torch.mean((means - test_y.cpu()) ** 2))

    means = torch.tensor([0.])
    with torch.no_grad():
        for x_batch, y_batch in train_loader:
            preds = model(x_batch)
            means = torch.cat([means, preds.mean.cpu()])
    means = means[1:]
    train_rmse= torch.sqrt(torch.mean((means - train_y.cpu()) ** 2))

    print('Test RMSE: {}'.format(test_rmse))
    print('Train RMSE: {}'.format(train_rmse))
    print('Wall time: {:.2f} seconds'.format(wall_time))

    return test_rmse, train_rmse, wall_time

experiment_datasets = [1, 275, 477, 189, 713, 186, 374, 183, 291, 294]

for experiment in experiment_datasets:
    print("Experiment " + str(experiment))

    trains = []
    tests = []
    times = []
    train_x, train_y, test_x, test_y, train_loader, test_loader = downloader(experiment)
    for run in range(3):
        # set seeds
        torch.manual_seed(run)
        print("run: " + str(run))

        test_rmse, train_rmse, wall_time = whole_process(train_x, train_y, test_x, test_y, train_loader, test_loader)
        trains.append(train_rmse)
        tests.append(test_rmse)
        times.append(wall_time)
    
    print("mean train performance= "+ str(np.mean(trains)))
    print("mean test performance= "+ str(np.mean(tests)))
    print("mean wall time= "+ str(np.mean(times)))

    print("std train= "+ str(np.std(trains, ddof=1)))
    print("std test= "+ str(np.std(tests, ddof=1)))

Experiment 1
run: 0


Epoch: 100%|██████████| 100/100 [00:11<00:00,  8.51it/s]


Test RMSE: 2.073751211166382
Train RMSE: 3.744349956512451
Wall time: 11.75 seconds
run: 1


Epoch: 100%|██████████| 100/100 [00:11<00:00,  8.59it/s]


Test RMSE: 2.1206347942352295
Train RMSE: 3.9831626415252686
Wall time: 11.65 seconds
run: 2


Epoch: 100%|██████████| 100/100 [00:11<00:00,  8.61it/s]


Test RMSE: 2.147575855255127
Train RMSE: 3.8951210975646973
Wall time: 11.62 seconds
mean train performance= 3.874211
mean test performance= 2.1139872
mean wall time= 11.672245502471924
std train= 0.120771654
std test= 0.037358556
Experiment 275
run: 0


Epoch: 100%|██████████| 100/100 [00:44<00:00,  2.23it/s]


Test RMSE: 166.97706604003906
Train RMSE: 198.50665283203125
Wall time: 44.86 seconds
run: 1


Epoch: 100%|██████████| 100/100 [00:44<00:00,  2.23it/s]


Test RMSE: 165.54965209960938
Train RMSE: 197.826904296875
Wall time: 44.87 seconds
run: 2


Epoch: 100%|██████████| 100/100 [00:44<00:00,  2.24it/s]


Test RMSE: 166.88079833984375
Train RMSE: 197.70867919921875
Wall time: 44.67 seconds
mean train performance= 198.01408
mean test performance= 166.46918
mean wall time= 44.8032857577006
std train= 0.43065786
std test= 0.7977811
Experiment 477
run: 0


Epoch: 100%|██████████| 100/100 [00:01<00:00, 51.31it/s]


Test RMSE: 38.706886291503906
Train RMSE: 38.82059097290039
Wall time: 1.95 seconds
run: 1


Epoch: 100%|██████████| 100/100 [00:01<00:00, 51.49it/s]


Test RMSE: 38.7023811340332
Train RMSE: 38.82297134399414
Wall time: 1.95 seconds
run: 2


Epoch: 100%|██████████| 100/100 [00:01<00:00, 51.23it/s]


Test RMSE: 38.707305908203125
Train RMSE: 38.82328414916992
Wall time: 1.95 seconds
mean train performance= 38.82228
mean test performance= 38.705524
mean wall time= 1.949548641840617
std train= 0.0014729351
std test= 0.0027302601
Experiment 189
run: 0


Epoch: 100%|██████████| 100/100 [00:15<00:00,  6.36it/s]


Test RMSE: 16.1807918548584
Train RMSE: 11.5370454788208
Wall time: 15.71 seconds
run: 1


Epoch: 100%|██████████| 100/100 [00:15<00:00,  6.27it/s]


Test RMSE: 16.134225845336914
Train RMSE: 11.576297760009766
Wall time: 15.95 seconds
run: 2


Epoch: 100%|██████████| 100/100 [00:16<00:00,  6.17it/s]


Test RMSE: 16.155359268188477
Train RMSE: 11.450639724731445
Wall time: 16.20 seconds
mean train performance= 11.521327
mean test performance= 16.156792
mean wall time= 15.951658805211386
std train= 0.06428665
std test= 0.023316057
Experiment 713
run: 0


Epoch: 100%|██████████| 100/100 [00:06<00:00, 16.40it/s]


Test RMSE: 15873.7490234375
Train RMSE: 11728.83203125
Wall time: 6.10 seconds
run: 1


Epoch: 100%|██████████| 100/100 [00:05<00:00, 16.76it/s]


Test RMSE: 15873.7216796875
Train RMSE: 11729.119140625
Wall time: 5.97 seconds
run: 2


Epoch: 100%|██████████| 100/100 [00:06<00:00, 16.54it/s]


Test RMSE: 15873.9716796875
Train RMSE: 11729.4599609375
Wall time: 6.05 seconds
mean train performance= 11729.137
mean test performance= 15873.813
mean wall time= 6.038362423578898
std train= 0.3143477
std test= 0.13712968
Experiment 186
run: 0


Epoch: 100%|██████████| 100/100 [00:18<00:00,  5.51it/s]


Test RMSE: 0.8001762628555298
Train RMSE: 0.9060923457145691
Wall time: 18.16 seconds
run: 1


Epoch: 100%|██████████| 100/100 [00:18<00:00,  5.55it/s]


Test RMSE: 0.7994657158851624
Train RMSE: 0.9101269841194153
Wall time: 18.01 seconds
run: 2


Epoch: 100%|██████████| 100/100 [00:17<00:00,  5.59it/s]


Test RMSE: 0.7973869442939758
Train RMSE: 0.9115209579467773
Wall time: 17.90 seconds
mean train performance= 0.90924674
mean test performance= 0.7990096
mean wall time= 18.02569270133972
std train= 0.0028193172
std test= 0.0014495095
Experiment 374
run: 0


Epoch: 100%|██████████| 100/100 [00:52<00:00,  1.90it/s]


Test RMSE: 109.19050598144531
Train RMSE: 122.07328033447266
Wall time: 52.55 seconds
run: 1


Epoch: 100%|██████████| 100/100 [00:52<00:00,  1.89it/s]


Test RMSE: 109.20128631591797
Train RMSE: 122.08318328857422
Wall time: 52.85 seconds
run: 2


Epoch: 100%|██████████| 100/100 [00:53<00:00,  1.89it/s]


Test RMSE: 109.19190216064453
Train RMSE: 122.0745620727539
Wall time: 53.01 seconds
mean train performance= 122.077
mean test performance= 109.19456
mean wall time= 52.806114276250206
std train= 0.0053857365
std test= 0.0058627045
Experiment 183
run: 0


Epoch: 100%|██████████| 100/100 [00:06<00:00, 16.15it/s]


Test RMSE: 0.15515795350074768
Train RMSE: 0.27864301204681396
Wall time: 6.19 seconds
run: 1


Epoch: 100%|██████████| 100/100 [00:06<00:00, 16.10it/s]


Test RMSE: 0.15586160123348236
Train RMSE: 0.28116291761398315
Wall time: 6.21 seconds
run: 2


Epoch: 100%|██████████| 100/100 [00:06<00:00, 16.02it/s]


Test RMSE: 0.1538863480091095
Train RMSE: 0.2814274728298187
Wall time: 6.24 seconds
mean train performance= 0.28041112
mean test performance= 0.15496863
mean wall time= 6.216350555419922
std train= 0.0015369415
std test= 0.0010011431
Experiment 291
run: 0


Epoch: 100%|██████████| 100/100 [00:04<00:00, 22.30it/s]


Test RMSE: 121.4604721069336
Train RMSE: 120.32157897949219
Wall time: 4.48 seconds
run: 1


Epoch: 100%|██████████| 100/100 [00:04<00:00, 21.85it/s]


Test RMSE: 121.45352935791016
Train RMSE: 120.3237075805664
Wall time: 4.58 seconds
run: 2


Epoch: 100%|██████████| 100/100 [00:04<00:00, 21.92it/s]


Test RMSE: 121.47771453857422
Train RMSE: 120.3444595336914
Wall time: 4.57 seconds
mean train performance= 120.32992
mean test performance= 121.463905
mean wall time= 4.542705059051514
std train= 0.012640506
std test= 0.012452752
Experiment 294
run: 0


Epoch: 100%|██████████| 100/100 [00:25<00:00,  3.97it/s]


Test RMSE: 96.75477600097656
Train RMSE: 99.25537872314453
Wall time: 25.20 seconds
run: 1


Epoch: 100%|██████████| 100/100 [00:25<00:00,  3.94it/s]


Test RMSE: 97.04032135009766
Train RMSE: 99.47126770019531
Wall time: 25.36 seconds
run: 2


Epoch: 100%|██████████| 100/100 [00:25<00:00,  3.99it/s]


Test RMSE: 96.45254516601562
Train RMSE: 98.99515533447266
Wall time: 25.09 seconds
mean train performance= 99.2406
mean test performance= 96.749214
mean wall time= 25.216092983881634
std train= 0.23839997
std test= 0.29392758
