In [1]:
import math
from tqdm import tqdm
import torch
import time
import numpy as np
import gpytorch
from matplotlib import pyplot as plt
from ucimlrepo import fetch_ucirepo 
from math import floor
from torch.utils.data import TensorDataset, DataLoader

# Make plots inline
%matplotlib inline

In [2]:
def downloader(uci_id):
    # fetch dataset 
    uci_download = fetch_ucirepo(id=uci_id)
    
    # data (as pandas dataframes) 
    X_data = uci_download.data.features
    # Drop category and date variables
    if uci_id == 1:
        X_data = X_data.drop(["Sex"], axis=1)
    if uci_id == 275:
        X_data = X_data.drop(["dteday"], axis=1)
    if uci_id == 374:
        X_data = X_data.drop(["date"], axis=1)
    if uci_id == 183:
        X_data = X_data.drop(["communityname"], axis=1)
        object_columns = X_data.select_dtypes(include=['object']).columns
        X_data = X_data.drop(columns=object_columns)

    y_data = uci_download.data.targets
    # select target for datasets with 2 targets
    if uci_id == 189:
        y_data = y_data["motor_UPDRS"]
    if uci_id == 713:
        y_data = y_data["verification.time"]

    y = y_data.squeeze()

    X = torch.tensor(X_data.values, dtype=torch.float32)
    y = torch.tensor(y.values, dtype=torch.float32)

    train_n = int(floor(0.8 * len(X)))
    train_x = X[:train_n, :].contiguous()
    train_y = y[:train_n].contiguous()

    test_x = X[train_n:, :].contiguous()
    test_y = y[train_n:].contiguous()

    # Create TensorDataset and DataLoader for training and test sets
    train_dataset = TensorDataset(train_x, train_y)
    test_dataset = TensorDataset(test_x, test_y)

    train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False)

    return train_x, train_y, test_x, test_y, train_loader, test_loader

def whole_process(train_x, train_y, test_x, test_y, train_loader, test_loader):

    data_dim = train_x.size(-1)

    class GPRegressionModel(gpytorch.models.ExactGP):
            def __init__(self, train_x, train_y, likelihood):
                super(GPRegressionModel, self).__init__(train_x, train_y, likelihood)
                self.mean_module = gpytorch.means.ConstantMean()
                self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel(ard_num_dims=data_dim))

            def forward(self, x):
                mean_x = self.mean_module(x)
                covar_x = self.covar_module(x)
                return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
            
    likelihood = gpytorch.likelihoods.GaussianLikelihood()
    model = GPRegressionModel(train_x, train_y, likelihood)
    training_iterations = 100

    # Find optimal model hyperparameters
    model.train()
    likelihood.train()

    # Use the adam optimizer
    optimizer = torch.optim.Adam([
        {'params': model.covar_module.parameters()},
        {'params': model.mean_module.parameters()},
        {'params': model.likelihood.parameters()},
    ], lr=0.01)

    # "Loss" for GPs - the marginal log likelihood
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

    def train():
        iterator = tqdm(range(training_iterations))
        for i in iterator:
            # Zero backprop gradients
            optimizer.zero_grad()
            # Get output from model
            output = model(train_x)
            # Calc loss and backprop derivatives
            loss = -mll(output, train_y)
            loss.backward()
            iterator.set_postfix(loss=loss.item())
            optimizer.step()

    start_time = time.time()
    %time train()
    end_time = time.time()
    wall_time = end_time - start_time

    model.eval()
    likelihood.eval()
    with torch.no_grad(), gpytorch.settings.use_toeplitz(False), gpytorch.settings.fast_pred_var():
        preds = model(test_x)

    test_rmse = torch.sqrt(torch.mean((preds.mean - test_y) ** 2))
    with torch.no_grad(), gpytorch.settings.use_toeplitz(False), gpytorch.settings.fast_pred_var():
        preds = model(train_x)

    train_rmse = torch.sqrt(torch.mean((preds.mean - train_y) ** 2))

    print('Test RMSE: {}'.format(test_rmse))
    print('Train RMSE: {}'.format(train_rmse))
    print('Wall time: {:.2f} seconds'.format(wall_time))

    return test_rmse, train_rmse, wall_time

experiment_datasets = [1, 275, 477, 189, 713, 186, 374, 183, 291, 294]

for experiment in experiment_datasets:
    print("Experiment " + str(experiment))

    trains = []
    tests = []
    times = []
    train_x, train_y, test_x, test_y, train_loader, test_loader = downloader(experiment)
    for run in range(3):
        # set seeds
        torch.manual_seed(run)
        print("run: " + str(run))

        test_rmse, train_rmse, wall_time = whole_process(train_x, train_y, test_x, test_y, train_loader, test_loader)
        trains.append(train_rmse)
        tests.append(test_rmse)
        times.append(wall_time)
    
    print("mean train performance= "+ str(np.mean(trains)))
    print("mean test performance= "+ str(np.mean(tests)))
    print("mean wall time= "+ str(np.mean(times)))

    print("std train= "+ str(np.std(trains, ddof=1)))
    print("std test= "+ str(np.std(tests, ddof=1)))

Experiment 1
run: 0


100%|██████████| 100/100 [00:30<00:00,  3.32it/s, loss=3.09]


CPU times: total: 2min 59s
Wall time: 30.1 s




Test RMSE: 1.8655411005020142
Train RMSE: 2.1527233123779297
Wall time: 30.11 seconds
run: 1


100%|██████████| 100/100 [00:29<00:00,  3.42it/s, loss=3.09]


CPU times: total: 2min 55s
Wall time: 29.2 s
Test RMSE: 1.8656526803970337
Train RMSE: 2.152709722518921
Wall time: 29.24 seconds
run: 2


100%|██████████| 100/100 [00:29<00:00,  3.35it/s, loss=3.09]


CPU times: total: 2min 58s
Wall time: 29.9 s
Test RMSE: 1.8655447959899902
Train RMSE: 2.1527369022369385
Wall time: 29.89 seconds
mean train performance= 2.1527233
mean test performance= 1.8655796
mean wall time= 29.74298095703125
std train= 1.3589859e-05
std test= 6.33809e-05
Experiment 275
run: 0


100%|██████████| 100/100 [10:01<00:00,  6.02s/it, loss=1.22e+3]


CPU times: total: 59min 36s
Wall time: 10min 1s




Test RMSE: 178.19866943359375
Train RMSE: 31.421541213989258
Wall time: 601.76 seconds
run: 1


100%|██████████| 100/100 [09:53<00:00,  5.93s/it, loss=1.22e+3]


CPU times: total: 58min 45s
Wall time: 9min 53s
Test RMSE: 178.19871520996094
Train RMSE: 31.421175003051758
Wall time: 593.55 seconds
run: 2


100%|██████████| 100/100 [09:54<00:00,  5.95s/it, loss=1.22e+3]


CPU times: total: 58min 45s
Wall time: 9min 55s
Test RMSE: 178.19869995117188
Train RMSE: 31.421228408813477
Wall time: 595.01 seconds
mean train performance= 31.421316
mean test performance= 178.19868
mean wall time= 596.7762099107107
std train= 0.00019783169
std test= 2.6428997e-05
Experiment 477
run: 0


100%|██████████| 100/100 [00:00<00:00, 144.38it/s, loss=241]


CPU times: total: 4.19 s
Wall time: 695 ms
Test RMSE: 28.989206314086914
Train RMSE: 15.713935852050781
Wall time: 0.69 seconds
run: 1


100%|██████████| 100/100 [00:00<00:00, 138.41it/s, loss=241]


CPU times: total: 4.11 s
Wall time: 724 ms
Test RMSE: 28.989206314086914
Train RMSE: 15.713935852050781
Wall time: 0.72 seconds
run: 2


100%|██████████| 100/100 [00:00<00:00, 142.39it/s, loss=241]


CPU times: total: 3.62 s
Wall time: 702 ms
Test RMSE: 28.989206314086914
Train RMSE: 15.713935852050781
Wall time: 0.70 seconds
mean train performance= 15.713936
mean test performance= 28.989206
mean wall time= 0.707190990447998
std train= 0.0
std test= 0.0
Experiment 189
run: 0


100%|██████████| 100/100 [01:11<00:00,  1.41it/s, loss=40.2]


CPU times: total: 7min 2s
Wall time: 1min 11s




Test RMSE: 21.75303840637207
Train RMSE: 4.895779132843018
Wall time: 71.11 seconds
run: 1


100%|██████████| 100/100 [01:10<00:00,  1.42it/s, loss=40.2]


CPU times: total: 6min 58s
Wall time: 1min 10s
Test RMSE: 21.753032684326172
Train RMSE: 4.895792484283447
Wall time: 70.41 seconds
run: 2


100%|██████████| 100/100 [01:10<00:00,  1.42it/s, loss=40.2]


CPU times: total: 6min 55s
Wall time: 1min 10s
Test RMSE: 21.752958297729492
Train RMSE: 4.895802021026611
Wall time: 70.33 seconds
mean train performance= 4.8957915
mean test performance= 21.753012
mean wall time= 70.61776447296143
std train= 1.1503542e-05
std test= 4.4751618e-05
Experiment 713
run: 0


100%|██████████| 100/100 [00:08<00:00, 12.09it/s, loss=7.28e+6]


CPU times: total: 47.9 s
Wall time: 8.27 s
Test RMSE: 6419.50146484375
Train RMSE: 1994.779541015625
Wall time: 8.27 seconds
run: 1


100%|██████████| 100/100 [00:08<00:00, 12.40it/s, loss=7.28e+6]


CPU times: total: 47.3 s
Wall time: 8.07 s
Test RMSE: 6419.49560546875
Train RMSE: 1994.7796630859375
Wall time: 8.07 seconds
run: 2


100%|██████████| 100/100 [00:08<00:00, 12.38it/s, loss=7.28e+6]


CPU times: total: 47.4 s
Wall time: 8.08 s
Test RMSE: 6419.49853515625
Train RMSE: 1994.7799072265625
Wall time: 8.08 seconds
mean train performance= 1994.7797
mean test performance= 6419.4985
mean wall time= 8.142451524734497
std train= 0.00019301011
std test= 0.0029296875
Experiment 186
run: 0


100%|██████████| 100/100 [01:19<00:00,  1.27it/s, loss=4.57]


CPU times: total: 7min 47s
Wall time: 1min 19s




Test RMSE: 3.982966661453247
Train RMSE: 1.578687310218811
Wall time: 79.04 seconds
run: 1


100%|██████████| 100/100 [01:19<00:00,  1.25it/s, loss=4.57]


CPU times: total: 7min 53s
Wall time: 1min 19s
Test RMSE: 3.983008623123169
Train RMSE: 1.5787004232406616
Wall time: 79.86 seconds
run: 2


100%|██████████| 100/100 [01:22<00:00,  1.21it/s, loss=4.57]


CPU times: total: 8min 15s
Wall time: 1min 22s
Test RMSE: 3.9829976558685303
Train RMSE: 1.5786542892456055
Wall time: 82.99 seconds
mean train performance= 1.5786806
mean test performance= 3.982991
mean wall time= 80.62866353988647
std train= 2.3772167e-05
std test= 2.1762798e-05
Experiment 374
run: 0


100%|██████████| 100/100 [12:16<00:00,  7.36s/it, loss=4.19e+3]


CPU times: total: 1h 12min 8s
Wall time: 12min 16s




Test RMSE: 131.9158172607422
Train RMSE: 70.67130279541016
Wall time: 736.33 seconds
run: 1


100%|██████████| 100/100 [12:16<00:00,  7.37s/it, loss=4.19e+3]


CPU times: total: 1h 12min 11s
Wall time: 12min 16s
Test RMSE: 131.9158172607422
Train RMSE: 70.67127990722656
Wall time: 736.71 seconds
run: 2


100%|██████████| 100/100 [12:34<00:00,  7.55s/it, loss=4.19e+3]


CPU times: total: 1h 14min 13s
Wall time: 12min 34s
Test RMSE: 131.9158172607422
Train RMSE: 70.67131042480469
Wall time: 754.99 seconds
mean train performance= 70.6713
mean test performance= 131.91582
mean wall time= 742.6773955027262
std train= 1.7059845e-05
std test= 0.0
Experiment 183
run: 0


100%|██████████| 100/100 [00:10<00:00,  9.69it/s, loss=0.613]


CPU times: total: 59.1 s
Wall time: 10.3 s
Test RMSE: 0.18782496452331543
Train RMSE: 0.0950593575835228
Wall time: 10.32 seconds
run: 1


100%|██████████| 100/100 [00:10<00:00,  9.72it/s, loss=0.617]


CPU times: total: 59.3 s
Wall time: 10.3 s
Test RMSE: 0.18774889409542084
Train RMSE: 0.09502825886011124
Wall time: 10.30 seconds
run: 2


100%|██████████| 100/100 [00:10<00:00,  9.69it/s, loss=0.616]


CPU times: total: 59.5 s
Wall time: 10.3 s
Test RMSE: 0.18769656121730804
Train RMSE: 0.09502524882555008
Wall time: 10.33 seconds
mean train performance= 0.09503762
mean test performance= 0.18775682
mean wall time= 10.315061092376709
std train= 1.888385e-05
std test= 6.456631e-05
Experiment 291
run: 0


100%|██████████| 100/100 [00:30<00:00,  3.27it/s, loss=4e+3]  


CPU times: total: 2min 58s
Wall time: 30.6 s
Test RMSE: 86.1680908203125
Train RMSE: 56.13261795043945
Wall time: 30.59 seconds
run: 1


100%|██████████| 100/100 [00:57<00:00,  1.73it/s, loss=3.84e+3]


CPU times: total: 5min 39s
Wall time: 57.8 s
Test RMSE: 80.72808837890625
Train RMSE: 50.63939666748047
Wall time: 57.83 seconds
run: 2


100%|██████████| 100/100 [00:37<00:00,  2.69it/s, loss=3.84e+3]


CPU times: total: 3min 38s
Wall time: 37.2 s




Test RMSE: 81.66252136230469
Train RMSE: 71.18772888183594
Wall time: 37.18 seconds
mean train performance= 59.319916
mean test performance= 82.8529
mean wall time= 41.86773570378622
std train= 10.638497
std test= 2.908807
Experiment 294
run: 0


100%|██████████| 100/100 [03:08<00:00,  1.88s/it, loss=1.92e+4]


CPU times: total: 18min 38s
Wall time: 3min 8s




Test RMSE: 197.33433532714844
Train RMSE: 107.3902587890625
Wall time: 188.44 seconds
run: 1


100%|██████████| 100/100 [03:03<00:00,  1.84s/it, loss=1.92e+4]


CPU times: total: 18min 3s
Wall time: 3min 3s
Test RMSE: 197.33421325683594
Train RMSE: 107.39027404785156
Wall time: 183.66 seconds
run: 2


100%|██████████| 100/100 [03:08<00:00,  1.88s/it, loss=1.92e+4]


CPU times: total: 18min 41s
Wall time: 3min 8s
Test RMSE: 197.33445739746094
Train RMSE: 107.39029693603516
Wall time: 188.07 seconds
mean train performance= 107.39028
mean test performance= 197.33434
mean wall time= 186.72332207361856
std train= 2.018548e-05
std test= 0.00012207031
