In [1]:
import math
import time
import numpy as np
from tqdm import tqdm
import torch
import gpytorch
from matplotlib import pyplot as plt
from ucimlrepo import fetch_ucirepo 
from math import floor
from torch.utils.data import TensorDataset, DataLoader


# Make plots inline
%matplotlib inline

In [2]:
def downloader(uci_id):
    # fetch dataset 
    uci_download = fetch_ucirepo(id=uci_id)
    
    # data (as pandas dataframes) 
    X_data = uci_download.data.features
    # Drop category and date variables
    if uci_id == 1:
        X_data = X_data.drop(["Sex"], axis=1)
    if uci_id == 275:
        X_data = X_data.drop(["dteday"], axis=1)
    if uci_id == 374:
        X_data = X_data.drop(["date"], axis=1)
    if uci_id == 183:
        X_data = X_data.drop(["communityname"], axis=1)
        object_columns = X_data.select_dtypes(include=['object']).columns
        X_data = X_data.drop(columns=object_columns)

    y_data = uci_download.data.targets
    # select target for datasets with 2 targets
    if uci_id == 189:
        y_data = y_data["motor_UPDRS"]
    if uci_id == 713:
        y_data = y_data["verification.time"]

    y = y_data.squeeze()

    X = torch.tensor(X_data.values, dtype=torch.float32)
    y = torch.tensor(y.values, dtype=torch.float32)

    train_n = int(floor(0.8 * len(X)))
    train_x = X[:train_n, :].contiguous()
    train_y = y[:train_n].contiguous()

    test_x = X[train_n:, :].contiguous()
    test_y = y[train_n:].contiguous()

    # Create TensorDataset and DataLoader for training and test sets
    train_dataset = TensorDataset(train_x, train_y)
    test_dataset = TensorDataset(test_x, test_y)

    train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False)

    return train_x, train_y, test_x, test_y, train_loader, test_loader
    
def whole_process(train_x, train_y, test_x, test_y, train_loader, test_loader):

    data_dim = train_x.size(-1)

    class LargeFeatureExtractor(torch.nn.Sequential):
        def __init__(self):
            super(LargeFeatureExtractor, self).__init__()
            self.add_module('linear1', torch.nn.Linear(data_dim, 1000))
            self.add_module('relu1', torch.nn.ReLU())
            self.add_module('linear2', torch.nn.Linear(1000, 500))
            self.add_module('relu2', torch.nn.ReLU())
            self.add_module('linear3', torch.nn.Linear(500, 50))
            self.add_module('relu3', torch.nn.ReLU())
            self.add_module('linear4', torch.nn.Linear(50, 1))

    model = LargeFeatureExtractor()

    training_iterations = 100

    # Find optimal model hyperparameters
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam([
        {'params': model.parameters()}
    ], lr=0.01)


    def train():
        model.train()
        iterator = tqdm(range(training_iterations))
        for i in iterator:
            # Zero backprop gradients
            optimizer.zero_grad()
            # Get output from model
            output = model(train_x)
            # Calc loss and backprop derivatives
            loss = criterion(output, train_y)
            loss.backward()
            iterator.set_postfix(loss=loss.item())
            optimizer.step()

    start_time = time.time()
    %time train()
    end_time = time.time()

    wall_time = end_time - start_time

    model.eval()
    with torch.no_grad(), gpytorch.settings.use_toeplitz(False), gpytorch.settings.fast_pred_var():
        preds_test = model(test_x)

    test_rmse = torch.sqrt(torch.mean((preds_test - test_y) ** 2)).item()

    with torch.no_grad(), gpytorch.settings.use_toeplitz(False), gpytorch.settings.fast_pred_var():
        preds_train = model(train_x)

    train_rmse = torch.sqrt(torch.mean((preds_train - train_y) ** 2)).item()

    print('Test RMSE: {}'.format(test_rmse))
    print('Train RMSE: {}'.format(train_rmse))
    print('Wall time: {:.2f} seconds'.format(wall_time))

    return test_rmse, train_rmse, wall_time

experiment_datasets = [1, 275, 477, 189, 713, 186, 374, 183, 291, 294]

for experiment in experiment_datasets:
    print("Experiment " + str(experiment))

    trains = []
    tests = []
    times = []
    train_x, train_y, test_x, test_y, train_loader, test_loader = downloader(experiment)
    for run in range(3):
        # set seeds
        torch.manual_seed(run)
        print("run: " + str(run))

        test_rmse, train_rmse, wall_time = whole_process(train_x, train_y, test_x, test_y, train_loader, test_loader)
        trains.append(train_rmse)
        tests.append(test_rmse)
        times.append(wall_time)
    
    print("mean train performance= "+ str(np.mean(trains)))
    print("mean test performance= "+ str(np.mean(tests)))
    print("mean wall time= "+ str(np.mean(times)))

    print("std train= "+ str(np.std(trains, ddof=1)))
    print("std test= "+ str(np.std(tests, ddof=1)))



Experiment 1
run: 0


  return F.mse_loss(input, target, reduction=self.reduction)
100%|██████████| 100/100 [00:12<00:00,  7.97it/s, loss=11.1]


CPU times: total: 1min 14s
Wall time: 12.6 s
Test RMSE: 2.7361459732055664
Train RMSE: 3.337834596633911
Wall time: 12.55 seconds
whh
run: 1


100%|██████████| 100/100 [00:12<00:00,  7.86it/s, loss=11.1]


CPU times: total: 1min 16s
Wall time: 12.7 s
Test RMSE: 2.7416114807128906
Train RMSE: 3.3371355533599854
Wall time: 12.72 seconds
whh
run: 2


100%|██████████| 100/100 [00:12<00:00,  7.89it/s, loss=11.1]


CPU times: total: 1min 15s
Wall time: 12.7 s
Test RMSE: 2.753124475479126
Train RMSE: 3.337411403656006
Wall time: 12.68 seconds
whh
mean train performance= 3.337460517883301
mean test performance= 2.7436273097991943
mean wall time= 12.649929920832315
std train= 0.0003521001706862717
std test= 0.008666894486620218
Experiment 275
run: 0


  return F.mse_loss(input, target, reduction=self.reduction)
100%|██████████| 100/100 [01:16<00:00,  1.31it/s, loss=2.8e+4]


CPU times: total: 7min 37s
Wall time: 1min 16s
Test RMSE: 232.35597229003906
Train RMSE: 167.2775115966797
Wall time: 76.61 seconds
whh
run: 1


100%|██████████| 100/100 [01:20<00:00,  1.25it/s, loss=2.8e+4]


CPU times: total: 7min 59s
Wall time: 1min 20s
Test RMSE: 231.08602905273438
Train RMSE: 167.3853759765625
Wall time: 80.09 seconds
whh
run: 2


100%|██████████| 100/100 [01:21<00:00,  1.23it/s, loss=2.79e+4]


CPU times: total: 8min 7s
Wall time: 1min 21s
Test RMSE: 232.56593322753906
Train RMSE: 167.1107940673828
Wall time: 81.36 seconds
whh
mean train performance= 167.25789388020834
mean test performance= 232.00264485677084
mean wall time= 79.35453685124715
std train= 0.13833816289026454
std test= 0.8007242334137402
Experiment 477
run: 0


  return F.mse_loss(input, target, reduction=self.reduction)
100%|██████████| 100/100 [00:01<00:00, 64.00it/s, loss=193]   


CPU times: total: 9.34 s
Wall time: 1.56 s
Test RMSE: 12.339187622070312
Train RMSE: 13.887871742248535
Wall time: 1.56 seconds
whh
run: 1


100%|██████████| 100/100 [00:01<00:00, 63.16it/s, loss=193]   


CPU times: total: 9.45 s
Wall time: 1.59 s
Test RMSE: 12.375568389892578
Train RMSE: 13.904309272766113
Wall time: 1.59 seconds
whh
run: 2


100%|██████████| 100/100 [00:01<00:00, 63.55it/s, loss=193]   


CPU times: total: 9.38 s
Wall time: 1.57 s
Test RMSE: 12.341885566711426
Train RMSE: 13.890791893005371
Wall time: 1.57 seconds
whh
mean train performance= 13.89432430267334
mean test performance= 12.352213859558105
mean wall time= 1.5755343437194824
std train= 0.008769637445518907
std test= 0.02027055232793326
Experiment 189
run: 0


  return F.mse_loss(input, target, reduction=self.reduction)
100%|██████████| 100/100 [00:18<00:00,  5.42it/s, loss=63.1] 


CPU times: total: 1min 49s
Wall time: 18.5 s
Test RMSE: 10.62756061553955
Train RMSE: 7.940044403076172
Wall time: 18.46 seconds
whh
run: 1


100%|██████████| 100/100 [00:18<00:00,  5.45it/s, loss=63.4] 


CPU times: total: 1min 48s
Wall time: 18.4 s
Test RMSE: 10.825313568115234
Train RMSE: 7.9590606689453125
Wall time: 18.35 seconds
whh
run: 2


100%|██████████| 100/100 [00:18<00:00,  5.45it/s, loss=63.7] 


CPU times: total: 1min 49s
Wall time: 18.3 s
Test RMSE: 10.462039947509766
Train RMSE: 7.98267936706543
Wall time: 18.34 seconds
whh
mean train performance= 7.960594813028972
mean test performance= 10.638304710388184
mean wall time= 18.38721529642741
std train= 0.021358844471164557
std test= 0.18187497776642841
Experiment 713
run: 0


  return F.mse_loss(input, target, reduction=self.reduction)
100%|██████████| 100/100 [00:05<00:00, 17.03it/s, loss=9.8e+7]


CPU times: total: 34.8 s
Wall time: 5.87 s
Test RMSE: 12282.1103515625
Train RMSE: 9901.037109375
Wall time: 5.87 seconds
whh
run: 1


100%|██████████| 100/100 [00:05<00:00, 17.03it/s, loss=9.8e+7]


CPU times: total: 35 s
Wall time: 5.87 s
Test RMSE: 12281.806640625
Train RMSE: 9901.1708984375
Wall time: 5.87 seconds
whh
run: 2


100%|██████████| 100/100 [00:05<00:00, 17.02it/s, loss=9.8e+7]


CPU times: total: 34.8 s
Wall time: 5.87 s
Test RMSE: 12302.2412109375
Train RMSE: 9901.2578125
Wall time: 5.87 seconds
whh
mean train performance= 9901.1552734375
mean test performance= 12288.719401041666
mean wall time= 5.873722394307454
std train= 0.11117811302114013
std test= 11.71121544621571
Experiment 186
run: 0


  return F.mse_loss(input, target, reduction=self.reduction)
100%|██████████| 100/100 [00:20<00:00,  4.80it/s, loss=0.975]


CPU times: total: 2min 4s
Wall time: 20.8 s
Test RMSE: 0.8997098803520203
Train RMSE: 0.9866371154785156
Wall time: 20.83 seconds
whh
run: 1


100%|██████████| 100/100 [00:21<00:00,  4.74it/s, loss=0.968]


CPU times: total: 2min 6s
Wall time: 21.1 s
Test RMSE: 0.8729061484336853
Train RMSE: 0.9775070548057556
Wall time: 21.08 seconds
whh
run: 2


100%|██████████| 100/100 [00:20<00:00,  4.79it/s, loss=1.01] 


CPU times: total: 2min 4s
Wall time: 20.9 s
Test RMSE: 0.9150986075401306
Train RMSE: 0.9934632778167725
Wall time: 20.86 seconds
whh
mean train performance= 0.9858691493670145
mean test performance= 0.8959048787752787
mean wall time= 20.919641494750977
std train= 0.008005784918147404
std test= 0.021352035713522004
Experiment 374
run: 0


  return F.mse_loss(input, target, reduction=self.reduction)
100%|██████████| 100/100 [01:39<00:00,  1.00it/s, loss=1.11e+4]


CPU times: total: 9min 57s
Wall time: 1min 39s
Test RMSE: 91.06103515625
Train RMSE: 105.20048522949219
Wall time: 99.78 seconds
whh
run: 1


100%|██████████| 100/100 [01:37<00:00,  1.03it/s, loss=1.11e+4]


CPU times: total: 9min 38s
Wall time: 1min 37s
Test RMSE: 91.05622863769531
Train RMSE: 105.20274353027344
Wall time: 97.25 seconds
whh
run: 2


100%|██████████| 100/100 [01:36<00:00,  1.03it/s, loss=1.11e+4]


CPU times: total: 9min 36s
Wall time: 1min 36s
Test RMSE: 91.05064392089844
Train RMSE: 105.2002944946289
Wall time: 96.67 seconds
whh
mean train performance= 105.20117441813152
mean test performance= 91.05596923828125
mean wall time= 97.90162801742554
std train= 0.0013622333252695659
std test= 0.0052004720050109424
Experiment 183
run: 0


  return F.mse_loss(input, target, reduction=self.reduction)
100%|██████████| 100/100 [00:06<00:00, 16.50it/s, loss=0.0557]


CPU times: total: 36.2 s
Wall time: 6.06 s
Test RMSE: 0.2217271476984024
Train RMSE: 0.23590850830078125
Wall time: 6.06 seconds
whh
run: 1


100%|██████████| 100/100 [00:06<00:00, 16.13it/s, loss=0.0557]


CPU times: total: 35.9 s
Wall time: 6.2 s
Test RMSE: 0.2217710167169571
Train RMSE: 0.23608730733394623
Wall time: 6.20 seconds
whh
run: 2


100%|██████████| 100/100 [00:06<00:00, 15.95it/s, loss=0.0557]


CPU times: total: 36.9 s
Wall time: 6.27 s
Test RMSE: 0.22181633114814758
Train RMSE: 0.23596525192260742
Wall time: 6.27 seconds
whh
mean train performance= 0.23598702251911163
mean test performance= 0.22177149852116904
mean wall time= 6.179989655812581
std train= 9.136598228714347e-05
std test= 4.459367700231347e-05
Experiment 291
run: 0


  return F.mse_loss(input, target, reduction=self.reduction)
100%|██████████| 100/100 [00:04<00:00, 22.62it/s, loss=8.55e+3]


CPU times: total: 24.9 s
Wall time: 4.42 s
Test RMSE: 92.30614471435547
Train RMSE: 92.48902130126953
Wall time: 4.43 seconds
whh
run: 1


100%|██████████| 100/100 [00:04<00:00, 22.43it/s, loss=3.8e+4]


CPU times: total: 24.8 s
Wall time: 4.46 s
Test RMSE: 85.97199249267578
Train RMSE: 101.96833038330078
Wall time: 4.46 seconds
whh
run: 2


100%|██████████| 100/100 [00:04<00:00, 22.19it/s, loss=8.58e+3]


CPU times: total: 26.2 s
Wall time: 4.51 s
Test RMSE: 92.044921875
Train RMSE: 92.63545989990234
Wall time: 4.51 seconds
whh
mean train performance= 95.69760386149089
mean test performance= 90.10768636067708
mean wall time= 4.464968840281169
std train= 5.431102042798488
std test= 3.583996675545042
Experiment 294
run: 0


  return F.mse_loss(input, target, reduction=self.reduction)
100%|██████████| 100/100 [00:34<00:00,  2.92it/s, loss=312]   


CPU times: total: 3min 24s
Wall time: 34.2 s
Test RMSE: 17.535917282104492
Train RMSE: 17.659637451171875
Wall time: 34.24 seconds
whh
run: 1


100%|██████████| 100/100 [00:32<00:00,  3.08it/s, loss=311]   


CPU times: total: 3min 13s
Wall time: 32.4 s
Test RMSE: 17.520639419555664
Train RMSE: 17.632526397705078
Wall time: 32.42 seconds
whh
run: 2


100%|██████████| 100/100 [00:32<00:00,  3.06it/s, loss=308]   

CPU times: total: 3min 15s
Wall time: 32.7 s
Test RMSE: 17.458927154541016
Train RMSE: 17.55830192565918
Wall time: 32.71 seconds
whh
mean train performance= 17.61682192484538
mean test performance= 17.50516128540039
mean wall time= 33.12434752782186
std train= 0.05246136705790589
std test= 0.040762107566422276





In [3]:
trains

[17.659637451171875, 17.632526397705078, 17.55830192565918]