In [2]:
import math
from tqdm import tqdm
import torch
import gpytorch
from matplotlib import pyplot as plt

# Make plots inline
%matplotlib inline

In [3]:
from ucimlrepo import fetch_ucirepo 
from math import floor
from torch.utils.data import TensorDataset, DataLoader

def downloader(uci_id):
    # fetch dataset 
    uci_download = fetch_ucirepo(id=uci_id)
    
    # data (as pandas dataframes) 
    X_data = uci_download.data.features

    # Drop category and date variables
    if uci_id == 1:
        X_data = X_data.drop(["Sex"], axis=1)
    if uci_id == 275:
        X_data = X_data.drop(["dteday"], axis=1)

    y_data = uci_download.data.targets
    y = y_data.squeeze()

    X = torch.tensor(X_data.values, dtype=torch.float32)
    y = torch.tensor(y.values, dtype=torch.float32)

    train_n = int(floor(0.8 * len(X)))
    train_x = X[:train_n, :].contiguous()
    train_y = y[:train_n].contiguous()

    test_x = X[train_n:, :].contiguous()
    test_y = y[train_n:].contiguous()

    # Create TensorDataset and DataLoader for training and test sets
    train_dataset = TensorDataset(train_x, train_y)
    test_dataset = TensorDataset(test_x, test_y)

    train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False)

    return train_x, train_y, test_x, test_y, train_loader, test_loader
    
def whole_process(train_x, train_y, test_x, test_y, train_loader, test_loader):

    data_dim = train_x.size(-1)

    class LargeFeatureExtractor(torch.nn.Sequential):
        def __init__(self):
            super(LargeFeatureExtractor, self).__init__()
            self.add_module('linear1', torch.nn.Linear(data_dim, 1000))
            self.add_module('relu1', torch.nn.ReLU())
            self.add_module('linear2', torch.nn.Linear(1000, 500))
            self.add_module('relu2', torch.nn.ReLU())
            self.add_module('linear3', torch.nn.Linear(500, 50))
            self.add_module('relu3', torch.nn.ReLU())
            self.add_module('linear4', torch.nn.Linear(50, 1))

    model = LargeFeatureExtractor()

    training_iterations = 100

    # Find optimal model hyperparameters
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam([
        {'params': model.parameters()}
    ], lr=0.01)


    def train():
        model.train()
        iterator = tqdm(range(training_iterations))
        for i in iterator:
            # Zero backprop gradients
            optimizer.zero_grad()
            # Get output from model
            output = model(train_x)
            # Calc loss and backprop derivatives
            loss = criterion(output, train_y)
            loss.backward()
            iterator.set_postfix(loss=loss.item())
            optimizer.step()

    %time train()

    model.eval()
    with torch.no_grad(), gpytorch.settings.use_toeplitz(False), gpytorch.settings.fast_pred_var():
        preds = model(test_x)

    print('Test MAE: {}'.format(torch.mean(torch.abs(preds - test_y))))
    with torch.no_grad(), gpytorch.settings.use_toeplitz(False), gpytorch.settings.fast_pred_var():
        preds = model(train_x)

    print('Train MAE: {}'.format(torch.mean(torch.abs(preds - train_y))))

experiment_datasets = [1, 275, 477]
# abalone, bike share, real estate

for experiment in experiment_datasets:
    print("Experiment " + str(experiment))
    train_x, train_y, test_x, test_y, train_loader, test_loader = downloader(experiment)
    whole_process(train_x, train_y, test_x, test_y, train_loader, test_loader)

Experiment 1


  return F.mse_loss(input, target, reduction=self.reduction)
100%|██████████| 100/100 [00:12<00:00,  8.08it/s, loss=11.1]


CPU times: total: 1min 13s
Wall time: 12.4 s
Test MAE: 2.072439670562744
Train MAE: 2.470304250717163
Experiment 275


  return F.mse_loss(input, target, reduction=self.reduction)
100%|██████████| 100/100 [01:19<00:00,  1.26it/s, loss=2.8e+4]


CPU times: total: 7min 50s
Wall time: 1min 19s
Test MAE: 174.66236877441406
Train MAE: 131.30117797851562
Experiment 477


  return F.mse_loss(input, target, reduction=self.reduction)
100%|██████████| 100/100 [00:01<00:00, 60.69it/s, loss=193]   

CPU times: total: 9 s
Wall time: 1.65 s
Test MAE: 9.845198631286621
Train MAE: 10.947065353393555



