In [1]:
import sys
sys.path.append('..')
import torch
from torch import nn
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import TensorDataset, DataLoader
import pandas as pd
%matplotlib notebook
from ipywidgets import *
import matplotlib.pyplot as plt
import plotly.express as px
from utils.functions import train_models, train_models_w_mean_var, train_and_save_results
from utils.models import BatchEnsemble
from utils.layers import BatchLinear
from utils.layers import AnchoredBatch

## Concrete Dataset

In [2]:
# Read data into a pandas dataframe
concrete = pd.read_excel('..\\data\\UCI_Regression\\2.Concrete\\Concrete_Data.xls')

concrete.head()

Unnamed: 0,Cement (component 1)(kg in a m^3 mixture),Blast Furnace Slag (component 2)(kg in a m^3 mixture),Fly Ash (component 3)(kg in a m^3 mixture),Water (component 4)(kg in a m^3 mixture),Superplasticizer (component 5)(kg in a m^3 mixture),Coarse Aggregate (component 6)(kg in a m^3 mixture),Fine Aggregate (component 7)(kg in a m^3 mixture),Age (day),"Concrete compressive strength(MPa, megapascals)"
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.986111
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.887366
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.269535
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05278
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.296075


In [3]:
# Set a seed for reproducibility
np.random.seed(52)

# Split into training and validation datasets
concrete_train, concrete_val = train_test_split(concrete, test_size=0.1)
concrete_train, concrete_val = concrete_train.to_numpy(), concrete_val.to_numpy()

In [4]:
# Subtract mean and divide by standard deviation
concrete_scaler = StandardScaler()
concrete_scaler.fit(X=concrete_train[:,:8], y=concrete_train[:,8])
concrete_train[:,:8] = concrete_scaler.transform(concrete_train[:,:8])

# Split the training dataset into train and val
x_train, x_test, y_train, y_test = train_test_split(concrete_train[:,:8], concrete_train[:,8],train_size=0.7)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Convert numpy arrays to PyTorch tensors
x_train_tensor = torch.tensor(x_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1).to(device)
x_test_tensor = torch.tensor(x_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1).to(device)

print(x_train_tensor.shape)
print(y_train_tensor.shape)
print(x_test_tensor.shape)
print(y_test_tensor.shape)

# Create TensorDatasets
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
test_dataset = TensorDataset(x_test_tensor, y_test_tensor)

# Define a batch size
batch_size = 128

# Create DataLoaders
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)



torch.Size([648, 8])
torch.Size([648, 1])
torch.Size([279, 8])
torch.Size([279, 1])


## Hyperparameter tuning

In [5]:
# Initialize list of Hyperparameters to try out
hidden_layers_options = list(range(6, 11))  
hidden_units_options = [32, 64, 128]
weight_decay_options = [0.01,0.001,0.0001,0.00001]

# Define loss function and optimizer
loss_fn = nn.GaussianNLLLoss()
optimizer = torch.optim.Adam

# Train and save results
train_and_save_results(
    model_name = 'batch', 
    hidden_layers_options = hidden_layers_options, 
    hidden_units_options = hidden_units_options, 
    input_shape = 8, 
    loss_fn = loss_fn,
    optimizer = optimizer, 
    train_loader = train_loader, 
    test_loader = test_loader,
    ensemble_size = 10,
    epochs = 1000, 
    csv_file = 'concrete_model_results.csv',
    weight_decay_options = weight_decay_options,
    print_frequency = 100,
    ENCE=False
)


Epoch: 0
-------
Loss: 1442.4045817057292


KeyboardInterrupt: 

### Comments
We initially trained the different models without explictly taking calibration into account. We will now continue to experiment with our best candidates and investigate if applying re-calibration can help boost performance.

In [37]:
# Read the results from our trained models
df = pd.read_csv('..\\results\concrete_model_results.csv')

# Filter out all models which achieves an RMSE worse than 6
df_batch = df[(df['RMSE'] < 6) & (df['model'] == 'batch')]
df_anchored = df[(df['RMSE'] < 6) & (df['model'] == 'anchored_batch')]
df_anchored.columns

Index(['model', 'ensemble_size', 'hidden_layers', 'hidden_units',
       'weight_decay', 'data_noise', 'epochs', 'optimizer', 'loss_fn',
       'learning_rate', 'ENCE', 'GNLLL', 'RMSE'],
      dtype='object')

In [34]:
# Keep the interesting parameters which we are tuning and the performances
df_batch = df_batch[['hidden_units', 'hidden_layers', 'weight_decay', 'GNLLL', 'RMSE']]
fig = px.parallel_coordinates(df_batch)

fig.show()

In [38]:
# Keep the interesting parameters which we are tuning and the performances
df_anchored = df_anchored[['hidden_units', 'hidden_layers', 'data_noise', 'GNLLL', 'RMSE']]
fig = px.parallel_coordinates(df_anchored)

fig.show()

### Evaluation of results
* **Anchored Batch**  
The best performing models in terms of RMSE all have 128 hidden units. A data noise of 1e-4 and 1e-5 also seems to be a more suitalbe choice for performance. Finally, the amount of hidden layers ranges from 6-10, where more hidden layers corresponds to better performance in terms of RMSE but worse performance in terms of GNLLL. We will retrain three models and calculate their corresponding calibration errors using the ENCE metric.  

* **Batch Ensemble**  
In terms of RMSE it seems that a combination of 128 hidden units with  higher range (9-10) of hidden_layers seems to have the best performance. Weight decay parameter does not seem to have a big significance on the performance in terms of RMSE, the range 1e-3 to 1e-5 all have similar performances, however, it seems that a stronger regularization leads to a better improvement in terms of GNLLL. We will investigate the calibration error of models with 128 hidden units and 9-10 hidden layers.

### Investigate Calibration Error

In [6]:
# Initialize list of Hyperparameters for Batch Ensemble
hidden_layers_options = [9,10]
hidden_units_options = [128]
weight_decay_options = [1e-2,1e-4,1e-5]

# Define loss function and optimizer
loss_fn = nn.GaussianNLLLoss()
optimizer = torch.optim.Adam

# Train and save results
train_and_save_results(
    model_name = 'batch', 
    hidden_layers_options = hidden_layers_options, 
    hidden_units_options = hidden_units_options, 
    input_shape = 8, 
    loss_fn = loss_fn,
    optimizer = optimizer, 
    train_loader = train_loader, 
    test_loader = test_loader,
    ensemble_size = 10,
    epochs = 1000, 
    csv_file = 'concrete_model_results_ence.csv',
    weight_decay_options = weight_decay_options,
    print_frequency = 1000
)

Epoch: 0
-------
Loss: 1014.7159525553385

Evaluation on Test Data
------------------------
Average Test Loss: 3.7201311588287354
Epoch: 0
-------
Loss: 1492.4285278320312

Evaluation on Test Data
------------------------
Average Test Loss: 3.921370585759481
Epoch: 0
-------
Loss: 1235.784891764323

Evaluation on Test Data
------------------------
Average Test Loss: 4.222842216491699
Epoch: 0
-------
Loss: 1283.935770670573

Evaluation on Test Data
------------------------
Average Test Loss: 3.5179531574249268
Epoch: 0
-------
Loss: 1269.8308614095051

Evaluation on Test Data
------------------------
Average Test Loss: 3.8427838484446206
Epoch: 0
-------
Loss: 1241.7892049153645

Evaluation on Test Data
------------------------
Average Test Loss: 3.743661324183146


In [9]:
# Initialize list of Hyperparameters for Batch Ensemble
hidden_layers_options = [9,10]
hidden_units_options = [128]
weight_decay_options = [1e-2, 1e-3,1e-4,1e-5]

# Define loss function and optimizer
loss_fn = nn.GaussianNLLLoss()
optimizer = torch.optim.Adam

# Train and save results
train_and_save_results(
    model_name = 'batch', 
    hidden_layers_options = hidden_layers_options, 
    hidden_units_options = hidden_units_options, 
    input_shape = 8, 
    loss_fn = loss_fn,
    optimizer = optimizer, 
    train_loader = train_loader, 
    test_loader = test_loader,
    ensemble_size = 10,
    epochs = 2000, # Increased number of epochs
    csv_file = 'concrete_model_results_ence.csv',
    weight_decay_options = weight_decay_options,
    print_frequency = 2000
)

Epoch: 0
-------
Loss: 1132.7173360188801

Evaluation on Test Data
------------------------
Average Test Loss: 9.231940269470215
Epoch: 0
-------
Loss: 1284.07373046875

Evaluation on Test Data
------------------------
Average Test Loss: 8.614970366160074
Epoch: 0
-------
Loss: 1451.8767903645833

Evaluation on Test Data
------------------------
Average Test Loss: 9.070743083953857
Epoch: 0
-------
Loss: 1067.8715922037761

Evaluation on Test Data
------------------------
Average Test Loss: 9.205824851989746
Epoch: 0
-------
Loss: 1277.6501159667969

Evaluation on Test Data
------------------------
Average Test Loss: 9.092270851135254
Epoch: 0
-------
Loss: 1440.3821207682292

Evaluation on Test Data
------------------------
Average Test Loss: 8.459661165873209
Epoch: 0
-------
Loss: 1282.7774047851562

Evaluation on Test Data
------------------------
Average Test Loss: 8.475510915120443
Epoch: 0
-------
Loss: 1216.1648763020833

Evaluation on Test Data
------------------------
Average

In [10]:
# Initialize list of Hyperparameters for Anchored Batch Ensemble
hidden_layers_options = [6,8,10]
hidden_units_options = [128]
data_noise_options = [1e-4,1e-5]

# Define loss function and optimizer
loss_fn = nn.GaussianNLLLoss()
optimizer = torch.optim.Adam

# Train and save results
train_and_save_results(
    model_name = 'anchored_batch', 
    hidden_layers_options = hidden_layers_options, 
    hidden_units_options = hidden_units_options, 
    input_shape = 8, 
    loss_fn = loss_fn,
    optimizer = optimizer, 
    train_loader = train_loader, 
    test_loader = test_loader,
    ensemble_size = 10,
    epochs = 1000, 
    csv_file = 'concrete_model_results_ence.csv',
    data_noise_options = data_noise_options,
    print_frequency = 1000
)

Epoch: 0
-------
Loss: 988.6456807454427

Evaluation on Test Data
------------------------
Average Test Loss: 3.455641746520996
Epoch: 0
-------
Loss: 1.8116760452588399

Evaluation on Test Data
------------------------
Average Test Loss: 7.075397491455078
Epoch: 0
-------
Loss: 1259.15576171875

Evaluation on Test Data
------------------------
Average Test Loss: 3.869340101877848
Epoch: 0
-------
Loss: 1.7819743355115254

Evaluation on Test Data
------------------------
Average Test Loss: 8.268374919891357
Epoch: 0
-------
Loss: 1392.183369954427

Evaluation on Test Data
------------------------
Average Test Loss: 3.7257259686787925
Epoch: 0
-------
Loss: 1.8835856318473816

Evaluation on Test Data
------------------------
Average Test Loss: 8.113072872161865


In [11]:
# Initialize list of Hyperparameters for Anchored Batch Ensemble
hidden_layers_options = [6,8,10]
hidden_units_options = [128]
data_noise_options = [1e-4,1e-5]

# Define loss function and optimizer
loss_fn = nn.GaussianNLLLoss()
optimizer = torch.optim.Adam

# Train and save results
train_and_save_results(
    model_name = 'anchored_batch', 
    hidden_layers_options = hidden_layers_options, 
    hidden_units_options = hidden_units_options, 
    input_shape = 8, 
    loss_fn = loss_fn,
    optimizer = optimizer, 
    train_loader = train_loader, 
    test_loader = test_loader,
    ensemble_size = 10,
    epochs = 2000, 
    csv_file = 'concrete_model_results_ence.csv',
    data_noise_options = data_noise_options,
    print_frequency = 2000
)

Epoch: 0
-------
Loss: 1038.3498433430989

Evaluation on Test Data
------------------------
Average Test Loss: 5.162876685460408
Epoch: 0
-------
Loss: 1.826033929983775

Evaluation on Test Data
------------------------
Average Test Loss: 15.193254788716635
Epoch: 0
-------
Loss: 1470.7306111653645

Evaluation on Test Data
------------------------
Average Test Loss: 7.184723377227783
Epoch: 0
-------
Loss: 2.7468228340148926

Evaluation on Test Data
------------------------
Average Test Loss: 14.553625424702963
Epoch: 0
-------
Loss: 1336.8081563313801

Evaluation on Test Data
------------------------
Average Test Loss: 6.611470381418864
Epoch: 0
-------
Loss: 2.9432413081328073

Evaluation on Test Data
------------------------
Average Test Loss: 15.40241813659668


In [7]:
# Initialize list of Hyperparameters for Anchored Batch Ensemble
hidden_layers_options = [20,30 , 40]
hidden_units_options = [128]
data_noise_options = [1e-4,1e-5]

# Define loss function and optimizer
loss_fn = nn.GaussianNLLLoss()
optimizer = torch.optim.Adam

# Train and save results
train_and_save_results(
    model_name = 'anchored_batch', 
    hidden_layers_options = hidden_layers_options, 
    hidden_units_options = hidden_units_options, 
    input_shape = 8, 
    loss_fn = loss_fn,
    optimizer = optimizer, 
    train_loader = train_loader, 
    test_loader = test_loader,
    ensemble_size = 10,
    epochs = 2000, 
    csv_file = 'concrete_model_results_ence.csv',
    data_noise_options = data_noise_options,
    print_frequency = 1000
)

Epoch: 0
-------
Loss: 1000.7238972981771
Epoch: 1000
-------
Loss: 1.1015309890111287

Evaluation on Test Data
------------------------
Average Test Loss: 8.392441749572754
Epoch: 0
-------
Loss: 4.1134850680828094
Epoch: 1000
-------
Loss: 0.6920038014650345

Evaluation on Test Data
------------------------
Average Test Loss: 15.647958755493164
Epoch: 0
-------
Loss: 1450.5557861328125
Epoch: 1000
-------
Loss: 3.3068974018096924

Evaluation on Test Data
------------------------
Average Test Loss: 3.342614730199178
Epoch: 0
-------
Loss: 3.2503873904546103
Epoch: 1000
-------
Loss: 3.3541545470555625

Evaluation on Test Data
------------------------
Average Test Loss: 3.3414089679718018
Epoch: 0
-------
Loss: 1113.2577209472656
Epoch: 1000
-------
Loss: 3.309673269589742

Evaluation on Test Data
------------------------
Average Test Loss: 3.3423481782277427
Epoch: 0
-------
Loss: 3.3082931439081826
Epoch: 1000
-------
Loss: 3.315606196721395

Evaluation on Test Data
-----------------

In [21]:
# Read the results from our trained models
df = pd.read_csv('..\\results\concrete_model_results_ence.csv')

# Collect Batch and Anchored Batch model results
df_batch = df[ (df['model'] == 'batch')]
df_anchored = df[(df['model'] == 'anchored_batch')]
df_anchored.columns

Index(['model', 'ensemble_size', 'hidden_layers', 'hidden_units',
       'weight_decay', 'data_noise', 'epochs', 'optimizer', 'loss_fn',
       'learning_rate', 'ENCE', 'GNLLL', 'RMSE'],
      dtype='object')

In [22]:
# Keep the interesting parameters which we are tuning and the performances
df_batch = df_batch[['hidden_layers', 'weight_decay', 'ENCE','GNLLL', 'RMSE']]
fig = px.parallel_coordinates(df_batch)

fig.show()

In [23]:
# Keep the interesting parameters which we are tuning and the performances
df_anchored = df_anchored[['hidden_layers', 'data_noise', 'epochs', 'ENCE','GNLLL', 'RMSE']]
fig = px.parallel_coordinates(df_anchored)

fig.show()

In [24]:
# Filter the worst performing models in terms of RMSE
df_anchored = df_anchored[(df_anchored['RMSE'] <10)]

# Keep the interesting parameters which we are tuning and the performances
df_anchored = df_anchored[['hidden_layers', 'data_noise', 'epochs', 'ENCE','GNLLL', 'RMSE']]
fig = px.parallel_coordinates(df_anchored)

fig.show()

### Power Plant Dataset

In [5]:
# Read data into a pandas dataframe
power = pd.read_excel('..\\data\\UCI_Regression\\5.PowerPlant\\Folds5x2_pp.xlsx')

power.head()

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,96.62,473.9


In [6]:
# Set a seed for reproducibility
np.random.seed(52)

# Split into training and validation datasets
power_train, power_val = train_test_split(power, test_size=0.1)
power_train, power_val = power_train.to_numpy(), power_val.to_numpy()

In [7]:
# Subtract mean and divide by standard deviation
power_scaler = StandardScaler()
power_scaler.fit(X=power_train[:,:4], y=power_train[:,4])
power_train[:,:4] = power_scaler.transform(power_train[:,:4])

# Split the training dataset into train and val
x_train, x_test, y_train, y_test = train_test_split(power_train[:,:4], power_train[:,4],train_size=0.7)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Convert numpy arrays to PyTorch tensors
x_train_tensor = torch.tensor(x_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1).to(device)
x_test_tensor = torch.tensor(x_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1).to(device)

print(x_train_tensor.shape)
print(y_train_tensor.shape)
print(x_test_tensor.shape)
print(y_test_tensor.shape)

# Create TensorDatasets
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
test_dataset = TensorDataset(x_test_tensor, y_test_tensor)

# Define a batch size
batch_size = 128

# Create DataLoaders
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)



torch.Size([6027, 4])
torch.Size([6027, 1])
torch.Size([2584, 4])
torch.Size([2584, 1])


In [8]:
# Initialize list of Hyperparameters to try out
hidden_layers_options = [6]  
hidden_units_options = [32,64,128]
weight_decay_options = [0.01,0.0001,0.00001]

# Define loss function and optimizer
loss_fn = nn.GaussianNLLLoss()
optimizer = torch.optim.Adam

# Train and save results
train_and_save_results(
    model_name = 'batch', 
    hidden_layers_options = hidden_layers_options, 
    hidden_units_options = hidden_units_options, 
    input_shape = 4, 
    loss_fn = loss_fn,
    optimizer = optimizer, 
    train_loader = train_loader, 
    test_loader = test_loader,
    ensemble_size = 10,
    epochs = 200, 
    csv_file = 'power_model_results.csv',
    weight_decay_options = weight_decay_options,
    print_frequency = 20,
)


Epoch: 0
-------
Train Loss: 351202.5245535714
Test loss: 148544.13541666666 


KeyboardInterrupt: 

In [15]:
# Initialize list of Hyperparameters to try out
hidden_layers_options = [8,]  
hidden_units_options = [32, 64, 128]
data_noise_options = [0.01,0.0001,0.00001,1e-6]

# Define loss function and optimizer
loss_fn = nn.GaussianNLLLoss()
optimizer = torch.optim.Adam

# Train and save results
train_and_save_results(
    model_name = 'anchored_batch', 
    hidden_layers_options = hidden_layers_options, 
    hidden_units_options = hidden_units_options, 
    input_shape = 4, 
    loss_fn = loss_fn,
    optimizer = optimizer, 
    train_loader = train_loader, 
    test_loader = test_loader,
    ensemble_size = 10,
    epochs = 600, 
    csv_file = 'power_model_results_ES.csv',
    data_noise_options = data_noise_options,
    print_frequency = 10,
)


Epoch: 0
-------
Train Loss: 355817.00595238095
Test loss: 151440.76264880953 
Epoch: 10
-------
Train Loss: 38.40261386689686
Test loss: 15.620517776125954 Improvement found, counter reset to 0
Epoch: 20
-------
Train Loss: 9.702053978329612
Test loss: 4.042075316111247 Improvement found, counter reset to 0
Epoch: 30
-------
Train Loss: 6.796389034816197
Test loss: 2.965614330200922 Improvement found, counter reset to 0
Epoch: 40
-------
Train Loss: 6.5411161581675215
Test loss: 2.8600851808275496 Improvement found, counter reset to 0
Epoch: 50
-------
Train Loss: 6.435928787503924
Test loss: 2.8166073731013705 Improvement found, counter reset to 0
Epoch: 60
-------
Train Loss: 6.370002860114688
Test loss: 2.7887246835799444 Improvement found, counter reset to 0
Epoch: 70
-------
Train Loss: 6.315686782201131
Test loss: 2.765982605162121 Improvement found, counter reset to 0
Epoch: 80
-------
Train Loss: 6.259228104636783
Test loss: 2.7398969672975086 Improvement found, counter reset 

In [23]:
# Read the results from our trained models
df = pd.read_csv('..\\results\power_model_results_ES.csv')

# Collect Batch and Anchored Batch model results
df_batch = df[ (df['model'] == 'batch')]
df_anchored = df[(df['model'] == 'anchored_batch')]
df_anchored.columns

Index(['model', 'ensemble_size', 'hidden_layers', 'hidden_units',
       'weight_decay', 'data_noise', 'epochs', 'optimizer', 'loss_fn',
       'learning_rate', 'ENCE', 'GNLLL', 'RMSE'],
      dtype='object')

In [25]:
# Filter the worst performing models in terms of RMSE
df_anchored = df_anchored[(df_anchored['RMSE'] <4.5)  & (df_anchored['data_noise'] != 0.01)]

# Keep the interesting parameters which we are tuning and the performances
df_anchored = df_anchored[['hidden_layers','hidden_units', 'data_noise', 'ENCE','GNLLL', 'RMSE']]
fig = px.parallel_coordinates(df_anchored)

fig.show()