Heteroscedastic Implementation of Deep Ensembles

no adversarial Training is used

Most of the code inspired by:
https://github.com/cameronccohen/deep-ensembles/blob/master/Tutorial.ipynb

Necessary imports

In [1]:
import os
import sys
import matplotlib.pyplot as plt
%matplotlib inline
import pickle
import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import r2_score
import optuna
from optuna.trial import TrialState
import uncertainty_toolbox as uct

# define the device for the setting
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# check the computer name and set the path accordingly
if os.environ['COMPUTERNAME'] == 'FYNN':            # name of surface PC
    sys.path.append(r'C:\Users\Surface\Masterarbeit')
elif os.environ['COMPUTERNAME'] == 'FYNNS-PC':  # desktop name
    sys.path.append(r'C:\Users\test\Masterarbeit')
    
else:
    raise ValueError("Unbekannter Computername: " + os.environ['COMPUTERNAME'])

from utils.data_prep import load_tranform_and_split_data, set_seed
from utils.metrices import evaluate_intervals
from utils.NN_model import Custom_NN_Model, train_model, heteroscedastic_loss
from utils.NN_model import create_ensemble

Load, Transform and Split the data

In [2]:
#load and transform the data, split it into training, validation, and test sets
# the split ratio is 60% training, 20% validation, and 20%
# return the feature names for later use
X_train, X_val, X_test, y_train, y_val, y_test, feature_names = load_tranform_and_split_data('C1_V01_delta_kan', split_ratio=(0.6, 0.2, 0.2))

# convert the data to PyTorch tensors
# and add an extra dimension for the target variable
X_train_tensor = torch.from_numpy(X_train).float()
X_val_tensor = torch.from_numpy(X_val).float()
X_test_tensor = torch.from_numpy(X_test).float()
y_train_tensor = torch.from_numpy(y_train).float().reshape(-1,1) # Add extra dimension for compatibility
y_val_tensor = torch.from_numpy(y_val).float().reshape(-1,1)
y_test_tensor = torch.from_numpy(y_test).float().reshape(-1,1)

Optuna Hyperparameter Search

In [None]:
def objective(trial):
    
    # hyperparameter for sampling with Optuna
    n_layer = trial.suggest_int("n_layer", 2, 5)  # number of hidden layers
    n_neurons = trial.suggest_int("n_neurons", 64, 320, step=32)  # number of neurons in each hidden layer
    lr = trial.suggest_float("lr", 1e-4, 1e-2, log=True)    # learning rate
    batch_size = trial.suggest_categorical("batch_size", [64, 128]) 
    decay = trial.suggest_float('decay', 0.5, 0.7, step = 0.1)

    # decay factor for the number of neurons in each layer
    # e.g. if n_neurons = 256 and n_layer = 3
    # then the hidden_dims will be [256, 128, 64]
    # this creates a list of integers representing the number of neurons in each hidden layer
    hidden_dims = [int(n_neurons * decay**i) for i in range(n_layer)]
    print(f"Hidden dimensions: {hidden_dims}")
        
    # generate the model with the sampled hyperparameters
    # and move it to the device (GPU or CPU)
    model = Custom_NN_Model(
        input_dim=X_train_tensor.shape[1],
        hidden_dims=hidden_dims,
        output_dim=1,
        do_rate=0,
        loss_type = 'heteroscedastic'
    ).to(device)

    # AdamW optimizer, where weight decay does not accumulate in the momentum nor variance.
    optimizer = torch.optim.AdamW(params = model.parameters(), lr = lr, weight_decay=0.0001)  

    # DataLoader for batching the data
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    # integrate early stopping
    patience = 20  # number of epochs with no improvement after which training will be stopped
    best_val_loss = float('inf')
    epochs_no_improve = 0

    # training the model
    for epoch in range(epochs):
        model.train()
        
        for X_batch, y_batch in train_loader:
            X_batch = X_batch.to(device)  # Move data to the device (GPU or CPU)
            y_batch = y_batch.to(device)
            optimizer.zero_grad()
            
            loss = heteroscedastic_loss(model, X_batch, y_batch)
            loss.backward()
            optimizer.step()
            
        # validation loss calculation after each epoch
        model.eval()
        with torch.no_grad():
            val_loss = heteroscedastic_loss(model, X_val_tensor, y_val_tensor)
        
        # report the validation loss to Optuna
        trial.report(val_loss, step=epoch)
        # handle pruning based on the intermediate value
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
        
        # Early stopping check
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_no_improve = 0
           
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print(f"Early stopping at epoch {epoch+1}, Best Val Loss: {best_val_loss:.4f}")
                break
    
    return best_val_loss


In [None]:
# number of epochs for training
epochs = 500

# create a study object for Optuna
study = optuna.create_study(
    direction="minimize",
    sampler=optuna.samplers.TPESampler(),                       #TPE (Tree-structured Parzen Estimator) sampler by default
    pruner=optuna.pruners.MedianPruner(        
        n_startup_trials=20,                                    # Number of trials to run before pruning starts
        n_warmup_steps=5                                        # Number of warmup steps before pruning starts)
    )
)

# move the tensors to the device
X_train_tensor = X_train_tensor.to(device)
y_train_tensor = y_train_tensor.to(device)
X_val_tensor = X_val_tensor.to(device)
y_val_tensor = y_val_tensor.to(device)

# optimize the objective function with Optuna
# timeout=None means no time limit for the optimization, all trials will be run
study.optimize(objective, n_trials=200, timeout=None, n_jobs=1, show_progress_bar=True)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Create an Ensemble, train it and make predictions

best Model Architectur implemented:
hidden_dims=[320,224,156]

Best trial:
  Value:  1.4413212537765503
  Params: 
    n_layer: 3
    n_neurons: 320
    lr: 0.0004348009611810878
    batch_size: 64
    decay: 0.7

In [None]:
#create an ensemble of 5 networks with the defined net architecture and optimizer
nets_ops = create_ensemble(2, input_dim = X_train.shape[1], hidden_dims=[320,224,156], 
                           do_rate=0, loss_type='heteroscedastic', lr=0.0004, weight_decay=0.0001)

# lists to store the output means and log variances of each network in the ensemble
outputs_mean = []
outputs_log_var = []

#train the ensemble of networks and make predictions on the test set
for net, ops in nets_ops:
    model = train_model(model= net, X_train_tensor=X_train_tensor, y_train_tensor=y_train_tensor,
                        X_val_tensor= X_val_tensor, y_val_tensor=y_val_tensor, batch_size=64, 
                        optimizer=ops, n_epochs=1000, patience=50, loss_type='heteroscedastic',
                        )
    print("Model training mode:", model.training)
    # set the model to evaluation mode and make predictions on the test set
    model.eval()   
    print("Model training mode:", model.training)
    with torch.no_grad():
        output_mean, output_log_var = model(X_test_tensor.to(device))
        output_mean_np, output_log_var_np = output_mean.detach().cpu().numpy(), output_log_var.detach().cpu().numpy()
        outputs_mean.append(output_mean_np)
        outputs_log_var.append(np.exp(output_log_var_np))

outputs_mean = np.array(outputs_mean)
outputs_log_var = np.array(outputs_log_var)

Evaluating the Model

In [None]:
# Calculate the mean and standard deviation of the predictions on the test data
DE_mean = np.mean(outputs_mean, axis = 0).reshape(-1)

# Calculate epistemic
epistemic_var_heteroscedastic = np.var(outputs_mean, axis=0)
print(f"Epistemic Variance: {epistemic_var_heteroscedastic.mean():.6f}")

# Calculate aleatoric variance (heteroscedastic)
aleatoric_var_heteroscedastic = np.mean(outputs_log_var, axis=0)
print(f"Aleatoric Variance: {aleatoric_var_heteroscedastic.mean():.6f}")

# Calculate total standard deviation
DE_std = np.sqrt(epistemic_var_heteroscedastic + aleatoric_var_heteroscedastic).reshape(-1)

# Calculate and print all metrics inclunding RMSE, MAE, R²-Score, NLL, CRPS
pnn_metrics = uct.metrics.get_all_metrics( DE_mean, DE_std, y_test)
print(pnn_metrics)
print(pnn_metrics['accuracy']['rmse'])
# Calculate coverage for 95% confidence interval
coverage_95 = uct.metrics_calibration.get_proportion_in_interval(DE_mean, DE_std, y_test, quantile = 0.95 )
print(f"Coverage 95%: {coverage_95}")

# use own function to calculate coverage and MPIW
ev_intervals = evaluate_intervals(DE_mean, DE_std, y_test, coverage=0.95)
print(f'coverage: {ev_intervals["coverage"]}, MPIW: {ev_intervals["MPIW"]}')

# calibration Curve with UCT
uct.viz.plot_calibration(DE_mean, DE_std, y_test)

# adversarial group calibration
uct.viz.plot_adversarial_group_calibration(DE_mean, DE_std, y_test)

Evaluate Deep Ensemble predictions with 10 runs

In [None]:
results_list = []
predictions_list = []
list_of_seeds = [42, 123, 777, 2024, 5250, 8888, 9876, 10001, 31415, 54321]
DE_prediction_path = r"C:\Users\test\Masterarbeit\models\Modelresults\Deep Ensembles"
DE_result_path = r"C:\Users\test\OneDrive\Master Management und Engineering\Masterarbeit\Experimente\Evaluation\10 Runs\Deep Ensembles"

for run, seed in enumerate(list_of_seeds):

    print(f"Run {run+1} with seed {seed}")
    set_seed(seed)

    #create an ensemble of 5 networks with the defined net architecture and optimizer
    nets_ops = create_ensemble(5, input_dim = X_train.shape[1], hidden_dims=[320,224,156], 
                            do_rate=0, loss_type='heteroscedastic', lr=0.0004, weight_decay=0.0001)

    # lists to store the output means and log variances of each network in the ensemble
    outputs_mean = []
    outputs_log_var = []

    #train the ensemble of networks and make predictions on the test set
    for net, ops in nets_ops:
        model = train_model(model= net, X_train_tensor=X_train_tensor, y_train_tensor=y_train_tensor,
                            X_val_tensor= X_val_tensor, y_val_tensor=y_val_tensor, batch_size=64, 
                            optimizer=ops, n_epochs=1000, patience=50, loss_type='heteroscedastic',
                            )

        # set the model to evaluation mode and make predictions on the test set
        model.eval()   
        with torch.no_grad():
            output_mean, output_log_var = model(X_test_tensor.to(device))
            # Detach and convert to numpy arrays
            output_mean_np, output_log_var_np = output_mean.detach().cpu().numpy(), output_log_var.detach().cpu().numpy()
            outputs_mean.append(output_mean_np)
            outputs_log_var.append(np.exp(output_log_var_np))

    outputs_mean = np.array(outputs_mean)
    outputs_log_var = np.array(outputs_log_var)

    # Calculate the mean and standard deviation of the predictions on the test data
    DE_mean = np.mean(outputs_mean, axis = 0).reshape(-1) # reshape to 1D array

    # Calculate epistemic
    DE_epistemic_var_heteroscedastic = np.var(outputs_mean, axis=0)
    print(f"Epistemic Variance: {DE_epistemic_var_heteroscedastic.mean():.6f}")

    # Calculate aleatoric variance (heteroscedastic)
    DE_aleatoric_var_heteroscedastic = np.mean(outputs_log_var, axis=0)
    print(f"Aleatoric Variance: {DE_aleatoric_var_heteroscedastic.mean():.6f}")

    # Calculate total standard deviation
    DE_std = np.sqrt(DE_epistemic_var_heteroscedastic + DE_aleatoric_var_heteroscedastic).reshape(-1) # reshape to 1D array

    # Calculate and print all metrics inclunding RMSE, MAE, R²-Score, NLL, CRPS
    pnn_metrics = uct.metrics.get_all_metrics(DE_mean, DE_std, y_test)
    print(pnn_metrics)

    # use own function to calculate coverage and MPIW
    ev_intervals = evaluate_intervals(DE_mean, DE_std, y_test, coverage=0.95)
    print(f'coverage: {ev_intervals["coverage"]}, MPIW: {ev_intervals["MPIW"]}')

    predictions_per_run = {
        'mean_prediction': DE_mean,
        'std_prediction': DE_std,
    }

    results_per_run = {
    'RMSE': pnn_metrics['accuracy']['rmse'],
    'MAE': pnn_metrics['accuracy']['mae'],
    'R2': pnn_metrics['accuracy']['r2'], 
    'Correlation' : pnn_metrics['accuracy']['corr'],
    'NLL': pnn_metrics['scoring_rule']['nll'],
    'CRPS': pnn_metrics['scoring_rule']['crps'],
    'coverage': ev_intervals["coverage"],
    'MPIW': ev_intervals["MPIW"],
    }

    predictions_list.append(predictions_per_run)
    results_list.append(results_per_run)
#save the predictions 
with open(os.path.join(DE_prediction_path, "ngboost_predictions_list.pkl"), "wb") as f:
    pickle.dump(predictions_list, f)

#save the results in an excel file
results_df = pd.DataFrame(results_list)
results_df.to_excel(os.path.join(DE_result_path, "ngboost_results.xlsx"), index=False)