Heteroscedastic MC Dropout


Necessary imports

In [None]:
import os
import sys
import matplotlib.pyplot as plt
%matplotlib inline
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import r2_score
import numpy as np
import optuna
from optuna.trial import TrialState
import pickle
import uncertainty_toolbox as uct
import pandas as pd

# define the device for the setting
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# check the computer name and set the path accordingly
if os.environ['COMPUTERNAME'] == 'FYNN':            # name of surface PC
    sys.path.append(r'C:\Users\Surface\Masterarbeit')
elif os.environ['COMPUTERNAME'] == 'FYNNS-PC':      # desktop name
    sys.path.append(r'C:\Users\test\Masterarbeit')
    
else:
    raise ValueError("Unbekannter Computername: " + os.environ['COMPUTERNAME'])

from utils.data_prep import load_tranform_and_split_data, set_seed
from utils.metrics import evaluate_intervals
from utils.NN_model import Custom_NN_Model, train_model, heteroscedastic_loss
from config import DATA_PATH, MCD_PREDICTION_PATH, MCD_RESULTS_PATH

Load and transform the data

In [None]:
#load and transform the data, split it into training, validation, and test sets
# the split ratio is 60% training, 20% validation, and 20%
# return the feature names for later use
X_train, X_val, X_test, y_train, y_val, y_test, feature_names = load_tranform_and_split_data(DATA_PATH,'C1_V01_delta_kan', split_ratio=(0.6, 0.2, 0.2))

# convert the data to PyTorch tensors
# and add an extra dimension for the target variable
X_train_tensor = torch.from_numpy(X_train).float()
X_val_tensor = torch.from_numpy(X_val).float()
X_test_tensor = torch.from_numpy(X_test).float() 
y_train_tensor = torch.from_numpy(y_train).float().reshape(-1,1) # Add extra dimension for compatibility
y_val_tensor = torch.from_numpy(y_val).float().reshape(-1,1)
y_test_tensor = torch.from_numpy(y_test).float().reshape(-1,1)

Hyperparameter Search with Optuna

In [None]:
def objective(trial):
    """
    Optuna objective function for hyperparameter optimization of MC Dropout.
    
    Optimizes the dropout rate, the learning rate, batch size, and weight decay.
    
    Args:
        trial (optuna.Trial): Optuna trial object for suggesting hyperparameters
        
    Returns:
        float: Best validation loss (negative ELBO) achieved during training
        
    Note:
        Hyperparameter ranges and kernel choice are justified in Chapter X.Y
    """
    
    # hyperparameter for sampling with Optuna
    #n_layer = trial.suggest_int("n_layer", 2, 3)  # number of hidden layers
    #n_neurons = trial.suggest_int("n_neurons", 256, 640, step=32)  # number of neurons in each hidden layer
    do_rate = trial.suggest_float("do_rate", 0.10, 0.5)     # dropout rate 
    lr = trial.suggest_float("lr", 1e-5, 1e-2, log=True)    # learning rate
    batch_size = trial.suggest_categorical("batch_size", [64, 128])
    weight_decay = trial.suggest_float("wd", 1e-8, 1e-5, log=True)  # weight decay for regularization

    # architecture from Deep Ensembles network
    hidden_dims = [320,224,156]
            
    # generate the model with the sampled hyperparameters
    # and move it to the device (GPU or CPU)
    model = Custom_NN_Model(
        input_dim=X_train_tensor.shape[1],
        hidden_dims=hidden_dims,
        output_dim=1,
        do_rate=do_rate,
        loss_type = 'heteroscedastic'
    ).to(device)

    # AdamW optimizer, where weight decay does not accumulate in the momentum nor variance.
    optimizer = torch.optim.AdamW(params = model.parameters(), lr = lr, weight_decay=weight_decay)  

    # DataLoader for batching the data
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    # integrate early stopping
    patience = 50  # number of epochs with no improvement after which training will be stopped
    best_val_loss = float('inf')
    epochs_no_improve = 0

    # training the model
    for epoch in range(epochs):
        model.train()
        
        for X_batch, y_batch in train_loader:
            X_batch = X_batch.to(device)  # Move data to the device (GPU or CPU)
            y_batch = y_batch.to(device)
            optimizer.zero_grad()
            
            loss = heteroscedastic_loss(model, X_batch, y_batch)
            loss.backward()
            optimizer.step()
            
        # validation loss calculation after each epoch
        model.eval()
        with torch.no_grad():
            val_loss = heteroscedastic_loss(model, X_val_tensor, y_val_tensor)
        # report the validation loss to Optuna    
        trial.report(val_loss, step=epoch)
        # handle pruning based on the intermediate value
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
        
        # Early stopping check
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_no_improve = 0
            
        else:
            epochs_no_improve += 1
            if epochs_no_improve >= patience:
                print(f"Early stopping at epoch {epoch+1}, Best Val Loss: {best_val_loss:.4f}")
                break
    
    return best_val_loss


Execute Optuna Study

In [None]:
# number of epochs for training
epochs = 1000

# create a study object for Optuna
study = optuna.create_study(
    #study_name="MC_Dropout_Optuna_Network_architecture",
    #storage="sqlite:///mc_dropout_study.db",                    # Use SQLite database to store the
    #load_if_exists=True,                                        # load the study if it already exists
    direction="minimize",
    sampler=optuna.samplers.TPESampler(),                       #TPE (Tree-structured Parzen Estimator) sampler by default
    pruner=optuna.pruners.MedianPruner(        
        n_startup_trials=10,                                    # Number of trials to run before pruning starts
        n_warmup_steps=5                                        # Number of warmup steps before pruning starts)
    )
)

# move the tensors to the device
X_train_tensor = X_train_tensor.to(device)
y_train_tensor = y_train_tensor.to(device)
X_val_tensor = X_val_tensor.to(device)
y_val_tensor = y_val_tensor.to(device)

# optimize the objective function with Optuna
# timeout=None means no time limit for the optimization, all trials will be run
study.optimize(objective, n_trials=100, timeout=None, n_jobs=1, show_progress_bar=True)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Make 10 Runs with different Random Seed to evaluate MC Dropout

In [None]:
results_list = []
predictions_list = []
list_of_seeds = [42, 123, 777, 2024, 5250, 8888, 9876, 10001, 31415, 54321]

for run, seed in enumerate(list_of_seeds):

    print(f"Run {run+1} with seed {seed}")
    set_seed(seed)

    #create a model instance and optimizer
    model = Custom_NN_Model(input_dim=X_train.shape[1], hidden_dims=[320,224,156], 
                                                 output_dim=1, do_rate=0.209, 
                                                 loss_type = 'heteroscedastic')
    optimizer = torch.optim.AdamW(params = model.parameters(), lr = 0.0015, weight_decay = 3.820457176351347e-06)  # Use AdamW optimizer with specified learning rate and weight decay
    #train model
    tr_model = train_model(model, X_train_tensor=X_train_tensor, y_train_tensor=y_train_tensor, 
                                                    X_val_tensor=X_val_tensor, y_val_tensor=y_val_tensor, batch_size=64, 
                                                    n_epochs=1000, optimizer= optimizer, patience=50, 
                                                    loss_type='heteroscedastic')
    
    # inference with multiple forward passes
    # keep the model in training mode to keep dropout active
    tr_model.train()

    # Number of stochastic forward passes for MC Dropout
    n_samples = 250

    # Make multiple stochastic predictions (MC Dropout) on the test data
    outputs_mean = []
    outputs_log_var = []
    for i in range(n_samples):
        mean, log_var = tr_model(X_test_tensor.to(device))
        mean_np, log_var_np = mean.detach().cpu().numpy(), log_var.detach().cpu().numpy()
        outputs_mean.append(mean_np)
        outputs_log_var.append(np.exp(log_var_np))

    outputs_mean = np.array(outputs_mean)
    outputs_log_var = np.array(outputs_log_var) # Convert log variance to variance

    # Calculate the mean and standard deviation of the predictions on the test data
    MC_mean = np.mean(outputs_mean, axis = 0).reshape(-1) # reshape to 1D array

    # Calculate epistemic
    MC_epistemic_var_heteroscedastic = np.var(outputs_mean, axis=0)
    print(f"Epistemic Variance: {MC_epistemic_var_heteroscedastic.mean():.6f}")

    # Calculate aleatoric variance (heteroscedastic)
    MC_aleatoric_var_heteroscedastic = np.mean(outputs_log_var, axis=0)
    print(f"Aleatoric Variance: {MC_aleatoric_var_heteroscedastic.mean():.6f}")

    # Calculate total standard deviation
    MC_std = np.sqrt(MC_epistemic_var_heteroscedastic + MC_aleatoric_var_heteroscedastic).reshape(-1) # reshape to 1D array

    # Calculate and print all metrics inclunding RMSE, MAE, R²-Score, NLL, CRPS
    pnn_metrics = uct.metrics.get_all_metrics(MC_mean, MC_std, y_test)
    print(pnn_metrics)

    # use own function to calculate coverage and MPIW
    ev_intervals = evaluate_intervals(MC_mean, MC_std, y_test, coverage=0.95)
    print(f'coverage: {ev_intervals["coverage"]}, MPIW: {ev_intervals["MPIW"]}')

    predictions_per_run = {
        'mean_prediction': MC_mean,
        'std_prediction': MC_std,
    }

    results_per_run = {
    'RMSE': pnn_metrics['accuracy']['rmse'],
    'MAE': pnn_metrics['accuracy']['mae'],
    'R2': pnn_metrics['accuracy']['r2'], 
    'Correlation' : pnn_metrics['accuracy']['corr'],
    'NLL': pnn_metrics['scoring_rule']['nll'],
    'CRPS': pnn_metrics['scoring_rule']['crps'],
    'coverage': ev_intervals["coverage"],
    'MPIW': ev_intervals["MPIW"],
    }

    predictions_list.append(predictions_per_run)
    results_list.append(results_per_run)
#save the predictions 
with open(os.path.join(MCD_PREDICTION_PATH, "MC_predictions_list.pkl"), "wb") as f:
    pickle.dump(predictions_list, f)

#save the results in an excel file
results_df = pd.DataFrame(results_list)
results_df.to_excel(os.path.join(MCD_RESULTS_PATH, "MC_results.xlsx"), index=False)

Save Results

In [None]:

with open(os.path.join(MCD_PREDICTION_PATH, "MC_predictions_list.pkl"), "rb") as f:
    predictions_list = pickle.load(f)

mean_list = []
std_list = []

for id, run in enumerate(predictions_list):
    # extract mean and std predictions
    mean = run['mean_prediction']
    std = run['std_prediction']
    
    # append to lists
    mean_list.append(mean)
    std_list.append(std)
    
    # calibration Curve with UCT
    uct.viz.plot_calibration(mean, std, y_test)
    plt.savefig(os.path.join(MCD_RESULTS_PATH, f"calibration_run_{id+1}.svg"), format ='svg')
    plt.savefig(os.path.join(MCD_RESULTS_PATH, f"calibration_run_{id+1}.png"), format ='png')
    plt.close()

    # adversarial group calibration
    uct.viz.plot_adversarial_group_calibration(mean, std, y_test)
    plt.savefig(os.path.join(MCD_RESULTS_PATH, f"adversarial_group_calibration_run_{id+1}.svg"), format ='svg')
    plt.savefig(os.path.join(MCD_RESULTS_PATH, f"adversarial_group_calibration_run_{id+1}.png"), format ='png')
    plt.close()

# predictions_list enthält pro Run ein Array mit 10403 Werten
mean_matrix = np.array(mean_list)  # Shape: (n_runs, 10403)
std_matrix = np.array(std_list)    # Shape: (n_runs, 10403)

# Mittelwert und Std für jeden Datenpunkt über alle Runs
mean_per_datapoint = np.mean(mean_matrix, axis=0)  # Shape: (10403,)
std_per_datapoint = np.mean(std_matrix, axis=0)    # Shape: (10403,)

# calibration Curve with UCT
uct.viz.plot_calibration(mean_per_datapoint, std_per_datapoint, y_test)
plt.savefig(os.path.join(MCD_RESULTS_PATH, "calibration_run_mean.svg"), format ='svg')
plt.savefig(os.path.join(MCD_RESULTS_PATH, "calibration_run_mean.png"), format ='png')
plt.close()

# adversarial group calibration
uct.viz.plot_adversarial_group_calibration(mean_per_datapoint, std_per_datapoint, y_test)
plt.savefig(os.path.join(MCD_RESULTS_PATH, "adversarial_group_calibration_run_mean.svg"), format ='svg')
plt.savefig(os.path.join(MCD_RESULTS_PATH, "adversarial_group_calibration_run_mean.png"), format ='png')
plt.close()