Necessary imports

In [None]:
import os
import sys
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import r2_score
import random
import numpy as np
import torch
from ngboost import NGBRegressor
from ngboost.scores import LogScore
from ngboost.distns import  Normal
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import root_mean_squared_error
import optuna
from optuna.trial import TrialState
import pickle
import uncertainty_toolbox as uct

# check the computer name and set the path accordingly
if os.environ['COMPUTERNAME'] == 'FYNN':            # name of surface PC
    sys.path.append(r'C:\Users\Surface\Masterarbeit')
elif os.environ['COMPUTERNAME'] == 'FYNNS-PC':  # desktop name
    sys.path.append(r'C:\Users\test\Masterarbeit')
    
else:
    raise ValueError("Unbekannter Computername: " + os.environ['COMPUTERNAME'])

import utils.data_prep as data_prep
import utils.metrices as metrices

In [None]:
SEED = 1

def set_seed(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(SEED)

Data Loading and Preprocessing to compare the algorithms, although Decision Trees are not sensitve to feature scalling, they do not use geometric distances


In [None]:
# load and transform the data, split it into training, validation, and test sets
# uses random state 42 internally for reproducibility
# the split ratio is 60% training, 20% validation, and 20%
# return the feature names for later use

X_train, X_val, X_test, y_train, y_val, y_test, feature_names = data_prep.load_tranform_and_split_data('C1_V01_delta_kan', split_ratio=(0.6, 0.2, 0.2))
print(X_train.shape, X_val.shape, X_test.shape)
print(feature_names)


NGBoost for outputting Probalitiy Distribution instead of single value https://stanfordmlgroup.github.io/ngboost/1-useage.html

Optuna Hyperparameter Search

In [None]:
def objective(trial):

    set_seed(SEED)  # Set the seed for reproducibility
    # Define the hyperparameters to tune
    n_estimators = trial.suggest_int("n_estimators", 50, 1000, step = 50)
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-2,log=True)
    max_depth = trial.suggest_int("max_depth", 4, 20)
    
    # Create the NGBRegressor model with the suggested hyperparameters
    ngb = NGBRegressor(
        Dist=Normal, 
        Score=LogScore, 
        Base=DecisionTreeRegressor(criterion='friedman_mse', max_depth=max_depth),
        verbose=False, 
        n_estimators=n_estimators, 
        learning_rate=learning_rate
    )
    
    # Fit the model to the training data
    ngb.fit(X_train, y_train, X_val=X_val, Y_val=y_val, early_stopping_rounds=20)
    
    # Make predictions on the validation set
    y_preds = ngb.predict(X_val)
    y_dists = ngb.pred_dist(X_val)
    
    # Calculate the negative log likelihood
    nll = -y_dists.logpdf(y_val).mean()
    rmse = root_mean_squared_error(y_val, y_preds)
    print(f"Trial {trial.number}: NLL={nll}, RMSE={rmse}")
    
    return nll

Execute Study

In [None]:
study = optuna.create_study(
    direction='minimize',
    sampler=optuna.samplers.TPESampler(seed = SEED),  # Use TPE sampler for hyperparameter optimization
    pruner=optuna.pruners.MedianPruner(        
        n_startup_trials=20,                                    # Number of trials to run before pruning starts
        n_warmup_steps=5                                        # Number of warmup steps before pruning starts
    )
)
study.optimize(objective, n_trials=200, timeout=None, n_jobs=1, show_progress_bar=True)

print(study.best_params)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Train final Model

In [None]:
set_seed(SEED)
ngb = NGBRegressor(Dist=Normal, 
                   Score = LogScore, 
                   Base = DecisionTreeRegressor(criterion='friedman_mse',                                                
                                                 max_depth=9,
                                                 random_state=SEED), 
                   verbose = True, 
                   n_estimators=750, 
                   learning_rate=0.003,
                   random_state=SEED)

ngb.fit(X_train, y_train, X_val=X_val, Y_val=y_val, early_stopping_rounds=50)

Using Uncertainty Toolbox to get metrices

In [None]:
set_seed(SEED)
y_test_preds = ngb.predict(X_test)
y_test_dists = ngb.pred_dist(X_test)
# Extract standard deviation from test predictions
test_stddev = y_test_dists.scale

# Calculate and print all metrics inclunding RMSE, MAE, R²-Score, NLL, CRPS
pnn_metrics = uct.metrics.get_all_metrics( y_test_preds, test_stddev,y_test)
print(pnn_metrics)
#print(pnn_metrics['accuracy']['rmse'])
# Calculate coverage for 95% confidence interval
coverage_95 = uct.metrics_calibration.get_proportion_in_interval(y_test_preds, test_stddev, y_test, quantile = 0.95 )
print(f"Coverage 95%: {coverage_95}")

# use own function to calculate coverage and MPIW
ev_intervals = metrices.evaluate_intervals(y_test_preds, test_stddev, y_test, coverage=0.95)
print(f'coverage: {ev_intervals["coverage"]}, MPIW: {ev_intervals["MPIW"]}')

# calibration Curve with UCT
uct.viz.plot_calibration(y_test_preds, test_stddev, y_test)

# adversarial group calibration
uct.viz.plot_adversarial_group_calibration(y_test_preds, test_stddev, y_test)

Do 10 Runs for a more representative Study

In [None]:
number_of_runs = 10
result = {}

ngb_path = r"C:\Users\test\Masterarbeit\models\Modelsaves\NGBoost"
ngb_result_path = r"C:\Users\test\Masterarbeit\models\Modelresults\NGBoost"

for run in range(number_of_runs):

    model_name = f'ngboost_run_{run+1}.pkl'
    # choose a random seed for each run
    seed = random.randint(0, 10000)
    print(f"Run {run+1}/{number_of_runs} with seed {seed}")
    set_seed(seed)

    ngb = NGBRegressor(Dist=Normal, 
                   Score = LogScore, 
                   Base = DecisionTreeRegressor(criterion='friedman_mse' , max_depth=9), 
                   verbose = True, 
                   n_estimators=750, 
                   learning_rate=0.003)
    # Train and evaluate the model
    ngb.fit(X_train, y_train, X_val=X_val, Y_val=y_val, early_stopping_rounds=20)

    pickle.dump(ngb, open(os.path.join(ngb_path, model_name), "wb"))

    y_test_preds = ngb.predict(X_test)
    y_test_dists = ngb.pred_dist(X_test)
    # Extract standard deviation from test predictions
    test_stddev = y_test_dists.scale

    # Calculate and print all metrics inclunding RMSE, MAE, R²-Score, NLL, CRPS
    pnn_metrics = uct.metrics.get_all_metrics( y_test_preds, test_stddev,y_test)
    print(pnn_metrics)

   # use own function to calculate coverage and MPIW
    ev_intervals = evaluate_intervals(y_test_preds, test_stddev, y_test, coverage=0.95)
    print(f'coverage: {ev_intervals["coverage"]}, MPIW: {ev_intervals["MPIW"]}')

    results_per_run = {
    'mean_prediction': y_test_preds,
    'std_prediction' : test_stddev,
    'coverage': ev_intervals["coverage"],
    'MPIW': ev_intervals["MPIW"],
    'RMSE': pnn_metrics['accuracy']['rmse'],
    'MAE': pnn_metrics['accuracy']['mae'],
    'R2': pnn_metrics['accuracy']['r2'],
    'NLL': pnn_metrics['scoring_rule']['nll'],
    'CRPS': pnn_metrics['scoring_rule']['crps']
    }

    result[f'run_{run+1}'] = results_per_run

with open(ngb_result_path, 'wb') as f:
    pickle.dump(result, f)    