In [40]:
%load_ext autoreload
%autoreload 2


import os
import sys
from pathlib import Path
import bainite_boundaries
project_root = os.path.abspath(Path(str(bainite_boundaries.PROJECT_ROOT), 'bainite_boundaries', 'bainite_boundaries'))

# Add `bainite_boundaries` to sys.path if it’s not already present
if project_root not in sys.path:
    sys.path.insert(0, project_root)
    
import numpy as np
import pandas as pd
from collections import Counter

import torch
import gpytorch
import matplotlib.pyplot as plt
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

from bainite_boundaries.utils.data_processing import stable_log10


import numpy as np
import pandas as pd

def plot_and_compute_mean_over_cp_cv_folds(results: pd.DataFrame, column = 'y_test', title = ''):
    # Dictionary to store the mean y_test values for each CV fold
    total_results = []

    # Loop over each CV fold
    for index in results['CV_fold'].unique():
        fold_data = results[results['CV_fold'] == index]
        
        # List to store the y_test values for each CP_CV_fold within the current CV fold
        cp_mean_values = []
        
        # Loop over each CP_CV_fold for the current CV fold
        for cp_fold in fold_data['CP_CV_fold'].unique():
            cp_fold_data = fold_data[fold_data['CP_CV_fold'] == cp_fold]
            
            # Extract the y_test and convert it to a numpy array

            string = cp_fold_data[column].iloc[0]  # Assuming the y_test for the same CP_CV_fold is the same
            # if it is a string
            if isinstance(string, str):
                string = string.replace('\n', ' ').strip('[]')
                y_test_array = np.fromstring(string, dtype=float, sep=' ')
            else:
                y_test_array = string
            
            # Append the mean of y_test_array for this CP_CV_fold
            cp_mean_values.append(y_test_array)
            
            # print( f" CV_fold: {index}, CP_CV_fold: {cp_fold}, y_test: {y_test_array}")
        
        # Calculate the mean over all CP_CV_folds for the current CV fold
        total_results.append(np.mean(cp_mean_values, axis=0))
        
    
    
    # Return the results
    return total_results

def round_uncertainty(value, uncertainty):
    """
    Format mean and standard deviation according to scientific rounding rules.

    Args:
        value (float): Mean value.
        uncertainty (float): Standard deviation (or uncertainty).

    Returns:
        str: Formatted mean ± standard deviation string.
    """

    # Rule: Uncertainty should have 1 or 2 significant digits
    if uncertainty == 0:
        return f"{value:.2f} ± 0"  # Handle zero case separately

    # Get the order of magnitude of the uncertainty
    order = np.floor(np.log10(abs(uncertainty)))

    # Find first two significant digits
    first_digit = int(uncertainty / 10**order)
    second_digit = int((uncertainty / 10**(order - 1)) % 10)

    # If first digit is 1, keep two significant digits; otherwise, keep one
    if first_digit == 1:
        rounded_uncertainty = round(uncertainty, -int(order - 1))
    else:
        rounded_uncertainty = round(uncertainty, -int(order))

    # Determine decimal places for the mean
    decimal_places = -int(np.floor(np.log10(rounded_uncertainty)))

    # If uncertainty is a whole number, no decimal places
    decimal_places = max(decimal_places, 0)

    # Round the mean accordingly
    rounded_value = round(value, decimal_places)
    
    # Format output string
    return f"${rounded_value:.{decimal_places}f}_{{\pm {rounded_uncertainty:.{decimal_places}f}}}$"



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


  return f"${rounded_value:.{decimal_places}f}_{{\pm {rounded_uncertainty:.{decimal_places}f}}}$"


### Load data


In [41]:
which_data = 'Austensite'
which_data = 'Martensite_start'
which_data = 'Martensite_start_RA'
#which_data = 'Bainite_start'
#which_data = 'Ferrite'
#which_data = 'Ferrite_critCR'
#which_data = 'Bainite'
#'RF', 'XGB_no_monotonic', 'LGBM_no_monotonic'
# 'linear','linear_mixed','polynomial','polynomial_mixed', 'neural', 'GP'
model_names = {
'Austensite': ['linear','neural_class'],#, 'GB_class', 'SVC_class'],
'Bainite': ['linear','polynomial','GP_Poly','GP','RF','LGBM_no_monotonic','monoton_neural','GP_linearmean'],#,'polynomial','GP','GP_Poly','RF','XGB_no_monotonic','LGBM_no_monotonic','GP_linearmean','NN'],
# 'Bainite': ['GP'],
'Ferrite_critCR': ['linear', 'RF', 'XGB_no_monotonic', 'LGBM_no_monotonic','polynomial','GP_Poly','GP','neural'],#['linear', 'neural', 'neural_pt'],#['linear', 'neural_pt','neural_pt'],
'Martensite_start':['linear','polynomial','GP_linearmean','GP_Poly','GP','RF','LGBM_no_monotonic','monoton_neural'],# ['monoton_neural','linear','polynomial','neural','neural_pt','random','GP','GP_Poly'],#,'GP_Poly'],
'Martensite_start_RA':['linear','polynomial','GP_linearmean','GP_Poly','GP','RF','LGBM_no_monotonic','monoton_neural'],
'Bainite_start': ['linear','polynomial','GP_linearmean','GP_Poly','GP','RF','LGBM_no_monotonic','monoton_neural']
}[which_data]
for model in model_names:
    filename = f'./bainite_boundaries/results/k-fold-CV/{which_data}_{model}_results'
    results = pd.read_csv(filename+'.csv')
    # load pickle
    # results = pd.read_pickle(filename+'.pkl')
    # results.head()

    if which_data == 'Austensite':
        classification = True
    else:
        classification = False

    from utils.data_processing import compute_metrics_regression
    from utils.data_processing import compute_metrics_classification


    if classification:

        # labels and predictions
        y_test = plot_and_compute_mean_over_cp_cv_folds(results, column='y_test', title='y_test')
        y_pred_test_mean = plot_and_compute_mean_over_cp_cv_folds(results, column='y_pred_test', title='y_pred_test_mean')
        y_pred_proba_test_mean = plot_and_compute_mean_over_cp_cv_folds(results, column='y_pred_proba_test', title='y_pred_proba_test_mean')

        confidence_sets = plot_and_compute_mean_over_cp_cv_folds(results, column='confidence_sets_test', title='confidence_sets_test')
        confidence_sets_distance = plot_and_compute_mean_over_cp_cv_folds(results, column='confidence_sets_distance_test', title='confidence_sets_distance_test')
        
        metrics_list = []
        for i in range(len(y_test)):
            prediction_dict = {
                'y_true': y_test[i],
                'y_pred': y_pred_test_mean[i],
                'y_pred_proba': y_pred_proba_test_mean[i],
                'confidence_sets': confidence_sets[i],
                'confidence_sets_distance': confidence_sets_distance[i],
            }

            metrics_dict = compute_metrics_classification(prediction_dict)
            metrics_list.append(metrics_dict)
            
    else:    

        # labels and predictions
        y_test = plot_and_compute_mean_over_cp_cv_folds(results, column='y_test', title='y_test')
        y_pred_test_mean = plot_and_compute_mean_over_cp_cv_folds(results, column='y_pred_test', title='y_pred_test_mean')


        # with distance
        y_test_lower_distance = plot_and_compute_mean_over_cp_cv_folds(results, column='lower_CP_distance_test', title='y_test_lower')
        y_test_upper_distance = plot_and_compute_mean_over_cp_cv_folds(results, column='upper_CP_distance_test', title='y_test_upper')

        # no distance
        y_test_lower = plot_and_compute_mean_over_cp_cv_folds(results, column='lower_CP_test', title='y_test_lower')
        y_test_upper = plot_and_compute_mean_over_cp_cv_folds(results, column='upper_CP_test', title='y_test_upper')

        metrics_list = []
        for i in range(len(y_test)):
            prediction_dict = {
                'y_true': y_test[i],
                'y_pred': y_pred_test_mean[i],
                'lower_CP': y_test_lower[i],
                'upper_CP': y_test_upper[i],
                'lower_CP_distance': y_test_lower_distance[i],
                'upper_CP_distance': y_test_upper_distance[i],
            }

            metrics_dict = compute_metrics_regression(prediction_dict)
            metrics_list.append(metrics_dict)
            
    # Initialize containers for each metric
    metrics_aggregated = {key: [] for key in metrics_list[0].keys()}

    # Aggregate the values for each metric
    for entry in metrics_list:
        for key, value in entry.items():
            metrics_aggregated[key].append(value)
    print(model)
    # Calculate the mean and std for each metric
    metrics_summary = {key: {'mean': np.mean(values), 'std': np.std(values)} for key, values in metrics_aggregated.items()}
    print(results['distance_metric'][0], results['distance_loc'][0])

    #m_list=['MALE','R2log','coverage_distance [%]','interval_log_size_distance']
    # Print the results
    for metric, summary in metrics_summary.items():
        #if metric in m_list:
            #print(metric,summary['mean'], summary['std'])
            print(metric,round_uncertainty(summary['mean'], summary['std']))
        #print(f"{metric}: $ {summary['mean']:.2f}_{{\pm {summary['std']:.2f}}} $")


linear
mahalanobis PCA_scaled
MALE $0.046_{\pm 0.007}$
MAPE [%] $12_{\pm 2}$
MAE $30_{\pm 2}$
R2 $0.75_{\pm 0.07}$
R2log $-26_{\pm 20}$
coverage [%] $90_{\pm 2}$
coverage_distance [%] $91_{\pm 2}$
interval_size  $120_{\pm 2}$
interval_size_distance  $119_{\pm 3}$
interval_log_size $0.162_{\pm 0.008}$
interval_log_size_distance $0.16_{\pm 0.01}$
polynomial
mahalanobis PCA_scaled
MALE $0.091_{\pm 0.005}$
MAPE [%] $23_{\pm 2}$
MAE $63_{\pm 2}$
R2 $0.22_{\pm 0.03}$
R2log $-7_{\pm 14}$
coverage [%] $90_{\pm 3}$
coverage_distance [%] $90_{\pm 3}$
interval_size  $254_{\pm 3}$
interval_size_distance  $255_{\pm 4}$
interval_log_size $0.36_{\pm 0.01}$
interval_log_size_distance $0.36_{\pm 0.01}$
GP_linearmean
mahalanobis PCA_scaled
MALE $0.032_{\pm 0.004}$
MAPE [%] $8_{\pm 1}$
MAE $20_{\pm 2}$
R2 $0.88_{\pm 0.03}$
R2log $0.7_{\pm 0.1}$
coverage [%] $92_{\pm 3}$
coverage_distance [%] $92_{\pm 2}$
interval_size  $93_{\pm 5}$
interval_size_distance  $96_{\pm 7}$
interval_log_size $0.15_{\pm 0.01}$


In [42]:
summary['std']

np.float64(0.006702324845382349)

In [43]:
metrics_aggregated

{'MALE': [np.float64(0.10251546795285568),
  np.float64(0.10819878308645933),
  np.float64(0.08744839502647002),
  np.float64(0.09669510551528955),
  np.float64(0.09292707595459895),
  np.float64(0.09682912977302906),
  np.float64(0.09499436414506156),
  np.float64(0.09914723847482607),
  np.float64(0.08796363309719898),
  np.float64(0.09119745911366674)],
 'MAPE [%]': [np.float64(28.52981159363141),
  np.float64(30.31552923264246),
  np.float64(22.12983768359827),
  np.float64(25.28769898366665),
  np.float64(25.499056112049818),
  np.float64(27.680853135123552),
  np.float64(24.222195876058763),
  np.float64(27.136528096709867),
  np.float64(22.976299806447052),
  np.float64(23.609315329449753)],
 'MAE': [np.float64(71.24995843697477),
  np.float64(73.9819889579832),
  np.float64(64.01855542857142),
  np.float64(68.72635242016807),
  np.float64(65.4872769579832),
  np.float64(67.26538764705882),
  np.float64(67.15934848739495),
  np.float64(69.83040447058823),
  np.float64(62.8026116