# MultiSensor Dataset Preparation 
- Experiment data: March 2024. Aluminum, Laser-Wire DED
- Aurthor: Chen Lequn

### Notebook 3a: Feature selection, ML modelling and ablation study (feature-based audio model)
- Full Dataset: All features (from previous notebook 2b)
- Ablated Dataset 1: Features after removing those with Pearson correlation > 0.97 (within audio modality).
- Ablated Dataset 2: Features after removing those with Pearson correlation > 0.9 (within audio modality).
- Ablated Dataset 3: Features from Ablated Dataset 1 with MI > 0.05.
- Ablated Dataset 4: Features from Ablated Dataset 1 with MI > 0.1.

In [1]:
from sklearnex import patch_sklearn, config_context
import dpctl
patch_sklearn()

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [2]:
# import wandb

In [3]:
import os
import glob
import sys
import math
import numpy as np
import pandas as pd
from pandas import DataFrame
import scipy as sp
import itertools
from collections import defaultdict
from pprint import pprint
import pickle
from joblib import dump, load

# Plotting
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.pyplot import gca
from pylab import *
import seaborn as sns
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)
%matplotlib inline

# Scikit-learn general
from sklearn import preprocessing, metrics, svm, datasets, tree, neighbors
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, precision_score, recall_score, confusion_matrix, roc_curve, auc, classification_report
from sklearn.model_selection import train_test_split, RandomizedSearchCV, cross_validate, KFold, StratifiedKFold, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder, scale
from sklearn.ensemble import HistGradientBoostingClassifier

# Scikit-learn models
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression, Lasso, Ridge
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC, SVR
from sklearn.multiclass import OneVsRestClassifier

# Imbalanced learn
from imblearn.ensemble import BalancedRandomForestClassifier, RUSBoostClassifier

# XGBoost and LightGBM
import xgboost as xgb
from xgboost import XGBClassifier
import lightgbm as lgb
from lightgbm import LGBMClassifier

# Utilities
from scipy.stats import uniform, randint
from scipy.interpolate import griddata
from sklearn.utils import shuffle, resample, class_weight
from utils import *


In [4]:
PROJECT_ROOT_DIR = "../"
IMAGE_PATH = os.path.join(PROJECT_ROOT_DIR, "result_images", 'defect classification', 'ML-baseline', 'ablation study')
model_path = os.path.join(PROJECT_ROOT_DIR, "trained_models", 'defect classification', 'ML-baseline', 'ablation study')
scaler_path = os.path.join(PROJECT_ROOT_DIR, "saved_scalers", 'defect classification',  'ML-baseline', 'ablation study')
os.makedirs(IMAGE_PATH, exist_ok=True)
os.makedirs(model_path, exist_ok=True)
os.makedirs(scaler_path, exist_ok=True)


# Change to desirable location of the raw dataset
Multimodal_dataset_PATH = "/home/chenlequn/pan1/Dataset/Laser-Wire-DED-ThermalAudio-Dataset"
Annotation_file_path = os.path.join(Multimodal_dataset_PATH, "Annotation")
Dataset_path = os.path.join(Multimodal_dataset_PATH, 'Dataset')
final_audio_dataset = os.path.join(Multimodal_dataset_PATH, 'Dataset', "audio")
final_image_dataset = os.path.join(Multimodal_dataset_PATH, 'Dataset', "thermal_images")
              

## function for automatically save the diagram/graph into the folder 
def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGE_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")

plt.rcParams["axes.edgecolor"] = "black"
plt.rcParams["axes.linewidth"] = 2.50

In [5]:
# # Initialize wandb
# wandb.init(project="LW-DED_audio-classification")

## Step 1: Dataset preparation

In [6]:
# Read the data back from the HDF5 file into a new DataFrame
df_audio_dataset = pd.read_hdf(os.path.join(Dataset_path, 'df_audio_dataset_with_annotations(raw_audio).h5'), key='df')

df_audio_dataset

Unnamed: 0,audio_name,image_name,experiment_number,label_1,label_2,label_3,rms_energy,amplitude_envelope_mean,amplitude_envelope_std,zero_crossing_rate,...,mfcc_8_mean,mfcc_8_std,mfcc_9_mean,mfcc_9_std,mfcc_10_mean,mfcc_10_std,mfcc_11_mean,mfcc_11_std,mfcc_12_mean,mfcc_12_std
0,Exp_17_12_1.wav,Exp_17_12_1.jpg,17_12,Balling (LoF),,,0.055400,0.063368,0.014181,0.021542,...,4.465534,5.621855,10.530560,7.604537,1.019614,5.977480,11.476974,6.662732,-8.070549,8.544114
1,Exp_17_12_2.wav,Exp_17_12_2.jpg,17_12,Balling (LoF),,,0.037319,0.047844,0.013682,0.057370,...,3.375155,6.320611,5.001470,8.519529,-3.618283,10.137958,8.518948,8.286986,-12.481044,8.784910
2,Exp_17_12_3.wav,Exp_17_12_3.jpg,17_12,Balling (LoF),,,0.028314,0.030773,0.013055,0.083900,...,0.212402,6.042810,3.667673,6.774061,-4.279868,5.320736,5.285318,6.426253,-13.551059,6.355420
3,Exp_17_12_4.wav,Exp_17_12_4.jpg,17_12,Balling (LoF),,,0.025656,0.033076,0.009994,0.080726,...,-2.960245,10.497297,3.910449,7.278681,0.686291,9.202967,9.157614,7.613472,-13.765296,6.685709
4,Exp_17_13_6.wav,Exp_17_13_6.jpg,17_13,Balling (LoF),,,0.019992,0.029970,0.007434,0.051927,...,-2.866064,6.072939,5.428969,10.252683,-3.750278,7.586510,3.211762,5.490491,-5.946276,7.376266
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3503,Exp_April_16_test_4_4.wav,Exp_April_16_test_4_4.jpg,April_16_test_4,Laser-off,Shielding Gas-off,,0.012902,0.015071,0.005879,0.099093,...,2.168951,6.490287,1.928032,8.401035,-1.724736,10.288368,-0.060851,8.381465,-12.953316,5.754806
3504,Exp_April_16_test_4_1.wav,Exp_April_16_test_4_1.jpg,April_16_test_4,Laser-off,Shielding Gas-off,,0.013222,0.015257,0.005407,0.084127,...,0.476796,9.227147,0.945553,7.366714,-1.322515,7.652929,-0.733449,5.396196,-14.786466,7.339162
3505,Exp_April_16_test_4_2.wav,Exp_April_16_test_4_2.jpg,April_16_test_4,Laser-off,Shielding Gas-off,,0.012246,0.014805,0.005100,0.093878,...,-0.026498,7.503857,2.117017,7.400688,-2.097013,8.523264,0.141502,6.387466,-10.734591,8.097878
3506,Exp_April_16_test_4_3.wav,Exp_April_16_test_4_3.jpg,April_16_test_4,Laser-off,Shielding Gas-off,,0.013367,0.016546,0.005384,0.087302,...,1.089101,6.276635,0.685349,5.679959,-5.039695,5.392179,-2.741401,4.763264,-13.847133,7.911690


In [7]:
# Extract labels
y = df_audio_dataset['label_1'].to_list()
y_array = np.array(y)
le = LabelEncoder()
le.fit(y)
Y_encoded = le.transform(y)

# Non-feature columns to be dropped
non_feature_columns = ['audio_name', 'image_name', 'experiment_number',
                       'label_1', 'label_2', 'label_3']

# Extract features for the full dataset
X_all = df_audio_dataset.drop(columns=non_feature_columns).to_numpy()

X_all.shape

(3508, 83)

In [8]:
class_weights = class_weight.compute_class_weight('balanced',
                                                 classes = np.unique(y),
                                                 y = y)

class_weights_encoded = class_weight.compute_class_weight('balanced',
                                                 classes = np.unique(Y_encoded),
                                                 y = Y_encoded)


class_names = (le.classes_).tolist()
print(f"class weights: {class_weights}")
print(f"class weights encoded: {class_weights_encoded}")
print(f"class        : {class_names}")

class weights: [ 1.33638095  1.30505952 20.88095238  0.85083677 14.31836735  0.38938839
  0.61565462]
class weights encoded: [ 1.33638095  1.30505952 20.88095238  0.85083677 14.31836735  0.38938839
  0.61565462]
class        : ['Balling (Conduction)', 'Balling (LoF)', 'Balling (Overheating)', 'Laser-off', 'LoF', 'Non-defective', 'Overheating']


In [9]:
# Given features based on previous analysis results
features_dropped_090 = ['spectral_centroid_mean', 'spectral_centroid_std',
       'spectral_complexity_std', 'spectral_contrast_0_mean',
       'spectral_contrast_0_std', 'spectral_contrast_1_mean',
       'spectral_contrast_1_std', 'spectral_contrast_2_mean',
       'spectral_contrast_2_std', 'spectral_contrast_3_mean',
       'spectral_contrast_3_std', 'spectral_contrast_4_mean',
       'spectral_contrast_4_std', 'spectral_contrast_5_mean',
       'spectral_valley_0_std', 'spectral_valley_1_std',
       'spectral_valley_2_mean', 'spectral_valley_2_std',
       'spectral_valley_3_std', 'spectral_valley_4_std',
       'spectral_valley_5_mean', 'spectral_energy_band_ratio_std',
       'spectral_flatness_mean', 'spectral_flatness_std',
       'spectral_rolloff_std', 'spectral_strong_peak_mean',
       'spectral_strong_peak_std', 'spectral_variance_mean',
       'spectral_variance_std', 'spectral_skewness_std',
       'spectral_kurtosis_std', 'spectral_crest_factor_mean',
       'spectral_crest_factor_std', 'mfcc_0_std', 'mfcc_1_mean', 'mfcc_1_std',
       'mfcc_2_mean', 'mfcc_2_std', 'mfcc_3_mean', 'mfcc_3_std', 'mfcc_4_mean',
       'mfcc_4_std', 'mfcc_5_mean', 'mfcc_5_std', 'mfcc_6_mean', 'mfcc_6_std',
       'mfcc_7_mean', 'mfcc_7_std', 'mfcc_8_mean', 'mfcc_8_std', 'mfcc_9_mean',
       'mfcc_9_std', 'mfcc_10_mean', 'mfcc_10_std', 'mfcc_11_mean',
       'mfcc_11_std', 'mfcc_12_mean', 'mfcc_12_std']

features_dropped_097 = ['amplitude_envelope_std', 'zero_crossing_rate',
       'loudness_vickers', 'spectral_centroid_mean', 'spectral_centroid_std',
       'spectral_complexity_mean', 'spectral_complexity_std',
       'spectral_contrast_0_mean', 'spectral_contrast_0_std',
       'spectral_contrast_1_mean', 'spectral_contrast_1_std',
       'spectral_contrast_2_mean', 'spectral_contrast_2_std',
       'spectral_contrast_3_mean', 'spectral_contrast_3_std',
       'spectral_contrast_4_mean', 'spectral_contrast_4_std',
       'spectral_contrast_5_mean', 'spectral_contrast_5_std',
       'spectral_valley_0_mean', 'spectral_valley_0_std',
       'spectral_valley_1_mean', 'spectral_valley_1_std',
       'spectral_valley_2_mean', 'spectral_valley_2_std',
       'spectral_valley_3_mean', 'spectral_valley_3_std',
       'spectral_valley_4_mean', 'spectral_valley_4_std',
       'spectral_valley_5_mean', 'spectral_valley_5_std',
       'spectral_decrease_mean', 'spectral_energy_band_ratio_mean',
       'spectral_energy_band_ratio_std', 'spectral_flatness_mean',
       'spectral_flatness_std', 'spectral_flux_std', 'spectral_rolloff_mean',
       'spectral_rolloff_std', 'spectral_strong_peak_mean',
       'spectral_strong_peak_std', 'spectral_variance_mean',
       'spectral_variance_std', 'spectral_skewness_std',
       'spectral_kurtosis_std', 'spectral_crest_factor_mean',
       'spectral_crest_factor_std', 'mfcc_0_mean', 'mfcc_0_std', 'mfcc_1_mean',
       'mfcc_1_std', 'mfcc_2_mean', 'mfcc_2_std', 'mfcc_3_mean', 'mfcc_3_std',
       'mfcc_4_mean', 'mfcc_4_std', 'mfcc_5_mean', 'mfcc_5_std', 'mfcc_6_mean',
       'mfcc_6_std', 'mfcc_7_mean', 'mfcc_7_std', 'mfcc_8_mean', 'mfcc_8_std',
       'mfcc_9_mean', 'mfcc_9_std', 'mfcc_10_mean', 'mfcc_10_std',
       'mfcc_11_mean', 'mfcc_11_std', 'mfcc_12_mean', 'mfcc_12_std']


features_dropped_090_mi_filtered_01 = ['spectral_centroid_mean', 'spectral_centroid_std',
       'spectral_complexity_std', 'spectral_contrast_3_mean',
       'spectral_contrast_5_mean', 'spectral_valley_2_mean',
       'spectral_valley_5_mean', 'spectral_flatness_mean',
       'spectral_flatness_std', 'spectral_variance_mean',
       'spectral_skewness_std', 'spectral_crest_factor_mean',
       'spectral_crest_factor_std', 'mfcc_1_mean', 'mfcc_2_mean',
       'mfcc_3_mean', 'mfcc_5_mean', 'mfcc_7_mean', 'mfcc_9_mean',
       'mfcc_11_mean']


features_dropped_090_mi_filtered_005 = ['spectral_centroid_mean', 'spectral_centroid_std',
       'spectral_complexity_std', 'spectral_contrast_3_mean',
       'spectral_contrast_4_mean', 'spectral_contrast_5_mean',
       'spectral_valley_2_mean', 'spectral_valley_5_mean',
       'spectral_energy_band_ratio_std', 'spectral_flatness_mean',
       'spectral_flatness_std', 'spectral_rolloff_std',
       'spectral_strong_peak_std', 'spectral_variance_mean',
       'spectral_skewness_std', 'spectral_kurtosis_std',
       'spectral_crest_factor_mean', 'spectral_crest_factor_std',
       'mfcc_1_mean', 'mfcc_2_mean', 'mfcc_3_mean', 'mfcc_5_mean',
       'mfcc_6_mean', 'mfcc_7_mean', 'mfcc_7_std', 'mfcc_8_mean',
       'mfcc_9_mean', 'mfcc_9_std', 'mfcc_11_mean', 'mfcc_12_mean']

# Create ablated datasets
X_dropped_097 = df_audio_dataset[features_dropped_097].to_numpy()
X_dropped_090 = df_audio_dataset[features_dropped_090].to_numpy()
X_dropped_090_mi_filtered_01 = df_audio_dataset[features_dropped_090_mi_filtered_01].to_numpy()
X_dropped_090_mi_filtered_005 = df_audio_dataset[features_dropped_090_mi_filtered_005].to_numpy()


# Get the shapes of these additional ablated datasets
X_dropped_097.shape, X_dropped_090.shape, X_dropped_090_mi_filtered_005.shape, X_dropped_090_mi_filtered_01.shape,

((3508, 73), (3508, 58), (3508, 30), (3508, 20))

- ALL Dataset (X_all):  `83 features`
- Ablated Dataset 1 (X_dropped_097):  `73 features`
- Ablated Dataset 2 (X_dropped_090):  `58 features`
- Ablated Dataset 3 (X_dropped_090_mi_filtered_005): `30 features`
- Ablated Dataset 4 (X_dropped_090_mi_filtered_01):  `20 features`

### Train val test split
- Train 80%, Val 10%, Test 10%

In [10]:
y_array = np.array(y)

# First split: Separate out the training set (80% of original)
train_indices, temp_indices = train_test_split(
    np.arange(len(df_audio_dataset)), test_size=0.2, random_state=0, stratify=y_array)

# Second split: Separate out the validation and test sets (each will be 10% of original)
val_indices, test_indices = train_test_split(
    temp_indices, test_size=0.5, random_state=0, stratify=y_array[temp_indices])

# Check the shape of the indices for train, val, and test splits
len(train_indices), len(val_indices), len(test_indices)

(2806, 351, 351)

In [11]:
%store train_indices
%store val_indices
%store test_indices

Stored 'train_indices' (ndarray)
Stored 'val_indices' (ndarray)
Stored 'test_indices' (ndarray)


## Step 2: Create a Pipeline and Iterate Over Datasets and Models

In [12]:
# Define hyperparameters for each model
hyperparameters = {
    'KNN': {'n_neighbors': 12, 'weights': 'distance'},
    'DT': {'ccp_alpha': 3.89e-05, 'max_depth': 21, 'min_samples_leaf': 1, 'min_samples_split': 13},
    'LR': {'C': 5.15, 'max_iter': 2528, 'penalty': 'l2', 'solver': 'lbfgs'},
    'RF': {'bootstrap': False, 'max_depth': 64, 'max_features': 'sqrt', 'min_samples_leaf': 3, 'min_samples_split': 6, 'n_estimators': 307},
    'BalancedRF': {'bootstrap': False, 'max_depth': 23, 'max_features': 'sqrt', 'min_samples_leaf': 8, 'min_samples_split': 17, 'n_estimators': 237},
    'RUSBoost': {'learning_rate': 0.1, 'n_estimators': 150},
    'Ada': {'algorithm': 'SAMME', 'learning_rate': 0.57, 'n_estimators': 307},
    'GP': {'kernel': 1**2 * RBF(length_scale=1)},
    'NN': {'activation': 'relu', 'alpha': 0.077, 'early_stopping': True, 'hidden_layer_sizes': (229, 255, 168), 'learning_rate': 'adaptive', 'max_iter': 11805, 'solver': 'adam', 'validation_fraction': 0.1},
    'XGBoost': {'colsample_bytree': 0.62, 'gamma': 0.48, 'learning_rate': 0.35, 'max_depth': 44, 'min_child_weight': 8, 'n_estimators': 480, 'subsample': 0.59,  'gpu_id': 0},
    'LightGBM': {'colsample_bytree': 0.80, 'learning_rate': 0.15, 'max_depth': 35, 'min_child_weight': 1, 'n_estimators': 276, 'num_leaves': 120, 'reg_alpha': 1.94, 'reg_lambda': 1.70, 'subsample': 0.86, 'verbose': -1},
    'SVM': {'C': 524.86, 'degree': 9, 'gamma': 0.029, 'kernel': 'rbf', 'probability': True},
    'GB': {'learning_rate': 0.23, 'max_depth': 49, 'min_samples_leaf': 10, 'min_samples_split': 17, 'n_estimators': 271, 'n_iter_no_change': 10, 'subsample': 0.73, 'tol': 0.0001},
    'HistGB': {'learning_rate': 0.23, 'max_iter': 100, 'max_depth': 49, 'min_samples_leaf': 10, 'n_iter_no_change': 10, 'tol': 0.0001},
}

# List of datasets
datasets = {
    'All Features': X_all,
    'Ablated 1': X_dropped_097,
    'Ablated 2': X_dropped_090,
    'Ablated 3': X_dropped_090_mi_filtered_005,
    'Ablated 4': X_dropped_090_mi_filtered_01,
}


# List of models
models = {
    'KNN': KNeighborsClassifier,
    'DT': DecisionTreeClassifier,
    'LR': LogisticRegression,
    'RF': RandomForestClassifier,
    'BalancedRF': BalancedRandomForestClassifier,
    'RUSBoost': RUSBoostClassifier,
    'Ada': AdaBoostClassifier,
    # 'GP': GaussianProcessClassifier,
    'NN': MLPClassifier,
    'XGBoost': XGBClassifier,
    'LightGBM': LGBMClassifier,
    'SVM': SVC,
    # 'GB': GradientBoostingClassifier,
    'HistGB': HistGradientBoostingClassifier
}

## Step 3: Model Training

In [13]:
# Initialize KFold with the number of splits you desire
kf = KFold(n_splits=5)
# Create a dictionary to store final results
final_results = defaultdict(list)
# Create a dictionary to store temporary cross-validation results for each dataset and model
cv_results = defaultdict(lambda: defaultdict(list))


# Iterate over datasets and models
for dataset_name, X in datasets.items():
    print(f"Processing dataset: {dataset_name}")
    # Split data
    X_train = X[train_indices]
    X_val = X[val_indices]
    X_test = X[test_indices]
  
    y_train = Y_encoded[train_indices]
    y_val = Y_encoded[val_indices]
    y_test = Y_encoded[test_indices]

    # Scaling
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_val = scaler.transform(X_val)
    X_test = scaler.transform(X_test)
    
    with open(os.path.join(scaler_path, f'StandardScaler_{dataset_name}.pkl'), 'wb') as file:
        pickle.dump(scaler, file)

    for model_name, Model in models.items():
        print(f"\tTraining model: {model_name}")
        # with wandb.init(project="LW-DED_audio-classification", name=f"{model_name}_{dataset_name}", reinit=True) as run:
            # Log model and hyperparameters
            # wandb.config.update({"Model": model_name, "Hyperparameters": hyperparameters[model_name]})
    
        # Initialize model with the best hyperparameters
        model = Model(**hyperparameters[model_name])

        # If the model is SVM, use only 20% of the training data
        if model_name == 'SVM':
            sample_indices = np.random.choice(len(X_train), int(0.2 * len(X_train)), replace=False)
            X_train_sampled = X_train[sample_indices]
            y_train_sampled = y_train[sample_indices]
            model.fit(X_train_sampled, y_train_sampled)
        else:
            # Train the model
            model.fit(X_train, y_train)

        # Save the model
        dump(model, os.path.join(model_path, f'{model_name}_{dataset_name}.joblib'))


        #####------   cross-validation on the validation dataset--------------
        fold_metrics = defaultdict(list)

        for train_idx, test_idx in kf.split(X_val):
            X_test_train, X_test_val = X_val[train_idx], X_val[test_idx]
            y_test_train, y_test_val = y_val[train_idx], y_val[test_idx]
            
            # Make predictions
            y_test_pred = model.predict(X_test_val)
            
            # Evaluate the model
            acc = accuracy_score(y_test_val, y_test_pred)
            f1 = f1_score(y_test_val, y_test_pred, average='weighted')
            precision = precision_score(y_test_val, y_test_pred, average='weighted')
            recall = recall_score(y_test_val, y_test_pred, average='weighted')
            
            # Optional: ROC-AUC can only be calculated for binary classification or multilabel indicator format
            try:
                roc_auc = roc_auc_score(y_test_val, model.predict_proba(X_test_val), multi_class='ovr', average='weighted')
                
            except:
                roc_auc = 'N/A'
            
            # Store the metrics for this fold
            fold_metrics['Accuracy'].append(acc)
            fold_metrics['F1 Score'].append(f1)
            fold_metrics['Precision'].append(precision)
            fold_metrics['Recall'].append(recall)
            fold_metrics['ROC-AUC'].append(roc_auc if roc_auc != 'N/A' else np.nan)
        
        # Calculate mean and std for each metric and print them
        for metric, values in fold_metrics.items():
            mean_val = np.nanmean(values)  # Handle 'N/A' values
            std_val = np.nanstd(values)    # Handle 'N/A' values
            
            print(f"\t\t{metric}: Mean = {mean_val}, Std = {std_val}")
            # Log the mean and std values to wandb
            # wandb.log({f"{metric}_mean": mean_val, f"{metric}_std": std_val})
            
            cv_results[dataset_name][f"{model_name}_{metric}_mean"] = mean_val
            cv_results[dataset_name][f"{model_name}_{metric}_std"] = std_val
             
            #####------   cross-validation on the validation dataset--------------
            
# Convert the temporary results to final format
for dataset_name, metrics in cv_results.items():
    final_results['Dataset'].append(dataset_name)
    for metric_name, value in metrics.items():
        final_results[metric_name].append(value)

# Convert results to a DataFrame for easier viewing
final_results_df = pd.DataFrame(final_results)

Processing dataset: All Features
	Training model: KNN
		Accuracy: Mean = 0.8717907444668007, Std = 0.02556532616119492
		F1 Score: Mean = 0.8600444428301165, Std = 0.029995850740289213
		Precision: Mean = 0.8647930508678394, Std = 0.03848246981483092
		Recall: Mean = 0.8717907444668007, Std = 0.02556532616119492
		ROC-AUC: Mean = 0.9734763632695949, Std = 0.0
	Training model: DT


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.854486921529175, Std = 0.06867146329250202
		F1 Score: Mean = 0.8575391735438929, Std = 0.06659267589143929
		Precision: Mean = 0.8717431424094638, Std = 0.060595645051879476
		Recall: Mean = 0.854486921529175, Std = 0.06867146329250202
		ROC-AUC: Mean = 0.9337537198124898, Std = 0.0
	Training model: LR
		Accuracy: Mean = 0.8944064386317908, Std = 0.04588061672180982
		F1 Score: Mean = 0.8934929164638789, Std = 0.04215561497588021
		Precision: Mean = 0.9027381787673538, Std = 0.03480442116150023
		Recall: Mean = 0.8944064386317908, Std = 0.04588061672180982
		ROC-AUC: Mean = 0.992024723170306, Std = 0.0
	Training model: RF


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  warn(
  warn(


		Accuracy: Mean = 0.8916297786720323, Std = 0.025209461011384836
		F1 Score: Mean = 0.8818012433661317, Std = 0.02529676544569119
		Precision: Mean = 0.8827987974772672, Std = 0.02825681949373289
		Recall: Mean = 0.8916297786720323, Std = 0.025209461011384836
		ROC-AUC: Mean = 0.9893988840930624, Std = 0.0
	Training model: BalancedRF


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.811871227364185, Std = 0.01749739651773743
		F1 Score: Mean = 0.8311553761670638, Std = 0.012845953609645163
		Precision: Mean = 0.8776352633571239, Std = 0.014671711903146482
		Recall: Mean = 0.811871227364185, Std = 0.01749739651773743
		ROC-AUC: Mean = 0.964329126364589, Std = 0.0
	Training model: RUSBoost


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.7804024144869215, Std = 0.05811213531196783
		F1 Score: Mean = 0.8045197576874843, Std = 0.05372922984105883
		Precision: Mean = 0.8460156298260534, Std = 0.0447393471924738
		Recall: Mean = 0.7804024144869215, Std = 0.05811213531196783
		ROC-AUC: Mean = 0.9413247644959181, Std = 0.0
	Training model: Ada


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.831830985915493, Std = 0.017288561891315193
		F1 Score: Mean = 0.8300269734956013, Std = 0.015180483505986745
		Precision: Mean = 0.8395243229458522, Std = 0.01690379703294171
		Recall: Mean = 0.831830985915493, Std = 0.017288561891315193
		ROC-AUC: Mean = 0.961519715518584, Std = 0.0
	Training model: NN


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.908692152917505, Std = 0.034693263612254624
		F1 Score: Mean = 0.9046140829382028, Std = 0.03554106569195125
		Precision: Mean = 0.9089761630053381, Std = 0.03541861834990414
		Recall: Mean = 0.908692152917505, Std = 0.034693263612254624
		ROC-AUC: Mean = 0.9906690651558175, Std = 0.0
	Training model: XGBoost


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.9201207243460765, Std = 0.030918686376706108
		F1 Score: Mean = 0.9129401902747338, Std = 0.03457203909306208
		Precision: Mean = 0.9128889406165814, Std = 0.03634234197558829
		Recall: Mean = 0.9201207243460765, Std = 0.030918686376706108
		ROC-AUC: Mean = 0.9802542465227181, Std = 0.0
	Training model: LightGBM


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.9202012072434608, Std = 0.023289197381063217
		F1 Score: Mean = 0.9144427944956792, Std = 0.024502228268769004
		Precision: Mean = 0.921121358796498, Std = 0.02340497139112543
		Recall: Mean = 0.9202012072434608, Std = 0.023289197381063217
		ROC-AUC: Mean = 0.9861008453336731, Std = 0.0
	Training model: SVM
		Accuracy: Mean = 0.8859557344064386, Std = 0.018454291377663114
		F1 Score: Mean = 0.8753210857241364, Std = 0.02305396233898249
		Precision: Mean = 0.8743478774439837, Std = 0.030422753631027984
		Recall: Mean = 0.8859557344064386, Std = 0.018454291377663114
		ROC-AUC: Mean = 0.9610670786815851, Std = 0.0
	Training model: HistGB


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.3274446680080483, Std = 0.049561653126780666
		F1 Score: Mean = 0.3721289751717528, Std = 0.04566579229868627
		Precision: Mean = 0.5854819407946519, Std = 0.07637774926625954
		Recall: Mean = 0.3274446680080483, Std = 0.049561653126780666
		ROC-AUC: Mean = 0.5584396291398575, Std = 0.0
Processing dataset: Ablated 1
	Training model: KNN
		Accuracy: Mean = 0.8689336016096579, Std = 0.019020460323723055
		F1 Score: Mean = 0.8531244352245715, Std = 0.025251018571925396
		Precision: Mean = 0.8581748959430342, Std = 0.033430913657118616
		Recall: Mean = 0.8689336016096579, Std = 0.019020460323723055
		ROC-AUC: Mean = 0.9737723938274415, Std = 0.0
	Training model: DT
		Accuracy: Mean = 0.8574245472837022, Std = 0.0386503560353476
		F1 Score: Mean = 0.8585426864117756, Std = 0.03848618424884485
		Precision: Mean = 0.8697980064477283, Std = 0.035430571411628295
		Recall: Mean = 0.8574245472837022, Std = 0.0386503560353476
		ROC-AUC: Mean = 0.9377559397704366, Std = 0.0
	Tr

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.8886921529175051, Std = 0.052375045503211405
		F1 Score: Mean = 0.8876972963873222, Std = 0.048430475136420875
		Precision: Mean = 0.8972182241188277, Std = 0.040669198242898545
		Recall: Mean = 0.8886921529175051, Std = 0.052375045503211405
		ROC-AUC: Mean = 0.9916148144864741, Std = 0.0
	Training model: RF


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  warn(
  warn(


		Accuracy: Mean = 0.8973038229376258, Std = 0.02796725677266918
		F1 Score: Mean = 0.8881192798443385, Std = 0.029412807064438214
		Precision: Mean = 0.8882500868055574, Std = 0.030522833208901354
		Recall: Mean = 0.8973038229376258, Std = 0.02796725677266918
		ROC-AUC: Mean = 0.9908662592368911, Std = 0.0
	Training model: BalancedRF


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.811871227364185, Std = 0.02515331980363284
		F1 Score: Mean = 0.8316082575626901, Std = 0.021084336796879117
		Precision: Mean = 0.8689702957646201, Std = 0.022357916985673652
		Recall: Mean = 0.811871227364185, Std = 0.02515331980363284
		ROC-AUC: Mean = 0.9697410200806635, Std = 0.0
	Training model: RUSBoost


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.7976257545271631, Std = 0.04118971269449099
		F1 Score: Mean = 0.8061542565386794, Std = 0.04580269096285656
		Precision: Mean = 0.8332705081768156, Std = 0.0388015234335574
		Recall: Mean = 0.7976257545271631, Std = 0.04118971269449099
		ROC-AUC: Mean = 0.9463721432561247, Std = 0.0
	Training model: Ada


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.8432595573440643, Std = 0.020501428529554857
		F1 Score: Mean = 0.8392616607379443, Std = 0.02087106599642462
		Precision: Mean = 0.8476940659488592, Std = 0.020214614531627532
		Recall: Mean = 0.8432595573440643, Std = 0.020501428529554857
		ROC-AUC: Mean = 0.9595902528718899, Std = 0.0
	Training model: NN


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.9001609657947686, Std = 0.04254196840084274
		F1 Score: Mean = 0.8969678748890342, Std = 0.04043799099129158
		Precision: Mean = 0.9012094700935404, Std = 0.035039034854126114
		Recall: Mean = 0.9001609657947686, Std = 0.04254196840084274
		ROC-AUC: Mean = 0.989109239926545, Std = 0.0
	Training model: XGBoost


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.9202012072434608, Std = 0.01724786234495301
		F1 Score: Mean = 0.9159736564408609, Std = 0.01810785794993575
		Precision: Mean = 0.920222193313419, Std = 0.021696903613829817
		Recall: Mean = 0.9202012072434608, Std = 0.01724786234495301
		ROC-AUC: Mean = 0.9819137814107618, Std = 0.0
	Training model: LightGBM


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.9258350100603622, Std = 0.027838707292439337
		F1 Score: Mean = 0.9210523811406877, Std = 0.029927658479449053
		Precision: Mean = 0.9258129470612646, Std = 0.028080210982959937
		Recall: Mean = 0.9258350100603622, Std = 0.027838707292439337
		ROC-AUC: Mean = 0.9853800742615328, Std = 0.0
	Training model: SVM
		Accuracy: Mean = 0.8604024144869216, Std = 0.031789827559810434
		F1 Score: Mean = 0.8547538552187192, Std = 0.03364405216289174
		Precision: Mean = 0.8595601194152142, Std = 0.030651803570929424
		Recall: Mean = 0.8604024144869216, Std = 0.031789827559810434
		ROC-AUC: Mean = 0.962697510922301, Std = 0.0
	Training model: HistGB


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.45613682092555335, Std = 0.06670947187713712
		F1 Score: Mean = 0.38680940205079545, Std = 0.07155078413441648
		Precision: Mean = 0.3583055807974804, Std = 0.06665392707158166
		Recall: Mean = 0.45613682092555335, Std = 0.06670947187713712
		ROC-AUC: Mean = 0.7048905907985552, Std = 0.0
Processing dataset: Ablated 2
	Training model: KNN
		Accuracy: Mean = 0.8575452716297786, Std = 0.020219075522467853
		F1 Score: Mean = 0.8399304069634607, Std = 0.026347779609980748
		Precision: Mean = 0.8487153812343013, Std = 0.03795289888015738
		Recall: Mean = 0.8575452716297786, Std = 0.020219075522467853
		ROC-AUC: Mean = 0.9696604370527867, Std = 0.0
	Training model: DT
		Accuracy: Mean = 0.8259557344064387, Std = 0.04942675491133464
		F1 Score: Mean = 0.8236902358660437, Std = 0.04941174503976881
		Precision: Mean = 0.8367336322731121, Std = 0.04439612043775065
		Recall: Mean = 0.8259557344064387, Std = 0.04942675491133464
		ROC-AUC: Mean = 0.9017578688427372, Std = 0.0
	T

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.8801207243460765, Std = 0.05176783585618526
		F1 Score: Mean = 0.8781485309567796, Std = 0.04935854261168623
		Precision: Mean = 0.8857520167811919, Std = 0.04017543068430163
		Recall: Mean = 0.8801207243460765, Std = 0.05176783585618526
		ROC-AUC: Mean = 0.991427853849388, Std = 0.0
	Training model: RF


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  warn(
  warn(


		Accuracy: Mean = 0.897344064386318, Std = 0.02129848765513649
		F1 Score: Mean = 0.888079544297858, Std = 0.022581265848956297
		Precision: Mean = 0.8867904170814425, Std = 0.02318355319999258
		Recall: Mean = 0.897344064386318, Std = 0.02129848765513649
		ROC-AUC: Mean = 0.9894111869399412, Std = 0.0
	Training model: BalancedRF


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.8033802816901409, Std = 0.011284408282228642
		F1 Score: Mean = 0.8286308910539484, Std = 0.017712313642741614
		Precision: Mean = 0.8849886541097085, Std = 0.01575948947809531
		Recall: Mean = 0.8033802816901409, Std = 0.011284408282228642
		ROC-AUC: Mean = 0.9652959221247739, Std = 0.0
	Training model: RUSBoost


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.7520724346076458, Std = 0.029878401710691102
		F1 Score: Mean = 0.7428470622907244, Std = 0.03531356463150519
		Precision: Mean = 0.7548075471250255, Std = 0.03685363075091233
		Recall: Mean = 0.7520724346076458, Std = 0.029878401710691102
		ROC-AUC: Mean = 0.9282567118651254, Std = 0.0
	Training model: Ada


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.8290543259557344, Std = 0.009086551193731437
		F1 Score: Mean = 0.817315310736571, Std = 0.008810843921615312
		Precision: Mean = 0.8250634653529791, Std = 0.010852206907615307
		Recall: Mean = 0.8290543259557344, Std = 0.009086551193731437
		ROC-AUC: Mean = 0.9440448503873938, Std = 0.0
	Training model: NN


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.9116297786720322, Std = 0.02472128848045459
		F1 Score: Mean = 0.9062728905375297, Std = 0.025971543003457102
		Precision: Mean = 0.9119865084613575, Std = 0.02973181476836459
		Recall: Mean = 0.9116297786720322, Std = 0.02472128848045459
		ROC-AUC: Mean = 0.9910121984067306, Std = 0.0
	Training model: XGBoost


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.9145674044265594, Std = 0.01785496345132819
		F1 Score: Mean = 0.9094408927362426, Std = 0.01803912574309897
		Precision: Mean = 0.9156214550774815, Std = 0.015627535148035877
		Recall: Mean = 0.9145674044265594, Std = 0.01785496345132819
		ROC-AUC: Mean = 0.9887144972755897, Std = 0.0
	Training model: LightGBM
		Accuracy: Mean = 0.9201609657947685, Std = 0.01735913670955526
		F1 Score: Mean = 0.9140056969745473, Std = 0.02018066297221052
		Precision: Mean = 0.9162591585627411, Std = 0.025931273574341884
		Recall: Mean = 0.9201609657947685, Std = 0.01735913670955526
		ROC-AUC: Mean = 0.9822552762680228, Std = 0.0
	Training model: SVM


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.854728370221328, Std = 0.016377330294056313
		F1 Score: Mean = 0.8401809879897002, Std = 0.01685465100481188
		Precision: Mean = 0.8475983743120427, Std = 0.025721683696034524
		Recall: Mean = 0.854728370221328, Std = 0.016377330294056313
		ROC-AUC: Mean = 0.9660083517089386, Std = 0.0
	Training model: HistGB


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

		Accuracy: Mean = 0.35030181086519113, Std = 0.0426866750786589
		F1 Score: Mean = 0.3326177137810268, Std = 0.04824232838060129
		Precision: Mean = 0.3919797063021161, Std = 0.10512498968988901
		Recall: Mean = 0.35030181086519113, Std = 0.0426866750786589
		ROC-AUC: Mean = 0.582821173070499, Std = 0.0
Processing dataset: Ablated 3
	Training model: KNN
		Accuracy: Mean = 0.8888531187122737, Std = 0.01686140710441168
		F1 Score: Mean = 0.8799485152624518, Std = 0.021750416779769254
		Precision: Mean = 0.8833509665298462, Std = 0.031271805866137016
		Recall: Mean = 0.8888531187122737, Std = 0.01686140710441168
		ROC-AUC: Mean = 0.9670110798733544, Std = 0.0
	Training model: DT
		Accuracy: Mean = 0.8117907444668008, Std = 0.03249969753374928
		F1 Score: Mean = 0.8117263645917742, Std = 0.03516231425207733
		Precision: Mean = 0.8228099642628278, Std = 0.03513938619852858
		Recall: Mean = 0.8117907444668008, Std = 0.03249969753374928
		ROC-AUC: Mean = 0.8917607334079175, Std = 0.0
	Traini

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  warn(
  warn(


		Accuracy: Mean = 0.880281690140845, Std = 0.030960035182244307
		F1 Score: Mean = 0.8731217519064207, Std = 0.031028810327614825
		Precision: Mean = 0.8764915792825356, Std = 0.031389766013341966
		Recall: Mean = 0.880281690140845, Std = 0.030960035182244307
		ROC-AUC: Mean = 0.9895779781297822, Std = 0.0
	Training model: BalancedRF


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.7636217303822936, Std = 0.022106221469782146
		F1 Score: Mean = 0.8002826756968512, Std = 0.007117610710924051
		Precision: Mean = 0.864907644698601, Std = 0.017582615164418322
		Recall: Mean = 0.7636217303822936, Std = 0.022106221469782146
		ROC-AUC: Mean = 0.9671270790244793, Std = 0.0
	Training model: RUSBoost


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.7377867203219315, Std = 0.03277774070521907
		F1 Score: Mean = 0.7498317921365548, Std = 0.035415719969978436
		Precision: Mean = 0.7947573556743578, Std = 0.03676322303899015
		Recall: Mean = 0.7377867203219315, Std = 0.03277774070521907
		ROC-AUC: Mean = 0.939940794682919, Std = 0.0
	Training model: Ada


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.8347283702213281, Std = 0.011789164918473162
		F1 Score: Mean = 0.8286237273659248, Std = 0.012762718297833068
		Precision: Mean = 0.8333794495649374, Std = 0.018371675240873495
		Recall: Mean = 0.8347283702213281, Std = 0.011789164918473162
		ROC-AUC: Mean = 0.9403043479212664, Std = 0.0
	Training model: NN


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.8859959758551309, Std = 0.022319534167351593
		F1 Score: Mean = 0.8826541215758562, Std = 0.022991062581782063
		Precision: Mean = 0.8880297385377869, Std = 0.024074404833334003
		Recall: Mean = 0.8859959758551309, Std = 0.022319534167351593
		ROC-AUC: Mean = 0.9909143929173977, Std = 0.0
	Training model: XGBoost


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.8975855130784709, Std = 0.028314370585461927
		F1 Score: Mean = 0.8907400323112936, Std = 0.030662947466419718
		Precision: Mean = 0.8945021958135936, Std = 0.02634699691470962
		Recall: Mean = 0.8975855130784709, Std = 0.028314370585461927
		ROC-AUC: Mean = 0.9830494503546766, Std = 0.0
	Training model: LightGBM


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.9031388329979879, Std = 0.01891801820655374
		F1 Score: Mean = 0.8968150875192091, Std = 0.01976131677758454
		Precision: Mean = 0.9010457711764028, Std = 0.022725815780768682
		Recall: Mean = 0.9031388329979879, Std = 0.01891801820655374
		ROC-AUC: Mean = 0.9817246717947933, Std = 0.0
	Training model: SVM
		Accuracy: Mean = 0.8574245472837022, Std = 0.02602928141626051
		F1 Score: Mean = 0.8493916376994044, Std = 0.028932436094544437
		Precision: Mean = 0.857807713413347, Std = 0.027938675552761895
		Recall: Mean = 0.8574245472837022, Std = 0.02602928141626051
		ROC-AUC: Mean = 0.9339361534976096, Std = 0.0
	Training model: HistGB


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

		Accuracy: Mean = 0.5472837022132797, Std = 0.0708927104060395
		F1 Score: Mean = 0.5284054575764552, Std = 0.07277577522466053
		Precision: Mean = 0.5688326183582457, Std = 0.06733920160758071
		Recall: Mean = 0.5472837022132797, Std = 0.0708927104060395
		ROC-AUC: Mean = 0.7187175558833301, Std = 0.0
Processing dataset: Ablated 4
	Training model: KNN
		Accuracy: Mean = 0.90317907444668, Std = 0.013612320619273083
		F1 Score: Mean = 0.8932667508258222, Std = 0.01841337605466188
		Precision: Mean = 0.8944549539438876, Std = 0.02377881370034436
		Recall: Mean = 0.90317907444668, Std = 0.013612320619273083
		ROC-AUC: Mean = 0.9711092931892334, Std = 0.0
	Training model: DT
		Accuracy: Mean = 0.8060764587525151, Std = 0.03745966635034591
		F1 Score: Mean = 0.808068486736518, Std = 0.030203988342622733
		Precision: Mean = 0.8292078315541633, Std = 0.022137936619100473
		Recall: Mean = 0.8060764587525151, Std = 0.03745966635034591
		ROC-AUC: Mean = 0.9218051008330597, Std = 0.0
	Training m

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  warn(
  warn(


		Accuracy: Mean = 0.8717102615694164, Std = 0.031559559780728115
		F1 Score: Mean = 0.8626591876661008, Std = 0.03327106835058639
		Precision: Mean = 0.8618621873280204, Std = 0.03591821785662606
		Recall: Mean = 0.8717102615694164, Std = 0.031559559780728115
		ROC-AUC: Mean = 0.9894495039734306, Std = 0.0
	Training model: BalancedRF


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.7805633802816901, Std = 0.01782364585754563
		F1 Score: Mean = 0.8134030349279755, Std = 0.02125949149584933
		Precision: Mean = 0.8644894460131469, Std = 0.02579206635246861
		Recall: Mean = 0.7805633802816901, Std = 0.01782364585754563
		ROC-AUC: Mean = 0.9605461098279132, Std = 0.0
	Training model: RUSBoost


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.8090543259557343, Std = 0.019893688923599443
		F1 Score: Mean = 0.8116775247618906, Std = 0.029062737223890047
		Precision: Mean = 0.8289912373362689, Std = 0.03580323513418286
		Recall: Mean = 0.8090543259557343, Std = 0.019893688923599443
		ROC-AUC: Mean = 0.9459327708691992, Std = 0.0
	Training model: Ada


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.8205231388329979, Std = 0.021266377796353587
		F1 Score: Mean = 0.8161800885895343, Std = 0.019329285009712243
		Precision: Mean = 0.826588387886426, Std = 0.017222408131826576
		Recall: Mean = 0.8205231388329979, Std = 0.021266377796353587
		ROC-AUC: Mean = 0.9431288770612578, Std = 0.0
	Training model: NN


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.9087726358148893, Std = 0.01958878330165881
		F1 Score: Mean = 0.905159904274595, Std = 0.02102014480085168
		Precision: Mean = 0.9089690022894835, Std = 0.0244471913745143
		Recall: Mean = 0.9087726358148893, Std = 0.01958878330165881
		ROC-AUC: Mean = 0.9874235843936999, Std = 0.0
	Training model: XGBoost


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.8773843058350101, Std = 0.028316829763083546
		F1 Score: Mean = 0.8734404309755304, Std = 0.028403448425615766
		Precision: Mean = 0.8774272448167928, Std = 0.027129280635606025
		Recall: Mean = 0.8773843058350101, Std = 0.028316829763083546
		ROC-AUC: Mean = 0.9780064904444024, Std = 0.0
	Training model: LightGBM


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


		Accuracy: Mean = 0.8888531187122737, Std = 0.011001897264483806
		F1 Score: Mean = 0.8823274883990369, Std = 0.014112291696074331
		Precision: Mean = 0.8855723984303738, Std = 0.017083205194309736
		Recall: Mean = 0.8888531187122737, Std = 0.011001897264483806
		ROC-AUC: Mean = 0.9801732289203808, Std = 0.0
	Training model: SVM
		Accuracy: Mean = 0.8774245472837021, Std = 0.0234940454397996
		F1 Score: Mean = 0.872220492363787, Std = 0.02310199319363695
		Precision: Mean = 0.8813185520532555, Std = 0.026443816009062975
		Recall: Mean = 0.8774245472837021, Std = 0.0234940454397996
		ROC-AUC: Mean = 0.8932938534622404, Std = 0.08422242228903826
	Training model: HistGB
		Accuracy: Mean = 0.675251509054326, Std = 0.06063613216247096
		F1 Score: Mean = 0.6774890751512785, Std = 0.05115511780962599
		Precision: Mean = 0.7123888363190968, Std = 0.0573468180571867
		Recall: Mean = 0.675251509054326, Std = 0.06063613216247096
		ROC-AUC: Mean = 0.7448388682474055, Std = 0.0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [14]:
%store datasets
%store le
%store Y_encoded

Stored 'datasets' (dict)
Stored 'le' (LabelEncoder)
Stored 'Y_encoded' (ndarray)
