In [2]:
#imports
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
import plotly.io as pio

from helpers.datasetHelper import get_samples, split_healthy_data
from imblearn.over_sampling import SMOTE
from sklearn.metrics import cohen_kappa_score

from helpers.ploting import display_confusion_matrix_pink_variants
from helpers.metaheuristics import run_pso_with_progress, run_ga_with_progress
from models import MyXGboost
import numpy as np
from sklearn.metrics import recall_score, precision_score
from helpers.datasetHelper import split_healthy_data_HxC

# Set the default renderer to 'browser' to ensure plots open in the browser
pio.renderers.default = 'browser'

In [10]:
directory_path_combined = './datasets/Combined'
data_HB_4C = get_samples(os.path.join(directory_path_combined, 'DT.Health_BRCA_4C.csv'))
data_HPB_3C = get_samples(os.path.join(directory_path_combined, 'DT.Consolidated_3C.csv'))

healthy_mt_cases, healthy_wt_cases, brca_mt_cases, brca_wt_cases = split_healthy_data_HxC(data_HB_4C)
healthy_consolidated_cases, pre_brca_consolidated_cases, brca_consolidated_cases = split_healthy_data(data_HPB_3C)

directory_path_binary = './datasets/Binary'
data_HP_2C = get_samples(os.path.join(directory_path_binary, 'Healthy.PRE-BRCA.csv'))
healthy_cases, pre_brca_cases,_ = split_healthy_data(data_HP_2C)


feature_names = np.array(data_HB_4C[0][:-1])

print(f"Healthy with mutation cases: {len(healthy_mt_cases)}")
print(f"Healthy without mutation cases: {len(healthy_wt_cases)}")
print(f"Healthy consolidated cases(adding not specified cases): {(len(healthy_consolidated_cases)) - (len(healthy_mt_cases) + len(healthy_wt_cases))}")

print(f"BRCA with mutation cases: {len(brca_mt_cases)}")
print(f"BRCA without mutation cases: {len(brca_wt_cases)}")
print(f"BRCA consolidated cases: {len(brca_consolidated_cases)}")

print(f"PRE-BRCA cases: {len(pre_brca_cases)}")



Columns (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144) have mixed types. Specify dtype option on import or set low_memory=False.


Columns (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,

Healthy with mutation cases: 22
Healthy without mutation cases: 57
Healthy consolidated cases(adding not specified cases): 512
BRCA with mutation cases: 50
BRCA without mutation cases: 15
BRCA consolidated cases: 65
PRE-BRCA cases: 134



Columns (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,27

In [14]:
healthy_mt_cases = pd.DataFrame(healthy_mt_cases)
healthy_wt_cases = pd.DataFrame(healthy_wt_cases)
healthy_cases = pd.DataFrame(healthy_cases)
healthy_consolidated_cases = pd.DataFrame(healthy_consolidated_cases)
brca_consolidated_cases = pd.DataFrame(brca_consolidated_cases)

pre_brca_cases = pd.DataFrame(pre_brca_cases)

brca_mt_cases = pd.DataFrame(brca_mt_cases)
brca_wt_cases = pd.DataFrame(brca_wt_cases)

healthy_mt_cases['Tag'] = 'HEALTHY-MUT'
healthy_wt_cases['Tag'] = 'HEALTHY-WT'
healthy_consolidated_cases['Tag'] = 'HEALTHY'
pre_brca_cases['Tag'] = 'PRE-BRCA'
brca_mt_cases['Tag'] = 'BRCA-MUT'
brca_wt_cases['Tag'] = 'BRCA-WT'
brca_consolidated_cases['Tag'] = 'BRCA'

print(f"Healthy MT cases shape: {healthy_mt_cases.shape}")
print(f"Healthy WT cases shape: {healthy_wt_cases.shape}")
print(f"Healthy consolidated cases shape: {healthy_consolidated_cases.shape}")

print(f"PRE-BRCA cases shape: {pre_brca_cases.shape}")
print(f"BRCA MT cases shape: {brca_mt_cases.shape}")
print(f"BRCA WT cases shape: {brca_wt_cases.shape}")    
print(f"BRCA consolidated cases shape: {brca_consolidated_cases.shape}")

Healthy MT cases shape: (22, 27579)
Healthy WT cases shape: (57, 27579)
Healthy consolidated cases shape: (591, 27579)
PRE-BRCA cases shape: (134, 27579)
BRCA MT cases shape: (50, 27579)
BRCA WT cases shape: (15, 27579)
BRCA consolidated cases shape: (65, 27579)


In [15]:
df_cancer = pd.concat([healthy_consolidated_cases, brca_consolidated_cases], ignore_index=True) #blood samples
X = df_cancer.iloc[:, :-1].apply(pd.to_numeric, errors='coerce')
Y = df_cancer.iloc[:, -1]

# Fill missing values with the lowest value of its cpg site
X = X.apply(lambda col: col.fillna(col.min()), axis=0)
label_encoder = LabelEncoder()
label_encoder.fit(Y)
Y_encoded = label_encoder.transform(Y)

print("Label indices and names:")
for idx, name in enumerate(label_encoder.classes_):
    print(f"{idx}: {name}")

X_train_all, X_test_all, y_train_all, y_test_all = train_test_split(
    X, Y_encoded, test_size=0.25, random_state=42
)

smote = SMOTE(sampling_strategy='auto', random_state=None, k_neighbors=10)
X_train_all, y_train_all = smote.fit_resample(X_train_all, y_train_all)

selector = MyXGboost.XGBoostBinary()[0].fit(X_train_all, y_train_all)

# Evaluate the model
y_pred = selector.predict(X_test_all)
y_pred_proba = selector.predict_proba(X_test_all)

accuracy = accuracy_score(y_test_all, y_pred)
f1 = f1_score(y_test_all, y_pred, average='weighted')

# Calculate sensitivity (recall) and specificity for each class
sensitivity = recall_score(y_test_all, y_pred, average='macro')

# Specificity is the recall of the negative class
# For multi-class, we calculate specificity for each class and average
specificities = []
for class_idx in range(len(np.unique(y_test_all))):
    true_neg = np.sum((y_test_all != class_idx) & (y_pred != class_idx))
    total_neg = np.sum(y_test_all != class_idx)
    specificities.append(true_neg / total_neg if total_neg > 0 else 0)
specificity = np.mean(specificities)
precision = precision_score(y_test_all, y_pred, average='weighted')

# Handle binary and multiclass cases for ROC AUC
if y_pred_proba.shape[1] == 2:
    roc_auc = roc_auc_score(y_test_all, y_pred_proba[:, 1])
else:
    roc_auc = roc_auc_score(y_test_all, y_pred_proba, multi_class='ovr')

# Compute Kappa index
kappa = cohen_kappa_score(y_test_all, y_pred)
# Print results
print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"ROC AUC: {roc_auc:.4f}")
print(f"Sensitivity (Recall): {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Kappa index: {kappa:.4f}")

# Confusion matrix
display_confusion_matrix_pink_variants(selector, X_test_all, y_test_all, label_encoder.classes_)

Label indices and names:
0: BRCA
1: HEALTHY



`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.


Parameters: { "use_label_encoder" } are not used.




Accuracy: 0.8780
F1 Score: 0.8826
ROC AUC: 0.9235
Sensitivity (Recall): 0.8272
Specificity: 0.8272
Precision: 0.8901
Kappa index: 0.6031


In [17]:
def process_dataframes(X, Y):
    n_features = X.shape[1]
    print(f"Loaded dataset with {n_features} features and {len(Y)} samples")

    # Use DecisionTreeClassifier as the estimator
    estimator = MyXGboost.DecisionTreeMultiClass()

    # Run PSO
    best_weights, best_fitness, progress, X_selected = run_pso_with_progress(
        X, Y, estimator, n_features,
        swarmsize=30,
        maxiter=10,
        threshold=0.7
    )

    X_selected_pso = X.iloc[:, X_selected]

    best_weights_ga, best_fitness_ga, progress_ga, X_selected_proc = run_ga_with_progress(
    X, Y, estimator, X.shape[1], 
    pop_size=25, n_generations=10, threshold=0.8
    )

    X_selected_ga = X.iloc[:, X_selected_proc]

    # Use LabelEncoder to encode the target classes
    label_encoder = LabelEncoder()
    label_encoder.fit(Y)
    Y_encoded = label_encoder.transform(Y)
    print("Label indices and names:")
    for idx, name in enumerate(label_encoder.classes_):
        print(f"{idx}: {name}")

    print(f"Encoded target classes: {label_encoder.classes_}")

    # 1) evaluate with all features
    X_train_all, X_test_all, y_train_all, y_test_all = train_test_split(
        X, Y_encoded, test_size=0.1, random_state=42
    )
    # 2) evaluate with selected features
    X_train_ga, X_test_ga, y_train_ga, y_test_ga = train_test_split(
        X_selected_ga, Y_encoded, test_size=0.1, random_state=42
    )
    # 3) evaluate with PSO selected features
    X_train_pso, X_test_pso, y_train_pso, y_test_pso = train_test_split(
        X_selected_pso, Y_encoded, test_size=0.1, random_state=42
    )

    # Apply SMOTE to balance the training instances - ALL
    smote = SMOTE(sampling_strategy='auto', random_state=None, k_neighbors=10)
    X_train_all, y_train_all = smote.fit_resample(X_train_all, y_train_all)

    # Apply SMOTE to balance the training instances - GA
    smote = SMOTE(sampling_strategy='auto', random_state=None, k_neighbors=10)
    X_train_ga, y_train_ga = smote.fit_resample(X_train_ga, y_train_ga)

    # Apply SMOTE to balance the training instances - PSO
    smote = SMOTE(sampling_strategy='auto', random_state=None, k_neighbors=10)
    X_train_pso, y_train_pso = smote.fit_resample(X_train_pso, y_train_pso)

    return [('GA', X_train_ga, X_test_ga, y_train_ga, y_test_ga), 
                   ('PSO', X_train_pso, X_test_pso, y_train_pso, y_test_pso),
                   ('ALL', X_train_all, X_test_all, y_train_all, y_test_all)]

In [19]:
modes_multiclass = [
    {
        'Name': 'Random Forest',
        'Model': MyXGboost.RandomForest300()
    },
    {
        'Name': 'XGBoost',
        'Model': MyXGboost.XGBoostMultiClass()
    },
    {
        'Name': 'Light GBM',
        'Model': MyXGboost.LightGBMMulticlass()
    },
    {
        'Name': 'Gradient Boosting',
        'Model': MyXGboost.GradientBoosting()
    },
    {
        'Name': 'Ada Boost',
        'Model': MyXGboost.AdaBoostMultiClass()
    }
]

modes_binary = [
    {
        'Name': 'Random Forest',
        'Model': MyXGboost.RandomForest300()
    },
    {
        'Name': 'XGBoost',
        'Model': MyXGboost.XGBoostBinary()
    },
    {
        'Name': 'Light GBM',
        'Model': MyXGboost.LightGBMBinary()
    },
    {
        'Name': 'Gradient Boosting',
        'Model': MyXGboost.GradientBoosting()
    },
    {
        'Name': 'Ada Boost',
        'Model': MyXGboost.AdaBoostBinary()
    }
]

In [20]:
def run_models(feature_set, models):
    method, X_train, X_test, y_train, y_test = feature_set
    print(f"\n=== Results for {method} selected features ===")
    results = []
    for m in models:
        # Split training data into train and validation sets for early stopping
        X_train_split, X_val, y_train_split, y_val = train_test_split(
            X_train, y_train, test_size=0.2, random_state=42
        )   

        selector = m['Model'][0].fit(X_train_split, y_train_split)
    
        # Evaluate the model
        y_pred = selector.predict(X_test)
        y_pred_proba = selector.predict_proba(X_test)
        
        accuracy = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, average='weighted')
        
        # Calculate sensitivity (recall) and specificity for each class
        sensitivity = recall_score(y_test, y_pred, average='macro')
        
        # Specificity is the recall of the negative class
        # For multi-class, we calculate specificity for each class and average
        specificities = []
        for class_idx in range(len(np.unique(y_test))):
            true_neg = np.sum((y_test != class_idx) & (y_pred != class_idx))
            total_neg = np.sum(y_test != class_idx)
            specificities.append(true_neg / total_neg if total_neg > 0 else 0)
        specificity = np.mean(specificities)
        precision = precision_score(y_test, y_pred, average='weighted')

        # Handle binary and multiclass cases for ROC AUC
        if y_pred_proba.shape[1] == 2:
            roc_auc = roc_auc_score(y_test, y_pred_proba[:, 1])
        else:
            roc_auc = roc_auc_score(y_test, y_pred_proba, multi_class='ovr')

        # Compute Kappa index
        kappa = cohen_kappa_score(y_test, y_pred)
        

        results.append({
            'Name': m['Name'],
            'Model': f"{method}",
            'Kappa': kappa,
            'Sensitivity': sensitivity,
            'Accuracy': accuracy,
            'F1 Score': f1,
            'ROC AUC': roc_auc,
            'Specificity': specificity,
            'Precision': precision})

    return results

In [None]:

datasets = {
    "HEALTHY-MT-WT-BRCA": pd.concat([healthy_mt_cases, healthy_wt_cases, brca_consolidated_cases], ignore_index=True),
    # "HEALTHY-WT-BRCA": pd.concat([healthy_wt_cases, brca_consolidated_cases], ignore_index=True),
    # "HEALTHY-MT-BRCA": pd.concat([healthy_mt_cases, brca_mt_cases], ignore_index=True),
    # "HEALTHY-BRCA": pd.concat([healthy_consolidated_cases, brca_consolidated_cases], ignore_index=True),
    # "HEALTHY-PRE-BRCA": pd.concat([healthy_consolidated_cases, pre_brca_cases], ignore_index=True),
    # "HEALTHY-PRE-BRCA-BRCA": pd.concat([healthy_consolidated_cases, pre_brca_cases, brca_consolidated_cases], ignore_index=True),
    # "PRE-BRCA-BRCA": pd.concat([pre_brca_cases, brca_consolidated_cases], ignore_index=True),
    # "PRE-BRCA-BRCA-MT": pd.concat([pre_brca_cases, brca_mt_cases], ignore_index=True),
    # "PRE-BRCA-BRCA-WT": pd.concat([pre_brca_cases, brca_wt_cases], ignore_index=True),
}

for name, df_cancer in datasets.items():
    X = df_cancer.iloc[:, :-1].apply(pd.to_numeric, errors='coerce')
    Y = df_cancer.iloc[:, -1]
    # Fill missing values with the lowest value of its cpg site
    X = X.apply(lambda col: col.fillna(col.min()), axis=0)

    features_set = process_dataframes(X, Y)
    for feature_set in features_set:
        if len(np.unique(feature_set[3])) > 2:
            models_param = modes_multiclass
        else:
            models_param = modes_binary
        result = run_models(feature_set, models_param)
        
        metrics_dict = []
        
        for res in result:
            print(f"Model: {res['Name']} "
                  f"Kappa: {res['Kappa']:.4f}, "
                  f"Sensitivity: {res['Sensitivity']:.4f}, "
                  f"Accuracy: {res['Accuracy']:.4f}, "
                  f"F1 Score: {res['F1 Score']:.4f}, "        
                  f"ROC AUC: {res['ROC AUC']:.4f}, "
                  f"Specificity: {res['Specificity']:.4f}, "
                  f"Precision: {res['Precision']:.4f}")
            
            metrics_dict.append({
            'Dataset': name,
            'Method': res['Name'],
            'Model': f"{res['Model']}",
            'Kappa': res['Kappa'],
            'Accuracy': res['Accuracy'],
            'ROC_AUC': res['ROC AUC'],
            'F1_Score': res['F1 Score'],
            'Sensitivity': res['Sensitivity'],
            'Specificity': res['Specificity'],
            'Precision': res['Precision']
            })
            
        df_metrics = pd.DataFrame(metrics_dict)
        csv_path = f"metrics_consolidated.csv"
        df_metrics.to_csv(csv_path, mode='a', header=not os.path.exists(csv_path), index=False)


Loaded dataset with 27578 features and 144 samples
Eval 10: best fitness so far = 0.9243
Eval 20: best fitness so far = 0.8712
Eval 30: best fitness so far = 0.8712
Eval 40: best fitness so far = 0.8712
Eval 50: best fitness so far = 0.8712
Eval 60: best fitness so far = 0.8712
Eval 70: best fitness so far = 0.8642
Eval 80: best fitness so far = 0.8642
Eval 90: best fitness so far = 0.8442
Eval 100: best fitness so far = 0.8442
Eval 110: best fitness so far = 0.7813
Eval 120: best fitness so far = 0.7813
Eval 130: best fitness so far = 0.7813
Eval 140: best fitness so far = 0.7813
Eval 150: best fitness so far = 0.7813
Eval 160: best fitness so far = 0.7813
Eval 170: best fitness so far = 0.7813
Eval 180: best fitness so far = 0.7813
Eval 190: best fitness so far = 0.7813
Eval 200: best fitness so far = 0.7813
Eval 210: best fitness so far = 0.7813
Eval 220: best fitness so far = 0.7813
Eval 230: best fitness so far = 0.7813
Eval 240: best fitness so far = 0.7813
Eval 250: best fitness


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.


`BaseEstimator._validate_data` is deprecated in 1.6 and will be removed in 1.7. Use `sklearn.utils.validation.validate_data` instead. This function becomes public and is part of the scikit-learn developer API.




=== Results for GA selected features ===



Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007269 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 263369
[LightGBM] [Info] Number of data points in the train set: 139, number of used features: 5488
[LightGBM] [Info] Start training from score -1.042654
[LightGBM] [Info] Start training from score -1.150284
[LightGBM] [Info] Start training from score -1.105833
Model: GA Kappa: 0.5714, Sensitivity: 0.6825, Accuracy: 0.7333, F1 Score: 0.7154, ROC AUC: 0.7860, Specificity: 0.8583, Precision: 0.7972
Model: GA Kappa: 0.6739, Sensitivity: 0.7302, Accuracy: 0.8000, F1 Score: 0.7778, ROC AUC: 0.8888, Specificity: 0.8917, Precision: 0.8381
Model: GA Kappa: 0.5455, Sensitivity: 0.6444, Accuracy: 0.7333, F1 Score: 0.7065, ROC AUC: 0.8470, Specificity: 0.8417, Precision: 0.7767
Model: GA Kappa: 0.6739, Sensitivity: 0.7302, Accuracy: 0.8000, F1 Score: 0.7778, ROC AUC: 0.7139, Specificity: 0.8917, Precision: 0.


Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009466 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 363677
[LightGBM] [Info] Number of data points in the train set: 139, number of used features: 7578
[LightGBM] [Info] Start training from score -1.042654
[LightGBM] [Info] Start training from score -1.150284
[LightGBM] [Info] Start training from score -1.105833
Model: PSO Kappa: 0.5588, Sensitivity: 0.6635, Accuracy: 0.7333, F1 Score: 0.7158, ROC AUC: 0.9050, Specificity: 0.8500, Precision: 0.7722
Model: PSO Kappa: 0.5890, Sensitivity: 0.7460, Accuracy: 0.7333, F1 Score: 0.7375, ROC AUC: 0.8490, Specificity: 0.8667, Precision: 0.8519
Model: PSO Kappa: 0.6739, Sensitivity: 0.7302, Accuracy: 0.8000, F1 Score: 0.7778, ROC AUC: 0.9562, Specificity: 0.8917, Precision: 0.8381
Model: PSO Kappa: 0.7857, Sensitivity: 0.8413, Accuracy: 0.8667, F1 Score: 0.8630, ROC AUC: 0.9536, Specificity: 0.9250, Precision


Parameters: { "use_label_encoder" } are not used.




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.042330 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1323497
[LightGBM] [Info] Number of data points in the train set: 139, number of used features: 27578
[LightGBM] [Info] Start training from score -1.042654
[LightGBM] [Info] Start training from score -1.150284
[LightGBM] [Info] Start training from score -1.105833
Model: ALL Kappa: 0.5588, Sensitivity: 0.6635, Accuracy: 0.7333, F1 Score: 0.7158, ROC AUC: 0.8413, Specificity: 0.8500, Precision: 0.7722
Model: ALL Kappa: 0.6786, Sensitivity: 0.7302, Accuracy: 0.8000, F1 Score: 0.7830, ROC AUC: 0.8358, Specificity: 0.8972, Precision: 0.7778
Model: ALL Kappa: 0.5652, Sensitivity: 0.6825, Accuracy: 0.7333, F1 Score: 0.7111, ROC AUC: 0.9365, Specificity: 0.8500, Precision: 0.7714
Model: ALL Kappa: 0.3836, Sensitivity: 0.5873, Accuracy: 0.6000, F1 Score: 0.5726, ROC AUC: 0.8894, Specificity: 0.7972, Precisi