In [None]:
from scipy.stats import sem
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.utils import resample
from sklearn.model_selection import StratifiedKFold
from numpy import mean, std
import numpy as np
from sklearn.metrics import roc_auc_score
import pandas as pd
from sklearn.metrics import roc_curve, auc, confusion_matrix, roc_auc_score
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier
from collections import defaultdict
from sklearn.metrics import auc
from sklearn.neural_network import MLPClassifier
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from scipy.stats import sem, t
import random



# Coherence-based prediction

In [None]:
seed_value = 777  
random.seed(seed_value)
np.random.seed(seed_value)

In [None]:

data = pd.read_csv('D:/coherence_data_150.csv')  # Placeholder path, this won't run here due to no access to the file

X = data.drop(columns=['Class', 'ID'])
y = data['Class']

model_configs = {
    "LogisticRegression": {
        "model": LogisticRegression(C=10, max_iter=3000, penalty='l2', solver='lbfgs', random_state=seed_value),
        "n_features": 115
    },
    "GaussianNB": {
        "model": GaussianNB(),
        "n_features": 121
    },
    "DecisionTreeClassifier": {
        "model": DecisionTreeClassifier(criterion='gini', max_depth=2, random_state=seed_value),
        "n_features": 32
    },
    "RandomForestClassifier": {
        "model": RandomForestClassifier(max_depth=10, n_estimators=1000, random_state=seed_value),
        "n_features": 141
    },
    "SVC": {
        "model": SVC(C=10, gamma='scale', kernel='rbf', probability=True, random_state=seed_value),
        "n_features": 143
    },
    "MLPClassifier": {
        "model": MLPClassifier(hidden_layer_sizes=(50,), max_iter=5000, random_state=seed_value),
        "n_features": 117
    }
}


In [None]:


skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed_value)


predictions = defaultdict(list)

for model_name, config in model_configs.items():
    print(f"Processing {model_name}...")
    
    selector = SelectKBest(f_classif, k=config['n_features'])
    
    pipeline = Pipeline([('selector', selector), ('clf', config['model'])])
    
    fold_num = 1
    for train_index, test_index in skf.split(X, y):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        
        pipeline.fit(X_train, y_train)
        
        y_pred_proba = pipeline.predict_proba(X_test)
        
        for idx, actual_id in enumerate(test_index):
            predictions[model_name].append({
                'ID': data.iloc[actual_id]['ID'],
                'Fold': fold_num,
                'True_Label': y_test.iloc[idx],
                'NCSE_Prob': y_pred_proba[idx][0],
                'ME_Prob': y_pred_proba[idx][1],
                'BI_Prob': y_pred_proba[idx][2]
            })
        
        fold_num += 1

for model_name in model_configs.keys():
    predictions[model_name] = pd.DataFrame(predictions[model_name])



In [None]:
for model_name, preds in predictions.items():
    file_path = f"coh_predictions_{model_name}.csv"
    preds.to_csv(file_path, index=False)

In [None]:

def plot_roc_curve(y_true, y_pred_proba, model_name):
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(3):
        fpr[i], tpr[i], _ = roc_curve(y_true == i, y_pred_proba[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
    
    plt.figure(figsize=(8, 6))
    plt.plot(fpr[0], tpr[0], label=f'NCSE (area = {roc_auc[0]:.2f})')
    plt.plot(fpr[1], tpr[1], label=f'ME (area = {roc_auc[1]:.2f})')
    plt.plot(fpr[2], tpr[2], label=f'BI (area = {roc_auc[2]:.2f})')
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'ROC Curve - {model_name}')
    plt.legend(loc='lower right')
    plt.savefig(f'coh_ROC_{model_name}.eps', format='eps')
    plt.show()

def plot_confusion_matrix(y_true, y_pred, model_name):
    matrix = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6, 5))
    sns.heatmap(matrix, annot=True, fmt='d', cmap='Blues', cbar=False, vmin=0, vmax=50)
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.title(f'Confusion Matrix - {model_name}')
    plt.savefig(f'coh_CM_{model_name}.eps', format='eps')
    plt.show()
    

def compute_corrected_metrics_from_cm(y_true, y_pred, y_pred_proba):
    cm = confusion_matrix(y_true, y_pred)
    
    classwise_metrics = []
    total_samples = len(y_true)
    
    for i in range(3):
        tp = cm[i, i]
        fn = sum(cm[i, :]) - tp
        fp = sum(cm[:, i]) - tp
        tn = total_samples - (tp + fn + fp)
        
        accuracy = (tp + tn) / (tp + tn + fp + fn)
        precision = tp / (tp + fp) if (tp + fp) != 0 else 0
        recall = tp / (tp + fn) if (tp + fn) != 0 else 0
        f1 = (2 * precision * recall) / (precision + recall) if (precision + recall) != 0 else 0
        
        classwise_metrics.append({
            'accuracy': accuracy,
            'f1': f1,
            'precision': precision,
            'recall': recall,
            'tn': tn,
            'tp': tp,
            'fp': fp,
            'fn': fn
        })
        
    overall_accuracy = accuracy_score(y_true, y_pred)
    overall_f1 = f1_score(y_true, y_pred, average='macro')
    overall_precision = precision_score(y_true, y_pred, average='macro')
    overall_recall = recall_score(y_true, y_pred, average='macro')
    overall_auc = roc_auc_score(y_true, y_pred_proba, average='macro', multi_class='ovr')
    
    return {
        'overall': {
            'accuracy': overall_accuracy,
            'f1': overall_f1,
            'precision': overall_precision,
            'recall': overall_recall,
            'auc': overall_auc
        },
        'classwise': classwise_metrics
    }

def confidence_interval(data):
    n = len(data)
    m = mean(data)
    std_err = sem(data)
    ci = std_err * t.ppf((1 + 0.95) / 2, n - 1)
    return (m - ci, m + ci)


def bootstrap_ci(y_true, y_pred, y_pred_proba, metric_function, label=None, n_bootstrap=1000, alpha=0.05):
    """Compute the (1-alpha) confidence interval of the metric using bootstrap."""
    bootstrap_samples = np.random.choice(len(y_true), size=(n_bootstrap, len(y_true)), replace=True)
    y_true_array = np.array(y_true)
    
    if label is not None:  # For classwise AUC
        binary_true = (y_true_array == label).astype(int)
        stats = [metric_function(binary_true[indices], y_pred_proba[indices, label]) for indices in bootstrap_samples]
    elif y_pred is not None:  # For metrics other than AUC
        stats = [metric_function(y_true_array[indices], y_pred[indices]) for indices in bootstrap_samples]
    else:  # For overall AUC
        stats = [metric_function(y_true_array[indices], y_pred_proba[indices]) for indices in bootstrap_samples]

    return (np.percentile(stats, 100 * (alpha / 2.)), np.percentile(stats, 100 * (1 - alpha / 2.)))

def bootstrap_ci_for_auc(y_true, y_pred_proba, label, n_bootstrap=1000, alpha=0.05):
    """Compute the (1-alpha) confidence interval of the AUC using bootstrap for a specific class."""
    bootstrap_samples = np.random.choice(len(y_true), size=(n_bootstrap, len(y_true)), replace=True)
    y_true_array = np.array(y_true)
    binary_true = (y_true_array == label).astype(int)
    
    auc_stats = [roc_auc_score(binary_true[indices], y_pred_proba[indices, label]) for indices in bootstrap_samples]
    return (np.percentile(auc_stats, 100 * (alpha / 2.)), np.percentile(auc_stats, 100 * (1 - alpha / 2.)))

def bootstrap_ci_classwise_metric(y_true, y_pred, metric_function, label, n_bootstrap=1000, alpha=0.05):
    """Compute the (1-alpha) confidence interval of the metric using bootstrap for a specific class."""
    indices = np.arange(len(y_true))  # original indices
    
    y_true_binary = (np.array(y_true) == label).astype(int)
    y_pred_binary = (np.array(y_pred) == label).astype(int)
    
    stats = []
    for _ in range(n_bootstrap):
        resampled_indices = resample(indices)
        stats.append(metric_function(y_true_binary[resampled_indices], y_pred_binary[resampled_indices]))
    
    return (np.percentile(stats, 100 * (alpha / 2.)), np.percentile(stats, 100 * (1 - alpha / 2.)))

def bootstrap_ci_classwise_auc(y_true, y_pred_proba, label, n_bootstrap=1000, alpha=0.05):
    """Compute the (1-alpha) confidence interval of the AUC using bootstrap for a specific class."""
    indices = np.arange(len(y_true))  # original indices
    y_true_binary = (np.array(y_true) == label).astype(int)
    
    auc_stats = []
    for _ in range(n_bootstrap):
        resampled_indices = resample(indices)
        auc_stats.append(roc_auc_score(y_true_binary[resampled_indices], y_pred_proba[resampled_indices, label]))
    
    return (np.percentile(auc_stats, 100 * (alpha / 2.)), np.percentile(auc_stats, 100 * (1 - alpha / 2.)))

def compute_classwise_auc(y_true, y_pred_proba):
    class_aucs = []
    for i in range(3):
        binary_y_true = np.where(y_true == i, 1, 0)
        class_aucs.append(roc_auc_score(binary_y_true, y_pred_proba[:, i]))
    return class_aucs

In [None]:
performance_data_corrected_v3 = []

for model_name, preds in predictions.items():
    y_true = preds["True_Label"]
    y_pred_proba = preds[["NCSE_Prob", "ME_Prob", "BI_Prob"]].values
    y_pred = np.argmax(y_pred_proba, axis=1)

    plot_roc_curve(y_true, y_pred_proba, model_name)
    plot_confusion_matrix(y_true, y_pred, model_name)
    
    metrics_from_cm = compute_corrected_metrics_from_cm(y_true, y_pred, y_pred_proba)

    for label in range(3):  # For each class label
        accuracy_ci = bootstrap_ci_classwise_metric(y_true, y_pred, accuracy_score, label)
        f1_ci = bootstrap_ci_classwise_metric(y_true, y_pred, f1_score, label)
        precision_ci = bootstrap_ci_classwise_metric(y_true, y_pred, precision_score, label)
        recall_ci = bootstrap_ci_classwise_metric(y_true, y_pred, recall_score, label)
        auc_ci = bootstrap_ci_classwise_auc(y_true, y_pred_proba, label)
     
        performance_data_corrected_v3.append({
            'Model': model_name,
            'Label': label,
            'Accuracy': accuracy_score((y_true == label).astype(int), (y_pred == label).astype(int)),
            'F1': f1_score((y_true == label).astype(int), (y_pred == label).astype(int)),
            'Precision': precision_score((y_true == label).astype(int), (y_pred == label).astype(int)),
            'Recall': recall_score((y_true == label).astype(int), (y_pred == label).astype(int)),
            'TP': metrics_from_cm['classwise'][label]['tp'],
            'TN': metrics_from_cm['classwise'][label]['tn'],
            'FP': metrics_from_cm['classwise'][label]['fp'],
            'FN': metrics_from_cm['classwise'][label]['fn'],
            'AUC': roc_auc_score((y_true == label).astype(int), y_pred_proba[:, label]),
            'Accuracy CI': accuracy_ci,
            'F1 CI': f1_ci,
            'Precision CI': precision_ci,
            'Recall CI': recall_ci,
            'AUC CI': auc_ci
        })

    overall_metrics = compute_corrected_metrics_from_cm(y_true, y_pred, y_pred_proba)['overall']
    accuracy_ci = bootstrap_ci(y_true, y_pred, None, lambda yt, yp: accuracy_score(yt, yp))
    f1_ci = bootstrap_ci(y_true, y_pred, None, lambda yt, yp: f1_score(yt, yp, average='macro'))
    precision_ci = bootstrap_ci(y_true, y_pred, None, lambda yt, yp: precision_score(yt, yp, average='macro'))
    recall_ci = bootstrap_ci(y_true, y_pred, None, lambda yt, yp: recall_score(yt, yp, average='macro'))
    auc_ci = bootstrap_ci(y_true, None, y_pred_proba, lambda yt, yp: roc_auc_score(yt, yp, average='macro', multi_class='ovr'))
    
    performance_data_corrected_v3.append({
        'Model': model_name,
        'Label': 'Overall',
        'Accuracy': overall_metrics['accuracy'],
        'F1': overall_metrics['f1'],
        'Precision': overall_metrics['precision'],
        'Recall': overall_metrics['recall'],
        'AUC': overall_metrics['auc'],
        'Accuracy CI': accuracy_ci,
        'F1 CI': f1_ci,
        'Precision CI': precision_ci,
        'Recall CI': recall_ci,
        'AUC CI': auc_ci
    })
    
performance_df_corrected_v3 = pd.DataFrame(performance_data_corrected_v3)
performance_df_corrected_v3.to_csv('coh_performance_results.csv', index=False)


In [None]:

performance_df = pd.read_csv('coh_performance_results.csv')
performance_df

# Coherence-based prospective validation

In [None]:

data = pd.read_csv('D:/coherence_data_150.csv')
X = data.drop(columns=['Class', 'ID'])
y = data['Class']

pro_data = pd.read_csv('D:/coherence_data_pro.csv')
X_pro = pro_data.drop(columns=['Class', 'ID'])
y_pro = pro_data['Class']

predictions_pro = defaultdict(list)

for model_name, config in model_configs.items():
    print(f"Processing {model_name}...")
    
    selector = SelectKBest(f_classif, k=config['n_features'])
    
    pipeline = Pipeline([('selector', selector), ('clf', config['model'])])
    
    pipeline.fit(X, y)
    
    y_pred_proba_pro = pipeline.predict_proba(X_pro)
    
    for idx, actual_id in enumerate(pro_data['ID']):
        predictions_pro[model_name].append({
            'ID': actual_id,
            'True_Label': y_pro.iloc[idx],
            'NCSE_Prob': y_pred_proba_pro[idx][0],
            'ME_Prob': y_pred_proba_pro[idx][1],
            'BI_Prob': y_pred_proba_pro[idx][2]
        })

for model_name in model_configs.keys():
    predictions_pro[model_name] = pd.DataFrame(predictions_pro[model_name])


In [None]:
for model_name, preds in predictions_pro.items():
    file_path = f"coh_predictions_prospective_{model_name}.csv"
    preds.to_csv(file_path, index=False)

In [None]:

def plot_roc_curve(y_true, y_pred_proba, model_name):
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(3):
        fpr[i], tpr[i], _ = roc_curve(y_true == i, y_pred_proba[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
    
    plt.figure(figsize=(8, 6))
    plt.plot(fpr[0], tpr[0], label=f'NCSE (area = {roc_auc[0]:.2f})')
    plt.plot(fpr[1], tpr[1], label=f'ME (area = {roc_auc[1]:.2f})')
    plt.plot(fpr[2], tpr[2], label=f'BI (area = {roc_auc[2]:.2f})')
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'ROC Curve - {model_name}')
    plt.legend(loc='lower right')
    plt.savefig(f'coh_prospective_ROC_{model_name}.eps', format='eps')
    plt.show()

def plot_confusion_matrix(y_true, y_pred, model_name):
    matrix = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6, 5))
    sns.heatmap(matrix, annot=True, fmt='d', cmap='Blues', cbar=False, vmin=0, vmax=10)
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.title(f'Confusion Matrix - {model_name}')
    plt.savefig(f'coh_prospective_CM_{model_name}.eps', format='eps')
    plt.show()
    
performance_data_corrected_pro = []

for model_name, preds in predictions_pro.items():
    y_true = preds["True_Label"]
    y_pred_proba = preds[["NCSE_Prob", "ME_Prob", "BI_Prob"]].values
    y_pred = np.argmax(y_pred_proba, axis=1)

    plot_roc_curve(y_true, y_pred_proba, model_name)
    plot_confusion_matrix(y_true, y_pred, model_name)
    
    metrics_from_cm = compute_corrected_metrics_from_cm(y_true, y_pred, y_pred_proba)

    for label in range(3):  # For each class label
        accuracy_ci = bootstrap_ci_classwise_metric(y_true, y_pred, accuracy_score, label)
        f1_ci = bootstrap_ci_classwise_metric(y_true, y_pred, f1_score, label)
        precision_ci = bootstrap_ci_classwise_metric(y_true, y_pred, precision_score, label)
        recall_ci = bootstrap_ci_classwise_metric(y_true, y_pred, recall_score, label)
        auc_ci = bootstrap_ci_classwise_auc(y_true, y_pred_proba, label)
     
        performance_data_corrected_pro.append({
            'Model': model_name,
            'Label': label,
            'Accuracy': accuracy_score((y_true == label).astype(int), (y_pred == label).astype(int)),
            'F1': f1_score((y_true == label).astype(int), (y_pred == label).astype(int)),
            'Precision': precision_score((y_true == label).astype(int), (y_pred == label).astype(int)),
            'Recall': recall_score((y_true == label).astype(int), (y_pred == label).astype(int)),
            'TP': metrics_from_cm['classwise'][label]['tp'],
            'TN': metrics_from_cm['classwise'][label]['tn'],
            'FP': metrics_from_cm['classwise'][label]['fp'],
            'FN': metrics_from_cm['classwise'][label]['fn'],
            'AUC': roc_auc_score((y_true == label).astype(int), y_pred_proba[:, label]),
            'Accuracy CI': accuracy_ci,
            'F1 CI': f1_ci,
            'Precision CI': precision_ci,
            'Recall CI': recall_ci,
            'AUC CI': auc_ci
        })

    overall_metrics = compute_corrected_metrics_from_cm(y_true, y_pred, y_pred_proba)['overall']
    accuracy_ci = bootstrap_ci(y_true, y_pred, None, lambda yt, yp: accuracy_score(yt, yp))
    f1_ci = bootstrap_ci(y_true, y_pred, None, lambda yt, yp: f1_score(yt, yp, average='macro'))
    precision_ci = bootstrap_ci(y_true, y_pred, None, lambda yt, yp: precision_score(yt, yp, average='macro'))
    recall_ci = bootstrap_ci(y_true, y_pred, None, lambda yt, yp: recall_score(yt, yp, average='macro'))
    auc_ci = bootstrap_ci(y_true, None, y_pred_proba, lambda yt, yp: roc_auc_score(yt, yp, average='macro', multi_class='ovr'))
    
    performance_data_corrected_pro.append({
        'Model': model_name,
        'Label': 'Overall',
        'Accuracy': overall_metrics['accuracy'],
        'F1': overall_metrics['f1'],
        'Precision': overall_metrics['precision'],
        'Recall': overall_metrics['recall'],
        'AUC': overall_metrics['auc'],
        'Accuracy CI': accuracy_ci,
        'F1 CI': f1_ci,
        'Precision CI': precision_ci,
        'Recall CI': recall_ci,
        'AUC CI': auc_ci
    })
    
performance_data_corrected_pro = pd.DataFrame(performance_data_corrected_pro)
performance_data_corrected_pro.to_csv('coh_performance_results_prospective.csv', index=False)


# Graph measurement-based prediction

In [None]:
seed_value = 777  # 원하는 seed 값으로 변경 가능
random.seed(seed_value)
np.random.seed(seed_value)

In [None]:

data = pd.read_csv('D:/graph_data_150.csv')  # Placeholder path, this won't run here due to no access to the file

X = data.drop(columns=['Class', 'ID'])
y = data['Class']

model_configs = {
    "LogisticRegression": {
        "model": LogisticRegression(C=100, max_iter=3000, penalty='l2', solver='lbfgs', random_state=seed_value),
        "n_features": 12
    },
    "GaussianNB": {
        "model": GaussianNB(),
        "n_features": 15
    },
    "DecisionTreeClassifier": {
        "model": DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=seed_value),
        "n_features": 60
    },
    "RandomForestClassifier": {
        "model": RandomForestClassifier(max_depth=10, n_estimators=100, random_state=seed_value),
        "n_features": 39
    },
    "SVC": {
        "model": SVC(C=10, gamma='scale', kernel='linear', probability=True, random_state=seed_value),
        "n_features": 12
    },
    "MLPClassifier": {
        "model": MLPClassifier(hidden_layer_sizes=(10,), max_iter=5000, random_state=seed_value),
        "n_features": 12
    }
}


In [None]:


skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed_value)


predictions = defaultdict(list)

for model_name, config in model_configs.items():
    print(f"Processing {model_name}...")
    
    selector = SelectKBest(f_classif, k=config['n_features'])
    
    pipeline = Pipeline([('selector', selector), ('clf', config['model'])])
    
    fold_num = 1
    for train_index, test_index in skf.split(X, y):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        
        pipeline.fit(X_train, y_train)
        
        y_pred_proba = pipeline.predict_proba(X_test)
        
        for idx, actual_id in enumerate(test_index):
            predictions[model_name].append({
                'ID': data.iloc[actual_id]['ID'],
                'Fold': fold_num,
                'True_Label': y_test.iloc[idx],
                'NCSE_Prob': y_pred_proba[idx][0],
                'ME_Prob': y_pred_proba[idx][1],
                'BI_Prob': y_pred_proba[idx][2]
            })
        
        fold_num += 1

for model_name in model_configs.keys():
    predictions[model_name] = pd.DataFrame(predictions[model_name])



In [None]:
for model_name, preds in predictions.items():
    file_path = f"graph_predictions_{model_name}.csv"
    preds.to_csv(file_path, index=False)

In [None]:

def plot_roc_curve(y_true, y_pred_proba, model_name):
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(3):
        fpr[i], tpr[i], _ = roc_curve(y_true == i, y_pred_proba[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
    
    plt.figure(figsize=(8, 6))
    plt.plot(fpr[0], tpr[0], label=f'NCSE (area = {roc_auc[0]:.2f})')
    plt.plot(fpr[1], tpr[1], label=f'ME (area = {roc_auc[1]:.2f})')
    plt.plot(fpr[2], tpr[2], label=f'BI (area = {roc_auc[2]:.2f})')
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'ROC Curve - {model_name}')
    plt.legend(loc='lower right')
    plt.savefig(f'graph_ROC_{model_name}.eps', format='eps')
    plt.show()

def plot_confusion_matrix(y_true, y_pred, model_name):
    matrix = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6, 5))
    sns.heatmap(matrix, annot=True, fmt='d', cmap='Blues', cbar=False, vmin=0, vmax=50)
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.title(f'Confusion Matrix - {model_name}')
    plt.savefig(f'graph_CM_{model_name}.eps', format='eps')
    plt.show()
    

def compute_corrected_metrics_from_cm(y_true, y_pred, y_pred_proba):
    cm = confusion_matrix(y_true, y_pred)
    
    classwise_metrics = []
    total_samples = len(y_true)
    
    for i in range(3):
        tp = cm[i, i]
        fn = sum(cm[i, :]) - tp
        fp = sum(cm[:, i]) - tp
        tn = total_samples - (tp + fn + fp)
        
        accuracy = (tp + tn) / (tp + tn + fp + fn)
        precision = tp / (tp + fp) if (tp + fp) != 0 else 0
        recall = tp / (tp + fn) if (tp + fn) != 0 else 0
        f1 = (2 * precision * recall) / (precision + recall) if (precision + recall) != 0 else 0
        
        classwise_metrics.append({
            'accuracy': accuracy,
            'f1': f1,
            'precision': precision,
            'recall': recall,
            'tn': tn,
            'tp': tp,
            'fp': fp,
            'fn': fn
        })
        
    overall_accuracy = accuracy_score(y_true, y_pred)
    overall_f1 = f1_score(y_true, y_pred, average='macro')
    overall_precision = precision_score(y_true, y_pred, average='macro')
    overall_recall = recall_score(y_true, y_pred, average='macro')
    overall_auc = roc_auc_score(y_true, y_pred_proba, average='macro', multi_class='ovr')
    
    return {
        'overall': {
            'accuracy': overall_accuracy,
            'f1': overall_f1,
            'precision': overall_precision,
            'recall': overall_recall,
            'auc': overall_auc
        },
        'classwise': classwise_metrics
    }

def confidence_interval(data):
    n = len(data)
    m = mean(data)
    std_err = sem(data)
    ci = std_err * t.ppf((1 + 0.95) / 2, n - 1)
    return (m - ci, m + ci)


def bootstrap_ci(y_true, y_pred, y_pred_proba, metric_function, label=None, n_bootstrap=1000, alpha=0.05):
    """Compute the (1-alpha) confidence interval of the metric using bootstrap."""
    bootstrap_samples = np.random.choice(len(y_true), size=(n_bootstrap, len(y_true)), replace=True)
    y_true_array = np.array(y_true)
    
    if label is not None:  # For classwise AUC
        binary_true = (y_true_array == label).astype(int)
        stats = [metric_function(binary_true[indices], y_pred_proba[indices, label]) for indices in bootstrap_samples]
    elif y_pred is not None:  # For metrics other than AUC
        stats = [metric_function(y_true_array[indices], y_pred[indices]) for indices in bootstrap_samples]
    else:  # For overall AUC
        stats = [metric_function(y_true_array[indices], y_pred_proba[indices]) for indices in bootstrap_samples]

    return (np.percentile(stats, 100 * (alpha / 2.)), np.percentile(stats, 100 * (1 - alpha / 2.)))

def bootstrap_ci_for_auc(y_true, y_pred_proba, label, n_bootstrap=1000, alpha=0.05):
    """Compute the (1-alpha) confidence interval of the AUC using bootstrap for a specific class."""
    bootstrap_samples = np.random.choice(len(y_true), size=(n_bootstrap, len(y_true)), replace=True)
    y_true_array = np.array(y_true)
    binary_true = (y_true_array == label).astype(int)
    
    auc_stats = [roc_auc_score(binary_true[indices], y_pred_proba[indices, label]) for indices in bootstrap_samples]
    return (np.percentile(auc_stats, 100 * (alpha / 2.)), np.percentile(auc_stats, 100 * (1 - alpha / 2.)))

def bootstrap_ci_classwise_metric(y_true, y_pred, metric_function, label, n_bootstrap=1000, alpha=0.05):
    """Compute the (1-alpha) confidence interval of the metric using bootstrap for a specific class."""
    indices = np.arange(len(y_true))  # original indices
    
    y_true_binary = (np.array(y_true) == label).astype(int)
    y_pred_binary = (np.array(y_pred) == label).astype(int)
    
    stats = []
    for _ in range(n_bootstrap):
        resampled_indices = resample(indices)
        stats.append(metric_function(y_true_binary[resampled_indices], y_pred_binary[resampled_indices]))
    
    return (np.percentile(stats, 100 * (alpha / 2.)), np.percentile(stats, 100 * (1 - alpha / 2.)))

def bootstrap_ci_classwise_auc(y_true, y_pred_proba, label, n_bootstrap=1000, alpha=0.05):
    """Compute the (1-alpha) confidence interval of the AUC using bootstrap for a specific class."""
    indices = np.arange(len(y_true))  # original indices
    y_true_binary = (np.array(y_true) == label).astype(int)
    
    auc_stats = []
    for _ in range(n_bootstrap):
        resampled_indices = resample(indices)
        auc_stats.append(roc_auc_score(y_true_binary[resampled_indices], y_pred_proba[resampled_indices, label]))
    
    return (np.percentile(auc_stats, 100 * (alpha / 2.)), np.percentile(auc_stats, 100 * (1 - alpha / 2.)))

def compute_classwise_auc(y_true, y_pred_proba):
    class_aucs = []
    for i in range(3):
        binary_y_true = np.where(y_true == i, 1, 0)
        class_aucs.append(roc_auc_score(binary_y_true, y_pred_proba[:, i]))
    return class_aucs

In [None]:
performance_data_corrected_v3 = []

for model_name, preds in predictions.items():
    y_true = preds["True_Label"]
    y_pred_proba = preds[["NCSE_Prob", "ME_Prob", "BI_Prob"]].values
    y_pred = np.argmax(y_pred_proba, axis=1)

    plot_roc_curve(y_true, y_pred_proba, model_name)
    plot_confusion_matrix(y_true, y_pred, model_name)
    
    metrics_from_cm = compute_corrected_metrics_from_cm(y_true, y_pred, y_pred_proba)

    for label in range(3):  # For each class label
        accuracy_ci = bootstrap_ci_classwise_metric(y_true, y_pred, accuracy_score, label)
        f1_ci = bootstrap_ci_classwise_metric(y_true, y_pred, f1_score, label)
        precision_ci = bootstrap_ci_classwise_metric(y_true, y_pred, precision_score, label)
        recall_ci = bootstrap_ci_classwise_metric(y_true, y_pred, recall_score, label)
        auc_ci = bootstrap_ci_classwise_auc(y_true, y_pred_proba, label)
     
        performance_data_corrected_v3.append({
            'Model': model_name,
            'Label': label,
            'Accuracy': accuracy_score((y_true == label).astype(int), (y_pred == label).astype(int)),
            'F1': f1_score((y_true == label).astype(int), (y_pred == label).astype(int)),
            'Precision': precision_score((y_true == label).astype(int), (y_pred == label).astype(int)),
            'Recall': recall_score((y_true == label).astype(int), (y_pred == label).astype(int)),
            'TP': metrics_from_cm['classwise'][label]['tp'],
            'TN': metrics_from_cm['classwise'][label]['tn'],
            'FP': metrics_from_cm['classwise'][label]['fp'],
            'FN': metrics_from_cm['classwise'][label]['fn'],
            'AUC': roc_auc_score((y_true == label).astype(int), y_pred_proba[:, label]),
            'Accuracy CI': accuracy_ci,
            'F1 CI': f1_ci,
            'Precision CI': precision_ci,
            'Recall CI': recall_ci,
            'AUC CI': auc_ci
        })

    overall_metrics = compute_corrected_metrics_from_cm(y_true, y_pred, y_pred_proba)['overall']
    accuracy_ci = bootstrap_ci(y_true, y_pred, None, lambda yt, yp: accuracy_score(yt, yp))
    f1_ci = bootstrap_ci(y_true, y_pred, None, lambda yt, yp: f1_score(yt, yp, average='macro'))
    precision_ci = bootstrap_ci(y_true, y_pred, None, lambda yt, yp: precision_score(yt, yp, average='macro'))
    recall_ci = bootstrap_ci(y_true, y_pred, None, lambda yt, yp: recall_score(yt, yp, average='macro'))
    auc_ci = bootstrap_ci(y_true, None, y_pred_proba, lambda yt, yp: roc_auc_score(yt, yp, average='macro', multi_class='ovr'))
    
    performance_data_corrected_v3.append({
        'Model': model_name,
        'Label': 'Overall',
        'Accuracy': overall_metrics['accuracy'],
        'F1': overall_metrics['f1'],
        'Precision': overall_metrics['precision'],
        'Recall': overall_metrics['recall'],
        'AUC': overall_metrics['auc'],
        'Accuracy CI': accuracy_ci,
        'F1 CI': f1_ci,
        'Precision CI': precision_ci,
        'Recall CI': recall_ci,
        'AUC CI': auc_ci
    })
    
performance_df_corrected_v3 = pd.DataFrame(performance_data_corrected_v3)
performance_df_corrected_v3.to_csv('graph_performance_results.csv', index=False)


In [None]:
performance_df = pd.read_csv('formatted_performance_results_prospective_.csv')
performance_df

# # Graph measurement-based prospective validation

In [None]:

data = pd.read_csv('D:/graph_data_150.csv')
X = data.drop(columns=['Class', 'ID'])
y = data['Class']

pro_data = pd.read_csv('D:/graph_data_pro.csv')
X_pro = pro_data.drop(columns=['Class', 'ID'])
y_pro = pro_data['Class']

predictions_pro = defaultdict(list)

for model_name, config in model_configs.items():
    print(f"Processing {model_name}...")
    
    selector = SelectKBest(f_classif, k=config['n_features'])
    
    pipeline = Pipeline([('selector', selector), ('clf', config['model'])])
    
    pipeline.fit(X, y)
    
    y_pred_proba_pro = pipeline.predict_proba(X_pro)
    
    for idx, actual_id in enumerate(pro_data['ID']):
        predictions_pro[model_name].append({
            'ID': actual_id,
            'True_Label': y_pro.iloc[idx],
            'NCSE_Prob': y_pred_proba_pro[idx][0],
            'ME_Prob': y_pred_proba_pro[idx][1],
            'BI_Prob': y_pred_proba_pro[idx][2]
        })

for model_name in model_configs.keys():
    predictions_pro[model_name] = pd.DataFrame(predictions_pro[model_name])


In [None]:
for model_name, preds in predictions_pro.items():
    file_path = f"graph_predictions_prospective_{model_name}.csv"
    preds.to_csv(file_path, index=False)

In [None]:

def plot_roc_curve(y_true, y_pred_proba, model_name):
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(3):
        fpr[i], tpr[i], _ = roc_curve(y_true == i, y_pred_proba[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
    
    plt.figure(figsize=(8, 6))
    plt.plot(fpr[0], tpr[0], label=f'NCSE (area = {roc_auc[0]:.2f})')
    plt.plot(fpr[1], tpr[1], label=f'ME (area = {roc_auc[1]:.2f})')
    plt.plot(fpr[2], tpr[2], label=f'BI (area = {roc_auc[2]:.2f})')
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f'ROC Curve - {model_name}')
    plt.legend(loc='lower right')
    plt.savefig(f'graph_prospective_ROC_{model_name}.eps', format='eps')
    plt.show()

def plot_confusion_matrix(y_true, y_pred, model_name):
    matrix = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(6, 5))
    sns.heatmap(matrix, annot=True, fmt='d', cmap='Blues', cbar=False, vmin=0, vmax=10)
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.title(f'Confusion Matrix - {model_name}')
    plt.savefig(f'graph_prospective_CM_{model_name}.eps', format='eps')
    plt.show()
    
performance_data_corrected_pro = []

for model_name, preds in predictions_pro.items():
    y_true = preds["True_Label"]
    y_pred_proba = preds[["NCSE_Prob", "ME_Prob", "BI_Prob"]].values
    y_pred = np.argmax(y_pred_proba, axis=1)

    plot_roc_curve(y_true, y_pred_proba, model_name)
    plot_confusion_matrix(y_true, y_pred, model_name)
    
    metrics_from_cm = compute_corrected_metrics_from_cm(y_true, y_pred, y_pred_proba)

    for label in range(3):  # For each class label
        accuracy_ci = bootstrap_ci_classwise_metric(y_true, y_pred, accuracy_score, label)
        f1_ci = bootstrap_ci_classwise_metric(y_true, y_pred, f1_score, label)
        precision_ci = bootstrap_ci_classwise_metric(y_true, y_pred, precision_score, label)
        recall_ci = bootstrap_ci_classwise_metric(y_true, y_pred, recall_score, label)
        auc_ci = bootstrap_ci_classwise_auc(y_true, y_pred_proba, label)
     
        performance_data_corrected_pro.append({
            'Model': model_name,
            'Label': label,
            'Accuracy': accuracy_score((y_true == label).astype(int), (y_pred == label).astype(int)),
            'F1': f1_score((y_true == label).astype(int), (y_pred == label).astype(int)),
            'Precision': precision_score((y_true == label).astype(int), (y_pred == label).astype(int)),
            'Recall': recall_score((y_true == label).astype(int), (y_pred == label).astype(int)),
            'TP': metrics_from_cm['classwise'][label]['tp'],
            'TN': metrics_from_cm['classwise'][label]['tn'],
            'FP': metrics_from_cm['classwise'][label]['fp'],
            'FN': metrics_from_cm['classwise'][label]['fn'],
            'AUC': roc_auc_score((y_true == label).astype(int), y_pred_proba[:, label]),
            'Accuracy CI': accuracy_ci,
            'F1 CI': f1_ci,
            'Precision CI': precision_ci,
            'Recall CI': recall_ci,
            'AUC CI': auc_ci
        })

    overall_metrics = compute_corrected_metrics_from_cm(y_true, y_pred, y_pred_proba)['overall']
    accuracy_ci = bootstrap_ci(y_true, y_pred, None, lambda yt, yp: accuracy_score(yt, yp))
    f1_ci = bootstrap_ci(y_true, y_pred, None, lambda yt, yp: f1_score(yt, yp, average='macro'))
    precision_ci = bootstrap_ci(y_true, y_pred, None, lambda yt, yp: precision_score(yt, yp, average='macro'))
    recall_ci = bootstrap_ci(y_true, y_pred, None, lambda yt, yp: recall_score(yt, yp, average='macro'))
    auc_ci = bootstrap_ci(y_true, None, y_pred_proba, lambda yt, yp: roc_auc_score(yt, yp, average='macro', multi_class='ovr'))
    
    performance_data_corrected_pro.append({
        'Model': model_name,
        'Label': 'Overall',
        'Accuracy': overall_metrics['accuracy'],
        'F1': overall_metrics['f1'],
        'Precision': overall_metrics['precision'],
        'Recall': overall_metrics['recall'],
        'AUC': overall_metrics['auc'],
        'Accuracy CI': accuracy_ci,
        'F1 CI': f1_ci,
        'Precision CI': precision_ci,
        'Recall CI': recall_ci,
        'AUC CI': auc_ci
    })
    
performance_data_corrected_pro = pd.DataFrame(performance_data_corrected_pro)
performance_data_corrected_pro.to_csv('graph_performance_results_prospective.csv', index=False)
