In [2]:
import pandas as pd
import numpy as np

from sklearn.metrics import roc_auc_score, accuracy_score, roc_curve

# List of prediction CSV files from each model (assuming 10 models for splits 0–9)
pred_files = [
    "/home/mezher/Documents/Deauville_DeepLearning/pred/pred_split0_run0.csv",
    "/home/mezher/Documents/Deauville_DeepLearning/pred/pred_split1_run0.csv",
    "/home/mezher/Documents/Deauville_DeepLearning/pred/pred_split2_run0.csv",
    "/home/mezher/Documents/Deauville_DeepLearning/pred/pred_split3_run0.csv",
    "/home/mezher/Documents/Deauville_DeepLearning/pred/pred_split4_run0.csv",
    "/home/mezher/Documents/Deauville_DeepLearning/pred/pred_split5_run0.csv",
    "/home/mezher/Documents/Deauville_DeepLearning/pred/pred_split6_run0.csv",
    "/home/mezher/Documents/Deauville_DeepLearning/pred/pred_split7_run0.csv",
    "/home/mezher/Documents/Deauville_DeepLearning/pred/pred_split8_run0.csv",
    "/home/mezher/Documents/Deauville_DeepLearning/pred/pred_split9_run0.csv"
]

# STEP 1: Within-model aggregation of MIP view predictions
# We'll create two lists: one for LARS-avg (mean) and one for LARS-max (max).
model_preds_avg = []  # per model aggregated predictions (LARS-avg)
model_preds_max = []  # per model aggregated predictions (LARS-max)

for file in pred_files:
    df = pd.read_csv(file)
    # Group by scan_id to aggregate the predictions for the individual MIP views.
    # We assume each file contains a 'scan_id', a 'probs' column, and optionally a 'target' column.
    df_avg = df.groupby('scan_id').agg({'probs': 'mean', 'target': 'first'}).reset_index()
    df_max = df.groupby('scan_id').agg({'probs': 'max', 'target': 'first'}).reset_index()
    model_preds_avg.append(df_avg)
    model_preds_max.append(df_max)

# STEP 2: Ensemble across models
# For each ensemble, we merge only the probability columns from the subsequent models,
# while retaining the 'target' from the first model.
ensemble_avg = model_preds_avg[0].rename(columns={'probs': 'probs_model0'})
for i, df_avg in enumerate(model_preds_avg[1:], start=1):
    df_avg = df_avg.rename(columns={'probs': f'probs_model{i}'})
    # Only merge the probability column, not the 'target'
    ensemble_avg = pd.merge(ensemble_avg, df_avg[['scan_id', f'probs_model{i}']], on='scan_id', how='outer')

ensemble_max = model_preds_max[0].rename(columns={'probs': 'probs_model0'})
for i, df_max in enumerate(model_preds_max[1:], start=1):
    df_max = df_max.rename(columns={'probs': f'probs_model{i}'})
    ensemble_max = pd.merge(ensemble_max, df_max[['scan_id', f'probs_model{i}']], on='scan_id', how='outer')

# Compute the final ensemble prediction per scan by averaging across models.
model_cols_avg = [col for col in ensemble_avg.columns if col.startswith('probs_model')]
ensemble_avg['ensemble_prob'] = ensemble_avg[model_cols_avg].mean(axis=1)

model_cols_max = [col for col in ensemble_max.columns if col.startswith('probs_model')]
ensemble_max['ensemble_prob'] = ensemble_max[model_cols_max].mean(axis=1)

# Use the 'target' column from ensemble_avg (assuming all models share the same ground truth).
auc_avg = roc_auc_score(ensemble_avg['target'], ensemble_avg['ensemble_prob'])
auc_max = roc_auc_score(ensemble_avg['target'], ensemble_max['ensemble_prob'])
print("Ensemble ROC AUC (LARS-avg):", auc_avg)
print("Ensemble ROC AUC (LARS-max):", auc_max)

# --- Calculate optimal threshold using Youden's index ---
# For LARS-avg:
fpr_avg, tpr_avg, thresholds_avg = roc_curve(ensemble_avg['target'], ensemble_avg['ensemble_prob'])
youden_index_avg = tpr_avg - fpr_avg
optimal_idx_avg = np.argmax(youden_index_avg)
optimal_threshold_avg = thresholds_avg[optimal_idx_avg]

# Compute predicted labels using the optimal threshold.
ensemble_avg['pred_label'] = (ensemble_avg['ensemble_prob'] >= optimal_threshold_avg).astype(int)
acc_avg_optimal = accuracy_score(ensemble_avg['target'], ensemble_avg['pred_label'])

# For LARS-max:
fpr_max, tpr_max, thresholds_max = roc_curve(ensemble_max['target'], ensemble_max['ensemble_prob'])
youden_index_max = tpr_max - fpr_max
optimal_idx_max = np.argmax(youden_index_max)
optimal_threshold_max = thresholds_max[optimal_idx_max]
ensemble_max['pred_label'] = (ensemble_max['ensemble_prob'] >= optimal_threshold_max).astype(int)
acc_max_optimal = accuracy_score(ensemble_max['target'], ensemble_max['pred_label'])

print("Optimal threshold for LARS-avg (Youden's index):", optimal_threshold_avg)
print("Optimal threshold for LARS-max (Youden's index):", optimal_threshold_max)
print("Ensemble Accuracy (LARS-avg) with optimal threshold:", acc_avg_optimal)
print("Ensemble Accuracy (LARS-max) with optimal threshold:", acc_max_optimal)



# Save the final ensemble predictions to CSV files.
ensemble_avg.to_csv("/home/mezher/Documents/Deauville_DeepLearning/pred/ensemble_avg.csv", index=False)
ensemble_max.to_csv("/home/mezher/Documents/Deauville_DeepLearning/pred/ensemble_max.csv", index=False)


Ensemble ROC AUC (LARS-avg): 0.75
Ensemble ROC AUC (LARS-max): 0.7291666666666666
Optimal threshold for LARS-avg (Youden's index): 0.038389773065461104
Optimal threshold for LARS-max (Youden's index): 0.09677579814565
Ensemble Accuracy (LARS-avg) with optimal threshold: 0.7142857142857143
Ensemble Accuracy (LARS-max) with optimal threshold: 0.7142857142857143
