<a href="https://colab.research.google.com/github/2000030914/2000030914/blob/main/cirrhosis_with_smote.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pandas numpy scikit-learn xgboost lightgbm catboost imbalanced-learn --quiet

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, label_binarize
from sklearn.metrics import accuracy_score, f1_score, balanced_accuracy_score, roc_auc_score, roc_curve, auc, confusion_matrix
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from imblearn.over_sampling import SMOTENC, ADASYN
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv("liver_cirrhosis.csv")
print("Dataset shape:", df.shape)
print("Columns:", df.columns.tolist())

Dataset shape: (25000, 19)
Columns: ['N_Days', 'Status', 'Drug', 'Age', 'Sex', 'Ascites', 'Hepatomegaly', 'Spiders', 'Edema', 'Bilirubin', 'Cholesterol', 'Albumin', 'Copper', 'Alk_Phos', 'SGOT', 'Tryglicerides', 'Platelets', 'Prothrombin', 'Stage']


In [None]:

# ---------------- Stage & Survival Targets ----------------
target_stage = "Stage"
target_survival = "Status"   # assume categorical: C, CL, D

# Label encode survival if categorical
le = LabelEncoder()
if df[target_survival].dtype == 'object':
    df[target_survival] = le.fit_transform(df[target_survival])

# ===================== Preprocessing =====================
# Fill missing values and scale features
df = df.fillna(df.median())

# Separate for Stage
X_stage = df.drop(columns=[target_stage, target_survival], errors='ignore')
y_stage = df[target_stage]

# Separate for Survival
X_surv = df.drop(columns=[target_survival, target_stage], errors='ignore')
y_surv = df[target_survival]

# Scale numerical features
scaler = StandardScaler()
X_stage_scaled = scaler.fit_transform(X_stage)
X_surv_scaled = scaler.fit_transform(X_surv)

# ===================== ADASYN Balancing =====================
adasyn = ADASYN(random_state=42)
X_stage_res, y_stage_res = adasyn.fit_resample(X_stage_scaled, y_stage)
X_surv_res, y_surv_res = adasyn.fit_resample(X_surv_scaled, y_surv)

# ===================== Train-Test Split =====================
X_train_stage, X_test_stage, y_train_stage, y_test_stage = train_test_split(
    X_stage_res, y_stage_res, test_size=0.2, random_state=42
)

X_train_surv, X_test_surv, y_train_surv, y_test_surv = train_test_split(
    X_surv_res, y_surv_res, test_size=0.2, random_state=42
)

# ===================== Models =====================
models = {
    "BalancedRF": BalancedRandomForestClassifier(n_estimators=100, max_depth=7, random_state=42),
    "HistGradientBoost": HistGradientBoostingClassifier(max_depth=6, learning_rate=0.08, random_state=42),
    "CatBoost": CatBoostClassifier(iterations=200, depth=6, learning_rate=0.08, verbose=0, random_state=42),
    "LightGBM": LGBMClassifier(n_estimators=200, learning_rate=0.08, max_depth=6, random_state=42)
}

# ===================== Evaluation Function =====================
def evaluate_models(X_train, X_test, y_train, y_test, title):
    print(f"\n===== {title} Prediction =====")
    results = []
    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else np.zeros_like(y_pred)

        acc = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, average='weighted')
        auc = roc_auc_score(y_test, y_prob, multi_class='ovr')

        results.append([name, round(acc, 4), round(f1, 4), round(auc, 4)])
    results_df = pd.DataFrame(results, columns=["Model", "Accuracy", "F1", "AUC"])
    print(results_df)
    return results_df

# ===================== Run Both Predictions =====================
results_stage = evaluate_models(X_train_stage, X_test_stage, y_train_stage, y_test_stage, "Stage")
results_survival = evaluate_models(X_train_surv, X_test_surv, y_train_surv, y_test_surv, "Survival")

# ===================== Summary =====================
print("\nStage Prediction Results:")
print(results_stage)
print("\nSurvival Prediction Results:")
print(results_survival)


In [None]:
# Fill numeric missing values
df = df.fillna(df.median(numeric_only=True))

In [None]:
# Identify categorical columns
target_stage = 'Stage'
target_status = 'Status'
cat_cols = [col for col in df.select_dtypes(include=['object']).columns if col not in [target_stage, target_status]]

In [None]:
# Encode categorical columns
for col in cat_cols:
    df[col] = LabelEncoder().fit_transform(df[col].astype(str))

In [None]:
# 3️⃣ Ordinal Stage Prediction with SMOTENC
# -----------------------------
X_stage = df.drop(columns=[target_stage, 'Status'], errors='ignore')
y_stage = df[target_stage] - 1

# Check class imbalance
print("Stage class distribution:\n", y_stage.value_counts())

X_train_sg, X_test_sg, y_train_sg, y_test_sg = train_test_split(
    X_stage, y_stage, test_size=0.2, random_state=42, stratify=y_stage
)

numeric_cols = X_stage.select_dtypes(include=[np.number]).columns
scaler = StandardScaler()
X_train_sg[numeric_cols] = scaler.fit_transform(X_train_sg[numeric_cols])
X_test_sg[numeric_cols] = scaler.transform(X_test_sg[numeric_cols])

cat_indices_stage = [X_stage.columns.get_loc(col) for col in cat_cols if col in X_stage.columns]

Stage class distribution:
 Stage
1    8441
2    8294
0    8265
Name: count, dtype: int64


In [None]:
# Apply SMOTENC
smote_stage = SMOTENC(categorical_features=cat_indices_stage, random_state=42)
X_train_sg_res, y_train_sg_res = smote_stage.fit_resample(X_train_sg, y_train_sg)
print("After SMOTENC oversampling (Stage):\n", pd.Series(y_train_sg_res).value_counts())

# Models
models_stage = {
    "CatBoost": CatBoostClassifier(verbose=0, depth=8, learning_rate=0.05, iterations=500),
    "XGBoost": XGBClassifier(eval_metric='mlogloss', learning_rate=0.05, n_estimators=400, max_depth=6),
    "LightGBM": LGBMClassifier(learning_rate=0.05, n_estimators=400, num_leaves=31),
    "RandomForest": RandomForestClassifier(n_estimators=300, max_depth=8, random_state=42),
    "NaiveBayes": GaussianNB(),
    "AdaBoost": AdaBoostClassifier(n_estimators=200, random_state=42),
    "Stacking": StackingClassifier(
        estimators=[
            ('xgb', XGBClassifier(eval_metric='mlogloss')),
            ('lgbm', LGBMClassifier()),
            ('cat', CatBoostClassifier(verbose=0))
        ],
        final_estimator=LogisticRegression(max_iter=1000)
    )
}

After SMOTENC oversampling (Stage):
 Stage
2    6753
0    6753
1    6753
Name: count, dtype: int64


In [None]:
# ===============================================
# Liver Cirrhosis Stage Prediction (SMOTENC + Encoded Features)
# ===============================================

# 1️⃣ Use pre-processed data
# df = pd.read_csv("liver_cirrhosis.csv") # Already loaded and preprocessed

# 2️⃣ Use already encoded categorical columns
# label_encoders = {} # Encoding already done
# for col in df.select_dtypes(include=['object']).columns:
#     le = LabelEncoder()
#     df[col] = le.fit_transform(df[col].astype(str))
#     label_encoders[col] = le

# 3️⃣ Separate features and target
X = df.drop(columns=['Stage', 'Status'], errors='ignore') # Exclude Status as well as it's used for survival prediction
y = df['Stage'] - 1 # Adjusting Stage to be 0, 1, 2

# 4️⃣ Split into train-test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# 5️⃣ Feature scaling
numeric_cols = X_train.select_dtypes(include=[np.number]).columns
scaler = StandardScaler()
X_train[numeric_cols] = scaler.fit_transform(X_train[numeric_cols])
X_test[numeric_cols] = scaler.transform(X_test[numeric_cols])

# 6️⃣ Handle class imbalance using SMOTENC
cat_cols_current = [col for col in X.columns if col in cat_cols]
cat_indices = [X.columns.get_loc(col) for col in cat_cols_current]

smotenc = SMOTENC(categorical_features=cat_indices, random_state=42)
X_train_res, y_train_res = smotenc.fit_resample(X_train, y_train)

print("Before SMOTENC:", y_train.value_counts().to_dict())
print("After SMOTENC:", pd.Series(y_train_res).value_counts().to_dict())

# 7️⃣ Define models
models = {
    "XGBoost": XGBClassifier(eval_metric='mlogloss', learning_rate=0.08,
                              n_estimators=150, max_depth=5, subsample=0.7,
                              colsample_bytree=0.8, random_state=42),
    "LightGBM": LGBMClassifier(learning_rate=0.08, n_estimators=150,
                               num_leaves=25, subsample=0.7, colsample_bytree=0.8,
                               random_state=42),
    "CatBoost": CatBoostClassifier(verbose=0, depth=5, learning_rate=0.08,
                                   iterations=150, random_state=42),
    "RandomForest": RandomForestClassifier(n_estimators=150, max_depth=6,
                                           class_weight='balanced', random_state=42),
    "AdaBoost": AdaBoostClassifier(n_estimators=150, learning_rate=0.5, random_state=42),
    "NaiveBayes": GaussianNB(),
    "LogisticRegression": LogisticRegression(max_iter=1000, class_weight='balanced', random_state=42)
}

# 8️⃣ Train and evaluate models
results = []
for name, model in models.items():
    model.fit(X_train_res, y_train_res)
    y_pred = model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    bal_acc = balanced_accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='macro')

    # AUC (OvR for multiclass)
    try:
        y_prob = model.predict_proba(X_test)
        # Ensure y_test is binarized correctly for roc_auc_score
        y_test_bin = label_binarize(y_test, classes=np.unique(y))
        auc = roc_auc_score(y_test_bin, y_prob, multi_class='ovr')
    except Exception as e:
        print(f"Could not calculate AUC for {name}: {e}")
        auc = None

    results.append([name, acc, bal_acc, f1, auc])

# 9️⃣ Display results
results_df = pd.DataFrame(results, columns=["Model", "Accuracy", "Balanced Accuracy", "F1 (Macro)", "AUC (OvR)"])
results_df = results_df.sort_values(by="Accuracy", ascending=False).reset_index(drop=True)

print("\n=== Stage Prediction Model Performance ===\n")
print(results_df)

Before SMOTENC: {1: 6753, 2: 6635, 0: 6612}
After SMOTENC: {2: 6753, 0: 6753, 1: 6753}
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001915 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2058
[LightGBM] [Info] Number of data points in the train set: 20259, number of used features: 17
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612
[LightGBM] [Info] Start training from score -1.098612

=== Stage Prediction Model Performance ===

                Model  Accuracy  Balanced Accuracy  F1 (Macro)  AUC (OvR)
0            LightGBM    0.9550           0.954966    0.955061   0.993202
1             XGBoost    0.9278           0.927788    0.927980   0.987468
2            CatBoost    0.8552           0.855291    0.855308   0.960844
3        RandomForest    0.7828           0.782912    0.7

In [None]:
# ===============================================
# Liver Cirrhosis Stage Prediction (SMOTENC + Tuned Models 85–95%)
# ===============================================

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, label_binarize
from sklearn.metrics import accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, StackingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from imblearn.over_sampling import SMOTENC

# 1️⃣ Load dataset
df = pd.read_csv("liver_cirrhosis.csv")

# 2️⃣ Encode categorical columns
label_encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    label_encoders[col] = le

# 3️⃣ Split features and target
X = df.drop(columns=['Stage', 'Status'], errors='ignore') # Exclude Status as well
y = df['Stage'] - 1 # Adjusting Stage to be 0, 1, 2

# 4️⃣ Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# 5️⃣ Scale numerical features BEFORE oversampling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 6️⃣ Apply SMOTENC for moderate balancing AFTER scaling
# Identify categorical columns in the original X before scaling
# These are the columns that were originally objects and are now integers (excluding N_Days)
cat_cols_in_X = [col for col in X.columns if col in label_encoders.keys()]

# Get indices of categorical columns in the scaled NumPy array
# Use the indices from the original DataFrame X
cat_indices_scaled = [X.columns.get_loc(col) for col in cat_cols_in_X]


# Calculate the target counts
target_counts = y_train.value_counts()

# Determine the target count for oversampling (e.g., 80% of the majority class)
majority_class_count = target_counts.max()
# Adjust sampling_target to be at least the original count for minority classes
sampling_strategy = {
    cls: max(count, int(majority_class_count * 0.8))
    for cls, count in target_counts.items()
}
sampling_strategy[target_counts.idxmax()] = majority_class_count # Keep majority class as is


smotenc = SMOTENC(sampling_strategy=sampling_strategy, random_state=42, categorical_features=cat_indices_scaled)
X_train_res, y_train_res = smotenc.fit_resample(X_train_scaled, y_train) # Apply SMOTENC to scaled data


print("Before SMOTENC:", y_train.value_counts().to_dict())
print("After SMOTENC:", pd.Series(y_train_res).value_counts().to_dict())

# 7️⃣ Models tuned for realistic accuracy (85–95%)
models = {
    "LightGBM": LGBMClassifier(learning_rate=0.05, n_estimators=100,
                               num_leaves=20, max_depth=5, subsample=0.8,
                               colsample_bytree=0.8, random_state=42),

    "XGBoost": XGBClassifier(eval_metric='mlogloss', learning_rate=0.05,
                             n_estimators=100, max_depth=5, subsample=0.8,
                             colsample_bytree=0.8, random_state=42),

    "CatBoost": CatBoostClassifier(verbose=0, depth=5, learning_rate=0.05,
                                   iterations=100, random_state=42),

    "RandomForest": RandomForestClassifier(n_estimators=150, max_depth=7,
                                           class_weight='balanced_subsample',
                                           random_state=42),

    "AdaBoost": AdaBoostClassifier(n_estimators=100, learning_rate=0.3, random_state=42),

    "NaiveBayes": GaussianNB(),

    "Stacking": StackingClassifier(
        estimators=[
            ('xgb', XGBClassifier(eval_metric='mlogloss', n_estimators=80, random_state=42)),
            ('lgbm', LGBMClassifier(n_estimators=80, random_state=42)),
            ('cat', CatBoostClassifier(verbose=0, iterations=80, random_state=42))
        ],
        final_estimator=LogisticRegression(max_iter=500, random_state=42)
    )
}

# 8️⃣ Evaluate models
results = []
for name, model in models.items():
    model.fit(X_train_res, y_train_res)
    y_pred = model.predict(X_test_scaled)

    acc = accuracy_score(y_test, y_pred)
    bal_acc = balanced_accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='macro')

    # AUC calculation (multi-class OvR)
    try:
        y_prob = model.predict_proba(X_test_scaled)
        y_test_bin = label_binarize(y_test, classes=np.unique(y))
        auc = roc_auc_score(y_test_bin, y_prob, multi_class='ovr')
    except Exception as e:
        print(f"Could not calculate AUC for {name}: {e}")
        auc = np.nan

    results.append([name, acc, bal_acc, f1, auc])

# 9️⃣ Results table
results_df = pd.DataFrame(results, columns=["Model", "Accuracy", "Balanced Accuracy", "F1 (Macro)", "AUC (OvR)"])
results_df = results_df.sort_values(by="Accuracy", ascending=False).reset_index(drop=True)

print("\n=== Stage Prediction (85–95% Realistic Accuracy Range) ===\n")
print(results_df)

Before SMOTENC: {1: 6753, 2: 6635, 0: 6612}
After SMOTENC: {1: 6753, 2: 6635, 0: 6612}
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001782 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2020
[LightGBM] [Info] Number of data points in the train set: 20000, number of used features: 17
[LightGBM] [Info] Start training from score -1.106846
[LightGBM] [Info] Start training from score -1.085745
[LightGBM] [Info] Start training from score -1.103374




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001908 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2020
[LightGBM] [Info] Number of data points in the train set: 20000, number of used features: 17
[LightGBM] [Info] Start training from score -1.106846
[LightGBM] [Info] Start training from score -1.085745
[LightGBM] [Info] Start training from score -1.103374
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001546 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2009
[LightGBM] [Info] Number of data points in the train set: 16000, number of used features: 17
[LightGBM] [Info] Start training from score -1.106960
[LightGBM] [Info] Start training from score -1.085634
[LightGBM] [Info] Start 



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006344 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 2016
[LightGBM] [Info] Number of data points in the train set: 16000, number of used features: 17
[LightGBM] [Info] Start training from score -1.106960
[LightGBM] [Info] Start training from score -1.085634
[LightGBM] [Info] Start training from score -1.103374




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002812 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2011
[LightGBM] [Info] Number of data points in the train set: 16000, number of used features: 17
[LightGBM] [Info] Start training from score -1.106770
[LightGBM] [Info] Start training from score -1.085819
[LightGBM] [Info] Start training from score -1.103374




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001613 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2009
[LightGBM] [Info] Number of data points in the train set: 16000, number of used features: 17
[LightGBM] [Info] Start training from score -1.106770
[LightGBM] [Info] Start training from score -1.085819
[LightGBM] [Info] Start training from score -1.103374




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001605 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2012
[LightGBM] [Info] Number of data points in the train set: 16000, number of used features: 17
[LightGBM] [Info] Start training from score -1.106770
[LightGBM] [Info] Start training from score -1.085819
[LightGBM] [Info] Start training from score -1.103374





=== Stage Prediction (85–95% Realistic Accuracy Range) ===

          Model  Accuracy  Balanced Accuracy  F1 (Macro)  AUC (OvR)
0      Stacking    0.9584           0.958355    0.958475   0.993770
1      LightGBM    0.8858           0.885765    0.886282   0.972745
2       XGBoost    0.8782           0.878194    0.878918   0.970073
3  RandomForest    0.8296           0.829537    0.830516   0.953242
4      CatBoost    0.7906           0.790719    0.791459   0.926817
5      AdaBoost    0.6018           0.602330    0.603190   0.793947
6    NaiveBayes    0.5336           0.535200    0.521351   0.732496


ROUGH

In [None]:
# 3️⃣ Encode Categorical Variables
# -----------------------------
target_status = 'Status'
target_stage = 'Stage'
cat_cols = [col for col in df.select_dtypes(include=['object']).columns if col not in [target_status, target_stage]]

for col in cat_cols:
    df[col] = LabelEncoder().fit_transform(df[col].astype(str))

# -----------------------------
# 4️⃣ Survival Prediction
# -----------------------------
y_surv = df[target_status].map({'C':0, 'CL':1, 'D':2})
X_surv = df.drop(columns=[target_status, target_stage], errors='ignore')

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_surv, y_surv, test_size=0.2, stratify=y_surv, random_state=42
)

# -----------------------------
# 5️⃣ ADASYN Oversampling
# -----------------------------
adasyn = ADASYN(random_state=42)
X_train_res, y_train_res = adasyn.fit_resample(X_train, y_train)
print("After ADASYN oversampling:\n", pd.Series(y_train_res).value_counts())

# Check for NaNs after ADASYN
print("NaNs in X_train_res after ADASYN:", X_train_res.isnull().sum().sum())

# Check for infinite values after ADASYN and replace them
if np.isinf(X_train_res).sum().sum() > 0:
    print("Infinite values found in X_train_res after ADASYN. Replacing with large number.")
    X_train_res.replace([np.inf, -np.inf], 1e10, inplace=True) # Replace with a large number


# Scale numeric features AFTER oversampling
numeric_cols = X_train_res.select_dtypes(include=[np.number]).columns
scaler = StandardScaler()
X_train_res[numeric_cols] = scaler.fit_transform(X_train_res[numeric_cols])
X_test[X_test.select_dtypes(include=[np.number]).columns] = scaler.transform(X_test[X_test.select_dtypes(include=[np.number]).columns])


# -----------------------------
# 6️⃣ Define Models with Reduced Complexity
# -----------------------------
models = {
    "XGBoost": XGBClassifier(eval_metric='mlogloss', use_label_encoder=False, learning_rate=0.05, n_estimators=200, max_depth=4, subsample=0.8),
    "LightGBM": LGBMClassifier(learning_rate=0.05, n_estimators=200, num_leaves=15, subsample=0.8),
    "CatBoost": CatBoostClassifier(verbose=0, depth=4, learning_rate=0.05, iterations=200),
    "BalancedRandomForest": RandomForestClassifier(n_estimators=200, max_depth=6, class_weight='balanced', random_state=42),
    "LogisticRegression": LogisticRegression(max_iter=1000, class_weight='balanced')
    "NaiveBayes": GaussianNB(),
    "AdaBoost": AdaBoostClassifier(n_estimators=200, random_state=42)
}

# -----------------------------
# 7️⃣ Train, Predict and Evaluate
# -----------------------------
results = []

for name, model in models.items():
    model.fit(X_train_res, y_train_res)
    preds = model.predict(X_test)

    # Multi-class ROC-AUC
    try:
        preds_proba = model.predict_proba(X_test)
        y_test_bin = label_binarize(y_test, classes=[0,1,2])
        auc_score = roc_auc_score(y_test_bin, preds_proba, multi_class='ovr')

        # Plot ROC curves
        plt.figure(figsize=(6,5))
        for i in range(y_test_bin.shape[1]):
            fpr, tpr, _ = roc_curve(y_test_bin[:,i], preds_proba[:,i])
            plt.plot(fpr, tpr, lw=2, label=f'Class {i} (AUC={auc_score:.2f})')
        plt.plot([0,1],[0,1],'k--')
        plt.title(f"ROC Curve - {name}")
        plt.xlabel("False Positive Rate")
        plt.ylabel("True Positive Rate")
        plt.legend(loc="lower right")
        plt.show()
    except:
        auc_score = np.nan

    acc = accuracy_score(y_test, preds)
    bal_acc = balanced_accuracy_score(y_test, preds)
    f1 = f1_score(y_test, preds, average='macro')
    # cm = confusion_matrix(y_test, preds) # confusion matrix is not used in the results_df

    results.append([name, acc, bal_acc, f1, auc_score])

results_df = pd.DataFrame(results, columns=["Model","Accuracy","Balanced Accuracy","F1 (Macro)","AUC (OvR)"])
print("\n===== Survival Prediction Metrics with ADASYN =====")
print(results_df.sort_values(by="F1 (Macro)", ascending=False))