In [4]:
import os
from pathlib import Path

import numpy as np
import pandas as pd

from sklearn.metrics import (
    precision_recall_fscore_support,
    accuracy_score,
    balanced_accuracy_score,
    f1_score,
    confusion_matrix,
    roc_auc_score
)
from sklearn.utils import shuffle
import joblib

import xgboost as xgb
from xgboost import XGBClassifier

# ============================================================
# 1) Paths and basic settings (edit these to match your system)
# ============================================================

# Path to the fused dataset (CSV)
DATA_PATH = Path(r"C:\Users\LENOVO\Downloads\Senior_Proj\Final\Data\FUSED_ALL_FINAL_FROM_DATA_ALL.csv")

# Directory where all XGBoost models and CSV summaries will be saved
MODELS_DIR = Path(r"C:\Users\LENOVO\Downloads\Senior_Proj\Final\output\XGBoost_Model")

# Column names for subject ID and label
SUBJECT_COL = "subject"
LABEL_COL   = "label"

# Global random seed for reproducibility
RANDOM_STATE = 42

# Number of outer and inner folds for nested N-LNSO
N_OUTER_FOLDS = 10
N_INNER_FOLDS = 10

os.makedirs(MODELS_DIR, exist_ok=True)
np.random.seed(RANDOM_STATE)


# ============================================================
# 2) Helper functions
# ============================================================

def make_subject_folds(subject_ids, n_folds=10, random_state=42):
    """
    Create subject-wise folds:
    - Take unique subject IDs
    - Shuffle them
    - Split them into n_folds groups
    Each fold contains a set of subjects (not windows).
    """
    unique_subjects = np.array(sorted(np.unique(subject_ids)))
    rng = np.random.RandomState(random_state)
    rng.shuffle(unique_subjects)
    folds = np.array_split(unique_subjects, n_folds)
    return folds


def undersample_multiclass(X, y, max_ratio=3.0, random_state=42):
    """
    Random undersampling for imbalanced multi-class data.
    For each class c:
        keep at most (max_ratio * min_class_count) samples.
    This avoids extreme imbalance across classes.
    """
    rng = np.random.RandomState(random_state)
    X = np.asarray(X)
    y = np.asarray(y)

    classes, counts = np.unique(y, return_counts=True)
    min_count = counts.min()
    max_per_class = {c: int(min_count * max_ratio) for c in classes}

    indices_to_keep = []

    for c in classes:
        class_indices = np.where(y == c)[0]
        n_keep = min(len(class_indices), max_per_class[c])
        chosen = rng.choice(class_indices, size=n_keep, replace=False)
        indices_to_keep.append(chosen)

    indices_to_keep = np.concatenate(indices_to_keep)
    indices_to_keep = shuffle(indices_to_keep, random_state=random_state)

    return X[indices_to_keep], y[indices_to_keep]


def get_xgb_model(params, n_classes, use_gpu=True):
    """
    Build an XGBClassifier for multi-class classification
    using the provided hyperparameters.

    XGBoost >= 2.0:
      - GPU:  tree_method="hist", device="cuda"
      - CPU:  tree_method="hist"
    """
    base_kwargs = dict(
        n_estimators=params["n_estimators"],
        max_depth=params["max_depth"],
        learning_rate=params["learning_rate"],
        subsample=params["subsample"],
        colsample_bytree=params["colsample_bytree"],
        objective="multi:softprob",
        num_class=n_classes,
        eval_metric="mlogloss",
        random_state=RANDOM_STATE,
        n_jobs=-1,
    )

    if use_gpu:
        try:
            # 🔥 GPU mode (الطريقة الجديدة)
            model = XGBClassifier(
                tree_method="hist",
                device="cuda",
                **base_kwargs,
            )
            print("[XGBoost] Using GPU (hist + device='cuda') ✅")
        except Exception as e:
            print("[XGBoost] GPU not available, falling back to CPU ❌")
            print("Reason:", e)
            model = XGBClassifier(
                tree_method="hist",
                **base_kwargs,
            )
    else:
        # 🧠 CPU فقط
        model = XGBClassifier(
            tree_method="hist",
            **base_kwargs,
        )
        print("[XGBoost] Using CPU only (hist) 🧠")

    return model


def xgb_fit_predict_proba(X_train, y_train, X_val, params, n_classes):
    """
    Train XGBoost on (X_train, y_train) and predict class probabilities on X_val.
    Returns:
        model       - trained XGBClassifier
        y_val_proba - numpy array of shape (n_samples_val, n_classes)
    """
    model = get_xgb_model(params, n_classes, use_gpu=True)
    model.fit(X_train, y_train)
    y_val_proba = model.predict_proba(X_val)
    return model, y_val_proba


def xgb_fit_predict(X_train, y_train, X_test, params, n_classes):
    """
    Train XGBoost on (X_train, y_train) and predict class labels on X_test.
    Returns:
        model       - trained XGBClassifier
        y_test_pred - numpy array of predicted class labels
    """
    model = get_xgb_model(params, n_classes, use_gpu=True)
    model.fit(X_train, y_train)
    y_test_pred = model.predict(X_test)
    return model, y_test_pred


def evaluate_metrics(y_true, y_pred, label_set=None):
    """
    Compute per-class and global evaluation metrics:
    - per-class precision, recall, f1, support
    - accuracy, balanced accuracy, micro-F1, macro-F1, weighted-F1
    - confusion matrix
    """
    if label_set is None:
        label_set = np.unique(y_true)

    prec, rec, f1, support = precision_recall_fscore_support(
        y_true, y_pred, labels=label_set, zero_division=0
    )

    acc = accuracy_score(y_true, y_pred)
    bal_acc = balanced_accuracy_score(y_true, y_pred)
    micro_f1 = f1_score(y_true, y_pred, average="micro")
    macro_f1 = f1_score(y_true, y_pred, average="macro")
    weighted_f1 = f1_score(y_true, y_pred, average="weighted")

    cm = confusion_matrix(y_true, y_pred, labels=label_set)

    metrics = {
        "labels": label_set,
        "precision_per_class": prec,
        "recall_per_class": rec,
        "f1_per_class": f1,
        "support_per_class": support,
        "accuracy": acc,
        "balanced_accuracy": bal_acc,
        "micro_f1": micro_f1,
        "macro_f1": macro_f1,
        "weighted_f1": weighted_f1,
        "confusion_matrix": cm,
    }

    return metrics


def print_metrics(metrics, header=""):
    """
    Pretty-print per-class and global metrics,
    and show TP/FP/FN/TN per label from the confusion matrix.
    """
    labels = metrics["labels"]
    prec = metrics["precision_per_class"]
    rec = metrics["recall_per_class"]
    f1 = metrics["f1_per_class"]
    support = metrics["support_per_class"]
    cm = metrics["confusion_matrix"]

    if header:
        print("\n" + "=" * 70)
        print(header)
        print("=" * 70)

    print("\nPer-class metrics:")
    print("label\tprecision\trecall\t\tf1-score\tsupport")
    for i, c in enumerate(labels):
        print(f"{c}\t{prec[i]:.4f}\t\t{rec[i]:.4f}\t\t{f1[i]:.4f}\t\t{support[i]}")

    print("\nGlobal metrics:")
    print(f"Accuracy          : {metrics['accuracy']:.4f}")
    print(f"Balanced Accuracy : {metrics['balanced_accuracy']:.4f}")
    print(f"Micro-F1          : {metrics['micro_f1']:.4f}")
    print(f"Macro-F1          : {metrics['macro_f1']:.4f}")
    print(f"Weighted-F1       : {metrics['weighted_f1']:.4f}")

    print("\nConfusion matrix (rows = true, cols = predicted):")
    print(cm)

    total = cm.sum()
    print("\nPer-class confusion details (TP, FP, FN, TN):")
    for idx, c in enumerate(labels):
        TP = cm[idx, idx]
        FP = cm[:, idx].sum() - TP
        FN = cm[idx, :].sum() - TP
        TN = total - (TP + FP + FN)
        print(f"Label {c}: TP={TP}, FP={FP}, FN={FN}, TN={TN}")


def compute_inner_roc_auc_for_params(
    X, y, subjects, label_set, param_grid, n_inner_folds=10, max_ratio=3.0
):
    """
    Inner loop for nested N-LNSO:
    - Split train+val subjects into n_inner_folds (subject-wise).
    - For each hyperparameter combination:
        - Train on inner-train subjects (with undersampling)
        - Validate on inner-val subjects
        - Compute macro ROC AUC (multi-class 'ovr')
    - Return:
        best_params, best_auc
    """
    inner_folds = make_subject_folds(
        subjects, n_folds=n_inner_folds, random_state=RANDOM_STATE
    )

    combo_scores = []
    n_classes = len(label_set)

    print("\n------------------------------------------------------")
    print("Inner loop ROC AUC hyperparameter tuning (XGBoost)")
    print("------------------------------------------------------")

    for combo_idx, params in enumerate(param_grid):
        fold_aucs = []

        for inner_idx, inner_val_subjects in enumerate(inner_folds):
            # Subject-wise split for inner validation
            is_inner_val = np.isin(subjects, inner_val_subjects)
            is_inner_train = ~is_inner_val

            X_inner_train = X[is_inner_train]
            y_inner_train = y[is_inner_train]
            X_inner_val   = X[is_inner_val]
            y_inner_val   = y[is_inner_val]

            # Undersampling on inner training set (only)
            X_inner_train_bal, y_inner_train_bal = undersample_multiclass(
                X_inner_train,
                y_inner_train,
                max_ratio=max_ratio,
                random_state=RANDOM_STATE + inner_idx
            )

            # Train XGBoost and get probabilities on validation set
            _, y_inner_val_proba = xgb_fit_predict_proba(
                X_inner_train_bal, y_inner_train_bal,
                X_inner_val,
                params,
                n_classes=n_classes
            )

            # Compute macro ROC AUC if possible for this fold
            try:
                auc = roc_auc_score(
                    y_inner_val,
                    y_inner_val_proba,
                    labels=label_set,
                    multi_class="ovr",
                    average="macro"
                )
                fold_aucs.append(auc)
            except ValueError:
                # If the validation fold has only one class, ROC AUC is undefined
                # => skip this fold for AUC calculation
                continue

        # Mean ROC AUC across inner folds for this hyperparameter combination
        if len(fold_aucs) == 0:
            mean_auc = np.nan
        else:
            mean_auc = float(np.mean(fold_aucs))

        combo_scores.append((params, mean_auc))
        print(f"Combo {combo_idx + 1}/{len(param_grid)}: "
              f"{params} --> mean macro ROC AUC = {mean_auc:.4f}")

    # Filter out combinations where all inner-fold AUCs were NaN
    combo_scores = [c for c in combo_scores if not np.isnan(c[1])]
    if len(combo_scores) == 0:
        raise RuntimeError("All inner ROC AUC scores are NaN. Check your data splits or labels.")

    # Select the hyperparameters with the highest mean ROC AUC
    best_params, best_auc = max(combo_scores, key=lambda t: t[1])

    print("\nBest inner hyperparameters based on ROC AUC:")
    print(best_params)
    print(f"Best inner mean macro ROC AUC: {best_auc:.4f}")

    return best_params, best_auc


# ============================================================
# 3) Load data and define feature/meta columns
# ============================================================

print("Loading fused dataset...")
df = pd.read_csv(DATA_PATH)

if SUBJECT_COL not in df.columns or LABEL_COL not in df.columns:
    raise ValueError("Check SUBJECT_COL and LABEL_COL names. They are not found in the dataframe.")

print(f"Data shape: {df.shape}")
print("\nLabel distribution:")
print(df[LABEL_COL].value_counts())
print("\nLabel proportions (%):")
print(df[LABEL_COL].value_counts(normalize=True) * 100)

# Meta columns (not used as features)
meta_cols = [
    "subject", "run", "window_idx", "label", "run_base",
    "ecg_start_time_sec", "t_start", "t_end",
    "win_idx", "Unnamed: 0", "timestamp_center"
]
# Keep only those that actually exist in the dataframe
meta_cols = [c for c in meta_cols if c in df.columns]

print("\nMeta columns:")
print(meta_cols)

# Select only numeric columns as candidate features
numeric_cols = df.select_dtypes(include=["number"]).columns.tolist()

# Final feature columns = numeric columns minus meta columns
feature_cols = [c for c in numeric_cols if c not in meta_cols]

print(f"\nNumber of feature columns: {len(feature_cols)}")
print("Example feature columns:", feature_cols[:10])

# Build X, y, and subject arrays
X_all = df[feature_cols].values
y_all = df[LABEL_COL].values
subjects_all = df[SUBJECT_COL].values

label_set = np.unique(y_all)
print(f"Unique labels: {label_set}")

n_classes = len(label_set)


# ============================================================
# 4) Nested 10x10 N-LNSO with ROC AUC inner tuning (XGBoost)
# ============================================================

# Hyperparameter grid for inner ROC AUC tuning.
param_grid = []

n_estimators_list      = [300, 600, 1000]
max_depth_list         = [4, 6, 8]
learning_rate_list     = [0.05, 0.1]
subsample_list         = [0.8]
colsample_bytree_list  = [0.8]

for ne in n_estimators_list:
    for md in max_depth_list:
        for lr in learning_rate_list:
            for ss in subsample_list:
                for cs in colsample_bytree_list:
                    param_grid.append({
                        "n_estimators": ne,
                        "max_depth": md,
                        "learning_rate": lr,
                        "subsample": ss,
                        "colsample_bytree": cs,
                    })

print(f"\nTotal hyperparameter combinations in inner loop: {len(param_grid)}")

# Outer folds (N-LNSO outer loop at subject level)
outer_folds = make_subject_folds(
    subjects_all,
    n_folds=N_OUTER_FOLDS,
    random_state=RANDOM_STATE
)

all_test_y_true = []
all_test_y_pred = []

# Store best hyperparameters and macro-F1 from each outer fold
outer_best_params_list = []
outer_macro_f1_list = []
outer_best_auc_list = []

for outer_idx, outer_test_subjects in enumerate(outer_folds):
    print("\n" + "#" * 80)
    print(f"OUTER Fold {outer_idx + 1}/{N_OUTER_FOLDS}")
    print("#" * 80)

    # Subject-wise split into Test vs Train+Val for this outer fold
    is_test = np.isin(subjects_all, outer_test_subjects)
    is_trainval = ~is_test

    X_trainval = X_all[is_trainval]
    y_trainval = y_all[is_trainval]
    subjects_trainval = subjects_all[is_trainval]

    X_test = X_all[is_test]
    y_test = y_all[is_test]
    subjects_test = subjects_all[is_test]

    print(f"Train+Val subjects: {len(np.unique(subjects_trainval))}, "
          f"Test subjects: {len(np.unique(subjects_test))}")

    # ============================
    # Inner loop: ROC AUC tuning
    # ============================
    best_inner_params, best_inner_auc = compute_inner_roc_auc_for_params(
        X_trainval, y_trainval, subjects_trainval,
        label_set=label_set,
        param_grid=param_grid,
        n_inner_folds=N_INNER_FOLDS,
        max_ratio=3.0
    )

    outer_best_params_list.append(best_inner_params)
    outer_best_auc_list.append(best_inner_auc)

    # ============================
    # Train final model on all Train+Val with undersampling
    # ============================
    X_trainval_bal, y_trainval_bal = undersample_multiclass(
        X_trainval, y_trainval,
        max_ratio=3.0,
        random_state=RANDOM_STATE + outer_idx
    )

    final_model, y_test_pred = xgb_fit_predict(
        X_trainval_bal, y_trainval_bal,
        X_test,
        best_inner_params,
        n_classes=n_classes
    )

    # Save outer fold model for external testing
    model_path = MODELS_DIR / f"xgb_nested_outer_{outer_idx + 1:02d}.pkl"
    joblib.dump(final_model, model_path)
    print(f"Saved outer fold model to: {model_path}")

    # ============================
    # Evaluate on outer Test subjects
    # ============================
    fold_metrics = evaluate_metrics(y_test, y_test_pred, label_set)
    print_metrics(fold_metrics, header=f"OUTER Fold {outer_idx + 1} Test Metrics")

    all_test_y_true.append(y_test)
    all_test_y_pred.append(y_test_pred)
    outer_macro_f1_list.append(fold_metrics["macro_f1"])


# ============================================================
# 5) Global metrics across all outer folds (model as a whole)
# ============================================================

all_test_y_true = np.concatenate(all_test_y_true)
all_test_y_pred = np.concatenate(all_test_y_pred)

global_metrics = evaluate_metrics(all_test_y_true, all_test_y_pred, label_set)
print_metrics(global_metrics, header="GLOBAL Test Metrics Across All OUTER Folds (XGBoost)")

print("\nNested 10×10 N-LNSO evaluation with XGBoost completed.")
print("All outer-fold models are saved and ready for external testing.")


# ============================================================
# 5b) Summary of best inner-loop ROC AUC per outer fold + CSV
# ============================================================

print("\n" + "=" * 70)
print("Summary of inner-loop ROC AUC used to select best hyperparameters (XGBoost)")
print("=" * 70)

auc_rows = []
for i, (p, auc_val) in enumerate(zip(outer_best_params_list, outer_best_auc_list), start=1):
    print(f"OUTER Fold {i:02d}: best inner mean macro ROC AUC = {auc_val:.4f}")
    print(f"  Selected params: {p}")
    row = {
        "outer_fold": i,
        "best_inner_mean_macro_roc_auc": auc_val,
        "n_estimators": p["n_estimators"],
        "max_depth": p["max_depth"],
        "learning_rate": p["learning_rate"],
        "subsample": p["subsample"],
        "colsample_bytree": p["colsample_bytree"],
    }
    auc_rows.append(row)

inner_auc_df = pd.DataFrame(auc_rows)
inner_auc_csv_path = MODELS_DIR / "xgb_inner_auc_summary.csv"
inner_auc_df.to_csv(inner_auc_csv_path, index=False)

print("=" * 70)
print(f"Inner-loop ROC AUC summary saved to: {inner_auc_csv_path}")
print("=" * 70)


# ============================================================
# 6) Train a single FINAL deployment model on all subjects
#    using the best hyperparameters discovered in nested CV
# ============================================================

from collections import defaultdict

combo_counts = defaultdict(int)
combo_f1_sum = defaultdict(float)

# Aggregate hyperparameter performance over outer folds
for params, macro_f1 in zip(outer_best_params_list, outer_macro_f1_list):
    # Convert dict to a hashable key (tuple of sorted items)
    key = tuple(sorted(params.items()))
    combo_counts[key] += 1
    combo_f1_sum[key] += float(macro_f1)

# Choose the hyperparameter combo:
# 1) with the highest frequency across outer folds
# 2) break ties by highest sum of macro-F1
best_key = None
best_count = -1
best_f1_sum = -np.inf

for key in combo_counts:
    count = combo_counts[key]
    f1_sum = combo_f1_sum[key]
    if (count > best_count) or (count == best_count and f1_sum > best_f1_sum):
        best_count = count
        best_f1_sum = f1_sum
        best_key = key

final_params = dict(best_key)

print("\n" + "=" * 70)
print("Selected FINAL deployment hyperparameters (based on outer folds, XGBoost):")
print(final_params)
print(f"Selected combo frequency across outer folds: {best_count}")
print("=" * 70)

# Undersampling on the full dataset (all subjects)
X_full_bal, y_full_bal = undersample_multiclass(
    X_all, y_all,
    max_ratio=3.0,
    random_state=RANDOM_STATE + 999
)

# Train final deployment XGBoost model on all balanced data
final_deployment_model, _ = xgb_fit_predict(
    X_full_bal, y_full_bal,
    X_full_bal,   # dummy predictions, not used
    final_params,
    n_classes=n_classes
)

deployment_model_path = MODELS_DIR / "xgb_final_deployment.pkl"
joblib.dump(final_deployment_model, deployment_model_path)

print("\nFinal deployment XGBoost model trained on all subjects and saved to:")
print(deployment_model_path)
print("Use this model for real-time or external testing on new patients.")


Loading fused dataset...
Data shape: (1622677, 86)

Label distribution:
label
0    1312916
1     289844
2      19917
Name: count, dtype: int64

Label proportions (%):
label
0    80.910495
1    17.862088
2     1.227416
Name: proportion, dtype: float64

Meta columns:
['subject', 'run', 'window_idx', 'label', 'run_base', 'ecg_start_time_sec', 't_start', 't_end', 'win_idx', 'Unnamed: 0', 'timestamp_center']

Number of feature columns: 75
Example feature columns: ['ch1_Delta Bandpower', 'ch1_Theta Bandpower', 'ch1_Alpha Bandpower', 'ch1_Beta Bandpower', 'ch1_Gamma Bandpower', 'ch1_Relative Delta Bandpower', 'ch1_Relative Theta Bandpower', 'ch1_Relative Alpha Bandpower', 'ch1_Relative Beta Bandpower', 'ch1_Relative Gamma Bandpower']
Unique labels: [0 1 2]

Total hyperparameter combinations in inner loop: 18

################################################################################
OUTER Fold 1/10
################################################################################
Train+Va

In [None]:
import os
from pathlib import Path

import numpy as np
import pandas as pd

from sklearn.metrics import (
    precision_recall_fscore_support,
    accuracy_score,
    balanced_accuracy_score,
    f1_score,
    confusion_matrix,
    roc_auc_score
)
from sklearn.utils import shuffle
import joblib

import xgboost as xgb
from xgboost import XGBClassifier

# ============================================================
# 1) Paths and basic settings (edit these to match your system)
# ============================================================

# Path to the fused dataset (CSV)
DATA_PATH = Path(r"C:\Users\LENOVO\Downloads\Senior_Proj\Final\Data\FUSED_ALL_FINAL_FROM_DATA_ALL.csv")

# Directory where all XGBoost models and CSV summaries will be saved
MODELS_DIR = Path(r"C:\Users\LENOVO\Downloads\Senior_Proj\Final\output\XGBoost_Model")

# Column names for subject ID and label
SUBJECT_COL = "subject"
LABEL_COL   = "label"

# Global random seed for reproducibility
RANDOM_STATE = 42

# Number of outer and inner folds for nested N-LNSO
N_OUTER_FOLDS = 10
N_INNER_FOLDS = 10

os.makedirs(MODELS_DIR, exist_ok=True)
np.random.seed(RANDOM_STATE)


# ============================================================
# 2) Helper functions
# ============================================================

def make_subject_folds(subject_ids, n_folds=10, random_state=42):
    """
    Create subject-wise folds:
    - Take unique subject IDs
    - Shuffle them
    - Split them into n_folds groups
    Each fold contains a set of subjects (not windows).
    """
    unique_subjects = np.array(sorted(np.unique(subject_ids)))
    rng = np.random.RandomState(random_state)
    rng.shuffle(unique_subjects)
    folds = np.array_split(unique_subjects, n_folds)
    return folds


def undersample_multiclass(X, y, max_ratio=3.0, random_state=42):
    """
    Random undersampling for imbalanced multi-class data.
    For each class c:
        keep at most (max_ratio * min_class_count) samples.
    This avoids extreme imbalance across classes.
    """
    rng = np.random.RandomState(random_state)
    X = np.asarray(X)
    y = np.asarray(y)

    classes, counts = np.unique(y, return_counts=True)
    min_count = counts.min()
    max_per_class = {c: int(min_count * max_ratio) for c in classes}

    indices_to_keep = []

    for c in classes:
        class_indices = np.where(y == c)[0]
        n_keep = min(len(class_indices), max_per_class[c])
        chosen = rng.choice(class_indices, size=n_keep, replace=False)
        indices_to_keep.append(chosen)

    indices_to_keep = np.concatenate(indices_to_keep)
    indices_to_keep = shuffle(indices_to_keep, random_state=random_state)

    return X[indices_to_keep], y[indices_to_keep]


def get_xgb_model(params, n_classes, use_gpu=True):
    """
    Build an XGBClassifier for multi-class classification
    using the provided hyperparameters.

    XGBoost >= 2.0:
      - GPU:  tree_method="hist", device="cuda"
      - CPU:  tree_method="hist"
    """
    base_kwargs = dict(
        n_estimators=params["n_estimators"],
        max_depth=params["max_depth"],
        learning_rate=params["learning_rate"],
        subsample=params["subsample"],
        colsample_bytree=params["colsample_bytree"],
        objective="multi:softprob",
        num_class=n_classes,
        eval_metric="mlogloss",
        random_state=RANDOM_STATE,
        n_jobs=-1,
    )

    if use_gpu:
        try:
            # 🔥 GPU mode (الطريقة الجديدة)
            model = XGBClassifier(
                tree_method="hist",
                device="cuda",
                **base_kwargs,
            )
            print("[XGBoost] Using GPU (hist + device='cuda') ✅")
        except Exception as e:
            print("[XGBoost] GPU not available, falling back to CPU ❌")
            print("Reason:", e)
            model = XGBClassifier(
                tree_method="hist",
                **base_kwargs,
            )
    else:
        # 🧠 CPU فقط
        model = XGBClassifier(
            tree_method="hist",
            **base_kwargs,
        )
        print("[XGBoost] Using CPU only (hist) 🧠")

    return model


def xgb_fit_predict_proba(X_train, y_train, X_val, params, n_classes):
    """
    Train XGBoost on (X_train, y_train) and predict class probabilities on X_val.
    Returns:
        model       - trained XGBClassifier
        y_val_proba - numpy array of shape (n_samples_val, n_classes)
    """
    model = get_xgb_model(params, n_classes, use_gpu=True)
    model.fit(X_train, y_train)
    y_val_proba = model.predict_proba(X_val)
    return model, y_val_proba


def xgb_fit_predict(X_train, y_train, X_test, params, n_classes):
    """
    Train XGBoost on (X_train, y_train) and predict class labels on X_test.
    Returns:
        model       - trained XGBClassifier
        y_test_pred - numpy array of predicted class labels
    """
    model = get_xgb_model(params, n_classes, use_gpu=True)
    model.fit(X_train, y_train)
    y_test_pred = model.predict(X_test)
    return model, y_test_pred


def evaluate_metrics(y_true, y_pred, label_set=None):
    """
    Compute per-class and global evaluation metrics:
    - per-class precision, recall, f1, support
    - accuracy, balanced accuracy, micro-F1, macro-F1, weighted-F1
    - confusion matrix
    """
    if label_set is None:
        label_set = np.unique(y_true)

    prec, rec, f1, support = precision_recall_fscore_support(
        y_true, y_pred, labels=label_set, zero_division=0
    )

    acc = accuracy_score(y_true, y_pred)
    bal_acc = balanced_accuracy_score(y_true, y_pred)
    micro_f1 = f1_score(y_true, y_pred, average="micro")
    macro_f1 = f1_score(y_true, y_pred, average="macro")
    weighted_f1 = f1_score(y_true, y_pred, average="weighted")

    cm = confusion_matrix(y_true, y_pred, labels=label_set)

    metrics = {
        "labels": label_set,
        "precision_per_class": prec,
        "recall_per_class": rec,
        "f1_per_class": f1,
        "support_per_class": support,
        "accuracy": acc,
        "balanced_accuracy": bal_acc,
        "micro_f1": micro_f1,
        "macro_f1": macro_f1,
        "weighted_f1": weighted_f1,
        "confusion_matrix": cm,
    }

    return metrics


def print_metrics(metrics, header=""):
    """
    Pretty-print per-class and global metrics,
    and show TP/FP/FN/TN per label from the confusion matrix.
    """
    labels = metrics["labels"]
    prec = metrics["precision_per_class"]
    rec = metrics["recall_per_class"]
    f1 = metrics["f1_per_class"]
    support = metrics["support_per_class"]
    cm = metrics["confusion_matrix"]

    if header:
        print("\n" + "=" * 70)
        print(header)
        print("=" * 70)

    print("\nPer-class metrics:")
    print("label\tprecision\trecall\t\tf1-score\tsupport")
    for i, c in enumerate(labels):
        print(f"{c}\t{prec[i]:.4f}\t\t{rec[i]:.4f}\t\t{f1[i]:.4f}\t\t{support[i]}")

    print("\nGlobal metrics:")
    print(f"Accuracy          : {metrics['accuracy']:.4f}")
    print(f"Balanced Accuracy : {metrics['balanced_accuracy']:.4f}")
    print(f"Micro-F1          : {metrics['micro_f1']:.4f}")
    print(f"Macro-F1          : {metrics['macro_f1']:.4f}")
    print(f"Weighted-F1       : {metrics['weighted_f1']:.4f}")

    print("\nConfusion matrix (rows = true, cols = predicted):")
    print(cm)

    total = cm.sum()
    print("\nPer-class confusion details (TP, FP, FN, TN):")
    for idx, c in enumerate(labels):
        TP = cm[idx, idx]
        FP = cm[:, idx].sum() - TP
        FN = cm[idx, :].sum() - TP
        TN = total - (TP + FP + FN)
        print(f"Label {c}: TP={TP}, FP={FP}, FN={FN}, TN={TN}")


def compute_inner_roc_auc_for_params(
    X, y, subjects, label_set, param_grid, n_inner_folds=10, max_ratio=3.0
):
    """
    Inner loop for nested N-LNSO:
    - Split train+val subjects into n_inner_folds (subject-wise).
    - For each hyperparameter combination:
        - Train on inner-train subjects (with undersampling)
        - Validate on inner-val subjects
        - Compute macro ROC AUC (multi-class 'ovr')
    - Return:
        best_params, best_auc
    """
    inner_folds = make_subject_folds(
        subjects, n_folds=n_inner_folds, random_state=RANDOM_STATE
    )

    combo_scores = []
    n_classes = len(label_set)

    print("\n------------------------------------------------------")
    print("Inner loop ROC AUC hyperparameter tuning (XGBoost)")
    print("------------------------------------------------------")

    for combo_idx, params in enumerate(param_grid):
        fold_aucs = []

        for inner_idx, inner_val_subjects in enumerate(inner_folds):
            # Subject-wise split for inner validation
            is_inner_val = np.isin(subjects, inner_val_subjects)
            is_inner_train = ~is_inner_val

            X_inner_train = X[is_inner_train]
            y_inner_train = y[is_inner_train]
            X_inner_val   = X[is_inner_val]
            y_inner_val   = y[is_inner_val]

            # Undersampling on inner training set (only)
            X_inner_train_bal, y_inner_train_bal = undersample_multiclass(
                X_inner_train,
                y_inner_train,
                max_ratio=max_ratio,
                random_state=RANDOM_STATE + inner_idx
            )

            # Train XGBoost and get probabilities on validation set
            _, y_inner_val_proba = xgb_fit_predict_proba(
                X_inner_train_bal, y_inner_train_bal,
                X_inner_val,
                params,
                n_classes=n_classes
            )

            # Compute macro ROC AUC if possible for this fold
            try:
                auc = roc_auc_score(
                    y_inner_val,
                    y_inner_val_proba,
                    labels=label_set,
                    multi_class="ovr",
                    average="macro"
                )
                fold_aucs.append(auc)
            except ValueError:
                # If the validation fold has only one class, ROC AUC is undefined
                # => skip this fold for AUC calculation
                continue

        # Mean ROC AUC across inner folds for this hyperparameter combination
        if len(fold_aucs) == 0:
            mean_auc = np.nan
        else:
            mean_auc = float(np.mean(fold_aucs))

        combo_scores.append((params, mean_auc))
        print(f"Combo {combo_idx + 1}/{len(param_grid)}: "
              f"{params} --> mean macro ROC AUC = {mean_auc:.4f}")

    # Filter out combinations where all inner-fold AUCs were NaN
    combo_scores = [c for c in combo_scores if not np.isnan(c[1])]
    if len(combo_scores) == 0:
        raise RuntimeError("All inner ROC AUC scores are NaN. Check your data splits or labels.")

    # Select the hyperparameters with the highest mean ROC AUC
    best_params, best_auc = max(combo_scores, key=lambda t: t[1])

    print("\nBest inner hyperparameters based on ROC AUC:")
    print(best_params)
    print(f"Best inner mean macro ROC AUC: {best_auc:.4f}")

    return best_params, best_auc


# ============================================================
# 3) Load data and define feature/meta columns
# ============================================================

print("Loading fused dataset...")
df = pd.read_csv(DATA_PATH)

if SUBJECT_COL not in df.columns or LABEL_COL not in df.columns:
    raise ValueError("Check SUBJECT_COL and LABEL_COL names. They are not found in the dataframe.")

print(f"Data shape: {df.shape}")
print("\nLabel distribution:")
print(df[LABEL_COL].value_counts())
print("\nLabel proportions (%):")
print(df[LABEL_COL].value_counts(normalize=True) * 100)

# Meta columns (not used as features)
meta_cols = [
    "subject", "run", "window_idx", "label", "run_base",
    "ecg_start_time_sec", "t_start", "t_end",
    "win_idx", "Unnamed: 0", "timestamp_center"
]
# Keep only those that actually exist in the dataframe
meta_cols = [c for c in meta_cols if c in df.columns]

print("\nMeta columns:")
print(meta_cols)

# ========= هنا التعديل حق ch1 / ch2 =========

# Select only numeric columns as candidate features
numeric_cols = df.select_dtypes(include=["number"]).columns.tolist()

# 1) استبعاد أعمدة الميتا من الأعمدة الرقمية
all_feature_cols = [c for c in numeric_cols if c not in meta_cols]

# 2) اختيار فقط الفيتشرز اللي تبدأ بـ ch1 أو ch2
feature_cols = [
    c for c in all_feature_cols
    if c.startswith("ch1") or c.startswith("ch2")
]

print(f"\nNumber of feature columns (ch1*/ch2* only): {len(feature_cols)}")
print("Example feature columns:", feature_cols[:10])

# Build X, y, and subject arrays
X_all = df[feature_cols].values
y_all = df[LABEL_COL].values
subjects_all = df[SUBJECT_COL].values

label_set = np.unique(y_all)
print(f"Unique labels: {label_set}")

n_classes = len(label_set)


# ============================================================
# 4) Nested 10x10 N-LNSO with ROC AUC inner tuning (XGBoost)
# ============================================================

# Hyperparameter grid for inner ROC AUC tuning.
param_grid = []

n_estimators_list      = [300, 600, 1000]
max_depth_list         = [4, 6, 8]
learning_rate_list     = [0.05, 0.1]
subsample_list         = [0.8]
colsample_bytree_list  = [0.8]

for ne in n_estimators_list:
    for md in max_depth_list:
        for lr in learning_rate_list:
            for ss in subsample_list:
                for cs in colsample_bytree_list:
                    param_grid.append({
                        "n_estimators": ne,
                        "max_depth": md,
                        "learning_rate": lr,
                        "subsample": ss,
                        "colsample_bytree": cs,
                    })

print(f"\nTotal hyperparameter combinations in inner loop: {len(param_grid)}")

# Outer folds (N-LNSO outer loop at subject level)
outer_folds = make_subject_folds(
    subjects_all,
    n_folds=N_OUTER_FOLDS,
    random_state=RANDOM_STATE
)

all_test_y_true = []
all_test_y_pred = []

# Store best hyperparameters and macro-F1 from each outer fold
outer_best_params_list = []
outer_macro_f1_list = []
outer_best_auc_list = []

for outer_idx, outer_test_subjects in enumerate(outer_folds):
    print("\n" + "#" * 80)
    print(f"OUTER Fold {outer_idx + 1}/{N_OUTER_FOLDS}")
    print("#" * 80)

    # Subject-wise split into Test vs Train+Val for this outer fold
    is_test = np.isin(subjects_all, outer_test_subjects)
    is_trainval = ~is_test

    X_trainval = X_all[is_trainval]
    y_trainval = y_all[is_trainval]
    subjects_trainval = subjects_all[is_trainval]

    X_test = X_all[is_test]
    y_test = y_all[is_test]
    subjects_test = subjects_all[is_test]

    print(f"Train+Val subjects: {len(np.unique(subjects_trainval))}, "
          f"Test subjects: {len(np.unique(subjects_test))}")

    # ============================
    # Inner loop: ROC AUC tuning
    # ============================
    best_inner_params, best_inner_auc = compute_inner_roc_auc_for_params(
        X_trainval, y_trainval, subjects_trainval,
        label_set=label_set,
        param_grid=param_grid,
        n_inner_folds=N_INNER_FOLDS,
        max_ratio=3.0
    )

    outer_best_params_list.append(best_inner_params)
    outer_best_auc_list.append(best_inner_auc)

    # ============================
    # Train final model on all Train+Val with undersampling
    # ============================
    X_trainval_bal, y_trainval_bal = undersample_multiclass(
        X_trainval, y_trainval,
        max_ratio=3.0,
        random_state=RANDOM_STATE + outer_idx
    )

    final_model, y_test_pred = xgb_fit_predict(
        X_trainval_bal, y_trainval_bal,
        X_test,
        best_inner_params,
        n_classes=n_classes
    )

    # Save outer fold model for external testing
    model_path = MODELS_DIR / f"xgb_nested_outer_{outer_idx + 1:02d}.pkl"
    joblib.dump(final_model, model_path)
    print(f"Saved outer fold model to: {model_path}")

    # ============================
    # Evaluate on outer Test subjects
    # ============================
    fold_metrics = evaluate_metrics(y_test, y_test_pred, label_set)
    print_metrics(fold_metrics, header=f"OUTER Fold {outer_idx + 1} Test Metrics")

    all_test_y_true.append(y_test)
    all_test_y_pred.append(y_test_pred)
    outer_macro_f1_list.append(fold_metrics["macro_f1"])


# ============================================================
# 5) Global metrics across all outer folds (model as a whole)
# ============================================================

all_test_y_true = np.concatenate(all_test_y_true)
all_test_y_pred = np.concatenate(all_test_y_pred)

global_metrics = evaluate_metrics(all_test_y_true, all_test_y_pred, label_set)
print_metrics(global_metrics, header="GLOBAL Test Metrics Across All OUTER Folds (XGBoost)")

print("\nNested 10×10 N-LNSO evaluation with XGBoost completed.")
print("All outer-fold models are saved and ready for external testing.")


# ============================================================
# 5b) Summary of best inner-loop ROC AUC per outer fold + CSV
# ============================================================

print("\n" + "=" * 70)
print("Summary of inner-loop ROC AUC used to select best hyperparameters (XGBoost)")
print("=" * 70)

auc_rows = []
for i, (p, auc_val) in enumerate(zip(outer_best_params_list, outer_best_auc_list), start=1):
    print(f"OUTER Fold {i:02d}: best inner mean macro ROC AUC = {auc_val:.4f}")
    print(f"  Selected params: {p}")
    row = {
        "outer_fold": i,
        "best_inner_mean_macro_roc_auc": auc_val,
        "n_estimators": p["n_estimators"],
        "max_depth": p["max_depth"],
        "learning_rate": p["learning_rate"],
        "subsample": p["subsample"],
        "colsample_bytree": p["colsample_bytree"],
    }
    auc_rows.append(row)

inner_auc_df = pd.DataFrame(auc_rows)
inner_auc_csv_path = MODELS_DIR / "xgb_inner_auc_summary.csv"
inner_auc_df.to_csv(inner_auc_csv_path, index=False)

print("=" * 70)
print(f"Inner-loop ROC AUC summary saved to: {inner_auc_csv_path}")
print("=" * 70)


# ============================================================
# 6) Train a single FINAL deployment model on all subjects
#    using the best hyperparameters discovered in nested CV
# ============================================================

from collections import defaultdict

combo_counts = defaultdict(int)
combo_f1_sum = defaultdict(float)

# Aggregate hyperparameter performance over outer folds
for params, macro_f1 in zip(outer_best_params_list, outer_macro_f1_list):
    # Convert dict to a hashable key (tuple of sorted items)
    key = tuple(sorted(params.items()))
    combo_counts[key] += 1
    combo_f1_sum[key] += float(macro_f1)

# Choose the hyperparameter combo:
# 1) with the highest frequency across outer folds
# 2) break ties by highest sum of macro-F1
best_key = None
best_count = -1
best_f1_sum = -np.inf

for key in combo_counts:
    count = combo_counts[key]
    f1_sum = combo_f1_sum[key]
    if (count > best_count) or (count == best_count and f1_sum > best_f1_sum):
        best_count = count
        best_f1_sum = f1_sum
        best_key = key

final_params = dict(best_key)

print("\n" + "=" * 70)
print("Selected FINAL deployment hyperparameters (based on outer folds, XGBoost):")
print(final_params)
print(f"Selected combo frequency across outer folds: {best_count}")
print("=" * 70)

# Undersampling on the full dataset (all subjects)
X_full_bal, y_full_bal = undersample_multiclass(
    X_all, y_all,
    max_ratio=3.0,
    random_state=RANDOM_STATE + 999
)

# Train final deployment XGBoost model on all balanced data
final_deployment_model, _ = xgb_fit_predict(
    X_full_bal, y_full_bal,
    X_full_bal,   # dummy predictions, not used
    final_params,
    n_classes=n_classes
)

deployment_model_path = MODELS_DIR / "xgb_final_deployment.pkl"
joblib.dump(final_deployment_model, deployment_model_path)

print("\nFinal deployment XGBoost model trained on all subjects and saved to:")
print(deployment_model_path)
print("Use this model for real-time or external testing on new patients.")
