In [12]:
import os
import numpy as np
import pandas as pd
import joblib

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, classification_report


# ================================
# CONFIG
# ================================
SPLIT_DIR = r"C:\Users\ririh\OneDrive\المستندات\ecg_project_new\split_Data_5fold_subjects"
MODEL_SAVE_DIR = r"C:\Users\ririh\OneDrive\المستندات\ecg_project_new\saved_models_FULL"
OUTPUT = r"C:\Users\ririh\OneDrive\المستندات\ecg_project_new\FULL_results.csv"

os.makedirs(MODEL_SAVE_DIR, exist_ok=True)

WIN_SEC = 2
WIN_PER_HOUR = 3600 / WIN_SEC

fold_ids = [f"fold{idx:02d}" for idx in range(1, 24)]
results = []


# ================================
# EVALUATION FUNCTION
# ================================
def evaluate(model, scaler, X, y):
    Xs = scaler.transform(X)
    pred = model.predict(Xs)
    rep = classification_report(y, pred, output_dict=True)

    y_bin_true = np.isin(y, [1, 2]).astype(int)
    y_bin_pred = np.isin(pred, [1, 2]).astype(int)

    TP = np.sum((y_bin_true == 1) & (y_bin_pred == 1))
    FN = np.sum((y_bin_true == 1) & (y_bin_pred == 0))
    FP = np.sum((y_bin_true == 0) & (y_bin_pred == 1))

    sensitivity = TP / (TP + FN + 1e-9)
    total_hours = len(y) / WIN_PER_HOUR
    fpr_h = FP / (total_hours + 1e-9)

    return {
        "acc": accuracy_score(y, pred),
        "f1_macro": f1_score(y, pred, average="macro"),
        "recall_0": rep["0"]["recall"],
        "recall_1": rep["1"]["recall"],
        "recall_2": rep["2"]["recall"],
        "TP": TP,
        "FN": FN,
        "FP": FP,
        "sensitivity": sensitivity,
        "fpr_h": fpr_h
    }


# ================================
# CLASS WEIGHTS
# ================================
WEIGHT_SET = [
    {"name": "w1", "weights": {0: 1, 1: 5,  2: 10}},
    {"name": "w2", "weights": {0: 1, 1: 10, 2: 15}},
    {"name": "w3", "weights": {0: 1, 1: 15, 2: 20}},
    {"name": "w4", "weights": {0: 1, 1: 20, 2: 30}},
    {"name": "balanced", "weights": "balanced"},
]


# ================================
# MAIN LOOP
# ================================
for fold in fold_ids:

    train_path = os.path.join(SPLIT_DIR, f"{fold}_train.csv")
    val_path   = os.path.join(SPLIT_DIR, f"{fold}_val.csv")
    test_path  = os.path.join(SPLIT_DIR, f"{fold}_test.csv")

    if not (os.path.exists(train_path) and os.path.exists(val_path) and os.path.exists(test_path)):
        print(f"Skipping {fold}")
        continue

    print(f"\n===== Processing {fold} =====\n")

    df_train = pd.read_csv(train_path)
    df_val   = pd.read_csv(val_path)
    df_test  = pd.read_csv(test_path)

    drop_cols = ["label", "subject", "patient", "session", "run", "start_time", "start_time_sec"]
    drop_cols = [c for c in drop_cols if c in df_train.columns]
    features = [c for c in df_train.columns if c not in drop_cols]

    X_train = df_train[features].fillna(0).to_numpy()
    y_train = df_train["label"].to_numpy()
    X_val   = df_val[features].fillna(0).to_numpy()
    y_val   = df_val["label"].to_numpy()
    X_test  = df_test[features].fillna(0).to_numpy()
    y_test  = df_test["label"].to_numpy()

    scaler = StandardScaler()
    X_train_s = scaler.fit_transform(X_train)
    X_val_s   = scaler.transform(X_val)
    X_test_s  = scaler.transform(X_test)

    best_f1 = -1
    best_model = None
    best_weight = None

    for w in WEIGHT_SET:
        model = LogisticRegression(max_iter=2000, n_jobs=-1,
                                   class_weight=w["weights"])
        model.fit(X_train_s, y_train)

        val_metrics = evaluate(model, scaler, X_val, y_val)

        if val_metrics["f1_macro"] > best_f1:
            best_f1 = val_metrics["f1_macro"]
            best_weight = w["name"]
            best_model = model

    print(f">>> Best Weight = {best_weight}")

    test_metrics = evaluate(best_model, scaler, X_test, y_test)

    results.append({"fold": fold, "best_weight": best_weight, **test_metrics})

    joblib.dump(best_model, f"{MODEL_SAVE_DIR}/{fold}_{best_weight}.pkl")
    joblib.dump(scaler,     f"{MODEL_SAVE_DIR}/{fold}_{best_weight}_scaler.pkl")


pd.DataFrame(results).to_csv(OUTPUT, index=False)
print("\n===== DONE FULL FEATURES MODEL =====")



===== Processing fold01 =====

>>> Best Weight = w1

===== Processing fold02 =====

>>> Best Weight = w1

===== Processing fold03 =====

>>> Best Weight = balanced

===== Processing fold04 =====

>>> Best Weight = w1

===== Processing fold05 =====

>>> Best Weight = w1

===== Processing fold06 =====

>>> Best Weight = w1

===== Processing fold07 =====

>>> Best Weight = w1

===== Processing fold08 =====

>>> Best Weight = w1

===== Processing fold09 =====

>>> Best Weight = w1

===== Processing fold10 =====

>>> Best Weight = w1

===== Processing fold11 =====

>>> Best Weight = w1

===== Processing fold12 =====

>>> Best Weight = w1

===== Processing fold13 =====

>>> Best Weight = w1

===== Processing fold14 =====

>>> Best Weight = w1

===== Processing fold15 =====

>>> Best Weight = w1

===== Processing fold16 =====

>>> Best Weight = w1

===== Processing fold17 =====

>>> Best Weight = w1

===== Processing fold18 =====

>>> Best Weight = w1

===== Processing fold19 =====

>>> Best

In [15]:
import os
import numpy as np
import pandas as pd
import joblib

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, classification_report


# ================================
# CONFIG
# ================================
SPLIT_DIR = r"C:\Users\ririh\OneDrive\المستندات\ecg_project_new\split_Data_5fold_subjects"
MODEL_SAVE_DIR = r"C:\Users\ririh\OneDrive\المستندات\ecg_project_new\saved_models_EEG"
OUTPUT = r"C:\Users\ririh\OneDrive\المستندات\ecg_project_new\EEG_results.csv"

os.makedirs(MODEL_SAVE_DIR, exist_ok=True)

WIN_SEC = 2
WIN_PER_HOUR = 3600 / WIN_SEC

fold_ids = [f"fold{idx:02d}" for idx in range(1, 24)]
results = []


def extract_eeg_features(columns):
    return [c for c in columns if c.startswith("ch1_") or c.startswith("ch2_")]


def evaluate(model, scaler, X, y):
    Xs = scaler.transform(X)
    pred = model.predict(Xs)
    rep = classification_report(y, pred, output_dict=True)

    y_bin_true = np.isin(y, [1, 2]).astype(int)
    y_bin_pred = np.isin(pred, [1, 2]).astype(int)

    TP = np.sum((y_bin_true == 1) & (y_bin_pred == 1))
    FN = np.sum((y_bin_true == 1) & (y_bin_pred == 0))
    FP = np.sum((y_bin_true == 0) & (y_bin_pred == 1))

    sensitivity = TP / (TP + FN + 1e-9)
    total_hours = len(y) / WIN_PER_HOUR
    fpr_h = FP / (total_hours + 1e-9)

    return {
        "acc": accuracy_score(y, pred),
        "f1_macro": f1_score(y, pred, average="macro"),
        "recall_0": rep["0"]["recall"],
        "recall_1": rep["1"]["recall"],
        "recall_2": rep["2"]["recall"],
        "TP": TP,
        "FN": FN,
        "FP": FP,
        "sensitivity": sensitivity,
        "fpr_h": fpr_h
    }


WEIGHT_SET = [
    {"name": "w1", "weights": {0: 1, 1: 5,  2: 10}},
    {"name": "w2", "weights": {0: 1, 1: 10, 2: 15}},
    {"name": "w3", "weights": {0: 1, 1: 15, 2: 20}},
    {"name": "w4", "weights": {0: 1, 1: 20, 2: 30}},
    {"name": "balanced", "weights": "balanced"},
]


# ================================
# MAIN LOOP
# ================================
for fold in fold_ids:

    print(f"\n===== Processing {fold} =====")

    df_train = pd.read_csv(os.path.join(SPLIT_DIR, f"{fold}_train.csv"))
    df_val   = pd.read_csv(os.path.join(SPLIT_DIR, f"{fold}_val.csv"))
    df_test  = pd.read_csv(os.path.join(SPLIT_DIR, f"{fold}_test.csv"))

    eeg_feats = extract_eeg_features(df_train.columns)

    X_train = df_train[eeg_feats].fillna(0).to_numpy()
    y_train = df_train["label"].to_numpy()

    X_val = df_val[eeg_feats].fillna(0).to_numpy()
    y_val = df_val["label"].to_numpy()

    X_test = df_test[eeg_feats].fillna(0).to_numpy()
    y_test = df_test["label"].to_numpy()

    scaler = StandardScaler()
    X_train_s = scaler.fit_transform(X_train)
    X_val_s   = scaler.transform(X_val)
    X_test_s  = scaler.transform(X_test)

    best_f1 = -1
    best_model = None
    best_weight = None

    for w in WEIGHT_SET:
        model = LogisticRegression(max_iter=2000, n_jobs=-1,
                                   class_weight=w["weights"])
        model.fit(X_train_s, y_train)

        val_metrics = evaluate(model, scaler, X_val, y_val)

        if val_metrics["f1_macro"] > best_f1:
            best_f1 = val_metrics["f1_macro"]
            best_model = model
            best_weight = w["name"]

    print(f">>> Best Weight = {best_weight}")

    test_metrics = evaluate(best_model, scaler, X_test, y_test)

    results.append({"fold": fold, "best_weight": best_weight, **test_metrics})

    joblib.dump(best_model, f"{MODEL_SAVE_DIR}/{fold}_{best_weight}.pkl")
    joblib.dump(scaler,     f"{MODEL_SAVE_DIR}/{fold}_{best_weight}_scaler.pkl")

pd.DataFrame(results).to_csv(OUTPUT, index=False)
print("\n===== DONE EEG MODEL =====")



===== Processing fold01 =====
>>> Best Weight = balanced

===== Processing fold02 =====
>>> Best Weight = balanced

===== Processing fold03 =====
>>> Best Weight = balanced

===== Processing fold04 =====
>>> Best Weight = balanced

===== Processing fold05 =====
>>> Best Weight = balanced

===== Processing fold06 =====
>>> Best Weight = balanced

===== Processing fold07 =====
>>> Best Weight = w1

===== Processing fold08 =====
>>> Best Weight = balanced

===== Processing fold09 =====
>>> Best Weight = balanced

===== Processing fold10 =====
>>> Best Weight = balanced

===== Processing fold11 =====
>>> Best Weight = balanced

===== Processing fold12 =====
>>> Best Weight = balanced

===== Processing fold13 =====
>>> Best Weight = balanced

===== Processing fold14 =====
>>> Best Weight = balanced

===== Processing fold15 =====
>>> Best Weight = balanced

===== Processing fold16 =====
>>> Best Weight = balanced

===== Processing fold17 =====
>>> Best Weight = balanced

===== Processing fo

In [16]:
import pandas as pd

EEG_CSV  = r"C:\Users\ririh\OneDrive\المستندات\ecg_project_new\EEG_results.csv"
FULL_CSV = r"C:\Users\ririh\OneDrive\المستندات\ecg_project_new\FULL_results.csv"
OUTPUT   = r"C:\Users\ririh\OneDrive\المستندات\ecg_project_new\Comparison_EEG_vs_FULL.csv"

df_eeg = pd.read_csv(EEG_CSV)
df_full = pd.read_csv(FULL_CSV)

df_eeg["model"] = "EEG_only"
df_full["model"] = "FULL_features"

df = pd.concat([df_eeg, df_full], ignore_index=True)

# Average comparison
metrics = ["acc", "f1_macro", "recall_0", "recall_1", "recall_2", "FP", "sensitivity", "fpr_h"]
avg = df.groupby("model")[metrics].mean().reset_index()

print("\n===== Average Results =====\n")
print(avg)

# Fold-by-fold comparison
merged = df_eeg.merge(df_full, on="fold", suffixes=("_EEG", "_FULL"))
print("\n===== Fold-by-Fold Comparison =====\n")
print(merged.head())

# Save
avg.to_csv(OUTPUT, index=False)
print("\nSaved comparison to:", OUTPUT)



===== Average Results =====

           model       acc  f1_macro  recall_0  recall_1  recall_2  \
0       EEG_only  0.344024  0.265171  0.273787  0.609246  0.488907   
1  FULL_features  0.390311  0.291707  0.324827  0.690098  0.137884   

             FP  sensitivity        fpr_h  
0  42773.521739     0.744441  1043.384319  
1  38997.521739     0.692148   972.721121  

===== Fold-by-Fold Comparison =====

     fold best_weight_EEG   acc_EEG  f1_macro_EEG  recall_0_EEG  recall_1_EEG  \
0  fold01        balanced  0.321534      0.250028      0.252679      0.723566   
1  fold02        balanced  0.435664      0.288143      0.427230      0.481587   
2  fold03        balanced  0.410293      0.265727      0.412518      0.375788   
3  fold04        balanced  0.424416      0.300782      0.440372      0.317778   
4  fold05        balanced  0.309340      0.228133      0.264212      0.598170   

   recall_2_EEG  TP_EEG  FN_EEG  FP_EEG  ...  f1_macro_FULL  recall_0_FULL  \
0      0.310060    9421 