# Train and Apply Models

In [4]:
from ML.model_training import (
    random_train_test_split,
    train_random_forest,
    train_random_forest_regressor,
    omit_patient_video,
    train_knn_regressor,
    train_lstm,
    STSNet
)
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    precision_score,
    recall_score,
    confusion_matrix,
    classification_report,
)
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import numpy as np
import pandas as pd
import math, re, itertools
from ML import utils
import sys
from IPython.display import clear_output
from scipy.stats import pearsonr

Generate all subsets of columns for parameters.

In [5]:
features = utils.important_features_list(
    "datasets/USE_feature_importance/dreamer_feature_importance_arousal.csv"
)
unique = []
for i in range(1, len(features)):
    unique.append(features[:i])

print("Unique:", unique)
print(len(unique))

Unique: [['T8_T7_gamma_ra'], ['T8_T7_gamma_ra', 'T8_T7_gamma_da'], ['T8_T7_gamma_ra', 'T8_T7_gamma_da', 'O2_alpha'], ['T8_T7_gamma_ra', 'T8_T7_gamma_da', 'O2_alpha', 'FC5_gamma'], ['T8_T7_gamma_ra', 'T8_T7_gamma_da', 'O2_alpha', 'FC5_gamma', 'F8_alpha'], ['T8_T7_gamma_ra', 'T8_T7_gamma_da', 'O2_alpha', 'FC5_gamma', 'F8_alpha', 'P8_alpha'], ['T8_T7_gamma_ra', 'T8_T7_gamma_da', 'O2_alpha', 'FC5_gamma', 'F8_alpha', 'P8_alpha', 'O1_alpha'], ['T8_T7_gamma_ra', 'T8_T7_gamma_da', 'O2_alpha', 'FC5_gamma', 'F8_alpha', 'P8_alpha', 'O1_alpha', 'FC6_FC5_gamma_ra'], ['T8_T7_gamma_ra', 'T8_T7_gamma_da', 'O2_alpha', 'FC5_gamma', 'F8_alpha', 'P8_alpha', 'O1_alpha', 'FC6_FC5_gamma_ra', 'O1_gamma'], ['T8_T7_gamma_ra', 'T8_T7_gamma_da', 'O2_alpha', 'FC5_gamma', 'F8_alpha', 'P8_alpha', 'O1_alpha', 'FC6_FC5_gamma_ra', 'O1_gamma', 'AF4_AF3_gamma_ra'], ['T8_T7_gamma_ra', 'T8_T7_gamma_da', 'O2_alpha', 'FC5_gamma', 'F8_alpha', 'P8_alpha', 'O1_alpha', 'FC6_FC5_gamma_ra', 'O1_gamma', 'AF4_AF3_gamma_ra', 'FC6_FC5

In [6]:
X_train, X_test, arousal_train, arousal_test = omit_patient_video(
    target="arousal", random_state=42, trials=3
)
arousal_train = pd.Series(
    np.where(arousal_train > 3.8, "high", "low"),
    index=arousal_train.index,
    dtype="string",
)
arousal_test = pd.Series(
    np.where(arousal_test > 3.8, "high", "low"),
    index=arousal_test.index,
    dtype="string",
)

def balance(X, y, seed=5):
    c = y.value_counts()
    if c.get("high", 0) == c.get("low", 0):
        return X.reset_index(drop=True), y.reset_index(drop=True)
    maj = c.idxmax()
    m = c.min()
    keep = y[y != maj].index.union(y[y == maj].sample(m, random_state=seed).index)
    return X.loc[keep].reset_index(drop=True), y.loc[keep].reset_index(drop=True)


X_train, arousal_train = balance(X_train, arousal_train, seed=5)
X_test, arousal_test = balance(X_test, arousal_test, seed=5)

print("arousal_train counts:\n", arousal_train.value_counts(dropna=False))
print("arousal_test counts:\n", arousal_test.value_counts(dropna=False))

5 [1 5 3 4 2]
Held-out patient: 2 | Held-out (patient, video) trials: [(2, 7), (2, 11), (2, 12)]
arousal_train counts:
 low     13826
high    13826
Name: count, dtype: Int64
arousal_test counts:
 high    89
low     89
Name: count, dtype: Int64


## LSTM LOSO

In [7]:
best_model = None
best_acc = 0
best_keep = None

best_lr = 0
best_f1 = 0
bar_len = 30


def render(bar_str: str, status_str: str, curr):
    print(bar_str)
    print(curr)
    print(status_str, end="")
    sys.stdout.flush()


start_at = 49

total_full = len(unique)
iter_unique = unique[start_at - 1 :]

status = f"Best: index= size= | " f"acc= | f1= | prec= | rec="

results = []
for idx, keep in enumerate(iter_unique, start_at):
    filled = int(bar_len * idx / total_full) if total_full else 0
    bar = "█" * filled + "-" * (bar_len - filled)
    pct = (idx / total_full * 100) if total_full else 100
    bar_str = f"[{bar}] {idx}/{total_full} ({pct:5.1f}%)"

    X_train_sub = X_train.loc[:, keep]
    X_test_sub = X_test.loc[:, keep]

    n_low = (arousal_train == "low").sum()
    n_high = (arousal_train == "high").sum()
    for lr in [0.001]:
        for e in [10, 50]:
            for u in [256]:
                for b_s in [64]:
                    lstm, X_test_eval, y_test_eval = train_lstm(
                        X_train_sub,
                        X_test_sub,
                        arousal_train,
                        arousal_test,
                        lr=lr,
                        epochs=e,
                        units=u,
                        batch_size=b_s,
                        bidirectional=False,
                    )
                    y_prob = lstm.predict(X_test_eval).ravel()
                    arousal_pred = (y_prob >= 0.5).astype(int)

                    acc = accuracy_score(y_test_eval, arousal_pred)
                    f1 = f1_score(y_test_eval, arousal_pred, average="weighted")
                    prec = precision_score(y_test_eval, arousal_pred, average="weighted")
                    rec = recall_score(y_test_eval, arousal_pred, average="weighted")

                    if acc > best_acc:
                        best_acc = acc
                        best_model = lstm
                        best_keep = keep
                        best_lr = lr
                        best_e = e
                        best_u = u
                        best_b_s = b_s
                        best_f1 = f1
                        best_arousal_pred = arousal_pred
                        status = (
                            f"Best: index={idx} size={len(keep)} | "
                            f"acc={acc:.6f} | f1={f1:.6f} | prec={prec:.6f} | rec={rec:.6f} | lr={best_lr} | epochs={best_e} | units={best_u} | batch_size={best_b_s}"
                        )
                        print(status)

print("\nConfusion Matrix (pooled):")
print(confusion_matrix(y_test_eval, best_arousal_pred))

print("\nClassification Report (pooled):")
print(classification_report(y_test_eval, best_arousal_pred, zero_division=0))

print(status)

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
Best: index=49 size=49 | acc=0.719101 | f1=0.716813 | prec=0.726419 | rec=0.719101 | lr=0.001 | epochs=10 | units=256 | batch_size=64
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step

Confusion Matrix (pooled):
[[72 17]
 [33 56]]

Classification Report (pooled):
              precision    recall  f1-score   support

         0.0       0.69      0.81      0.74        89
         1.0       0.77      0.63      0.69        89

    accuracy                           0.72       178
   macro avg       0.73      0.72      0.72       178
weighted avg       0.73      0.72      0.72       178

Best: index=49 size=49 | acc=0.719101 | f1=0.716813 | prec=0.726419 | rec=0.719101 | lr=0.001 | epochs=10 | units=256 | batch_size=64


In [None]:
acc_list = []
f1_list = []
precw_list = []
recw_list = []
n_list = []

num_folds = 4
y_test_full = []
y_pred_full = []

for fold in range(num_folds):

    while True:
        X_train, X_test, arousal_train, arousal_test = omit_patient_video(
            target="arousal", trials=3
        )
        arousal_train = pd.Series(
            np.where(arousal_train > 3.8, "high", "low"),
            index=arousal_train.index,
            dtype="string",
        )
        arousal_test = pd.Series(
            np.where(arousal_test > 3.8, "high", "low"),
            index=arousal_test.index,
            dtype="string",
        )

        c = arousal_test.value_counts()
        if c.get("high", 0) == 0 or c.get("low", 0) == 0:
            continue


        X_train, arousal_train = balance(X_train, arousal_train)
        X_test, arousal_test = balance(X_test, arousal_test)
        break

    print("arousal_train counts:\n", arousal_train.value_counts(dropna=False))
    print("arousal_test counts:\n", arousal_test.value_counts(dropna=False))


    lstm, X_test_eval, y_test_eval = train_lstm(
            X_train,
            X_test,
            arousal_train,
            arousal_test,
            lr=best_lr,
            epochs=best_e,
            units=best_u,
            batch_size=best_b_s,
            bidirectional=False,
    )
    y_prob = lstm.predict(X_test_eval).ravel()
    arousal_pred = (y_prob >= 0.5).astype(int)

    acc = accuracy_score(y_test_eval, arousal_pred)
    f1 = f1_score(y_test_eval, arousal_pred, average="weighted")
    prec = precision_score(y_test_eval, arousal_pred, average="weighted")
    rec = recall_score(y_test_eval, arousal_pred, average="weighted")

    print("\nConfusion Matrix (pooled):")
    print(confusion_matrix(y_test_eval, arousal_pred))

    acc_list.append(float(acc))
    print(acc)
    f1_list.append(float(f1))
    precw_list.append(float(prec))
    recw_list.append(float(rec))

    y_test_full.extend(y_test_eval)
    y_pred_full.extend(arousal_pred.tolist() if hasattr(arousal_pred, "tolist") else list(arousal_pred))

# averages across folds
avg_acc = float(np.mean(acc_list)) if acc_list else float("nan")
avg_f1w = float(np.mean(f1_list)) if f1_list else float("nan")
avg_prec = float(np.mean(precw_list)) if precw_list else float("nan")
avg_rec = float(np.mean(recw_list)) if recw_list else float("nan")

print("LSTM Classification Performance (cross-subject folds)")
print("---------------------------------------------------")
print(f"Accuracy: {avg_acc:.4f}")
print(f"F1 (weighted): {avg_f1w:.4f}")
print(f"Precision (weighted): {avg_prec:.4f}")
print(f"Recall (weighted): {avg_rec:.4f}")

print("\nConfusion Matrix (pooled):")
print(confusion_matrix(y_test_full, y_pred_full))

print("\nClassification Report (pooled):")
print(classification_report(y_test_full, y_pred_full, zero_division=0))

5 [1 5 3 4 2]
Held-out patient: 0 | Held-out (patient, video) trials: [(0, 6), (0, 7), (0, 15)]
5 [1 5 3 4 2]
Held-out patient: 9 | Held-out (patient, video) trials: [(9, 7), (9, 9), (9, 16)]
arousal_train counts:
 low     13788
high    13788
Name: count, dtype: Int64
arousal_test counts:
 high    127
low     127
Name: count, dtype: Int64
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step

Confusion Matrix (pooled):
[[ 17 110]
 [ 71  56]]
0.2874015748031496
5 [1 5 3 4 2]
Held-out patient: 17 | Held-out (patient, video) trials: [(17, 10), (17, 11), (17, 13)]
arousal_train counts:
 low     13779
high    13779
Name: count, dtype: Int64
arousal_test counts:
 low     84
high    84
Name: count, dtype: Int64
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step

Confusion Matrix (pooled):
[[45 39]
 [46 38]]
0.49404761904761907
5 [1 5 3 4 2]
Held-out patient: 4 | Held-out (patient, video) trials: [(4, 5), (4, 7), (4, 11)]
arousal_train counts:
 low     13

In [11]:
best_model = None
best_mae = math.inf
best_keep = None
best_metrics = {
    "PCC": 0.0,
    "MSE": math.inf,
    "RMSE": math.inf,
    "MAE": math.inf,
    "size": 0,
}


bar_len = 30


def safe_pcc(y_true, y_pred):
    yt = np.asarray(y_true, dtype=float).ravel()
    yp = np.asarray(y_pred, dtype=float).ravel()
    if yt.size < 2 or np.std(yt) == 0 or np.std(yp) == 0:
        return 0.0
    return float(np.corrcoef(yt, yp)[0, 1])


def render(bar_str: str, status_str: str, curr_mae):
    clear_output(wait=True)
    print(bar_str)
    print(curr_mae)
    print(status_str, end="")
    sys.stdout.flush()


status = "Best: size=0 | PCC=0.0000 | MSE=∞ | RMSE=∞ | MAE=∞"

start_at = 49

total_full = len(unique)
iter_unique = unique[start_at - 1 :]

for idx, keep in enumerate(iter_unique, start_at):
    filled = int(bar_len * idx / total_full) if total_full else 0
    bar = "█" * filled + "-" * (bar_len - filled)
    pct = (idx / total_full * 100) if total_full else 100
    bar_str = f"[{bar}] {idx}/{total_full} ({pct:5.1f}%)"


    # arousal_train = pd.Series(
    #         np.where(arousal_train > 3.33, "high", "low"),
    #         index=arousal_train.index,
    #         dtype="string",
    #     )
    # arousal_test = pd.Series(
    #     np.where(arousal_test > 3.33, "high", "low"),
    #     index=arousal_test.index,
    #     dtype="string",
    # )

    rf, X_test_eval, y_test_eval = train_random_forest(
        X_train, X_test, arousal_train, arousal_test
    )

    arousal_pred = rf.predict(X_test_eval)

    acc = accuracy_score(y_test_eval, arousal_pred)
    f1 = f1_score(y_test_eval, arousal_pred, average="weighted")
    prec = precision_score(y_test_eval, arousal_pred, average="weighted")
    rec = recall_score(y_test_eval, arousal_pred, average="weighted")

    best_acc = acc
    best_model = rf
    best_keep = keep
    best_f1 = f1
    best_arousal_pred = arousal_pred
    status = (
        f"Best: index={idx} size={len(keep)} | "
        f"acc={acc:.6f} | f1={f1:.6f} | prec={prec:.6f} | rec={rec:.6f}"
    )
    print(status)

print("\nConfusion Matrix (pooled):")
print(confusion_matrix(y_test_eval, best_arousal_pred))

print("\nClassification Report (pooled):")
print(classification_report(y_test_eval, best_arousal_pred, zero_division=0))

print(status)
# 40076!! low mse
# 40418!! low mae

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    3.5s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:   17.3s


Best: index=49 size=49 | acc=0.525581 | f1=0.519968 | prec=0.526837 | rec=0.525581

Confusion Matrix (pooled):
[[545 315]
 [501 359]]

Classification Report (pooled):
              precision    recall  f1-score   support

        high       0.52      0.63      0.57       860
         low       0.53      0.42      0.47       860

    accuracy                           0.53      1720
   macro avg       0.53      0.53      0.52      1720
weighted avg       0.53      0.53      0.52      1720

Best: index=49 size=49 | acc=0.525581 | f1=0.519968 | prec=0.526837 | rec=0.525581


[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:   27.3s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:    0.0s
[Parallel(n_jobs=8)]: Done 300 out of 300 | elapsed:    0.0s finished
