# Train and Apply Models

In [None]:
from ML.model_training import (
    omit_patient_video,
    single_user_split,
    train_lstm,
    train_random_forest,
    build_lstm_sequences
)
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    precision_score,
    recall_score,
    confusion_matrix,
    classification_report,
)
import numpy as np
import pandas as pd
from ML import utils
import sys
import random
from itertools import product

In [None]:
# remove_list = [0, 1, 2, 4, 5, 8, 9, 10, 11, 14, 16, 17, 18, 22]
remove_list = []

def balance(X, y, seed=5):
    c = y.value_counts()
    if c.get("high", 0) == c.get("low", 0):
        return X.reset_index(drop=True), y.reset_index(drop=True)
    maj = c.idxmax()
    m = c.min()
    keep = y[y != maj].index.union(y[y == maj].sample(m, random_state=seed).index)
    return X.loc[keep].reset_index(drop=True), y.loc[keep].reset_index(drop=True)

# subjects = [i for i in range(0, 23)]
# subjects = [3]

## LSTM CV Optimizer - PSD features only

This is currently running LOSO CV, by removing 18 videos (all) from each user.

In [None]:
CV_SIZE = 5
subjects = []
for i in range(0, CV_SIZE):
    subjects.append(random.randint(0, 22))
subjects = [20]

param_grid = {
    "lr": [0.0001],
    "epochs": [100],
    "units": [256],
    "batch_size": [64],
    "timesteps": [76],
}

best_params = None
best_mean_acc = -float("inf")

print("Starting global hyperparameter search...\n")

for lr, epochs, units, batch_size, timesteps in product(
    param_grid["lr"],
    param_grid["epochs"],
    param_grid["units"],
    param_grid["batch_size"],
    param_grid["timesteps"],
):
    combo_accs = []
    for i in subjects:
        relabel = False
        while True:
            while True:
                X_train_df, X_test_df, arousal_train, arousal_test = omit_patient_video(
                    target="arousal",
                    selected_user=i,
                    trials=18,
                    # holdout_videos=[2, 10, 15],
                    exclude_users=remove_list,
                    filename="datasets/experiment_features_table.csv",
                    relabel=relabel,
                )

                features = utils.filter_features(
                    X_train_df.columns,
                    remove_bands=["gamma"],
                )
                features = [c for c in features if c in X_train_df.columns]
                features.remove("arousal")
                features.remove("valence")
                features.remove("patient_index")
                features.remove("video_index")

                # Build sequence-level data (trials = (patient, video))
                X_train_seq, y_train_seq = build_lstm_sequences(
                    X_train_df,
                    features,
                    target_col="arousal",
                    thresh=3.8,
                    fixed_T=timesteps,
                )
                X_test_seq, y_test_seq = build_lstm_sequences(
                    X_test_df,
                    features,
                    target_col="arousal",
                    thresh=3.8,
                    fixed_T=timesteps,
                )

                minority = (y_train_seq == 1.0).sum() < (y_train_seq == 0.0).sum()
                maj_label = 0.0 if minority else 1.0
                min_label = 1.0 - maj_label
                n_pos = (y_train_seq == 1.0).sum()
                n_neg = (y_train_seq == 1.0).sum()

                idx_maj = np.where(y_train_seq == maj_label)[0]
                idx_min = np.where(y_train_seq == min_label)[0]

                if len(idx_min) == 0:
                    # no samples of one class, redo split
                    print("No minority class in this split, re-drawing...")
                    continue

                # reps = int(np.ceil(len(idx_maj) / len(idx_min)))
                # idx_min_upsampled = np.tile(idx_min, reps)[: len(idx_maj)]

                # idx_balanced = np.concatenate([idx_maj, idx_min_upsampled])
                # np.random.shuffle(idx_balanced)

                # X_train_bal = X_train_seq[idx_balanced]
                # y_train_bal = y_train_seq[idx_balanced]

                print("y_train counts:", np.bincount(y_train_seq.astype(int)))
                print("y_test counts:", np.bincount(y_test_seq.astype(int)))

                break

            lstm, X_test_eval, y_test_eval = train_lstm(
                X_train_seq,
                X_test_seq,
                y_train_seq,
                y_test_seq,
                lr=lr,
                epochs=epochs,
                units=units,
                batch_size=batch_size,
                dropout=0.4,
                recurrent_dropout=0,
                bidirectional=True,
            )
            y_prob = lstm.predict(X_test_eval).ravel()
            arousal_pred = (y_prob >= 0.5).astype(int) # TODO

            acc = accuracy_score(y_test_eval, arousal_pred)
            print(acc)
            relabel = False
            break

        combo_accs.append(float(acc))

        print("\nConfusion Matrix (subject):")
        print(confusion_matrix(y_test_eval, arousal_pred))
    mean_acc = float(np.mean(combo_accs))
    print(
        f"Params lr={lr}, epochs={epochs}, units={units}, batch_size={batch_size}, timesteps={timesteps} "
        f"-> mean acc across subjects = {mean_acc:.4f}"
    )

    if mean_acc > best_mean_acc:
        best_lstm = lstm
        best_mean_acc = mean_acc
        best_params = {
            "lr": lr,
            "epochs": epochs,
            "units": units,
            "batch_size": batch_size,
            "timesteps": timesteps,
        }


print("\nBest universal hyperparameters:")
print(256)
print(f"Mean accuracy across subjects (tuning): {best_mean_acc:.4f}\n")

### Fine-tune for User

In [None]:
from tensorflow.keras.models import clone_model
from tensorflow.keras import optimizers
from sklearn.metrics import confusion_matrix, accuracy_score
import tensorflow as tf
import numpy as np

param_grid = {
    "lr": [0.0001],
    "epochs": [12, 25],
    "batch_size": [64],
    "patience": [8],
    "layers": [1, 2],
}

T_SIZE = 76
N_RUNS = 1
SELECTED_USER = 20
TRIALS = [i for i in range(8, 18)]

best_params = None
best_mean_acc = -np.inf

print("Starting per-user fine-tuning hyperparameter search...\n")

for lr, epochs, batch_size, patience, layers in product(
    param_grid["lr"],
    param_grid["epochs"],
    param_grid["batch_size"],
    param_grid["patience"],
    param_grid["layers"],
):

    combo_accs = []
    combo_preds = []
    combo_eval = []

    print(
        f"Testing combo: lr={lr}, batch_size={batch_size}, "
        f"epochs={epochs}, patience={patience}, layers={layers}"
    )

    for run in range(N_RUNS):
        (
            X_user_train_df,
            X_user_test_df,
            arousal_user_train,
            arousal_user_test,
        ) = single_user_split(
            target="arousal",
            selected_user=SELECTED_USER,
            holdout_videos=TRIALS,
            # holdout_videos=[run],
            # k_holdouts=1,
            filename="datasets/experiment_features_table.csv",
        )

        X_user_train_seq, y_user_train_seq = build_lstm_sequences(
            X_user_train_df,
            feature_cols=features,
            target_col="arousal",
            thresh=3.8,
            fixed_T=T_SIZE,
        )
        X_user_test_seq, y_user_test_seq = build_lstm_sequences(
            X_user_test_df,
            feature_cols=features,
            target_col="arousal",
            thresh=3.8,
            fixed_T=T_SIZE,
        )

        user_model = clone_model(best_lstm)
        user_model.build(best_lstm.input_shape)
        user_model.set_weights(best_lstm.get_weights())

        for layer in user_model.layers[:-layers]:
            layer.trainable = False

        user_model.compile(
            optimizer=optimizers.Adam(learning_rate=lr),
            loss="binary_crossentropy",
            metrics=["accuracy"],
        )

        history = user_model.fit(
            X_user_train_seq,
            y_user_train_seq,
            validation_split=0.2,
            verbose=False,
            batch_size=batch_size,
            epochs=epochs,
            callbacks=[
                tf.keras.callbacks.EarlyStopping(
                    patience=patience,
                    restore_best_weights=True,
                    monitor="val_accuracy",
                )
            ],
        )
        y_prob = user_model.predict(X_user_train_seq).ravel()
        arousal_pred = (y_prob >= 0.5).astype(int)

        acc = accuracy_score(y_user_train_seq, arousal_pred)
        cm = confusion_matrix(y_user_train_seq, arousal_pred)
        print(cm)

        combo_accs.append(acc)
        combo_preds.append(arousal_pred)
        combo_eval.append(y_user_train_seq)

    # print(confusion_matrix(combo_eval, combo_preds))
    mean_acc = np.mean(combo_accs)
    std_acc = np.std(combo_accs)
    print(f"  -> mean acc over {N_RUNS} runs: {mean_acc:.4f} " f"(std={std_acc:.4f})\n")
    if mean_acc > best_mean_acc:
        best_mean_acc = mean_acc
        best_params = {
            "lr": lr,
            "batch_size": batch_size,
            "epochs": epochs,
            "patience": patience,
            "layers": layers,
        }

print("\nBest hyperparameters for user", SELECTED_USER)
print(best_params)
print(f"Best mean test accuracy: {best_mean_acc:.4f}\n")


# Final evaluation with best hyperparams
acc_list = []
all_cm = np.zeros((2, 2), dtype=int)

print("Running final evaluation with best hyperparameters...\n")

for run in range(N_RUNS):
    (
        X_user_train_df,
        X_user_test_df,
        arousal_user_train,
        arousal_user_test,
    ) = single_user_split(
        target="arousal",
        selected_user=SELECTED_USER,
        k_holdouts=1,
        filename="datasets/experiment_features_table.csv",
    )

    X_user_train_seq, y_user_train_seq = build_lstm_sequences(
        X_user_train_df,
        feature_cols=features,
        target_col="arousal",
        thresh=3.8,
        fixed_T=T_SIZE,
    )
    X_user_test_seq, y_user_test_seq = build_lstm_sequences(
        X_user_test_df,
        feature_cols=features,
        target_col="arousal",
        thresh=3.8,
        fixed_T=T_SIZE,
    )

    print(f"Run {run}")
    print("User train seq shape:", X_user_train_seq.shape)
    print("User test  seq shape:", X_user_test_seq.shape)
    print("y_train counts:", np.bincount(y_user_train_seq.astype(int)))
    print("y_test  counts:", np.bincount(y_user_test_seq.astype(int)))

    user_model = clone_model(best_lstm)
    user_model.build(best_lstm.input_shape)
    user_model.set_weights(best_lstm.get_weights())

    for layer in user_model.layers[:-best_params["layers"]]:
        layer.trainable = False

    user_model.compile(
        optimizer=optimizers.Adam(learning_rate=best_params["lr"]),
        loss="binary_crossentropy",
        metrics=["accuracy"],
    )

    history = user_model.fit(
        X_user_train_seq,
        y_user_train_seq,
        validation_split=0.2,
        verbose=False,
        batch_size=best_params["batch_size"],
        epochs=best_params["epochs"],
        callbacks=[
            tf.keras.callbacks.EarlyStopping(
                patience=best_params["patience"],
                restore_best_weights=True,
                monitor="val_accuracy",
            )
        ],
    )

    test_loss, test_acc = user_model.evaluate(
        X_user_test_seq, y_user_test_seq, verbose=1
    )

    acc_list.append(test_acc)

    y_prob = user_model.predict(X_user_test_seq, verbose=0).ravel()
    y_pred = (y_prob >= 0.5).astype("int32")

    cm = confusion_matrix(y_user_test_seq.astype("int32"), y_pred, labels=[0, 1])
    all_cm += cm

    print(
        "\nOverall accuracy:", accuracy_score(y_user_test_seq.astype("int32"), y_pred)
    )
    print("-" * 50)

acc_array = np.array(acc_list)
print(f"\nMean user test accuracy over {len(acc_array)} runs: {acc_array.mean():.4f}")
print(f"Std of user test accuracy over {len(acc_array)} runs: {acc_array.std():.4f}")

print("\nPooled confusion matrix over all runs:")
print(all_cm)