In [None]:
# evaluate all LOSO TFLite models (ALL CONFIGS) and generate summaries

import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from datetime import datetime

from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# GLOBAL CONSTANTS
BASE_STEP3_MODELS = Path(
    "/content/drive/MyDrive/Colab Notebooks/classifier_2026_locomotion_mode/step3_models"
)

DATA_BASE = Path(
    "/content/drive/MyDrive/Colab Notebooks/classifier_2026_locomotion_mode/step1_labelled"
)

NUM_CLASSES = 7

assert BASE_STEP3_MODELS.exists()
assert DATA_BASE.exists()

# PARAMETRIC CONFIGS
WINDOW_STRIDE_CONFIGS = [
    (100, 20),
    (100, 50),
    (50, 10),
    (50, 25),
]

KERNEL_CONFIGS = [
    (11, 7),
    (9, 5),
    (7, 5),
    (5, 5),
]

BATCH_SIZES = [
    32,
    64,
]

# Helper — build LOSO test tensor
def build_loso_test_tensor(subject_folder, window_size, step_distance):
    segments, labels = [], []

    for csv_file in sorted(subject_folder.glob("*.csv")):
        df = pd.read_csv(csv_file)
        class_label = int(csv_file.stem.split("_")[-1])
        values = df.values

        for start in range(0, len(values) - window_size + 1, step_distance):
            segments.append(values[start:start + window_size])
            labels.append(class_label)

    X = np.asarray(segments, dtype=np.float32)
    y = np.asarray(labels)
    return X, y

# Helper — evaluate TFLite model
def evaluate_tflite(tflite_path, X, y_true):
    interpreter = tf.lite.Interpreter(model_path=str(tflite_path))
    interpreter.allocate_tensors()

    in_det  = interpreter.get_input_details()[0]
    out_det = interpreter.get_output_details()[0]

    scale, zero = in_det["quantization"]

    preds = []
    for i in range(len(X)):
        x = X[i:i+1]
        x_q = (x / scale + zero).astype(np.int8)
        interpreter.set_tensor(in_det["index"], x_q)
        interpreter.invoke()
        out = interpreter.get_tensor(out_det["index"])
        preds.append(np.argmax(out))

    preds = np.array(preds)
    cm = confusion_matrix(y_true, preds, labels=np.arange(NUM_CLASSES))
    cm_norm = cm / cm.sum(axis=1, keepdims=True)

    return {
        "accuracy": accuracy_score(y_true, preds),
        "f1_macro": f1_score(y_true, preds, average="macro"),
        "cm_raw": cm,
        "cm_norm": cm_norm
    }

# Helper — plot confusion matrices
def plot_cm(cm, title, path, normalize=False):
    fig, ax = plt.subplots(figsize=(7, 6), dpi=450)

    display_cm = cm * 100 if normalize else cm

    im = ax.imshow(
        display_cm,
        cmap="Greens",
        vmin=0,
        vmax=100 if normalize else None
    )

    ax.set_title(title)
    ax.set_xlabel("Predicted")
    ax.set_ylabel("True")

    cbar = plt.colorbar(im, ax=ax, fraction=0.046)
    cbar.set_label("Percentage (%)" if normalize else "Count")

    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            if normalize:
                txt = f"{display_cm[i, j]:.2f}"
            else:
                txt = f"{int(display_cm[i, j])}"

            ax.text(
                j, i, txt,
                ha="center", va="center",
                color="white" if display_cm[i, j] > (50 if normalize else display_cm.max()/2) else "black"
            )

    plt.tight_layout()
    plt.savefig(path)
    plt.close()


# MASTER NESTED LOOP
for (WINDOW_SIZE, STEP_DISTANCE) in WINDOW_STRIDE_CONFIGS:
    for (k1, k2) in KERNEL_CONFIGS:
        for BATCH_SIZE in BATCH_SIZES:

            print("\n" + "=" * 100)
            print(
                f"REPEATED TEST → "
                f"W{WINDOW_SIZE}_S{STEP_DISTANCE} | "
                f"k{k1}-{k2} | batch{BATCH_SIZE}"
            )
            print("=" * 100)

            CONFIG_DIR = (
                BASE_STEP3_MODELS
                / f"W{WINDOW_SIZE}_S{STEP_DISTANCE}"
                / f"k{k1}-{k2}"
                / f"batch{BATCH_SIZE}"
            )

            if not CONFIG_DIR.exists():
                print("Skipping (not found):", CONFIG_DIR)
                continue

            REPEATED_TEST_EVALUATION_DIR = (
                CONFIG_DIR / "repeated_test_evaluation"
            ) # this is ran for multiple times to ensure the results are not noisy and random, but repeatable
            REPEATED_TEST_EVALUATION_DIR.mkdir(parents=True, exist_ok=True)

            records = []
            cms_raw = []
            cms_norm = []

            print("\nScanning LOSO folders in:", CONFIG_DIR)

            for loso_dir in sorted(CONFIG_DIR.glob("LOSO_sub*")):
                subject = loso_dir.name.replace("LOSO_", "")
                print(f"\nEvaluating {loso_dir.name}")

                tflite_files = list(loso_dir.glob("*.tflite"))
                assert len(tflite_files) == 1, f"Expected 1 tflite in {loso_dir}"
                tflite_path = tflite_files[0]

                subject_folder = DATA_BASE / subject
                X_test, y_test = build_loso_test_tensor(
                    subject_folder,
                    WINDOW_SIZE,
                    STEP_DISTANCE
                )

                metrics = evaluate_tflite(tflite_path, X_test, y_test)

                records.append({
                    "loso_subject": subject,
                    "test_accuracy": metrics["accuracy"],
                    "test_f1_macro": metrics["f1_macro"]
                })

                cms_raw.append(metrics["cm_raw"])
                cms_norm.append(metrics["cm_norm"])

            # ================= SUMMARY TABLE =================
            df = pd.DataFrame(records)

            mean_row = {"loso_subject": "MEAN"}
            std_row  = {"loso_subject": "STD"}

            for col in ["test_accuracy", "test_f1_macro"]:
                mean_row[col] = df[col].mean()
                std_row[col]  = df[col].std()

            df_out = pd.concat(
                [df, pd.DataFrame([mean_row]), pd.DataFrame([std_row])],
                ignore_index=True
            )

            summary_path = (
                REPEATED_TEST_EVALUATION_DIR
                / "repeated_test_evaluation_results.xlsx"
            )
            df_out.to_excel(summary_path, index=False)
            print("\nSaved:", summary_path)

            # ================= AVERAGE CONFUSION MATRICES =================
            avg_cm_raw  = np.mean(np.stack(cms_raw), axis=0)
            avg_cm_norm = np.mean(np.stack(cms_norm), axis=0)

            plot_cm(
                avg_cm_raw,
                "Average Confusion Matrix (Raw)",
                REPEATED_TEST_EVALUATION_DIR / "repeated_test_evaluation_avg_cm_raw.png",
                normalize=False
            )

            plot_cm(
                avg_cm_norm,
                "Average Confusion Matrix (Normalized)",
                REPEATED_TEST_EVALUATION_DIR / "repeated_test_evaluation_avg_cm_norm.png",
                normalize=True
            )

            print("Saved avg_cm_raw.png and avg_cm_norm.png")
            print("Completed at", datetime.now())



print("Test cell done at", datetime.now())
