In [1]:
import os
import joblib
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score

In [2]:
DATASET_PATH = r"C:\Users\LENOVO\Desktop\ByteBuzz\Data\final_dataset.csv"
MODEL_DIR = "C:/Users/LENOVO/Desktop/ByteBuzz/Models/"

TEACHER_MODEL = "teacher_eeg_xgb.pkl"

# Student models (baseline trained in 02 notebook)
STUDENT_MODELS = {
    "eye": "student_eye_rf.pkl",
    "gsr": "student_gsr_rf.pkl",
    "tiva": "student_tiva_rf.pkl"
}

In [3]:
# 3. Load dataset
df = pd.read_csv(DATASET_PATH)
print("✅ Dataset loaded:", df.shape)

# Separate labels and features
y = df["Label"]
X = df.drop(columns=["StudentID", "TrialID", "Label"])


✅ Dataset loaded: (1448, 28)


In [4]:
# 4. Group features by modality
modalities = {
    "EEG": [c for c in X.columns if c.startswith("EEG_PC")],
    "EYE": [c for c in X.columns if c.startswith("EYE_PC")],
    "GSR": [c for c in X.columns if c.startswith("GSR_PC")],
    "IVT": [c for c in X.columns if c.startswith("IVT_PC")],
    "TIVA": [c for c in X.columns if c.startswith("TIVA_PC")]
}

print("Feature counts per modality:")
for m, cols in modalities.items():
    print(f"{m}: {len(cols)}")

# 5. Train/Test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

Feature counts per modality:
EEG: 7
EYE: 3
GSR: 4
IVT: 6
TIVA: 5


In [5]:
# 6. Load Teacher (EEG model)
teacher = joblib.load(os.path.join(MODEL_DIR, TEACHER_MODEL))

eeg_features = modalities["EEG"]
teacher_probs_train = teacher.predict_proba(X_train[eeg_features])
teacher_probs_test = teacher.predict_proba(X_test[eeg_features])
print("Teacher model loaded and soft predictions generated.")

# 7. Knowledge Distillation for Students
results = {}

for modality, model_file in STUDENT_MODELS.items():
    feats = modalities[modality.upper()]
    if len(feats) == 0:
        print(f" No features found for {modality.upper()} → skipping.")
        continue

    print(f"\n--- Training Student ({modality.upper()}) with KD ---")

    X_train_mod = X_train[feats]
    X_test_mod = X_test[feats]

    student = joblib.load(os.path.join(MODEL_DIR, model_file))

    # KD labels (mix of true labels + teacher soft labels)
    alpha = 0.5  
    y_soft = np.argmax(
        (alpha * pd.get_dummies(y_train).values) + (1 - alpha) * teacher_probs_train,
        axis=1
    )

    # Retrain student with KD labels
    student.fit(X_train_mod, y_soft)

    # Evaluate
    y_pred = student.predict(X_test_mod)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average="weighted")

    results[modality] = {"Acc": acc, "F1": f1}

    kd_model_file = os.path.join(MODEL_DIR, f"student_{modality}_kd.pkl")
    joblib.dump(student, kd_model_file)

    print(f"Saved KD {modality.upper()} student → {kd_model_file}")


Teacher model loaded and soft predictions generated.

--- Training Student (EYE) with KD ---
Saved KD EYE student → C:/Users/LENOVO/Desktop/ByteBuzz/Models/student_eye_kd.pkl

--- Training Student (GSR) with KD ---
Saved KD GSR student → C:/Users/LENOVO/Desktop/ByteBuzz/Models/student_gsr_kd.pkl

--- Training Student (TIVA) with KD ---
Saved KD TIVA student → C:/Users/LENOVO/Desktop/ByteBuzz/Models/student_tiva_kd.pkl


In [6]:
print("\n=== Final KD Student Results ===")
for mod, metrics in results.items():
    print(f"{mod.upper():<6} | Acc: {metrics['Acc']:.3f} | F1: {metrics['F1']:.3f}")


=== Final KD Student Results ===
EYE    | Acc: 0.724 | F1: 0.659
GSR    | Acc: 0.748 | F1: 0.702
TIVA   | Acc: 0.734 | F1: 0.689
