In [1]:
import os
import joblib
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
import warnings
warnings.filterwarnings("ignore")

In [2]:
DATASET_PATH = "C:/Users/LENOVO/Desktop/ByteBuzz/Data/final_dataset.csv"
MODEL_DIR = "C:/Users/LENOVO/Desktop/ByteBuzz/Models/"
os.makedirs(MODEL_DIR, exist_ok=True)

# Load dataset
df = pd.read_csv(DATASET_PATH)
print(" Dataset loaded:", df.shape)

y = df["Label"]
X = df.drop(columns=["StudentID", "TrialID", "Label"])

 Dataset loaded: (1448, 28)


Group features by modality

In [3]:
modalities = {
    "EEG": [c for c in X.columns if c.startswith("EEG_PC")],
    "EYE": [c for c in X.columns if c.startswith("EYE_PC")],
    "GSR": [c for c in X.columns if c.startswith("GSR_PC")],
    "IVT": [c for c in X.columns if c.startswith("IVT_PC")],
    "TIVA": [c for c in X.columns if c.startswith("TIVA_PC")]
}

print("Feature counts per modality:")
for m, cols in modalities.items():
    print(f"{m}: {len(cols)}")

Feature counts per modality:
EEG: 7
EYE: 3
GSR: 4
IVT: 6
TIVA: 5


In [4]:
# 5. Train/Test split 
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


#Baseline models

In [5]:

results = {}

# Teacher (EEG → XGBoost)
if len(modalities["EEG"]) > 0:
    print("\n--- Training EEG Teacher (XGBoost) ---")
    eeg_X_train, eeg_X_test = X_train[modalities["EEG"]], X_test[modalities["EEG"]]

    teacher = XGBClassifier(
        n_estimators=200, max_depth=5, learning_rate=0.05, random_state=42
    )
    teacher.fit(eeg_X_train, y_train)

    y_pred = teacher.predict(eeg_X_test)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average="weighted")
    results["EEG"] = {"Acc": acc, "F1": f1}

    # Save model
    joblib.dump(teacher, os.path.join(MODEL_DIR, "teacher_eeg_xgb.pkl"))
    print(f"EEG Teacher saved → {MODEL_DIR}teacher_eeg_xgb.pkl")



--- Training EEG Teacher (XGBoost) ---
EEG Teacher saved → C:/Users/LENOVO/Desktop/ByteBuzz/Models/teacher_eeg_xgb.pkl


#Students (Random Forest for EYE, GSR, TIVA

In [6]:
for modality in ["EYE", "GSR", "TIVA"]:
    feats = modalities[modality]
    if len(feats) == 0:
        print(f" No features found for {modality}, skipping...")
        continue

    print(f"\n--- Training {modality} Student (RandomForest) ---")
    X_train_mod, X_test_mod = X_train[feats], X_test[feats]

    student = RandomForestClassifier(n_estimators=200, random_state=42)
    student.fit(X_train_mod, y_train)

    y_pred = student.predict(X_test_mod)
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average="weighted")
    results[modality] = {"Acc": acc, "F1": f1}

    # Save model
    joblib.dump(student, os.path.join(MODEL_DIR, f"student_{modality.lower()}_rf.pkl"))
    print(f" {modality} Student saved → {MODEL_DIR}student_{modality.lower()}_rf.pkl")


--- Training EYE Student (RandomForest) ---
 EYE Student saved → C:/Users/LENOVO/Desktop/ByteBuzz/Models/student_eye_rf.pkl

--- Training GSR Student (RandomForest) ---
 GSR Student saved → C:/Users/LENOVO/Desktop/ByteBuzz/Models/student_gsr_rf.pkl

--- Training TIVA Student (RandomForest) ---
 TIVA Student saved → C:/Users/LENOVO/Desktop/ByteBuzz/Models/student_tiva_rf.pkl


In [7]:
print("\n=== Baseline Results ===")
for mod, metrics in results.items():
    print(f"{mod:<6} | Acc: {metrics['Acc']:.3f} | F1: {metrics['F1']:.3f}")


=== Baseline Results ===
EEG    | Acc: 0.748 | F1: 0.667
EYE    | Acc: 0.724 | F1: 0.659
GSR    | Acc: 0.748 | F1: 0.702
TIVA   | Acc: 0.734 | F1: 0.689
