In [None]:
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)


In [None]:
from google.colab import drive
import sys

drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
data_path='/content/drive/MyDrive/IITB/project/data'
models_dir = "/content/drive/MyDrive/IITB/project/models"

In [None]:
import pandas as pd
import numpy as np



In [None]:
df_features=pd.read_csv(data_path+'/df_features_trials.csv')

## Multimodal Fusion (EEG + Facial)

#  Early Fusion Code
wealready have df_features = PSY + EEG features + TIVA features (aggregated).
This is early fusion — all features concatenated → single classifier.

In [None]:
# Early fusion is just using df_features as before
feature_cols = [c for c in df_features.columns
                if c not in ['Student_ID','routineStart','routineEnd','verdict','QuestionKey','label']]
X = df_features[feature_cols].replace([np.inf,-np.inf],np.nan).fillna(0).values
y = df_features['verdict'].astype(str).values


In [None]:
# pick only numeric columns explicitly
numeric_cols = df_features.select_dtypes(include=[np.number]).columns.tolist()

X_df = df_features[numeric_cols].copy()

# replace inf with NaN and fill
X_df = X_df.replace([np.inf, -np.inf], np.nan).fillna(0)

X = X_df.values
y = df_features['verdict'].astype(str).values  # or your label

from sklearn.model_selection import train_test_split
train_idx, test_idx = train_test_split(df_features.index, test_size=0.2, stratify=y, random_state=42)
X_train, y_train = X[train_idx], y[train_idx]
X_test, y_test = X[test_idx], y[test_idx]

In [None]:


from imblearn.over_sampling import SMOTE
sm = SMOTE(random_state=42)
X_train, y_train = sm.fit_resample(X_train, y_train)


In [None]:

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
rf_fusion = RandomForestClassifier(n_estimators=300, random_state=42)
rf_fusion.fit(X_train, y_train)
print(classification_report(y_test, rf_fusion.predict(X_test)))
print(confusion_matrix(y_test, rf_fusion.predict(X_test)))


              precision    recall  f1-score   support

     CORRECT       0.71      0.77      0.74       208
   INCORRECT       0.25      0.21      0.23        67
        SKIP       0.20      0.14      0.17         7
     Unknown       0.00      0.00      0.00         8

    accuracy                           0.60       290
   macro avg       0.29      0.28      0.28       290
weighted avg       0.57      0.60      0.59       290

[[160  42   4   2]
 [ 51  14   0   2]
 [  5   1   1   0]
 [  8   0   0   0]]


# Saving model

In [None]:
import os
from joblib import dump

def save_model_if_not_exists(model, model_name, models_dir):
    """
    Save a trained scikit-learn model if it doesn't already exist.
    """
    os.makedirs(models_dir, exist_ok=True)
    model_path = os.path.join(models_dir, model_name)
    if not os.path.exists(model_path):
        dump(model, model_path)
        print(f"✅ Saved model to {model_path}")
    else:
        print(f"⚠️ Model {model_path} already exists. Skipping save.")


In [None]:
save_model_if_not_exists(rf_fusion, "rf_earlyfusion.pkl",models_dir)

✅ Saved model to /content/drive/MyDrive/IITB/models/rf_earlyfusion.pkl


## Late Fusion Code

Here we train separate models on EEG-only and TIVA-only, then average their probabilities.

In [None]:
print([c for c in df_features.columns])

['Key', 'Category', 'QuestionNumber', 'matchOrnomatch', 'Difficulty', 'verdict', 'ResponseTime', 'routineStart', 'routineStamp', 'routineEnd', 'Cat2FeedbackTime', 'Student_ID', 'File_Type', 'delta_mean', 'delta_std', 'theta_mean', 'theta_std', 'alpha_mean', 'alpha_std', 'beta_mean', 'beta_std', 'gamma_mean', 'gamma_std', 'alpha_asymmetry', 'Anger_mean', 'Anger_std', 'Anger_max', 'Contempt_mean', 'Contempt_std', 'Contempt_max', 'Disgust_mean', 'Disgust_std', 'Disgust_max', 'Fear_mean', 'Fear_std', 'Fear_max', 'Joy_mean', 'Joy_std', 'Joy_max', 'Sadness_mean', 'Sadness_std', 'Sadness_max', 'Surprise_mean', 'Surprise_std', 'Surprise_max', 'Engagement_mean', 'Engagement_std', 'Engagement_max', 'Valence_mean', 'Valence_std', 'Valence_max', 'Sentimentality_mean', 'Sentimentality_std', 'Sentimentality_max', 'Confusion_mean', 'Confusion_std', 'Confusion_max', 'Neutral_mean', 'Neutral_std', 'Neutral_max', 'Attention_mean', 'Attention_std', 'Attention_max', 'Brow Furrow_mean', 'Brow Furrow_occurr

In [None]:
# 1. EEG and TIVA feature matrices as before
eeg_cols = [
    'delta_mean','delta_std','theta_mean','theta_std',
    'alpha_mean','alpha_std','beta_mean','beta_std',
    'gamma_mean','gamma_std','alpha_asymmetry'
]

tiva_cols = [c for c in df_features.columns if c.lower().startswith((
    'anger','joy','fear','sad','disgust','surprise','engagement','valence','sentimentality','confusion','neutral','attention',
    'brow','cheek','chin','dimpler','eye','inner','jaw','lip','lid','mouth','nose','smile','smirk','upper','blink','pitch','yaw','roll','interocular'
))]


In [None]:
# replace -,+inf
X_eeg = df_features[eeg_cols].replace([np.inf,-np.inf],np.nan).fillna(0).values
X_tiva = df_features[tiva_cols].replace([np.inf,-np.inf],np.nan).fillna(0).values
y = df_features['verdict'].astype(str).values


In [None]:
# 2. Split
train_idx, test_idx = train_test_split(df_features.index, test_size=0.2, stratify=y, random_state=42)
X_eeg_train, y_train = X_eeg[train_idx], y[train_idx]
X_tiva_train = X_tiva[train_idx]
X_eeg_test,  y_test  = X_eeg[test_idx],  y[test_idx]
X_tiva_test  = X_tiva[test_idx]

In [None]:
# 3. Train separate models
rf_eeg = RandomForestClassifier(n_estimators=200, random_state=42)
rf_tiva = RandomForestClassifier(n_estimators=200, random_state=42)

rf_eeg.fit(X_eeg_train, y_train)
rf_tiva.fit(X_tiva_train, y_train)

In [None]:
# Saving the models
save_model_if_not_exists(rf_eeg, "rf_eeg_latefusion.pkl",models_dir)
save_model_if_not_exists(rf_tiva, "rf_tiva_latefusion.pkl",models_dir)

✅ Saved model to /content/drive/MyDrive/IITB/models/rf_eeg_latefusion.pkl
✅ Saved model to /content/drive/MyDrive/IITB/models/rf_tiva_latefusion.pkl


In [None]:
# 4. Get probability predictions on test set
proba_eeg = rf_eeg.predict_proba(X_eeg_test)
proba_tiva = rf_tiva.predict_proba(X_tiva_test)

# 5. Average probabilities (simple late fusion)
proba_fused = (proba_eeg + proba_tiva) / 2.0
classes = rf_eeg.classes_  # same classes for both models
y_pred_fused = np.array(classes)[np.argmax(proba_fused, axis=1)]

In [None]:

print("Late Fusion Model:\n", classification_report(y_test, y_pred_fused))
print(confusion_matrix(y_test, y_pred_fused))


Late Fusion Model:
               precision    recall  f1-score   support

     CORRECT       0.72      1.00      0.84       208
   INCORRECT       0.00      0.00      0.00        67
        SKIP       0.00      0.00      0.00         7
     Unknown       0.00      0.00      0.00         8

    accuracy                           0.72       290
   macro avg       0.18      0.25      0.21       290
weighted avg       0.51      0.72      0.60       290

[[208   0   0   0]
 [ 67   0   0   0]
 [  7   0   0   0]
 [  8   0   0   0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## Intermediate Fusion Code

If ywe train embeddings from CNN on faces + MLP on EEG, you can concatenate their last hidden layer outputs and feed into another classifier. In our case, because we have Affectiva features (no CNN), intermediate fusion = “take a compressed EEG feature vector (via PCA or an autoencoder) + compressed TIVA vector, concatenate, train RF/NN”.

In [None]:
from sklearn.decomposition import PCA
n_eeg_comp = min(10, X_eeg.shape[1])   # choose <= number of EEG features
n_tiva_comp = min(20, X_tiva.shape[1]) # choose <= number of TIVA features

pca_eeg = PCA(n_components=n_eeg_comp)
pca_tiva = PCA(n_components=n_tiva_comp)

X_eeg_pca = pca_eeg.fit_transform(X_eeg)
X_tiva_pca = pca_tiva.fit_transform(X_tiva)

X_concat = np.concatenate([X_eeg_pca, X_tiva_pca], axis=1)


In [None]:
from sklearn.neural_network import MLPClassifier

train_idx, test_idx = train_test_split(np.arange(len(y)), test_size=0.2, stratify=y, random_state=42)
X_train, y_train = X_concat[train_idx], y[train_idx]
X_test, y_test = X_concat[test_idx], y[test_idx]

mlp_fusion = MLPClassifier(hidden_layer_sizes=(128,), max_iter=300, random_state=42)
mlp_fusion.fit(X_train, y_train)

y_pred = mlp_fusion.predict(X_test)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

              precision    recall  f1-score   support

     CORRECT       0.72      0.80      0.76       208
   INCORRECT       0.21      0.13      0.17        67
        SKIP       0.11      0.14      0.12         7
     Unknown       0.00      0.00      0.00         8

    accuracy                           0.61       290
   macro avg       0.26      0.27      0.26       290
weighted avg       0.57      0.61      0.58       290

[[166  30   6   6]
 [ 53   9   2   3]
 [  5   1   1   0]
 [  6   2   0   0]]


In [None]:
save_model_if_not_exists(mlp_fusion, "mlp_intermidatefusion.pkl",models_dir)

✅ Saved model to /content/drive/MyDrive/IITB/models/mlp_intermidatefusion.pkl
