STEP 4: Modeling Fusion

In [None]:
import pandas as pd
import numpy as np

# Load engineered features (already preprocessed)
df = pd.read_parquet("data_out/features_engineered.parquet").fillna(0)

# EEG / GSR / TIVA / SELFREPORT features
eeg_cols  = [c for c in df.columns if "eeg_" in c or any(b in c for b in ["Alpha","Beta","Theta","Gamma"])]
gsr_cols  = [c for c in df.columns if "gsr_" in c or "conductance" in c.lower()]
tiva_cols = [c for c in df.columns if "tiva_" in c or "AU" in c]
sr_cols   = [c for c in df.columns if "sr_" in c or "valence" in c.lower() or "arousal" in c.lower()]

# Features and labels
X_eeg  = df[eeg_cols].values
X_gsr  = df[gsr_cols].values
X_tiva = df[tiva_cols].values
X_sr   = df[sr_cols].values
y      = df["_y"].values

from sklearn.model_selection import train_test_split

# Stratified split for each modality
X_eeg_train, X_eeg_test, y_train, y_test = train_test_split(X_eeg, y, test_size=0.2, random_state=42, stratify=y)
X_gsr_train, X_gsr_test, _, _ = train_test_split(X_gsr, y, test_size=0.2, random_state=42, stratify=y)
X_tiva_train, X_tiva_test, _, _ = train_test_split(X_tiva, y, test_size=0.2, random_state=42, stratify=y)
X_sr_train, X_sr_test, _, _     = train_test_split(X_sr, y, test_size=0.2, random_state=42, stratify=y)

print("Feature shapes -> EEG:", X_eeg_train.shape, "GSR:", X_gsr_train.shape,
      "TIVA:", X_tiva_train.shape, "SELFREPORT:", X_sr_train.shape)

Feature shapes -> EEG: (3939, 34) GSR: (3939, 1) TIVA: (3939, 2) SELFREPORT: (3939, 5)


In [None]:
from sklearn.preprocessing import StandardScaler
import numpy as np

def clean_features(X):
    # Replace NaN, inf, -inf with 0
    return np.nan_to_num(X, nan=0.0, posinf=0.0, neginf=0.0)

# Clean
X_eeg_train  = clean_features(X_eeg_train)
X_gsr_train  = clean_features(X_gsr_train)
X_tiva_train = clean_features(X_tiva_train)
X_sr_train   = clean_features(X_sr_train)

X_eeg_test  = clean_features(X_eeg_test)
X_gsr_test  = clean_features(X_gsr_test)
X_tiva_test = clean_features(X_tiva_test)
X_sr_test   = clean_features(X_sr_test)

# Standardize
scaler_eeg  = StandardScaler()
scaler_gsr  = StandardScaler()
scaler_tiva = StandardScaler()
scaler_sr   = StandardScaler()

X_eeg_train_scaled  = scaler_eeg.fit_transform(X_eeg_train)
X_gsr_train_scaled  = scaler_gsr.fit_transform(X_gsr_train)
X_tiva_train_scaled = scaler_tiva.fit_transform(X_tiva_train)
X_sr_train_scaled   = scaler_sr.fit_transform(X_sr_train)

X_eeg_test_scaled  = scaler_eeg.transform(X_eeg_test)
X_gsr_test_scaled  = scaler_gsr.transform(X_gsr_test)
X_tiva_test_scaled = scaler_tiva.transform(X_tiva_test)
X_sr_test_scaled   = scaler_sr.transform(X_sr_test)

print("Features cleaned and standardized. Sample mean/std (first 5):")
print(np.mean(X_eeg_train_scaled, axis=0)[:5], np.std(X_eeg_train_scaled, axis=0)[:5])

Features cleaned and standardized. Sample mean/std (first 5):
[-5.39874483e-15  1.03954813e-16 -2.02839071e-15 -1.85149912e-15
 -1.24015773e-18] [1. 1. 1. 1. 1.]


In [None]:
import xgboost as xgb
from sklearn.metrics import accuracy_score, f1_score

def train_xgb(X_tr, y_tr, X_te, y_te, modality_name="modality"):
    clf = xgb.XGBClassifier(
        n_estimators=200, max_depth=5, learning_rate=0.05,
        subsample=0.8, colsample_bytree=0.8, random_state=42,
        use_label_encoder=False, eval_metric="mlogloss"
    )
    clf.fit(X_tr, y_tr)
    y_pred = clf.predict(X_te)
    print(f"{modality_name} Accuracy:", accuracy_score(y_te, y_pred))
    print(f"{modality_name} Macro F1:", f1_score(y_te, y_pred, average="macro"))
    return clf

# Train each modality
clf_eeg   = train_xgb(X_eeg_train_scaled, y_train, X_eeg_test_scaled, y_test, "EEG")
clf_gsr   = train_xgb(X_gsr_train_scaled, y_train, X_gsr_test_scaled, y_test, "GSR")
clf_tiva  = train_xgb(X_tiva_train_scaled, y_train, X_tiva_test_scaled, y_test, "TIVA")
clf_sr    = train_xgb(X_sr_train_scaled, y_train, X_sr_test_scaled, y_test, "SELFREPORT")

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


EEG Accuracy: 0.6964467005076143
EEG Macro F1: 0.32488146773861054


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


GSR Accuracy: 0.6873096446700507
GSR Macro F1: 0.3174396928998156
TIVA Accuracy: 0.6944162436548224
TIVA Macro F1: 0.2732174955062912


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


SELFREPORT Accuracy: 0.766497461928934
SELFREPORT Macro F1: 0.49965460611175844


In [None]:
# Get predicted probabilities (3 classes) for each modality
prob_eeg_train  = clf_eeg.predict_proba(X_eeg_train_scaled)
prob_gsr_train  = clf_gsr.predict_proba(X_gsr_train_scaled)
prob_tiva_train = clf_tiva.predict_proba(X_tiva_train_scaled)
prob_sr_train   = clf_sr.predict_proba(X_sr_train_scaled)

prob_eeg_test   = clf_eeg.predict_proba(X_eeg_test_scaled)
prob_gsr_test   = clf_gsr.predict_proba(X_gsr_test_scaled)
prob_tiva_test  = clf_tiva.predict_proba(X_tiva_test_scaled)
prob_sr_test    = clf_sr.predict_proba(X_sr_test_scaled)

# Stack horizontally → 3 classes × 4 modalities = 12 features
meta_X_train = np.hstack([prob_eeg_train, prob_gsr_train, prob_tiva_train, prob_sr_train])
meta_X_test  = np.hstack([prob_eeg_test, prob_gsr_test, prob_tiva_test, prob_sr_test])

print("Meta-classifier train shape:", meta_X_train.shape)
print("Meta-classifier test shape:", meta_X_test.shape)

Meta-classifier train shape: (3939, 12)
Meta-classifier test shape: (985, 12)


In [None]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import accuracy_score, f1_score, classification_report

# Convert stacked features to tensors
meta_X_train_t = torch.tensor(meta_X_train, dtype=torch.float32)
meta_y_train_t = torch.tensor(y_train, dtype=torch.long)
meta_X_test_t  = torch.tensor(meta_X_test, dtype=torch.float32)
meta_y_test_t  = torch.tensor(y_test, dtype=torch.long)

train_dataset = TensorDataset(meta_X_train_t, meta_y_train_t)
train_loader  = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Define simple MLP meta-classifier
class MetaClassifier(nn.Module):
    def __init__(self, input_dim=12, hidden_dim=16, num_classes=3):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, num_classes)
    def forward(self, x):
        return self.fc2(self.relu(self.fc1(x)))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
meta_model = MetaClassifier().to(device)
criterion  = nn.CrossEntropyLoss()
optimizer  = torch.optim.Adam(meta_model.parameters(), lr=0.01)

# Training loop
epochs = 30
for epoch in range(epochs):
    meta_model.train()
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        out = meta_model(xb)
        loss = criterion(out, yb)
        loss.backward()
        optimizer.step()
    if (epoch+1) % 10 == 0:
        print(f"Epoch {epoch+1}/{epochs} Loss: {loss.item():.4f}")

# Evaluation
meta_model.eval()
with torch.no_grad():
    preds = meta_model(meta_X_test_t.to(device))
    y_pred_meta = torch.argmax(preds, dim=1).cpu().numpy()

print("Stacking Meta-classifier Accuracy:", accuracy_score(y_test, y_pred_meta))
print("Macro F1:", f1_score(y_test, y_pred_meta, average="macro"))
print(classification_report(y_test, y_pred_meta))

Epoch 10/30 Loss: 0.0058
Epoch 20/30 Loss: 0.0455
Epoch 30/30 Loss: 0.8336
Stacking Meta-classifier Accuracy: 0.7147208121827411
Macro F1: 0.5111749561851939
              precision    recall  f1-score   support

           0       0.35      0.19      0.24        43
           1       0.78      0.85      0.81       684
           2       0.53      0.43      0.48       258

    accuracy                           0.71       985
   macro avg       0.55      0.49      0.51       985
weighted avg       0.69      0.71      0.70       985



In [None]:
fusion_path = "models/fusion_model.pt"
torch.save({
    "meta_model_state_dict": meta_model.state_dict(),
    "clf_eeg": clf_eeg,
    "clf_gsr": clf_gsr,
    "clf_tiva": clf_tiva,
    "clf_sr": clf_sr,
    "scaler_eeg": scaler_eeg,
    "scaler_gsr": scaler_gsr,
    "scaler_tiva": scaler_tiva,
    "scaler_sr": scaler_sr
}, fusion_path)

print("Saved fusion model to:", fusion_path)

Saved fusion model to: models/fusion_model.pt
