## STEP 3: Modeling Approaches


In [None]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score
import tensorflow as tf
from tensorflow.keras import layers, models
from pathlib import Path

# Load preprocessed data
RESULTS_DIR = Path("/content/results")
df = pd.read_csv(RESULTS_DIR / "/content/results/preprocessed_trials.csv")
print(f"✅ Loaded preprocessed_trials.csv (Shape: {df.shape})")

# Define modality feature groups (adjust prefixes based on extraction)
modalities = {
    'EEG': [col for col in df.columns if col.startswith('EEG_') or col.startswith('PCA_')],  # Teacher
    'Eye': [col for col in df.columns if col.startswith('EYE_') or col.startswith('IVT_')],  # Combined Eye-tracking
    'GSR': [col for col in df.columns if col.startswith('GSR_')],
    'Facial': [col for col in df.columns if col.startswith('TIVA_')]  # Facial expressions
}

y = df['Target_encoded']  # Binary target

# Function to evaluate model
def evaluate_model(y_true, y_pred, y_prob=None):
    metrics = {
        'F1-score': f1_score(y_true, y_pred),
        'Accuracy': accuracy_score(y_true, y_pred)
    }
    if y_prob is not None:
        metrics['ROC-AUC'] = roc_auc_score(y_true, y_prob)
    return metrics

# -------------------------------
# 3.1 Baseline (Single-Modality Models)
# -------------------------------
baseline_results = {}
for modality, features in modalities.items():
    if not features:
        print(f"⚠️ No features for {modality}")
        continue
    X = df[features].select_dtypes(include=[np.number]).fillna(0)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)  # Stratify for imbalance

    model = xgb.XGBClassifier(random_state=42, scale_pos_weight=1 if y_train.mean() > 0.5 else 1/y_train.mean())  # Handle imbalance
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:, 1]
    metrics = evaluate_model(y_test, y_pred, y_prob)
    baseline_results[modality] = metrics
    print(f"✅ Baseline {modality}: {metrics}")

    # Save model
    model.save_model(RESULTS_DIR / f"{modality.lower()}_baseline.json")

# Save baseline results
pd.DataFrame(baseline_results).T.to_csv(RESULTS_DIR / "/content/results/baseline_results.csv")
print("✅ Baseline results saved")

# Load teacher model (EEG baseline) and generate soft labels
if modalities['EEG']:
    teacher_model = xgb.XGBClassifier()
    teacher_model.load_model(RESULTS_DIR / "/content/results/eeg_baseline.json")
    X_eeg = df[modalities['EEG']].select_dtypes(include=[np.number]).fillna(0)
    teacher_probs = teacher_model.predict_proba(X_eeg)[:, 1]
else:
    print("❌ No EEG features to load teacher model")
    teacher_probs = None

# -------------------------------
# 3.2 Knowledge Transfer (Knowledge Distillation)
# -------------------------------
def create_student_nn(input_dim):
    return models.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Dense(1, activation='sigmoid')
    ])

def distillation_loss(y_true, y_pred, teacher_probs, temperature=2.0, alpha=0.5):
    y_pred = tf.squeeze(y_pred)
    soft_pred = tf.math.sigmoid(y_pred / temperature)
    soft_teacher = teacher_probs / temperature
    hard_loss = tf.keras.losses.binary_crossentropy(y_true, y_pred)
    soft_loss = tf.keras.losses.kullback_leibler_divergence(soft_teacher, soft_pred)
    return alpha * soft_loss + (1 - alpha) * hard_loss

distillation_results = {}
for modality in ['Eye', 'GSR', 'Facial']:
    features = modalities[modality]
    if not features or teacher_probs is None:
        print(f"⚠️ No features or teacher for {modality}")
        continue
    X = df[features].select_dtypes(include=[np.number]).fillna(0)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
    train_indices = X_train.index
    teacher_probs_train = teacher_probs[train_indices]

    student_model = create_student_nn(X_train.shape[1])
    student_model.compile(optimizer='adam', loss=lambda y_true, y_pred: distillation_loss(y_true, y_pred, teacher_probs_train))
    student_model.fit(X_train, y_train, epochs=20, batch_size=32, verbose=0)

    y_pred = (student_model.predict(X_test) > 0.5).astype(int).flatten()
    y_prob = student_model.predict(X_test).flatten()
    metrics = evaluate_model(y_test, y_pred, y_prob)
    distillation_results[modality] = metrics
    print(f"✅ Distillation {modality}: {metrics}")

    student_model.save(RESULTS_DIR / f"{modality.lower()}_student.h5")

pd.DataFrame(distillation_results).T.to_csv(RESULTS_DIR / "/content/results/distillation_results.csv")
print("✅ Distillation results saved")

# -------------------------------
# 3.3 Domain Adaptation Approaches
# -------------------------------
# Shared function for feature extractor
def create_feature_extractor(input_dim, embedding_dim=16):
    return models.Sequential([
        layers.Input(shape=(input_dim,)),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Dense(embedding_dim, activation='relu', name='embedding')
    ])

# 3.3.1 Adversarial Domain Adaptation (Fixed model definition)
if modalities['EEG'] and modalities.get('Eye'):  # Example with Eye; extend for others
    X_teacher = df[modalities['EEG']].select_dtypes(include=[np.number]).fillna(0)
    X_student = df[modalities['Eye']].select_dtypes(include=[np.number]).fillna(0)
    X_teacher_train, X_teacher_test, X_student_train, X_student_test, y_train, y_test = train_test_split(
        X_teacher, X_student, y, test_size=0.2, random_state=42, stratify=y
    )

    embedding_dim = 16
    feature_extractor_teacher = create_feature_extractor(X_teacher_train.shape[1], embedding_dim)
    feature_extractor_student = create_feature_extractor(X_student_train.shape[1], embedding_dim)
    label_predictor = models.Sequential([layers.Dense(embedding_dim, activation='relu'), layers.Dense(1, activation='sigmoid')])
    domain_discriminator = models.Sequential([layers.Dense(embedding_dim, activation='relu'), layers.Dense(1, activation='sigmoid')])

    @tf.custom_gradient
    def gradient_reversal(x):
        def grad(dy):
            return -dy
        return x, grad

    class GradientReversalLayer(layers.Layer):
        def call(self, x):
            return gradient_reversal(x)

    # Inputs
    teacher_input = layers.Input(shape=(X_teacher_train.shape[1],))
    student_input = layers.Input(shape=(X_student_train.shape[1],))
    labels_input = layers.Input(shape=(1,))
    domain_labels_input = layers.Input(shape=(1,))

    # Embeddings
    teacher_embedding = feature_extractor_teacher(teacher_input)
    student_embedding = feature_extractor_student(student_input)

    # Predictions
    label_pred = label_predictor(student_embedding)
    domain_pred_teacher = domain_discriminator(GradientReversalLayer()(teacher_embedding))
    domain_pred_student = domain_discriminator(GradientReversalLayer()(student_embedding))

    # Model
    ada_model = models.Model(
        inputs=[teacher_input, student_input, labels_input, domain_labels_input],
        outputs=[label_pred, domain_pred_teacher, domain_pred_student]
    )
    ada_model.compile(
        optimizer='adam',
        loss=['binary_crossentropy', 'binary_crossentropy', 'binary_crossentropy'],
        loss_weights=[1.0, 0.5, 0.5]
    )

    # Domain labels
    domain_labels_teacher = np.zeros(len(X_teacher_train))
    domain_labels_student = np.ones(len(X_student_train))

    # Train
    ada_model.fit(
        [X_teacher_train, X_student_train, y_train, domain_labels_teacher],
        [y_train, domain_labels_teacher, domain_labels_student],
        epochs=20, batch_size=32, verbose=1
    )

    # Evaluate
    student_embeddings_test = feature_extractor_student(X_student_test)
    y_pred = (label_predictor(student_embeddings_test) > 0.5).numpy().astype(int).flatten()
    y_prob = label_predictor(student_embeddings_test).numpy().flatten()
    ada_metrics = evaluate_model(y_test, y_pred, y_prob)
    print(f"✅ Adversarial DA (Eye): {ada_metrics}")

    ada_model.save(RESULTS_DIR / "ada_eye.h5")
    pd.DataFrame({'ADA_Eye': ada_metrics}).T.to_csv(RESULTS_DIR / "ada_results.csv")
else:
    print("❌ Insufficient features for Adversarial DA")

# 3.3.2 Contrastive Learning (Skipped due to missing features)
print("❌ Contrastive Learning skipped due to missing student features")

print("✅ Step 3 Complete - Modeling done (with limitations)")

✅ Loaded preprocessed_trials.csv (Shape: (425096, 12))
✅ Baseline EEG: {'F1-score': 0.5263064971751412, 'Accuracy': 0.6213126323218067, 'ROC-AUC': np.float64(0.7594613012613253)}
⚠️ No features for Eye
⚠️ No features for GSR
⚠️ No features for Facial
✅ Baseline results saved
⚠️ No features or teacher for Eye
⚠️ No features or teacher for GSR
⚠️ No features or teacher for Facial
✅ Distillation results saved
❌ Insufficient features for Adversarial DA
❌ Contrastive Learning skipped due to missing student features
✅ Step 3 Complete - Modeling done (with limitations)


In [None]:
print(f"✅ Checking columns: {df.columns.tolist()}")
print(f"✅ EEG features found: {modalities['EEG']}")

✅ Checking columns: ['participant_id', 'Target_encoded', 'PCA_1', 'PCA_2', 'PCA_3', 'PCA_4', 'PCA_5', 'PCA_6', 'PCA_7', 'PCA_8', 'PCA_9', 'PCA_10']
✅ EEG features found: ['PCA_1', 'PCA_2', 'PCA_3', 'PCA_4', 'PCA_5', 'PCA_6', 'PCA_7', 'PCA_8', 'PCA_9', 'PCA_10']
