In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# === Load and preprocess ===
df = pd.read_csv("combined_features.csv")

# Encode emotion labels
le = LabelEncoder()
df["emotion_encoded"] = le.fit_transform(df["emotion"])
emotion_labels = le.classes_

# Focus on 'happy' and 'angry'
happy_idx = np.where(emotion_labels == 'happy')[0][0]
angry_idx = np.where(emotion_labels == 'angry')[0][0]

df_filtered = df[df["emotion_encoded"].isin([happy_idx, angry_idx])]
X = df_filtered.drop(columns=["emotion", "emotion_encoded"])
y = df_filtered["emotion_encoded"]

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

# Scale
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# === Step 1: MLP Grid Search ===
param_grid = {
    'hidden_layer_sizes': [(128,), (256,), (128, 64)],
    'activation': ['relu', 'tanh'],
    'alpha': [0.0001, 0.001],
    'learning_rate': ['constant', 'adaptive']
}

mlp = MLPClassifier(max_iter=500, random_state=42)
grid = GridSearchCV(mlp, param_grid, cv=3, n_jobs=-1, verbose=1)
grid.fit(X_train_scaled, y_train)

best_mlp = grid.best_estimator_
print("Best MLP Params:", grid.best_params_)

# === Step 2: Train directional classifiers ===

# A) Happy-focused model (happy = 1, all else = 0)
df_happy = df.copy()
df_happy["happy_label"] = df_happy["emotion"].apply(lambda x: 1 if x == "happy" else 0)
X_happy = df_happy.drop(columns=["emotion", "happy_label"])
y_happy = df_happy["happy_label"]

X_h_train, X_h_test, y_h_train, y_h_test = train_test_split(X_happy, y_happy, stratify=y_happy, test_size=0.2, random_state=42)
scaler_h = StandardScaler()
X_h_train_scaled = scaler_h.fit_transform(X_h_train)
X_h_test_scaled = scaler_h.transform(X_h_test)

happy_model = MLPClassifier(**grid.best_params_, max_iter=500, random_state=42)
happy_model.fit(X_h_train_scaled, y_h_train)

# B) Angry-focused model (angry = 1, all else = 0)
df_angry = df.copy()
df_angry["angry_label"] = df_angry["emotion"].apply(lambda x: 1 if x == "angry" else 0)
X_angry = df_angry.drop(columns=["emotion", "angry_label"])
y_angry = df_angry["angry_label"]

X_a_train, X_a_test, y_a_train, y_a_test = train_test_split(X_angry, y_angry, stratify=y_angry, test_size=0.2, random_state=42)
scaler_a = StandardScaler()
X_a_train_scaled = scaler_a.fit_transform(X_a_train)
X_a_test_scaled = scaler_a.transform(X_a_test)

angry_model = MLPClassifier(**grid.best_params_, max_iter=500, random_state=42)
angry_model.fit(X_a_train_scaled, y_a_train)

# === Step 3: Hybrid Prediction ===

hybrid_preds = []
true_labels = []

# Use same filtered happy/angry test set
for i in range(len(X_test)):
    x_sample = X_test.iloc[i:i+1]
    true_label = y_test.iloc[i]

    # Get the columns used to fit the happy scaler
    expected_cols_h = X_h_train.columns
    expected_cols_a = X_a_train.columns
    
    # Align x_sample to those columns
    x_sample_h = x_sample.reindex(columns=expected_cols_h, fill_value=0)
    x_sample_a = x_sample.reindex(columns=expected_cols_a, fill_value=0)
    
    # Now transform
    x_scaled_h = scaler_h.transform(x_sample_h)
    x_scaled_a = scaler_a.transform(x_sample_a)


    x_scaled_h = scaler_h.transform(x_sample)
    x_scaled_a = scaler_a.transform(x_sample)

    happy_conf = happy_model.predict_proba(x_scaled_h)[0][1]  # P(happy)
    angry_conf = angry_model.predict_proba(x_scaled_a)[0][1]  # P(angry)

    pred = happy_idx if happy_conf > angry_conf else angry_idx
    hybrid_preds.append(pred)
    true_labels.append(true_label)

# === Step 4: Evaluation ===
print("\n=== Hybrid Model Evaluation ===")
print(classification_report(true_labels, hybrid_preds, target_names=["angry", "happy"]))

cm = confusion_matrix(true_labels, hybrid_preds)
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=["angry", "happy"], yticklabels=["angry", "happy"])
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Hybrid Classifier Confusion Matrix")
plt.tight_layout()
plt.show()


Fitting 3 folds for each of 24 candidates, totalling 72 fits
Best MLP Params: {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (128, 64), 'learning_rate': 'constant'}


ValueError: The feature names should match those that were passed during fit.
Feature names seen at fit time, yet now missing:
- emotion_encoded
