# Emotion Detection Model Training

This notebook demonstrates how to train an emotion detection model using scikit-learn.
We'll use the FER2013 dataset or a similar emotion dataset.

In [None]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import joblib
import cv2
import os

# Set style
sns.set_style('darkgrid')
%matplotlib inline

## 1. Load and Prepare Dataset

For demonstration purposes, we'll create a simple dataset.
In production, you would load the FER2013 dataset or similar.

In [None]:
# Emotion labels
EMOTIONS = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

# For demo: Create synthetic data
# In practice, load from FER2013 CSV or image dataset
def create_demo_dataset(n_samples=1000, img_size=48):
    """
    Create a demo dataset for testing
    Replace this with actual dataset loading
    """
    np.random.seed(42)
    
    # Generate random images and labels
    X = np.random.rand(n_samples, img_size * img_size)
    y = np.random.randint(0, len(EMOTIONS), n_samples)
    
    return X, y

# Load data
print("Loading dataset...")
X, y = create_demo_dataset(n_samples=5000)
print(f"Dataset shape: X={X.shape}, y={y.shape}")

## 2. Split Dataset

In [None]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"Training set: {X_train.shape}")
print(f"Test set: {X_test.shape}")

# Check class distribution
unique, counts = np.unique(y_train, return_counts=True)
for emotion_idx, count in zip(unique, counts):
    print(f"{EMOTIONS[emotion_idx]}: {count} samples")

## 3. Feature Scaling

In [None]:
# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Features scaled successfully!")

## 4. Train Model

In [None]:
# Train Random Forest Classifier
print("Training Random Forest model...")

model = RandomForestClassifier(
    n_estimators=100,
    max_depth=20,
    min_samples_split=5,
    min_samples_leaf=2,
    random_state=42,
    n_jobs=-1,
    verbose=1
)

model.fit(X_train_scaled, y_train)
print("Training completed!")

## 5. Evaluate Model

In [None]:
# Make predictions
y_pred = model.predict(X_test_scaled)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"\nTest Accuracy: {accuracy * 100:.2f}%\n")

# Classification report
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=EMOTIONS))

In [None]:
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=EMOTIONS, yticklabels=EMOTIONS)
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.tight_layout()
plt.show()

## 6. Feature Importance

In [None]:
# Get feature importance
feature_importance = model.feature_importances_

# Plot top 20 features
top_features = np.argsort(feature_importance)[-20:]

plt.figure(figsize=(10, 6))
plt.barh(range(20), feature_importance[top_features])
plt.xlabel('Importance')
plt.ylabel('Feature Index')
plt.title('Top 20 Feature Importances')
plt.tight_layout()
plt.show()

## 7. Save Model

In [None]:
# Save the trained model
model_path = '../models/emotion_model.pkl'
os.makedirs('../models', exist_ok=True)

joblib.dump(model, model_path)
print(f"✓ Model saved to {model_path}")

# Also save the scaler
scaler_path = '../models/scaler.pkl'
joblib.dump(scaler, scaler_path)
print(f"✓ Scaler saved to {scaler_path}")

## 8. Test Model Loading

In [None]:
# Load and test the saved model
loaded_model = joblib.load(model_path)
loaded_scaler = joblib.load(scaler_path)

# Test prediction
test_sample = X_test_scaled[0:1]
prediction = loaded_model.predict(test_sample)
probabilities = loaded_model.predict_proba(test_sample)

print(f"Predicted emotion: {EMOTIONS[prediction[0]]}")
print(f"\nProbabilities:")
for emotion, prob in zip(EMOTIONS, probabilities[0]):
    print(f"{emotion}: {prob*100:.2f}%")

## Next Steps

1. **Use Real Dataset**: Replace demo data with FER2013 or similar
2. **Try Deep Learning**: Use CNN with TensorFlow/Keras for better accuracy
3. **Hyperparameter Tuning**: Use GridSearchCV or RandomizedSearchCV
4. **Data Augmentation**: Increase dataset size with transformations
5. **Ensemble Methods**: Combine multiple models for better performance