In [None]:
# Cell 1: Imports and Setup
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
import mediapipe as mp
import random
import warnings
warnings.filterwarnings("ignore")

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)
random.seed(42)

# Define paths and classes
DATA_DIR = "FER-2013"
CLASSES = ["angry", "disgust", "fear", "happy", "sad", "surprise", "neutral"]
IMG_SIZE = (48, 48)

Milestone 1 — Face Detection & Visual Check

In [None]:
# Cell 2: Verify dataset structure and display sample images
def verify_and_show_samples(data_type="train", samples_per_class=2):
    base_path = os.path.join(DATA_DIR, data_type)
    fig, axes = plt.subplots(len(CLASSES), samples_per_class, figsize=(6, 15))
    for i, emotion in enumerate(CLASSES):
        class_path = os.path.join(base_path, emotion)
        if not os.path.exists(class_path):
            print(f"⚠️ Missing folder: {class_path}")
            continue
        files = os.listdir(class_path)
        print(f"{emotion}: {len(files)} images")
        for j in range(min(samples_per_class, len(files))):
            img_path = os.path.join(class_path, files[j])
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is None:
                continue
            axes[i, j].imshow(img, cmap='gray')
            axes[i, j].set_title(emotion)
            axes[i, j].axis('off')
    plt.tight_layout()
    plt.show()

verify_and_show_samples("train", 2)
verify_and_show_samples("test", 1)

In [None]:
# Cell 3: MediaPipe Face Detection + Bounding Box
mp_face_detection = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils

def detect_and_draw_face(image, min_detection_confidence=0.5):
    """Takes grayscale 48x48 image, converts to RGB, detects face, draws box."""
    rgb_img = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
    with mp_face_detection.FaceDetection(
        model_selection=0, min_detection_confidence=min_detection_confidence
    ) as face_detector:
        results = face_detector.process(rgb_img)
        if results.detections:
            for detection in results.detections:
                mp_drawing.draw_detection(rgb_img, detection)
            return rgb_img
        else:
            return rgb_img  # return original if no face

# Test on a few samples
def test_face_detection():
    base_path = os.path.join(DATA_DIR, "train")
    fig, axes = plt.subplots(2, 4, figsize=(10, 6))
    idx = 0
    for emotion in CLASSES:
        class_path = os.path.join(base_path, emotion)
        files = os.listdir(class_path)
        if files:
            img_path = os.path.join(class_path, files[0])
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            detected = detect_and_draw_face(img)
            ax = axes[idx // 4, idx % 4]
            ax.imshow(detected)
            ax.set_title(emotion)
            ax.axis('off')
            idx += 1
            if idx >= 8:
                break
    plt.tight_layout()
    plt.show()

test_face_detection()

Milestone 2 — Feature Extraction

In [None]:
# Cell 4: Image loading utility
def load_image(path):
    img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        raise ValueError(f"Could not load {path}")
    return img

Track A — Landmark Features (MediaPipe FaceMesh)

In [None]:
# Cell 5: Landmark-based feature extraction
mp_face_mesh = mp.solutions.face_mesh

def extract_landmark_features(image):
    """Returns normalized 468x2 landmark array flattened to 936-dim vector, or zeros if no face."""
    rgb = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
    with mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1, refine_landmarks=True) as face_mesh:
        results = face_mesh.process(rgb)
        if not results.multi_face_landmarks:
            return np.zeros(936)  # fallback
        landmarks = results.multi_face_landmarks[0].landmark
        h, w = image.shape
        coords = np.array([[lm.x * w, lm.y * h] for lm in landmarks])  # (468, 2)

        # Normalize: center on nose tip (landmark 1) and scale by inter-pupillary distance
        left_eye = coords[33]   # left eye outer
        right_eye = coords[263] # right eye outer
        ipd = np.linalg.norm(left_eye - right_eye)
        if ipd == 0:
            ipd = 1.0
        nose = coords[1]
        coords = (coords - nose) / ipd
        return coords.flatten()

def prepare_landmark_dataset(data_type="train"):
    features, labels = [], []
    base_path = os.path.join(DATA_DIR, data_type)
    for label_idx, emotion in enumerate(CLASSES):
        class_path = os.path.join(base_path, emotion)
        files = os.listdir(class_path)
        for file in files[:500]:  # limit for speed; adjust as needed
            try:
                img = load_image(os.path.join(class_path, file))
                feat = extract_landmark_features(img)
                features.append(feat)
                labels.append(label_idx)
            except Exception as e:
                continue
    return np.array(features), np.array(labels)

print("Extracting landmark features (may take a few minutes)...")
X_train_land, y_train_land = prepare_landmark_dataset("train")
X_test_land, y_test_land = prepare_landmark_dataset("test")
print("Landmark shapes:", X_train_land.shape, X_test_land.shape)

Track B — CNN Deep Features (MobileNetV2)

In [None]:
# Cell 6: CNN feature extraction using MobileNetV2
# Build feature extractor (remove top layers)
base_model = MobileNetV2(input_shape=(224, 224, 3), include_top=False, weights='imagenet')
feature_extractor = Model(inputs=base_model.input, outputs=base_model.output)
feature_extractor.trainable = False

def preprocess_for_cnn(image):
    """Resize 48x48 grayscale → 224x224 RGB, normalize."""
    resized = cv2.resize(image, (224, 224))
    rgb = cv2.cvtColor(resized, cv2.COLOR_GRAY2RGB)
    rgb = rgb.astype(np.float32) / 255.0
    return np.expand_dims(rgb, axis=0)

def extract_cnn_features(image):
    preprocessed = preprocess_for_cnn(image)
    features = feature_extractor(preprocessed)
    return tf.reduce_mean(features, axis=[1, 2]).numpy().flatten()  # global avg pool → (1280,)

def prepare_cnn_dataset(data_type="train"):
    features, labels = [], []
    base_path = os.path.join(DATA_DIR, data_type)
    for label_idx, emotion in enumerate(CLASSES):
        class_path = os.path.join(base_path, emotion)
        files = os.listdir(class_path)
        for file in files[:500]:  # limit for speed
            try:
                img = load_image(os.path.join(class_path, file))
                feat = extract_cnn_features(img)
                features.append(feat)
                labels.append(label_idx)
            except Exception as e:
                continue
    return np.array(features), np.array(labels)

print("Extracting CNN features...")
X_train_cnn, y_train_cnn = prepare_cnn_dataset("train")
X_test_cnn, y_test_cnn = prepare_cnn_dataset("test")
print("CNN shapes:", X_train_cnn.shape, X_test_cnn.shape)

Model Training (Both Tracks)

In [None]:
# Cell 7: Train models
def train_and_evaluate(X_train, y_train, X_test, y_test, track_name="Landmark"):
    print(f"\n=== Training on {track_name} Features ===")
    
    # Standardize for distance-based models
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Model 1: SVM
    svm = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
    svm.fit(X_train_scaled, y_train)
    y_pred_svm = svm.predict(X_test_scaled)
    
    # Model 2: Random Forest
    rf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
    rf.fit(X_train, y_train)  # RF doesn't need scaling
    y_pred_rf = rf.predict(X_test)
    
    # Evaluate
    def report(name, y_true, y_pred):
        acc = accuracy_score(y_true, y_pred)
        f1 = f1_score(y_true, y_pred, average='macro')
        print(f"{name} → Acc: {acc:.3f}, Macro-F1: {f1:.3f}")
        return acc, f1, y_pred
    
    acc_svm, f1_svm, pred_svm = report("SVM", y_test, y_pred_svm)
    acc_rf, f1_rf, pred_rf = report("Random Forest", y_test, y_pred_rf)
    
    return {
        'SVM': (svm, scaler, pred_svm),
        'RF': (rf, None, pred_rf)
    }, (acc_svm, f1_svm), (acc_rf, f1_rf)

# Train both tracks
models_land, svm_land_metrics, rf_land_metrics = train_and_evaluate(
    X_train_land, y_train_land, X_test_land, y_test_land, "Landmark"
)

models_cnn, svm_cnn_metrics, rf_cnn_metrics = train_and_evaluate(
    X_train_cnn, y_train_cnn, X_test_cnn, y_test_cnn, "CNN"
)

Milestone 3 — Evaluation & Reflection

In [None]:
# Cell 8: Evaluation reports and confusion matrices
def plot_confusion_matrix(y_true, y_pred, title):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(CLASSES))
    plt.xticks(tick_marks, CLASSES, rotation=45)
    plt.yticks(tick_marks, CLASSES)
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    plt.show()
    print(classification_report(y_true, y_pred, target_names=CLASSES))

# Example: Best model is CNN + SVM (usually)
best_model_name = "CNN_SVM"
best_model, best_scaler, best_pred = models_cnn['SVM']
plot_confusion_matrix(y_test_cnn, best_pred, "CNN + SVM Confusion Matrix")

# Also show Landmark + RF for comparison
_, _, pred_land_rf = models_land['RF']
plot_confusion_matrix(y_test_land, pred_land_rf, "Landmark + RF Confusion Matrix")

Simple Deployment: predict Function

In [None]:
# Cell 9: Prediction pipeline
def predict_emotion(image_path, model=models_cnn['SVM'][0], scaler=models_cnn['SVM'][1], feature_type='cnn'):
    """
    Predict emotion from a single FER-style image (48x48 grayscale).
    Returns: predicted label (string)
    """
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        raise ValueError("Image not found")
    
    if feature_type == 'cnn':
        feat = extract_cnn_features(img)
        if scaler:
            feat = scaler.transform([feat])
        pred = model.predict(feat)[0]
    else:  # landmark
        feat = extract_landmark_features(img)
        pred = model.predict([feat])[0]
    
    return CLASSES[pred]

# Test on unseen images (place 2 images in 'unseen/' folder)
unseen_dir = "unseen"
if os.path.exists(unseen_dir):
    for img_file in os.listdir(unseen_dir)[:2]:
        path = os.path.join(unseen_dir, img_file)
        pred = predict_emotion(path)
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        plt.imshow(img, cmap='gray')
        plt.title(f"Prediction: {pred} | File: {img_file}")
        plt.axis('off')
        plt.show()
else:
    print("⚠️ Create 'unseen/' folder with test images for demo.")