# Facial Emotion Recognition on FER2013
This notebook implements a deep learning pipeline using ResNet50V2 with CBAM and Focal Loss for emotion classification on the FER2013 dataset.

Imports and configurations

In [0]:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

import numpy as np
import cv2
import glob
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf

from tensorflow.keras import models, layers, optimizers
from tensorflow.keras.applications.resnet_v2 import ResNet50V2, preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, Callback
from tensorflow.keras.optimizers.schedules import CosineDecay
from tensorflow.keras import backend as K

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from sklearn.metrics import classification_report, confusion_matrix

BASE_DIR = os.path.dirname(os.path.abspath(__file__))
PROJECT_ROOT = os.path.dirname(BASE_DIR)
DATA_DIR = os.path.join(PROJECT_ROOT, "data", "train", "*", "*")
PLOTS_DIR = os.path.join(PROJECT_ROOT, "results","utils","plots")
MODELS_CHECKPOINTS = os.path.join(PROJECT_ROOT,"models", "checkpoints","best_model.keras")
MODELS_FINAL_MODEL = os.path.join(PROJECT_ROOT,"Models", "final", "final_model.keras")


# Constants
INPUT_SIZE = 144
BATCH_SIZE = 64
EPOCHS = 30
NUM_CLASSES = 7

# Label encoder and learning rate log
le = LabelEncoder()
lr_log = []


Attention Block

In [ ]:
class CBAMLayer(layers.Layer):
    def __init__(self, ratio=8):
        super(CBAMLayer, self).__init__()
        self.ratio = ratio

    def build(self, input_shape):
        # This ensures the variables are created only once when the layer is first called.
        self.channel = input_shape[-1]

        # Channel Attention
        self.shared_dense_one = layers.Dense(self.channel // self.ratio, activation='relu')
        self.shared_dense_two = layers.Dense(self.channel)

        # Spatial Attention
        self.spatial_conv = layers.Conv2D(1, (7, 7), padding='same', activation='sigmoid')

    def call(self, input_tensor):
        # Channel Attention
        avg_pool = layers.GlobalAveragePooling2D()(input_tensor)
        max_pool = layers.GlobalMaxPooling2D()(input_tensor)

        avg = self.shared_dense_one(avg_pool)
        avg = self.shared_dense_two(avg)

        max = self.shared_dense_one(max_pool)
        max = self.shared_dense_two(max)

        channel_attention = layers.Add()([avg, max])
        channel_attention = layers.Activation('sigmoid')(channel_attention)
        channel_attention = layers.Reshape((1, 1, self.channel))(channel_attention)
        x = layers.Multiply()([input_tensor, channel_attention])

        # Spatial Attention
        avg_pool = tf.reduce_mean(x, axis=-1, keepdims=True)
        max_pool = tf.reduce_max(x, axis=-1, keepdims=True)
        concat = layers.Concatenate(axis=-1)([avg_pool, max_pool])
        spatial_attention = self.spatial_conv(concat)
        x = layers.Multiply()([x, spatial_attention])

        return x


Focal loss

In [ ]:
def focal_loss(gamma=2., alpha=0.25):
    def loss(y_true, y_pred):
        epsilon = K.epsilon()
        y_pred = K.clip(y_pred, epsilon, 1. - epsilon)
        cross_entropy = -y_true * K.log(y_pred)
        weight = alpha * K.pow(1 - y_pred, gamma)
        return K.mean(weight * cross_entropy)

    return loss


Learning Rate Scheduler

In [ ]:
learning_rate = CosineDecay(initial_learning_rate=0.001, decay_steps=1000, alpha=0.1)


class LogCosineDecay(Callback):
    def __init__(self, lr_schedule, log_storage):
        super().__init__()
        self.lr_schedule = lr_schedule
        self.log_storage = log_storage

    def on_epoch_end(self, epoch, logs=None):
        steps = len(self.model.history.epoch) * len(self.model.history.history.get('loss', []))
        current_lr = float(self.lr_schedule(epoch * steps))
        self.log_storage.append(current_lr)
        print(f"Epoch {epoch + 1}: Learning Rate = {current_lr:.6f}")


Data Loader

In [ ]:
def load_data():
    data, labels = [], []

    for i, path in enumerate(glob.glob(DATA_DIR)):
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
        img = cv2.resize(img, (INPUT_SIZE, INPUT_SIZE))
        img = preprocess_input(img)
        data.append(img)

        label = path.split("\\")[-2]
        labels.append(label)

        if i % 2870 == 0:
            print(f"{i}/28708 samples loaded")

    data = np.array(data)
    labels = le.fit_transform(labels)
    labels = to_categorical(labels, NUM_CLASSES)

    return train_test_split(data, labels, test_size=0.2, random_state=42)



Augmentation

In [ ]:
def augment_data(x_train):
    datagen = ImageDataGenerator(
        rotation_range=15,
        width_shift_range=0.1,
        height_shift_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True,
        fill_mode='nearest'
    )
    datagen.fit(x_train)
    return datagen

Model Builder

In [ ]:
def build_model():
    base_model = ResNet50V2(weights="imagenet", include_top=False, input_shape=(INPUT_SIZE, INPUT_SIZE, 3))
    for layer in base_model.layers[:-50]:
        layer.trainable = False

    inputs = layers.Input(shape=(INPUT_SIZE, INPUT_SIZE, 3))
    x = base_model(inputs, training=False)
    x = CBAMLayer()(x)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(2048, activation="relu")(x)
    x = layers.Dropout(0.5)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dense(1024, activation="relu")(x)
    x = layers.Dropout(0.5)(x)
    x = layers.BatchNormalization()(x)
    outputs = layers.Dense(NUM_CLASSES, activation="softmax")(x)

    model = models.Model(inputs, outputs)
    model.compile(
        optimizer=optimizers.RMSprop(learning_rate=learning_rate),
        loss=focal_loss(),
        metrics=["accuracy"]
    )
    return model


Training

In [ ]:
def train_model(model, datagen, x_train, y_train, x_val, y_val):
    callbacks = [
        ModelCheckpoint(MODELS_CHECKPOINTS, save_best_only=True, monitor="val_loss", mode="min", verbose=1),
        EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True, verbose=1),
        LogCosineDecay(learning_rate, lr_log)
    ]
    weights = class_weight.compute_class_weight("balanced", np.unique(np.argmax(y_train, axis=1)),
                                                np.argmax(y_train, axis=1))
    weights = dict(enumerate(weights))

    history = model.fit(
        datagen.flow(x_train, y_train, batch_size=BATCH_SIZE),
        validation_data=(x_val, y_val),
        epochs=EPOCHS,
        callbacks=callbacks,
        class_weight=weights
    )
    model.save(MODELS_FINAL_MODEL)
    loss, acc = model.evaluate(x_val, y_val, verbose=1)
    print(f"\nTest Loss: {loss:.4f} | Test Accuracy: {acc:.4f}")
    return history


Evaluation 

In [ ]:
def evaluate_model(model, x_val, y_val):
    y_pred = np.argmax(model.predict(x_val), axis=1)
    y_true = np.argmax(y_val, axis=1)
    print(classification_report(y_true, y_pred, target_names=le.classes_))

    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=le.classes_, yticklabels=le.classes_)
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.show()


Visualization

In [ ]:
def plot_history(H):
    plt.figure(figsize=(12, 6))
    plt.plot(H.history["accuracy"], label="Train Accuracy", color="blue")
    plt.plot(H.history["val_accuracy"], label="Val Accuracy", color="green")
    plt.title("Train vs Test Accuracy")
    plt.legend()
    plt.title('Accuracy')
    plt.savefig(os.path.join(PLOTS_DIR, 'accuracy.png'))
    plt.show()

    plt.figure(figsize=(12, 6))
    plt.plot(H.history["loss"], label="Train Loss", color="red")
    plt.plot(H.history["val_loss"], label="Val Loss", color="orange")
    plt.title("Train vs Test Loss")
    plt.legend()
    plt.title('Loss')
    plt.savefig(os.path.join(PLOTS_DIR, 'loss.png'))
    plt.show()


Main pipeline

In [ ]:
x_train, x_val, y_train, y_val = load_data()
datagen = augment_data(x_train)
model = build_model()
history = train_model(model, datagen, x_train, y_train, x_val, y_val)
evaluate_model(model, x_val, y_val)
plot_history(history)
