The following is the training sequence of a light weight model used for emotion detection on video frames. The model is small to increase the inference speed while maintaining a decent accuracy. It was further quantised from 8mb to 2 mb post training. Here is the demo video link: https://drive.google.com/file/d/1atuPCgSzteMwL30KOVUDsYgNZSkelzgI/view?usp=drive_link

In [None]:
!pip install -q tensorflow
!pip install -q tensorflow-model-optimization
!pip install numpy
!pip install keras seaborn scikit-learn matplotlib

^C


The following section is data preparation. To run this sequence, create a folder named 'images' in the root of the project. It should have 2 folders, 'train' and 'validation'. 

In [None]:
import os
from keras.preprocessing.image import load_img, img_to_array

def extract_dataset(main_folder, image_size=(48, 48)):
    train_dataset = []
    train_labels = []
    val_dataset = []
    val_labels = []

    # Define a dictionary to map each emotion to a unique label
    emotion_labels = {'neutral': 0, 'happy': 1,'angry': 2 , 'surprise':3,'sad':4}

    # Loop through emotions in the main folder
    for emotion in emotion_labels:
        train_folder = os.path.join(main_folder, 'train', emotion)
        val_folder = os.path.join(main_folder, 'validation', emotion)
        label = emotion_labels[emotion]

        # Training set
        for filename in os.listdir(train_folder):
            if filename.endswith(('.jpg', '.jpeg', '.png', '.JPG')):  
                image_path = os.path.join(train_folder, filename)

                # Loading the image using TensorFlow and convert to grayscale
                img = load_img(image_path, color_mode='grayscale', target_size=image_size)
                img_array = img_to_array(img)

                # Normalize the pixel values to the range [0, 1]
                img_array /= 255.0

                # Append the image data and label to the training dataset
                train_dataset.append(img_array)
                train_labels.append(label)

        # Validation set
        for filename in os.listdir(val_folder):
            if filename.endswith(('.jpg', '.jpeg', '.png', '.JPG')):  
                image_path = os.path.join(val_folder, filename)

                # Loading the image using TensorFlow and converting to grayscale
                img = load_img(image_path, color_mode='grayscale', target_size=image_size)
                img_array = img_to_array(img)

                # Normalize the pixel values to the range [0, 1]
                img_array /= 255.0

                # Append the image data to the validation dataset
                val_dataset.append(img_array)
                val_labels.append(label)

    return train_dataset, train_labels, val_dataset, val_labels
    
main_folder_path = '/images'  
image_size = (48, 48)

train_dataset, train_labels, val_dataset, val_labels = extract_dataset(main_folder_path, image_size)

The following code defines the model. We are using vggnet16 architecture with the fer2013 dataset. The inputs are grey scale 48 x 48 images.

In [None]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout,BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras.regularizers import l2
from sklearn.metrics import confusion_matrix,ConfusionMatrixDisplay
from sklearn.metrics import classification_report, roc_curve, auc
from sklearn.utils import class_weight
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow_model_optimization as tfmot
import tf_keras as keras
import tempfile

def create_vggnet16_model(input_shape=(48, 48, 1)):
    model = Sequential()

    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(48,48,1)))
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(1024, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(5, activation='softmax'))
    model.summary()
    return model


These are some graph plotting functions. They have been taken as a sample from the internet.

In [None]:
def plot_confusion_matrix(y_true, y_pred, classes):
    cm = confusion_matrix(y_true, y_pred)
    
    # Get unique classes from the data
    unique_classes = np.unique(np.concatenate([y_true, y_pred]))

    # Display confusion matrix
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=classes)
    disp.plot(cmap='Blues', values_format='d')

    # Update tick locations based on unique classes
    plt.xticks(np.arange(len(unique_classes)), unique_classes, rotation=45)
    plt.yticks(np.arange(len(unique_classes)), unique_classes)
    
    plt.show()
def plot_accuracy_curves(history):
    # Plot train accuracy and validation accuracy over epochs
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.title('Training Vs Validation Accuracy')
    plt.show()    

def plot_loss_curves(history):
    # Plot train loss and validation loss over epochs
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.title('Training Vs Validation Loss')
    plt.show()


This is the main training sequence. We have used Adams optimizer and sparse categorical crossentropy as our loss function.

In [None]:
def train_vgg(dataset, labels, learning_rate=0.001, batch_size=32, test_size=0.2, epochs=10, patience=3):
    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(dataset, labels, test_size=test_size, random_state=42)

    # Compute class weights
    class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(labels), y=labels)
    class_weight_dict = dict(enumerate(class_weights))
    print(class_weight_dict)
    
    # Create and compile the VGGNet16 model
    model = create_vggnet16_model()
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Define early stopping
    early_stopping = EarlyStopping(monitor='val_loss', patience=patience, restore_best_weights=True)

    # Train the model with class weights
    history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test),
                        callbacks=[early_stopping], class_weight=class_weight_dict)

    # Quantize the model
    

    # Evaluate the quantized model
    test_loss, test_accuracy = model.evaluate(X_test, y_test)
    print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

    # Optionally, you can also evaluate the quantized model on the training set
    train_loss, train_accuracy = model.evaluate(X_train, y_train)
    print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")

    return model


In the following section, the model is trained on a total of 40 epochs. After the model is trained, it is converted into a tflite model and quantised. The quantised model is then saved as emotion_quantized.tflite in the root of the project.

In [None]:
import psutil
print("Memory Usage Before:", psutil.virtual_memory())

# for five emotions {neutral , happy ,angry, surprise, sad}

dataset = np.array(train_dataset)
labels = np.array(train_labels)

simple_model = train_vgg(dataset, labels, learning_rate=0.0001, batch_size=128, test_size=0.2, epochs=40, patience=7)
# Convert the Keras model to TensorFlow Lite
converter = tf.lite.TFLiteConverter.from_keras_model(simple_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]  # Enable optimization
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]  # Target INT8 for better performance
converter.allow_flexible_interpreter = True  # Allow flexible interpreter for dynamic range support


def representative_dataset():
    for _ in range(100): 
        yield [np.random.random_sample((1, 48, 48, 1)).astype(np.float32)]

converter.representative_dataset = representative_dataset
tflite_quant_model = converter.convert()


with open("emotion_quantized.tflite", "wb") as f:
    f.write(tflite_quant_model)


model_size_bytes = os.path.getsize("emotion_quantized.tflite")
model_size_kb = model_size_bytes / 1024  # Convert to kilobytes
model_size_mb = model_size_kb / 1024  # Convert to megabytes
print(f"Size of the quantized model: {model_size_bytes} bytes ({model_size_kb:.2f} KB, {model_size_mb:.2f} MB)")