# Emotion Classification in Video

## 1. Google Drive Mount

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import zipfile

path = '/content/drive/MyDrive/colab/Computer-Vision-Course/Data/Datasets/fer_images.zip'
zip_object = zipfile.ZipFile(file=path, mode='r')
zip_object.extractall('./')
zip_object.close()

## 2. Create Train and Test Datasets

In [3]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_generator = ImageDataGenerator(rotation_range=10,  # Degree range for random rotations
                                     zoom_range=0.2,  # Float or [lower, upper]. Range for random zoom. If a float, [lower, upper] = [1-zoom_range, 1+zoom_range]
                                     horizontal_flip=True,  # Randomly flip inputs horizontally
                                     rescale=1/255)  # Rescaling by 1/255 to normalize

train_dataset = train_generator.flow_from_directory(directory='/content/fer2013/train',
                                                    target_size=(48, 48),  # Tuple of integers (height, width), defaults to (256, 256)
                                                    class_mode='categorical',
                                                    batch_size=16,  # Size of the batches of data (default: 32)
                                                    shuffle=True,  # Whether to shuffle the data (default: True) If set to False, sorts the data in alphanumeric order
                                                    seed=10) 

Found 28709 images belonging to 7 classes.


In [4]:
test_generator = ImageDataGenerator(rescale=1/255)

test_dataset = test_generator.flow_from_directory(directory='/content/fer2013/validation',
                                                  target_size=(48, 48),
                                                  class_mode='categorical',
                                                  batch_size=1,
                                                  shuffle=False,
                                                  seed=10)

Found 3589 images belonging to 7 classes.


## 3. Build Convolutional Neural Network(CNN)

In [5]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten, BatchNormalization

num_classes = 7
num_detectors = 32
width, height = 48, 48

network = Sequential()

network.add(Conv2D(filters=num_detectors, kernel_size=3, activation='relu', padding='same', input_shape=(width, height, 3)))
network.add(BatchNormalization())
network.add(Conv2D(filters=num_detectors, kernel_size=3, activation='relu', padding='same'))
network.add(BatchNormalization())
network.add(MaxPooling2D(pool_size=(2, 2)))
network.add(Dropout(0.2))

network.add(Conv2D(2*num_detectors, 3, activation='relu', padding='same'))
network.add(BatchNormalization())
network.add(Conv2D(2*num_detectors, 3, activation='relu', padding='same'))
network.add(BatchNormalization())
network.add(MaxPooling2D(pool_size=(2, 2)))
network.add(Dropout(0.2))

network.add(Conv2D(2*2*num_detectors, 3, activation='relu', padding='same'))
network.add(BatchNormalization())
network.add(Conv2D(2*2*num_detectors, 3, activation='relu', padding='same'))
network.add(BatchNormalization())
network.add(MaxPooling2D(pool_size=(2, 2)))
network.add(Dropout(0.2))

network.add(Conv2D(2*2*2*num_detectors, 3, activation='relu', padding='same'))
network.add(BatchNormalization())
network.add(Conv2D(2*2*2*num_detectors, 3, activation='relu', padding='same'))
network.add(BatchNormalization())
network.add(MaxPooling2D(pool_size=(2, 2)))
network.add(Dropout(0.2))

network.add(Flatten())

network.add(Dense(2*2*num_detectors, activation='relu'))
network.add(BatchNormalization())
network.add(Dropout(0.2))

network.add(Dense(2*num_detectors, activation='relu'))
network.add(BatchNormalization())
network.add(Dropout(0.2))

network.add(Dense(num_classes, activation='softmax'))

## 4. Train the Model

In [6]:
network.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])

epochs = 70
network.fit(train_dataset, epochs=epochs)

Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70
Epoch 27/70
Epoch 28/70
Epoch 29/70
Epoch 30/70
Epoch 31/70
Epoch 32/70
Epoch 33/70
Epoch 34/70
Epoch 35/70
Epoch 36/70
Epoch 37/70
Epoch 38/70
Epoch 39/70
Epoch 40/70
Epoch 41/70
Epoch 42/70
Epoch 43/70
Epoch 44/70
Epoch 45/70
Epoch 46/70
Epoch 47/70
Epoch 48/70
Epoch 49/70
Epoch 50/70
Epoch 51/70
Epoch 52/70
Epoch 53/70
Epoch 54/70
Epoch 55/70
Epoch 56/70
Epoch 57/70
Epoch 58/70
Epoch 59/70
Epoch 60/70
Epoch 61/70
Epoch 62/70
Epoch 63/70
Epoch 64/70
Epoch 65/70
Epoch 66/70
Epoch 67/70
Epoch 68/70
Epoch 69/70
Epoch 70/70


<keras.callbacks.History at 0x7eff3ba59b20>

## 5. Classify Emotions in Video

In [7]:
import cv2

# Create VideoCapture object
cap = cv2.VideoCapture('/content/drive/MyDrive/colab/Computer-Vision-Course/Data/Videos/emotion_classification.mp4')

video_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)  # Video capture's frame width
video_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)  # Video capture's frame height
video_size = (round(video_width), round(video_height)) # Video size
video_fps = cap.get(cv2.CAP_PROP_FPS)  # FPS(Frames Per Second)
frame_cnt = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))  # Number of frames

print('Number of frames:', frame_cnt, '/ FPS:', round(video_fps), '/ Frame size:', video_size)

Number of frames: 408 / FPS: 24 / Frame size: (640, 360)


- `connected` means whether it has been read normally.
- `frame` is the first frame of video. 

In [8]:
# In Linux, the extension of video output must be set to avi
video_output_path = '/content/drive/MyDrive/colab/Computer-Vision-Course/Data/Videos/emotion_classification_result.avi'

codec = cv2.VideoWriter_fourcc(*'XVID')  # Set the codec  

video_writer = cv2.VideoWriter(video_output_path, codec, video_fps, video_size)

In [9]:
import dlib

cnn_face_detector = dlib.cnn_face_detection_model_v1('/content/drive/MyDrive/colab/Computer-Vision-Course/Data/Weights/mmod_human_face_detector.dat')

green_color=(0, 255, 0)
red_color=(0, 0, 255)
emotions = ['Angry', 'Disgust', 'Fear', 'Happy', 'Neutral', 'Sad', 'Surprise']

while (cv2.waitKey(1) < 0):
    connected, frame = cap.read()  # Read one frame from a VideoCapture object
    if not connected:
        break
    face_detections = cnn_face_detector(frame, 1)
    if len(face_detections) > 0:
        for face_detection in face_detections:
            left, top, right, bottom, confidence = face_detection.rect.left(), face_detection.rect.top(), face_detection.rect.right(), face_detection.rect.bottom(), face_detection.confidence
            cv2.rectangle(frame, (left, top), (right, bottom), green_color, 2)
            roi = frame[top:bottom, left:right]
            roi = cv2.resize(roi, (48, 48))  # Extract region of interest from image
            roi = roi / 255  # Normalize
            roi = np.expand_dims(roi, axis=0)
            preds = network.predict(roi)

            if preds is not None:
                pred_emotion_index = np.argmax(preds)
                cv2.putText(frame, emotions[pred_emotion_index], (left, top-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, red_color, 1)
    
    video_writer.write(frame)

video_writer.release()
cap.release()

