# Preprocessing

In [1]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
import mtcnn
from PIL import Image

## Extract face
<b>Source:</b> https://machinelearningmastery.com/how-to-train-a-progressive-growing-gan-in-keras-for-synthesizing-faces/

In [2]:
def extract_face(model, pixels, required_size=(128, 128)):
    # detect face in the image
    faces = model.detect_faces(pixels)
    # skip cases where we could not detect a face
    if len(faces) == 0:
        return None
    #extract details of the face
    x1, y1, width, height = faces[0]['box']
    # force detected pixel values to be positive (bug fix)
    x1, y1 = abs(x1), abs(y1)
    # convert into coordinates
    x2, y2 = x1 + width, y1 + height
    # retrieve face pixels
    face_pixels = pixels[y1:y2, x1:x2]
    # resize pixels to the model size
    image = Image.fromarray(face_pixels)
    image.convert('RGB')
    image = image.resize(required_size)
    face_array = np.asarray(image)
    return face_array

## Preprocess video

In [3]:
SKIP_PERCENTAGE = 0.6
READ_PERCENTAGE = 0.2
def preprocessVideo(model, video_path, target_path):
    # Read video and calculate fps, number of frames and video length
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    video_length = frame_count / fps
    # Skip first part of video
    for i in range(int(frame_count * SKIP_PERCENTAGE)):
        ret, frame = cap.read()
        if not ret:
            exit(1)
    #Read second part of video
    for i in range(int(frame_count * READ_PERCENTAGE)):
        ret, frame = cap.read()
        if ret:
            face = extract_face(model, frame)
            if face is None:
                continue
            cv2.imwrite(os.path.join(target_path,f'{os.path.splitext(os.path.basename(video_path))[0]}_{i}.jpg'), face)
        else:
            break
    cap.release()

## Create train dataset

In [4]:
def create_train_dataset(model, dataset_path, target_path):
    # Create train dataset folder
    os.makedirs(target_path, exist_ok=True)
    for emotion in os.listdir(dataset_path):
        # Create emotion folder
        emotion_source_dir = os.path.join(dataset_path, emotion)
        emotion_target_dir = os.path.join(target_path, emotion)
        os.makedirs(emotion_target_dir, exist_ok=True)
        for video in os.listdir(os.path.join(dataset_path, emotion)):
            video_path = os.path.join(emotion_source_dir, video)
            preprocessVideo(model, video_path, emotion_target_dir)

## Create test dataset

In [5]:
def create_test_dataset(model, dataset_path, target_path):
    # Create test dataset folder
    os.makedirs(target_path, exist_ok=True)
    for video in os.listdir(dataset_path):
        # Create video folder
        video_source_path = os.path.join(dataset_path, video)
        video_target_path = os.path.join(target_path, os.path.splitext(video)[0])
        os.makedirs(video_target_path, exist_ok=True)
        preprocessVideo(model, video_source_path, video_target_path,)

## Convert to grayscale

In [6]:
def create_grayscale_train_dataset(dataset_path, target_path):
    # Create train dataset folder
    os.makedirs(target_path, exist_ok=True)
    for emotion in os.listdir(dataset_path):
        # Create emotion folder
        emotion_source_dir = os.path.join(dataset_path, emotion)
        emotion_target_dir = os.path.join(target_path, emotion)
        os.makedirs(emotion_target_dir, exist_ok=True)
        for img_name in os.listdir(emotion_source_dir):
            img_path = os.path.join(emotion_source_dir, img_name)
            img = cv2.imread(img_path)
            img_grayscale = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
            cv2.imwrite(os.path.join(emotion_target_dir, f'{img_name}.jpg'), img_grayscale)
            
def create_grayscale_test_dataset(dataset_path, target_path):
    # Create train dataset folder
    os.makedirs(target_path, exist_ok=True)
    for test in os.listdir(dataset_path):
        # Create emotion folder
        test_source_dir = os.path.join(dataset_path, test)
        test_target_dir = os.path.join(target_path, test)
        os.makedirs(test_target_dir, exist_ok=True)
        for img_name in os.listdir(test_source_dir):
            img_path = os.path.join(test_source_dir, img_name)
            img = cv2.imread(img_path)
            img_grayscale = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
            cv2.imwrite(os.path.join(test_target_dir, f'{img_name}.jpg'), img_grayscale)

In [7]:
#mtcnn = mtcnn.MTCNN()
#create_test_dataset(mtcnn, 'Dataset/test', 'processed/test')
#create_train_dataset(mtcnn, 'Dataset/train', 'processed/train')
#create_grayscale_dataset('processed/train', 'processed/grayscale')
create_grayscale_test_dataset('processed/test', 'processed/grayscale_test')