##### Importing and Installing Main Libraries Used

In [None]:
# Use !pip or %pip depending on Jupyter Notebook version

%pip install opencv-python
%pip install imutils
%pip install pandas
%pip install matplotlib
%pip install numpy
%pip install split-folders

### Dataset Preparation and Image Pre-Processing


#### Data Augmentation

Rotate each image in 70 different angles from -10.5º to 10.5º


In [1]:
import cv2
import imutils
import os
import math

# change the path to the directory (for each category of the facial expression) where the augmented data is retrieved and saved

path = "YOUR DIRECTORY PATH HERE/CATEGORY"

imgfiles = os.listdir(path)

# rotating images from -10.5º to 10.5º degree from the original position
start = -10.5
step = 0.3
myRange = []
for i in range(71):
  myRange.append(round(start+(i*step),2))
myRange.remove(0.0)

result = []
for file in imgfiles:
    img = cv2.imread(path+"/"+file)
    for a in myRange:
        imgr = imutils.rotate(img, angle=a)
        result+=[imgr]

# to ensure the augmentation is done properly, displaying the number of the files before and after augmentation
print(len(imgfiles))
print(len(result))

count = 0
for j in range(len(result)):
  if(count == len(myRange)):
    count = 0

  img = result[j]
  i = math.floor(j/(len(myRange)))
  x = imgfiles[i].split(".")
  newName = x[0] + str(myRange[count]) + '.png'
  cv2.imwrite(os.path.join(path , newName), img)
  count+=1

415
29050


#### Preparing dataset for cross-validation
3 Different Datasets for Cross-Validation:
<br>Each dataset contains different set of images in training and testing for cross-validtion purposes
<br>The ratio of the split is the same for all datasets
<br>
<br>
Data split:
<li> 80% of total data as Training set (64% of total as Training and 16% as Validation set)
<li> 20% of total data as Testing set


In [1]:
#import necessary libraries
import splitfolders
import random

In [3]:
# Split with a ratio of (.64, .16, .2) for training, validation, and test sets
# Seed is determined randomly to ensure each dataset contains different collection of images in test sets
# Destination directory name is FER_2013_Augmented, because the data augmentation will be done directly to each datasets later

# change the path names in the following line of code to split and save data
splitfolders.ratio('Path of the Original Directory', output='Path of Destination Directory', 
                  seed=random.seed(10), ratio=(.64, .16, .20), group_prefix=None, move=False) 


#Actual code used for the experiment

#Dataset 1
# splitfolders.ratio('D:\Concordia\COMP473\CK+\CK+_Complete', output='D:\Concordia\COMP473\CK+\CKD1',
#                    seed=random.seed(10), ratio=(.64, .16, .20), group_prefix=None, move=False) 

# #Dataset 2
# splitfolders.ratio('D:\Concordia\COMP473\CK+\CK+_Complete', output='D:\Concordia\COMP473\CK+\CKD2',
#                    seed=random.seed(10), ratio=(.64, .16, .20), group_prefix=None, move=False) 

# #Dataset 3
# splitfolders.ratio('D:\Concordia\COMP473\CK+\CK+_Complete', output='D:\Concordia\COMP473\CK+\CKD3',
#                    seed=random.seed(10), ratio=(.64, .16, .20), group_prefix=None, move=False) 

Copying files: 109695 files [1:03:23, 28.84 files/s]


##### To ensure that there is no overalp of subjects between the test sets of each emotion category

In [None]:

import re

def create_dataset_without_duplicate_subject(input_directory, output_directory_test, output_directory_other,
                                             list_of_subjects, image_class):
    # dataset = list_of_subjects
    # Directory where we are reading the original images from
    data_dir = input_directory

    if not os.path.isdir(output_directory_test):
        os.mkdir(output_directory_test)

    # for image_class in os.listdir(data_dir):
    for image in os.listdir(os.path.join(data_dir, image_class)):
        # get the identity of the test subject
        identity = re.findall(r'^[^_]+(?=_)', image)[0]
        # check to see if the subject is in the test set
        if identity in list_of_subjects:
            if not os.path.isdir(os.path.join(output_directory_test, image_class)):
                os.mkdir(os.path.join(output_directory_test, image_class))

            original_image_path = os.path.join(data_dir, image_class, image)
            new_save_location = os.path.join(output_directory_test, image_class, image)
            image_to_save = cv2.imread(original_image_path)
            print(new_save_location)
            cv2.imwrite(new_save_location, image_to_save)
        else:
            if not os.path.isdir(os.path.join(output_directory_other, image_class)):
                os.mkdir(os.path.join(output_directory_other, image_class))

            original_image_path = os.path.join(data_dir, image_class, image)
            new_save_location = os.path.join(output_directory_other, image_class, image)
            image_to_save = cv2.imread(original_image_path)
            print(new_save_location)
            cv2.imwrite(new_save_location, image_to_save)

dataset_1_test_anger = ['S133', 'S134', 'S136', 'S501', 'S502', 'S503', 'S504', 'S506', 'S999']
dataset_1_test_disgust = ['S108', 'S109', 'S111', 'S116', 'S124', 'S125', 'S128', 'S129', 'S130', 'S131', 'S132', 'S134']
dataset_1_test_fear = ['S501','S502','S504','S506','S999']
dataset_1_test_happy = ['S124','S125','S127','S128','S129','S130','S131','S132','S133','S134','S135','S136','S137','S138']
dataset_1_test_sad = ['S501','S502','S503','S504','S506']
dataset_1_test_surprise = ['S117','S119','S122','S124','S125','S126','S127','S129','S130','S131','S132','S133','S135','S136','S137','S138']
image_class_names = ['anger', 'disgust', 'fear', 'happy', 'sadness', 'surprise']

subject_name = [dataset_1_test_anger, dataset_1_test_disgust, dataset_1_test_fear, dataset_1_test_happy, dataset_1_test_sad, dataset_1_test_surprise]

for i in range(len(image_class_names)):
    create_dataset_without_duplicate_subject('../COMP_473_Project/CK+_Complete', '../COMP_473_Project/Test_1',
                                             '../COMP_473_Project/CK_split_1', subject_name[i], image_class_names[i])

dataset_2_test_anger = ['S010','S011','S014','S022','S026','S028','S029','S032','S034']
dataset_2_test_disgust = ['S005','S011','S022','S032','S035','S044','S045','S046','S051','S052','S054','S055']
dataset_2_test_fear = ['S011','S032','S046','S050','S054']
dataset_2_test_happy = ['S010','S011','S014','S026','S032','S034','S035','S037','S042','S044','S050','S052','S053','S055']
dataset_2_test_sad = ['S011','S014','S026','S042','S046']
dataset_2_test_surprise = ['S010','S011','S014','S022','S026','S032','S034','S035','S037','S042','S044','S046','S050','S051','S052','S053']
image_class_names = ['anger', 'disgust', 'fear', 'happy', 'sadness', 'surprise']

subject_name = [dataset_2_test_anger, dataset_2_test_disgust, dataset_2_test_fear, dataset_2_test_happy, dataset_2_test_sad, dataset_2_test_surprise]

for i in range(len(image_class_names)):
    create_dataset_without_duplicate_subject('../COMP_473_Project/CK+_Complete', '../COMP_473_Project/Test_2',
                                             '../COMP_473_Project/CK_split_2', subject_name[i], image_class_names[i])

dataset_3_test_anger = ['S067','S071','S072','S075','S082','S087','S089','S090','S092']
dataset_3_test_disgust = ['S077','S078','S079','S080','S081','S082','S085','S087','S088','S090','S095','S096']
dataset_3_test_fear = ['S068','S074','S084','S091','S102']
dataset_3_test_happy = ['S071','S072','S074','S075','S076','S078','S079','S083','S085','S086','S087','S089','S091','S092']
dataset_3_test_sad = ['S071','S080','S081','S093','S095']
dataset_3_test_surprise = ['S079','S080','S081','S082','S084','S085','S086','S087','S088','S089','S090','S092','S094','S095','S096','S097']
image_class_names = ['anger', 'disgust', 'fear', 'happy', 'sadness', 'surprise']

subject_name = [dataset_3_test_anger, dataset_3_test_disgust, dataset_3_test_fear, dataset_3_test_happy, dataset_3_test_sad, dataset_3_test_surprise]

for i in range(len(image_class_names)):
    create_dataset_without_duplicate_subject('../COMP_473_Project/CK+_Complete', '../COMP_473_Project/Test_3',
                                             '../COMP_473_Project/CK_split_3', subject_name[i], image_class_names[i])

#### Pre-Processing: Image Cropping
Effect: Delimiting the region of interest
<br>
<br>With OpenCV2, import image
<br>Detect the face of the subject with OpenCV2 and haarcascade_frontalface_default.xml
<br>Crop the image based on the x, y, w, h coordinates of the detected face with a constant scale of 45 pixel

In [5]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt

In [6]:
def face_preprocessing(input_directory, output_directory):
    
    count = 1
    
    # The directory we want to do pre processing on:
    data_dir = input_directory

    # The directory where we save the new pre processing images:
    aug_data_dir = output_directory

    if not os.path.isdir(aug_data_dir):
        os.mkdir(aug_data_dir)

    constant_to_scale = 45

    for image_class in os.listdir(data_dir):
        for image in os.listdir(os.path.join(data_dir, image_class)):
            image_name = image
            print(image_name)

            image_path = os.path.join(data_dir, image_class, image)

            image = cv2.imread(image_path)  # cv2.IMREAD_UNCHANGED

            # The classifiers we use to find the face and eyes
            face_cascade = cv2.CascadeClassifier("haarcascade_frontalface_default.xml")

            faces = face_cascade.detectMultiScale(image, 1.3, 5)

            original_image = image
            crop_img = image

            for (x, y, w, h) in faces:
                x, y, w, h = x, y, w, h
                crop_img = image[y:y + h, x + constant_to_scale:x + w - constant_to_scale]

            if not os.path.isdir(os.path.join(aug_data_dir, image_class)):
                os.mkdir(os.path.join(aug_data_dir, image_class))

            try:
                cv2.imwrite(os.path.join(aug_data_dir, image_class, image_name), crop_img)
            except:
                cv2.imwrite(os.path.join(aug_data_dir, image_class, image_name), original_image)
                continue

In [7]:
face_preprocessing('PATH TO YOUR ORIGINAL DIRECTORY', 'PATH TO THE DESTINATION DIRECTORY')

# Actual code used for the experiment
# ck_augmented_directory_1 = 'D:\Concordia\COMP473\\Test1'
# ck_augmented_directory_2 = 'D:\Concordia\COMP473\\Test2'
# ck_augmented_directory_3 = 'D:\Concordia\COMP473\\Test3'

# ck_cropping_output_directory_1 = 'D:\Concordia\COMP473\\Test_crop1'
# ck_cropping_output_directory_2 = 'D:\Concordia\COMP473\\Test_crop2'
# ck_cropping_output_directory_3 = 'D:\Concordia\COMP473\\Test_crop3'


# face_preprocessing(ck_augmented_directory_1, ck_cropping_output_directory_1)

# face_preprocessing(ck_augmented_directory_2, ck_cropping_output_directory_2)

# face_preprocessing(ck_augmented_directory_3, ck_cropping_output_directory_3)


S133_003_00000043-0.3.png
S133_003_00000043-0.6.png
S133_003_00000043-0.9.png
S133_003_00000043-1.2.png
S133_003_00000043-1.5.png
S133_003_00000043-1.8.png
S133_003_00000043-10.2.png
S133_003_00000043-10.5.png
S133_003_00000043-2.1.png
S133_003_00000043-2.4.png
S133_003_00000043-2.7.png
S133_003_00000043-3.0.png
S133_003_00000043-3.3.png
S133_003_00000043-3.6.png
S133_003_00000043-3.9.png
S133_003_00000043-4.2.png
S133_003_00000043-4.5.png
S133_003_00000043-4.8.png
S133_003_00000043-5.1.png
S133_003_00000043-5.4.png
S133_003_00000043-5.7.png
S133_003_00000043-6.0.png
S133_003_00000043-6.3.png
S133_003_00000043-6.6.png
S133_003_00000043-6.9.png
S133_003_00000043-7.2.png
S133_003_00000043-7.5.png
S133_003_00000043-7.8.png
S133_003_00000043-8.1.png
S133_003_00000043-8.4.png
S133_003_00000043-8.7.png
S133_003_00000043-9.0.png
S133_003_00000043-9.3.png
S133_003_00000043-9.6.png
S133_003_00000043-9.9.png
S133_003_00000043.png
S133_003_000000430.3.png
S133_003_000000430.6.png
S133_003_0000004

#### Pre-Processing: intensity normalization 
Effect: Increase contrast of each input image file
<br>
<br>With OpenCV2, import image
<br> Calculate the mean and the standard deviation of the pixel distribution of the input image
<br> Clip image based on the lower and upper bound
<br> Apply offset of 0.3


In [8]:
import cv2
import numpy as np
import os

In [4]:
data_dir = 'Enter the path to your original directory here'
new_data_dir = 'Enter the path to your destination directory here'

# data_dir = 'D:\Concordia\COMP473\\Test3'
# new_data_dir = 'D:\Concordia\COMP473\\Test_IN3'

for image_class in os.listdir(data_dir):
        for image in os.listdir(os.path.join(data_dir, image_class)):
            
            image_name = image
            image_path = os.path.join(data_dir, image_class, image_name)

            #load image as grayscale
            img = cv2.imread(image_path, 0)
            
            # Calculate mean and STD
            mean, STD  = cv2.meanStdDev(img)

            # Clip frame to lower and upper Standard Deviation
            offset = 0.3
            clipped = np.clip(img, mean - offset*STD, mean + offset*STD).astype('uint8')

            # Normalize to range
            result = cv2.normalize(clipped, clipped, 0, 255, norm_type=cv2.NORM_MINMAX)

            #check if there is a directory for the emotion category, if not create one
            if not os.path.isdir(os.path.join(new_data_dir, image_class)):
                    os.mkdir(os.path.join(new_data_dir, image_class))
                
            cv2.imwrite(os.path.join(new_data_dir, image_class , image_name), result)
