In [2]:
import pathlib
import os
import cv2
import numpy as np
import pickle
import pandas as pd
from tqdm import tqdm

In [3]:
FER2013_images_train = pathlib.Path("C:/Users/cdr03/Documents/Thesis/dataset/FER2013/train")
FER2013_images_test = pathlib.Path("C:/Users/cdr03/Documents/Thesis/dataset/FER2013/test")
emotion_labels_number = {"neutral": 0, "angry": 1, "surprise": 2, "disgust": 3, "fear": 4, "happy": 5, "sad": 6}
IMG_size = 48

In [4]:
def list_files_recursive(path, format = ".txt"):
    txt_files = set()  # create an empty set to store unique txt file names
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith(format):
                file_path = os.path.join(root, file)
                txt_files.add(file_path)  # add the file path to the set
        for dir in dirs:
            dir_path = os.path.join(root, dir)
            list_files_recursive(dir_path)  # recursively call the function on each subdirectory
    return list(txt_files)  # return a list of unique txt file names

In [5]:
def image_to_array(file_path):
    # Read image using imread() function from opencv
    img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
    # Convert image to numpy array using np.array() function
    img_array = np.array(img , dtype="float32")/255
    # Return the numpy array
    return img_array

In [12]:
def readImages_FER2013(file_list):
    df = pd.DataFrame(columns=['Name','Data', 'Emotion'])
    for file_path in tqdm(file_list, desc="Processing Images"):
        data = image_to_array(file_path)
        dir_name = os.path.dirname(file_path)
        file_name = os.path.basename(file_path)
        emotion = emotion_labels_number[os.path.basename(dir_name)]
        temp_df = pd.DataFrame({'Name': [file_name], 'Data':[data], 'Emotion':emotion})
        df = pd.concat([df, temp_df], ignore_index=True)
    return df

In [13]:
FER2013_train_list = list_files_recursive(FER2013_images_train, ".jpg")
FER2013_test_list = list_files_recursive(FER2013_images_test, ".jpg")
Fer2013_df_train = readImages_FER2013(FER2013_train_list)
Fer2013_df_test = readImages_FER2013(FER2013_test_list)

Processing Images: 100%|██████████| 28709/28709 [03:53<00:00, 122.80it/s]
Processing Images: 100%|██████████| 7178/7178 [00:52<00:00, 136.50it/s]


In [14]:
Fer2013_df_train.head(10)

Unnamed: 0,Name,Data,Emotion
0,Training_2949880.jpg,"[[0.15686275, 0.101960786, 0.08627451, 0.08627...",0
1,Training_97259432.jpg,"[[0.08235294, 0.08627451, 0.07450981, 0.070588...",6
2,Training_67244109.jpg,"[[0.95686275, 0.9607843, 0.9647059, 0.96862745...",6
3,Training_31690617.jpg,"[[0.7019608, 0.44705883, 0.13725491, 0.2745098...",2
4,Training_21320400.jpg,"[[0.3764706, 0.18431373, 0.14117648, 0.1294117...",5
5,Training_38820602.jpg,"[[0.11764706, 0.03529412, 0.015686275, 0.03921...",1
6,Training_37076666.jpg,"[[0.2509804, 0.23529412, 0.2627451, 0.23921569...",5
7,Training_6257545.jpg,"[[0.078431375, 0.11372549, 0.10980392, 0.11764...",2
8,Training_91218539.jpg,"[[1.0, 1.0, 0.99607843, 1.0, 1.0, 0.99607843, ...",1
9,Training_99523432.jpg,"[[0.21960784, 0.3019608, 0.24705882, 0.2941176...",5


In [15]:
cv2.imshow("test",Fer2013_df_test['Data'].iloc[2])
cv2.waitKey(0)
cv2.destroyAllWindows()

In [16]:
Fer2013_df_train.to_pickle(f"./Fer2013_train_{IMG_size}")
Fer2013_df_test.to_pickle(f"./Fer2013_test_{IMG_size}")



In [7]:
Fer2013_df_test.shape

(28709, 3)

In [7]:
Fer2013_df_train = pd.read_pickle("./Fer2013_train_48")
Fer2013_df_test = pd.read_pickle("./Fer2013_test_48")

In [8]:
Fer2013_df_train.columns

Index(['Name', 'Data', 'Emotion'], dtype='object')

In [9]:
Fer2013_df_train = Fer2013_df_train[Fer2013_df_train.Emotion !=0 ]
Fer2013_df_test = Fer2013_df_test[Fer2013_df_test.Emotion !=0 ]

In [10]:
## Create training, validation and test datasets
from sklearn.model_selection import train_test_split

X_train, y_train = Fer2013_df_train['Data'], Fer2013_df_train['Emotion']
# split remaining data into validation and train sets
X_test, X_val, y_test, y_val = train_test_split(Fer2013_df_test.Data, Fer2013_df_test.Emotion, test_size=0.5, random_state=12, stratify=Fer2013_df_test.Emotion)
    


In [11]:
print(f'Distribution in training set {len(y_train)}: \n{y_train.value_counts().sort_index() / len(y_train)}\n\n'+
      f'Distribution in validation set {len(y_val)}: \n{y_val.value_counts().sort_index() / len(y_val)}\n\n'+
      f'Distribution in testing set {len(y_test)}: \n{y_test.value_counts().sort_index() / len(y_test)}')

Distribution in training set 23744: 
1    0.168253
2    0.133550
3    0.018363
4    0.172549
5    0.303866
6    0.203420
Name: Emotion, dtype: float64

Distribution in validation set 2973: 
1    0.161117
2    0.139926
3    0.018500
4    0.172217
5    0.298352
6    0.209889
Name: Emotion, dtype: float64

Distribution in testing set 2972: 
1    0.161171
2    0.139637
3    0.018843
4    0.172275
5    0.298452
6    0.209623
Name: Emotion, dtype: float64


In [14]:
X_train.to_pickle('./Clean_datasets/FER2013/FER2013_Emotion_X_train')
y_train.to_pickle('./Clean_datasets/FER2013/FER2013_Emotion_Y_train')
X_val.to_pickle('./Clean_datasets/FER2013/FER2013_Emotion_X_val')
y_val.to_pickle('./Clean_datasets/FER2013/FER2013_Emotion_Y_val')
X_test.to_pickle('./Clean_datasets/FER2013/FER2013_Emotion_X_test')
y_test.to_pickle('./Clean_datasets/FER2013/FER2013_Emotion_Y_test')