In [27]:
import os
import pandas as pd
import numpy as np
import shutil
import cv2
from sklearn.model_selection import train_test_split

In [28]:
cwd = os.getcwd()
df = pd.read_csv(cwd + "/../data_csv/all_data.csv")

In [29]:
# create folder
dir_img = cwd + '/cleaned_images'
if os.path.exists(dir_img):
    shutil.rmtree(dir_img)
os.makedirs(dir_img)

In [30]:
dictionary = ['ANGER', 'CONTEMPT', 'DISGUST', 'FEAR', 'HAPPINESS',  'NEUTRAL', 'SADNESS', 'SURPRISE']
for emo in dictionary:
    os.makedirs(dir_img + "/" + emo)

all_image = []

# set size
dim = (224, 224)

for idx, row in df.iterrows():
    imagePath = cwd + "/../images/" + row.image

    image = cv2.imread(imagePath)

    # resize
    image = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)

    # change color
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # apply clahe
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    image = clahe.apply(image)

    # collect preprocessed data
    all_image.append([row.image, row.emotion])

    # write preprocessed image 
    cv2.imwrite(os.path.join(dir_img, row.image), image)

# export proprocessed data
new_df = pd.DataFrame(all_image, columns=["image", "emotion"])
new_df.to_csv(cwd + "/preprocess_data.csv", index=False)

In [45]:
df = pd.read_csv(cwd + "/preprocess_data.csv")

# change label
df["emotion"].replace({"anger": "ANGER", "contempt": "CONTEMPT", "disgust": "DISGUST", "fear": "FEAR", \
                        "happiness": "HAPPINESS", "neutral": "NEUTRAL", "sadness": "SADNESS", "surprise": "SURPRISE"}, inplace=True)
                        # replace string emotion label with integer
df['emotion'].replace({'ANGER': 0, 'CONTEMPT': 1, 'DISGUST': 2, 'FEAR': 3, \
                        'HAPPINESS': 4,  'NEUTRAL': 5, 'SADNESS': 6, 'SURPRISE': 7}, inplace=True)

In [46]:
def create_state(df, split_fraction, file_name):
    # arrange data
    X = list(df['image'][i] for i in range(len(df)))
    y = list(df['emotion'][i] for i in range(len(df)))

    # train test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split_fraction)

    # prepare data to export
    new_df = pd.DataFrame(columns=['image', 'emotion'])
    new_df['image'] = X_test
    new_df['emotion'] = y_test

    # add split data to new csv file
    new_df.to_csv(cwd + '/state/' + file_name + '.csv', index=False)

    print(new_df.groupby('emotion').count())

    # drop split data from all data
    data = {
        'image': X_train,
        'emotion': y_train
    }
    df = pd.DataFrame(data)

    return df

In [47]:
dir_img = cwd + '/state'
if os.path.exists(dir_img):
    shutil.rmtree(dir_img)
os.makedirs(dir_img)

In [48]:
# split test data
df = create_state(df, 0.1, "test")

         image
emotion       
0          251
1          254
2          248
3          236
4          990
5         1158
6          347
7          391


In [49]:
df

Unnamed: 0,image,emotion
0,NEUTRAL/neutral (507).jpg,5
1,HAPPINESS/happiness (399).png,4
2,NEUTRAL/neutral (2668).png,5
3,Yory_Boy_Campas_0001.jpg,4
4,SURPRISE/surprise (1342).png,7
...,...,...
34866,SADNESS/sadness (800).jpg,6
34867,SADNESS/sadness (5823).jpg,6
34868,SADNESS/sadness (1844).jpg,6
34869,HAPPINESS/happiness (4360).png,4


In [50]:
# split val data
df = create_state(df, 0.1, "val")

         image
emotion       
0          232
1          221
2          224
3          225
4          947
5         1020
6          297
7          322


In [51]:
df.to_csv(cwd + "/state/train.csv", index=False)