In [1]:
import pandas as pd
import numpy as np

path='../1_Datasets/AffectNet/Manually_Annotated_file_lists/training.csv'
path2='../1_Datasets/AffectNet/Manually_Annotated_file_lists/validation.csv'
df_train = pd.read_csv(path)
colnames = list(df_train.columns)
df_val = pd.read_csv(path2, names=colnames, header=None)


In [2]:
df_train.head(2)

Unnamed: 0,subDirectory_filePath,face_x,face_y,face_width,face_height,facial_landmarks,expression,valence,arousal
0,689/737db2483489148d783ef278f43f486c0a97e140fc...,134,134,899,899,181.64;530.91;188.32;627.82;195.1;723.37;205.2...,1,0.785714,-0.055556
1,392/c4db2f9b7e4b422d14b6e038f0cdc3ecee239b5532...,20,20,137,137,28.82;77.52;29.12;93.25;31.04;108.51;33.03;123...,0,-0.017253,0.004313


In [3]:
df_val.head(2)

Unnamed: 0,subDirectory_filePath,face_x,face_y,face_width,face_height,facial_landmarks,expression,valence,arousal
0,459/81456263be241927c7a59a2646f88c2700ce4b7cba...,147,147,981,981,233.58;512.99;246.97;623.96;262.53;733.87;285....,7,-0.65331,0.65331
1,680/cfa0c679da3dbe9f01e92cdeda2da1065aa50e7bf0...,99,99,666,666,98.98;341.86;107.43;422.6;123.41;503.32;144.97...,0,-0.176846,-0.07764


In [4]:
print('Training shape: ',df_train.shape)
print('Validation shape:',df_val.shape)

Training shape:  (414799, 9)
Validation shape: (5500, 9)


In [5]:
df_train['expression'].value_counts()

1     134415
10     82415
0      74874
8      33088
2      25459
6      24882
3      14090
9      11645
4       6378
5       3803
7       3750
Name: expression, dtype: int64

In [6]:
df_val['expression'].value_counts()

7     500
3     500
10    500
6     500
2     500
9     500
5     500
1     500
8     500
4     500
0     500
Name: expression, dtype: int64

In [7]:
df_train = pd.concat([df_train, df_val])
df_train = df_train.reset_index(drop=True)
print('Training shape: ',df_train.shape)

Training shape:  (420299, 9)


# Adaptación del Dataset

In [8]:
emotions_dict = {0: 'neutral', 1: 'happy',
                 2: 'sad', 3: 'surprise',
                 4: 'fear', 5: 'disgust',
                 6: 'anger', 7: 'contempt',
                 8: 'none', 9: 'uncertain',
                10: 'non-face'}

In [9]:
# Se seleccionan solo las emociones definidas por Ekman (Happy,Sad, Anger, Fear, Disgust/Contempt, Surprise)
emociones_tesis = [1,2,3,4,5,6,7]

In [10]:
keepId_train = df_train.loc[df_train['expression'].isin(emociones_tesis)]
df_affect_ekman = keepId_train.copy()
df_affect_ekman['expression'] = np.where(df_affect_ekman['expression']==7, 5, df_affect_ekman['expression'])
df_affect_ekman['expression'] = df_affect_ekman['expression']-1
df_affect_ekman['expression'].value_counts()

0    134915
1     25959
5     25382
2     14590
4      8553
3      6878
Name: expression, dtype: int64

In [11]:
df_affect_ekman = df_affect_ekman.reset_index(drop=True)
df_affect_ekman.head(2)

Unnamed: 0,subDirectory_filePath,face_x,face_y,face_width,face_height,facial_landmarks,expression,valence,arousal
0,689/737db2483489148d783ef278f43f486c0a97e140fc...,134,134,899,899,181.64;530.91;188.32;627.82;195.1;723.37;205.2...,0,0.785714,-0.055556
1,944/06e9ae8d3b240eb68fa60534783eacafce2def60a8...,40,40,269,269,44.43;158.17;47.08;189.2;50.54;221.88;58.3;253...,0,0.153401,0.03889


In [12]:
#Se seleccionaran de manera aleatoria solo 8600 muestras de cada emocion, siempre  y cuando se tenga menos cantidad
emo0 = df_affect_ekman.loc[df_affect_ekman['expression']==0]
emo1 = df_affect_ekman.loc[df_affect_ekman['expression']==1]
emo2 = df_affect_ekman.loc[df_affect_ekman['expression']==2]
emo3 = df_affect_ekman.loc[df_affect_ekman['expression']==3]
emo4 = df_affect_ekman.loc[df_affect_ekman['expression']==4]
emo5 = df_affect_ekman.loc[df_affect_ekman['expression']==5]

In [20]:
def sampling_emotions(df, n=8600):
    row, column = df.shape
    
    if (row>=n):
        res = df.sample(n=n)
    else:
        res = df
    return res

In [21]:
emo_process0 = sampling_emotions(emo0)
emo_process1 = sampling_emotions(emo1)
emo_process2 = sampling_emotions(emo2)
emo_process3 = sampling_emotions(emo3)
emo_process4 = sampling_emotions(emo4)
emo_process5 = sampling_emotions(emo5)

In [25]:
df_train_2 = pd.concat([emo_process0, emo_process1, emo_process2, emo_process3, emo_process4, emo_process5])
df_train_2 = df_train_2.reset_index(drop=True)
print('Training shape: ',df_train_2.shape)

Training shape:  (49831, 9)


## Data Division
* Train: 80%
* Val: 10%
* Test: 10%

In [46]:
new_emotions_dict = {0: 'happy', 1: 'sad',
                     2: 'surprise', 3: 'fear',
                     4: 'disgust-contempt', 5: 'anger'}

In [27]:
from sklearn.model_selection import train_test_split

rnd_seed = 0
df_train, df_test_val = train_test_split(df_train_2,test_size=0.2, random_state=rnd_seed)
df_test, df_val = train_test_split(df_test_val,test_size=0.5, random_state=rnd_seed)

In [28]:
print('Training shape: ',df_train.shape)
print('Validation shape:',df_val.shape)
print('Test shape:',df_test.shape)

Training shape:  (39864, 9)
Validation shape: (4984, 9)
Test shape: (4983, 9)


In [29]:
df_train = df_train.reset_index(drop=True)
df_val = df_val.reset_index(drop=True)
df_test = df_test.reset_index(drop=True)

## Cropping image and saving cropped faces

In [30]:
a = list(new_emotions_dict.keys())
print(len(a))
values = list(np.zeros(len(a), dtype=np.int32))
d = dict(zip(a,values))
d

6


{0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0}

In [48]:
import matplotlib.pyplot as plt
import cv2

#This function only works with the AffectNet data and
#could be adapted for similar data.
#Make sure that the index of the dataframe are continous,
#you can use reset_index(drop=true) of pandas 
def crop_image_save(df, img_path, emotions_dict, name='train'):
    #Creation of a dictonary to keep a counter for each emotion of
    #the input dictionary
    keys = list(emotions_dict.keys())
    values = list(np.zeros(len(keys), dtype=np.int32))
    emotions_counter = dict(zip(keys, values)) 
    
    #Total number of file         
    total_files = len(df['expression'])
    for idx in range(total_files):  
        #if idx==20:
        #    break;
        img_dir = df['subDirectory_filePath'][idx] #reading the path of the image
        x = df['face_x'][idx] #reading the coordinate x for the crop
        y = df['face_y'][idx] #reading the coordinate y for the crop
        h = df['face_width'][idx] #reading the width for the crop
        w = df['face_height'][idx] #reading the height for the crop
        emo = df['expression'][idx] #reading the emotion
                    
        #Reading each face
        img = cv2.imread(imgs_path+img_dir)
        #cropping the face
        crop_img = img[y:y+h, x:x+w]
        save_path='./data/'+name+'/'+str(emo)+'/'+name+'-'+emotions_dict[emo]+'-'+str(emotions_counter[emo])+'.jpg'
        emotions_counter[emo]+=1
        cv2.imwrite(save_path, crop_img)
        
        #if emo==4:
        #    print(save_path)
        #    plt.imshow(crop_img)
    
    return emotions_counter 

#Test of the function
imgs_path = '../1_Datasets/AffectNet/Manually_Annotated_compressed/'
#crop_image_save(df_train, imgs_path, new_emotions_dict, name='train')

In [49]:
#df_train.head(20)

In [50]:
crop_image_save(df_train, imgs_path, new_emotions_dict, name='train')

{0: 6888, 1: 6865, 2: 6889, 3: 5497, 4: 6832, 5: 6893}

In [51]:
df_train['expression'].value_counts()

5    6893
2    6889
0    6888
1    6865
4    6832
3    5497
Name: expression, dtype: int64

In [52]:
crop_image_save(df_test, imgs_path, new_emotions_dict, name='test')

{0: 887, 1: 870, 2: 842, 3: 653, 4: 826, 5: 905}

In [53]:
df_test['expression'].value_counts()

5    905
0    887
1    870
2    842
4    826
3    653
Name: expression, dtype: int64

In [54]:
crop_image_save(df_val, imgs_path, new_emotions_dict, name='validation')

{0: 825, 1: 865, 2: 869, 3: 728, 4: 895, 5: 802}

In [55]:
df_val['expression'].value_counts()

4    895
2    869
1    865
0    825
5    802
3    728
Name: expression, dtype: int64

In [None]:
emotions_counter