In [1]:
from keras.preprocessing.image import ImageDataGenerator
import numpy as np
from itertools import groupby
import collections
import random

Using TensorFlow backend.


In [2]:
data = np.load('./data/emotion/Train/final_data.npy')
print(data.shape)
labels = np.load('./data/emotion/Train/final_labels_data.npy')
print(labels.shape)

(27947, 64, 64, 1)
(27947, 8)


In [4]:
# look for images with multiple labels
res = []
for i in range(27947):
    winner = np.argwhere(labels[i] == np.amax(labels[i])).flatten().tolist()
    a = ''
    for w in winner:
        a += str(w)
    res.append(a)

    
# get the frequency of every category
# Some images have two or more labels (emotions)
# eg. 01 -> neutral + happy
counter = collections.Counter(res)
print(counter)

Counter({'0': 9272, '1': 7394, '3': 3361, '2': 3349, '4': 2360, '6': 642, '03': 450, '5': 165, '7': 163, '01': 119, '26': 97, '04': 96, '02': 70, '12': 58, '24': 40, '34': 32, '07': 28, '36': 26, '14': 25, '45': 23, '23': 16, '46': 15, '35': 13, '06': 12, '034': 12, '05': 11, '13': 10, '37': 9, '037': 8, '036': 7, '246': 6, '023': 5, '145': 4, '026': 4, '57': 4, '045': 4, '25': 3, '124': 3, '15': 3, '345': 3, '47': 2, '012': 2, '16': 2, '245': 2, '17': 2, '024': 2, '56': 1, '236': 1, '67': 1, '256': 1, '057': 1, '015': 1, '234': 1, '014': 1, '457': 1, '035': 1, '357': 1, '247': 1, '013': 1})


In [6]:
# image augmentation
data_generator = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.1,
        height_shift_range=0.1,
        zoom_range=.1,
        horizontal_flip=True)

In [174]:
# Augment the images and create a balanced dataset across all 8 emotions
# Final length of every image is 10000
for emotion in ['0','1','2','3','4','5','6','7']:
    print(emotion)
    aug = []
    lab = []
    indices = [index for index, value in enumerate(res) if value == emotion]
    frequency = counter[emotion]
    number_of_augmentations = 10000 - frequency
    # find the indices with the emotion
    indices = [index for index, value in enumerate(res) if value == emotion]
    # Take at random 'number_of_augmentations' items
    if number_of_augmentations > frequency:
        indices = int(np.ceil(number_of_augmentations/frequency)) * indices
        random_items = random.sample(indices, number_of_augmentations)
    else:
        random_items = random.sample(indices, number_of_augmentations)
    for item in random_items:
        for X_batch, in data_generator.flow(np.reshape(data[item], (1,64,64,1)), save_to_dir='aug'+emotion, save_format='jpg', batch_size=1):
            X_batch = X_batch.astype('uint')
            X_batch = np.reshape(X_batch,(64,64,1)).tolist()
            aug.append(X_batch)
            lab.append(labels[item].tolist())
            break
    arr_aug = np.array(aug)
    arr_lab = np.array(lab)
    
    np.save('./aug' + emotion + '/' + 'arr' + emotion, arr_aug)
    np.save('./aug' + emotion + '/' + 'lab' + emotion, arr_lab)

0
1
2
3
4
5
6
7


In [187]:
# Concatenate the arrays
for i in range(8):
    d = np.load('aug{}/arr{}.npy'.format(i,i))
    l = np.load('aug{}/lab{}.npy'.format(i,i))
    data = np.concatenate((data, d), axis=0)
    labels = np.concatenate((labels, l), axis=0)

In [189]:
data.shape

(81241, 64, 64, 1)

In [190]:
labels.shape

(81241, 8)

In [None]:
# Save the augmented train set
np.save('./data/emotion/Train/final_data_aug.npy', data)
np.save('./data/emotion/Train/final_labels_data_aug.npy', labels)