In [None]:
import glob

path = "Users/masih/desktop/FER_2013" # path to all images

angry_images = glob.glob(path + 'train/angry/*', recursive=True)
fear_images = glob.glob(path + 'train/fear/*', recursive=True)
happy_images = glob.glob(path + 'train/happy/*', recursive=True)
neutral_images = glob.glob(path + 'train/neutral/*', recursive=True)
surprise_images = glob.glob(path + 'train/surprise/*', recursive=True)
sad_images = glob.glob(path + 'train/sad/*', recursive=True)

train_images = angry_images + fear_images + happy_images + neutral_images + surprise_images + sad_images

In [None]:
angry_images = glob.glob(path + 'test/angry/*', recursive=True)
fear_images = glob.glob(path + 'test/fear/*', recursive=True)
happy_images = glob.glob(path + 'test/happy/*', recursive=True)
neutral_images = glob.glob(path + 'test/neutral/*', recursive=True)
surprise_images = glob.glob(path + 'test/surprise/*', recursive=True)
sad_images = glob.glob(path + 'test/sad/*', recursive=True)

test_images = angry_images + fear_images + happy_images + neutral_images + surprise_images + sad_images

In [5]:
import numpy as np

np.random.shuffle(train_images)
np.random.shuffle(test_images)

# Using 80% of development data for training and 20% for validation
train_images = train_images[:int(len(train_images)*0.8)]
val_images = train_images[int(len(train_images)*0.8):]

print("Number of train images: ", len(train_images))
print("Number of val images: ", len(val_images))
print("Number of test images: ", len(test_images))

Number of train images:  19012
Number of val images:  3803
Number of test images:  5931


In [None]:
from PIL import Image
import os
import pickle
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder, OneHotEncoder


def preprocess(image_paths, cat, target_size, train = False, label_encoder = None):
    images, labels = [], []

    # Opening all images in image_paths, resizing them, saving as np arrays
    for path in image_paths:
        img = Image.open(path).convert('RGB')
        img = img.resize(target_size)
        img_array = np.array(img) / 255.0  
        images.append(img_array)

    # Saving all labels based on path name in list labels 
    for path in image_paths:
        label = os.path.basename(os.path.dirname(path))
        labels.append(label)
        
    # Fitting/ Creating a label encoder 
    if train:
        label_encoder = LabelEncoder()
        encoded_labels = label_encoder.fit_transform(labels)
    else:
        encoded_labels = label_encoder.transform(labels)

    # Fitting a One-Hot-Encoding to the encoded labels
    onehot_encoder = OneHotEncoder(sparse=False)
    encoded_labels = encoded_labels.reshape(len(encoded_labels), 1)
    onehot_labels = onehot_encoder.fit_transform(encoded_labels)

    images = np.array([img.reshape(target_size[0], target_size[1], 3) for img in images])
    labels = np.array(onehot_labels)
    
    save_path = "Users/masih/desktop/FER_CNN/Data" # path to saved NumPy arrays
    os.makedirs(save_path, exist_ok=True)
    
    # Saving the numpy arrays
    np.save(os.path.join(save_path, 'X_' + cat + '.npy'), images)
    np.save(os.path.join(save_path, 'y_' + cat + '.npy'), labels)

    return label_encoder


target_size = (224, 224)

# The Label Encoder is saved from training and then applied to validation and test data
label_encoder = preprocess(train_images, 'train', target_size, train=True)
preprocess(validation_images, 'validation',target_size, label_encoder=label_encoder)
preprocess(test_images, 'test', target_size, label_encoder=label_encoder)