In [35]:
from keras.utils import to_categorical 
from keras_preprocessing.image import load_img
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D
from tqdm.notebook import tqdm
from sklearn.preprocessing import LabelEncoder

import os
import pandas as pd
import numpy as np

# https://medium.com/@pheonixdiaz625/real-time-facial-emotion-recognition-using-deep-learning-and-opencv-30a331d39cf1

TRAIN_DIR = 'images/train'
TEST_DIR = 'images/test'

In [18]:
# putting the image paths + labels into an arraylist
def createdataframe(dir):
    image_paths = []
    labels = []
    for label in os.listdir(dir):
        for imagename in os.listdir(os.path.join(dir, label)):
            image_paths.append(os.path.join(dir, label, imagename))
            labels.append(label)
        print(label, "completed")
    return image_paths, labels

In [19]:
# putting the images pathes + labels into a panda dataframe
train = pd.DataFrame()
train['image'], train['label'] = createdataframe(TRAIN_DIR)

happy completed
sad completed
fear completed
surprise completed
neutral completed
angry completed
disgust completed


In [20]:
test = pd.DataFrame()
test['image'], test['label'] = createdataframe(TRAIN_DIR)

happy completed
sad completed
fear completed
surprise completed
neutral completed
angry completed
disgust completed


In [24]:
# turn the image pathes into grayscale pixels, and then rehape the array into a 48x48x1
def extract_features(images):
    features = []
    for image in tqdm(images):
        img = load_img(image, color_mode = "grayscale")
        img = np.array(img)
        features.append(img)
    features = np.array(features)
    features = features.reshape(len(features), 48, 48, 1)
    return features

In [25]:
train_features = extract_features(train['image'])
test_features = extract_features(test['image'])

# normalize the features (0-255 to 0-1)
x_train = train_features / 255.0
x_test = test_features / 255.0

  0%|          | 0/28821 [00:00<?, ?it/s]

  0%|          | 0/28821 [00:00<?, ?it/s]

In [26]:
# using sklearn.preprocessing's labelencoder
# turns categorical data (red, green, blue) into numbers. In our case it's emotions
le = LabelEncoder()
# turns the categorical into numbers (sad = 0, happy = 1, etc)
le.fit(train['label'])

# turns categorical into numbers 
y_train = le.transform(train['label'])
y_test = le.transform(test['label'])

# turns it into one-hot encoders
# an array is created with size = number of labels in the category
# 1 is on, 0 is off
y_train = to_categorical(y_train, num_classes = 7)
y_test = to_categorical(y_test, num_classes = 7)

In [None]:
# building the neural network

In [34]:
#sequential model 
model = Sequential()

#convolutional layers
# conv2d(number of filters, kernel size, activation function, expected input type of model)
# kernel: a odd x odd matrix (of weights) that is used for convolution, blurring, edge detection, etc
# - convolution: slide kernel window over area, and perform a element-wise multiplication with
#                the value of that pixel, and summing it up. It returns a feature map that 
#                highlights important features
# relu (Rectified Linear Unit): when the total input into the neuron is positive, it's "activated",
#                               else, it is off (the neuron doesn't transmit information)
#                               advantages: fixes vanishing gradient, and encourages sparse neuron activation (faster training)
#                               disadvantages: dying relu problem, neurons are stuck inactive
# input shape: height, width, channels 
#              height/width of image
#              channels are the different aspects of the image: for grayscale, 1. for RGB, 3

# pooling: grabbing the most important feature (maximum value) and makes the image more abstract
#          from that, another feature map is created.

# dropout: introduce randomness/chaos (think basketball game)
#          might be good at shooting with no defenders, in game more difficult
#          dropout prevents the network to be overreliant on one neuron
#          0.4 means 40% change neuron will drop out 
#          dropout occurs only during training; when in practice, it uses all neurons
model.add(Conv2D(128, kernel_size = (3,3), activation = 'relu', input_shape = (48, 48, 1)))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Dropout(0.4))

model.add(Conv2D(256, kernel_size = (3, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Dropout(0.4))

model.add(Conv2D(512, kernel_size = (3, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Dropout(0.4))

model.add(Conv2D(512, kernel_size = (3, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Dropout(0.4))
          

In [36]:
# flattening: turns 2d array into a vector
model.add(Flatten())

# fully connected layers
# In a dense layer, every neuron is connected to every other neuron in the layer before it
# Convolution has sparse connectivity
# 
# Convolution layers are specifically designed for grids. it preserves spatial structure, 
# and is good for learning spatial features
# Dense is more general purpose and is for more abstract, nonlinear learning
# 
model.add(Dense(512, activation = 'relu'))
model.add(Dropout(0.4))
model.add(Dense(512, activation = 'relu'))
model.add(Dropout(0.4))

# output layer
# 7 is for the different categories (happy, sad, angry, ...)
# softmax turns raw output values (logit) and turns it into a probability. It ensures sum is 1
model.add(Dense(7, activation='softmax'))

# configures learning process based on optimizer, loss function, and metrics
# doesn't actually train, just sets up the model
# optimizer: how the model's weights are updated during traning to minimize loss function
#           - adam: adapts learning rate based on gradients of the loss funciton
# loss function: measures how well the model is doing (target vs traning)
#              - categorical_crossentropy: used with targets are one-hot encoded
# Metrics: how we evaluate performance 
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = 'accuracy')


In [37]:
# training the model: 
# x_train: input data ("question")
# y_train: corresponding labels ("answer key")
# batch size: number of input datas used in each iteration of training 
# epoch: number of times the entire dataset will be ran
#       - each epoch contains a foward pass (making predictions) and a backwards pass (computing gradients and calculating weights)
# validation data: test model's performance during training (not used to train, but to test effectiveness).
model.fit(x = x_train, y = y_train, batch_size = 128, epochs = 100, validation_data = (x_test, y_test))


Epoch 1/100


2024-03-21 20:17:06.670237: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


 25/226 [==>...........................] - ETA: 1:42 - loss: 1.8601 - accuracy: 0.2272

KeyboardInterrupt: 