In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import os

from keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
from keras.layers import Dense,Input,Dropout,GlobalAveragePooling2D,Flatten,Conv2D,BatchNormalization,Activation,MaxPooling2D
from keras.models import Model,Sequential
from tensorflow.keras.optimizers import Adam,SGD,RMSprop
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

In [2]:
picture_size = 48
folder_path = "../input/face-expression-recognition-dataset/images/"

In [3]:
# Batch Size - How many Training images model will take in 1 iteration
batch_size  = 128

# Define Image Data Generator variables
datagen_train  = ImageDataGenerator()
datagen_val = ImageDataGenerator()

# Store images in folder path in the training and validation sets - 7 categories of expressions
train_set = datagen_train.flow_from_directory(folder_path+"train",
                                              target_size = (picture_size,picture_size),
                                              color_mode = "grayscale",
                                              batch_size=batch_size,
                                              class_mode='categorical',
                                              shuffle=True)
test_set = datagen_val.flow_from_directory(folder_path+"validation",
                                              target_size = (picture_size,picture_size),
                                              color_mode = "grayscale",
                                              batch_size=batch_size,
                                              class_mode='categorical',
                                              shuffle=False)

Found 28821 images belonging to 7 classes.
Found 7066 images belonging to 7 classes.


In [4]:
# There are 7 classes/ possible outcomes
no_of_classes = 7

model = Sequential()

# STEP 1: Conv2D(no_filters,kernel_size,padding,input_shape(picture_size,picture_size,grayscale=1))
# filters: Dimensionality of output space (number of output filters in the convolution).
# kernel_size: Tuple of 2 ints, height and width of the 2D convolution window.

# STEP 2: Batch normalization normalizes its inputs, that maintains the mean output close to 0 
# and the output standard deviation close to 1.

# STEP 3: Activation layer is present at end of neuron, it decides information fired to the next layer
# relu - Recitified Linear Unit - Chosen as it does not trigger all neurons at same time

# STEP 4: Max pooling operation for 2D spatial data.
# Downsamples the input along its spatial dimensions (height and width) by taking the maximum 
# value over an input window (pool_size) for each channel of the input.

# STEP 5: Dropout sets input to 0 with frequency of rate at each step during training, preventing overfitting. 
# Inputs not set to 0 are scaled up by 1/(1 - rate) such that the sum over all inputs is unchanged.

#1st CNN layer
model.add(Conv2D(64,(3,3),padding = 'same',input_shape = (picture_size,picture_size,1)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size = (2,2)))
model.add(Dropout(0.25))

#2nd CNN layer
model.add(Conv2D(128,(5,5),padding = 'same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size = (2,2)))
model.add(Dropout (0.25))

#3rd CNN layer
model.add(Conv2D(512,(3,3),padding = 'same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size = (2,2)))
model.add(Dropout (0.25))

#4th CNN layer
model.add(Conv2D(512,(3,3), padding='same'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

# STEP 6: Flatten Layer - Collapses input to 1D array which can be fed in model easily
model.add(Flatten())

# STEP 7: Use Dense Layer Connect all the layers
# units: Positive integer, dimensionality of the output space.
# Dense implements the operation: output = activation(dot(input, kernel) + bias) 
# where activation is the element-wise activation function passed as the activation argument, 
# kernel is a weights matrix created by the layer, and bias is a bias vector created by the layer 
# (only applicable if use_bias is True).
#Fully connected 1st layer
model.add(Dense(256))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.25))

# Fully connected layer 2nd layer
model.add(Dense(512))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.25))

# Softmax is used for categorical classification
model.add(Dense(no_of_classes, activation='softmax'))

# STEP 8: Optimize model use learning rate 0.0001
# Set Loss Function as cross entropy - used with softmax activation
# Optimizer that implements the Adam algorithm.
# Adam optimization is a stochastic gradient descent method that is based on 
# adaptive estimation of first-order and second-order moments.
opt = Adam(lr = 0.0001)
model.compile(optimizer=opt,loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

2022-11-14 15:55:33.102334: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 48, 48, 64)        640       
_________________________________________________________________
batch_normalization (BatchNo (None, 48, 48, 64)        256       
_________________________________________________________________
activation (Activation)      (None, 48, 48, 64)        0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 24, 24, 64)        0         
_________________________________________________________________
dropout (Dropout)            (None, 24, 24, 64)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 24, 24, 128)       204928    
_________________________________________________________________
batch_normalization_1 (Batch (None, 24, 24, 128)       5

  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [5]:
# STEP 9: ModelCheckpoint callback is used in conjunction with training using model.fit() to save a model or 
# weights (in a checkpoint file) at some interval, so the model or weights can be loaded later 
# to continue the training from the state saved.
# Keras verbose defines the mode of verbosity, which will be auto 0, 1, or 2. 
# In this mode, 0 is defined as silent, 1 as a progress bar, and 2 as a single line per epoch.
# if save_best_only=True, it only saves when the model is considered the "best" 
# and the latest best model according to the quantity monitored will not be overwritten.
# mode: one of {'auto', 'min', 'max'}. If save_best_only=True, the decision to overwrite the current 
# save file is made based on either the maximization or the minimization of the monitored quantity.
checkpoint = ModelCheckpoint("./model.h5",monitor='val_acc',verbose=1,save_best_only=True,mode='max')

# STEP 10: Early Stopping - Stop training when a monitored metric has stopped improving.
# min_delta: Min change in monitored quantity to qualify as improvement, 
# less than min_delta is no improvement.
# patience: Number of epochs with no improvement after which training will be stopped.
# restore_best_weights: Restore model weights from epoch with best value of monitored quantity. 
early_stopping = EarlyStopping(monitor='val_loss',min_delta=0,patience=3,verbose=1,restore_best_weights=True)

# STEP 11: Reduce learning rate when a metric has stopped improving.
# Models often benefit from reducing the learning rate by a factor of 2-10 once learning stagnates.
reduce_learningrate = ReduceLROnPlateau(monitor='val_loss',factor=0.2,patience=3,verbose=1,min_delta=0.0001)

callbacks_list = [early_stopping,checkpoint,reduce_learningrate]
epochs = 48
# Compile Model
model.compile(loss='categorical_crossentropy',optimizer = Adam(lr=0.001),metrics=['accuracy'])

In [6]:
history = model.fit_generator(generator=train_set,
                                steps_per_epoch=train_set.n//train_set.batch_size,
                                epochs=epochs,
                                validation_data = test_set,
                                validation_steps = test_set.n//test_set.batch_size,
                                callbacks=callbacks_list)

2022-11-14 15:55:34.320760: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/48
Epoch 2/48
Epoch 3/48
Epoch 4/48
Epoch 5/48
Epoch 6/48
Epoch 7/48
Epoch 8/48
Epoch 9/48
Restoring model weights from the end of the best epoch.

Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.00020000000949949026.
Epoch 00009: early stopping
