In [None]:
import numpy as np
import mxnet as mx
from mxnet import nd, autograd, gluon

![image.png](../assets/data_prep_wf.png)

# Raw Data - FER2013
* https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge/data
* 48*48 gray scale images
* 28,709 training samples
* 3,589 validation data
* 3,589 test data
* 7 emotion labels (0=Angry, 1=Disgust, 2=Fear, 3=Happy, 4=Sad, 5=Surprise, 6=Neutral)
* ~60-65% human accuracy in labelling

![image.png](../assets/test_raw_data.png)

# Data Preparation Critical
* State of the Art accuracy using a Convolutional Neural Network (CNN) model directly on this raw data is **63%** [1].
* State of the Art accuracy using a slightly modified Convolutional Neural Network (CNN) model after processing the data is **83%** [2].

# Processed Data - FER+

Follow the instructions listed in the repository here - https://github.com/sandeep-krishnamurthy/facial-emotion-recognition-gluon#step-1---data-preparation

* FER+ has new corrected labels
* FER+ has 8 emotions - (0: 'neutral', 1: 'happiness', 2: 'surprise', 3: 'sadness', 4: 'anger', 5: 'disgust', 6: 'fear',7: 'contempt')
* Image augmentations:
    * Crop faces in the images – bounding box in the FER+ dataset
    * Scale image size from 48*48 -> 64*64
    * Shift image
    * Flip image
    * Rotate (angle) image
    * Normalize the pixels in the image

![image.png](../assets/test_processed_data.png)

In [None]:
# Emotions we want recognize
emotion_table = {0: 'neutral',
                 1: 'happiness',
                 2: 'surprise',
                 3: 'sadness',
                 4: 'anger',
                 5: 'disgust',
                 6: 'fear',
                 7: 'contempt'}

In [None]:
processed_train_images = np.load('./data/fer_train_processed_images.npy')
processed_train_labels = np.load('./data/fer_train_processed_labels.npy')

In [None]:
print(processed_train_images.shape, processed_train_labels.shape)

In [None]:
from matplotlib import pyplot as plt

In [None]:
plt.imshow(processed_train_images[987].reshape(64,64), cmap='gray')

In [None]:
processed_test_images = np.load('./data/fer_test_processed_images.npy')
processed_test_labels = np.load('./data/fer_test_processed_labels.npy')
processed_val_images = np.load('./data/fer_val_processed_images.npy')
processed_val_labels = np.load('./data/fer_val_processed_labels.npy')

In [None]:
print(processed_test_images.shape, processed_test_labels.shape)

In [None]:
print(processed_val_images.shape, processed_val_labels.shape)

In [None]:
# Set this to ctx = mx.cpu() if running on CPU. 
# However, please note, it takes approx. 1.1 min/epoch on 1 GPU => Can take longer time on cPU

ctx = mx.gpu()

![image.png](../assets/model_training_prep_wf.png)

![image.png](../assets/network.png)

# Step 1 – Construct the Neural Network

In [None]:
# We use HybridSequential network type to able to save the trained model as symbols and params.
# More Info - https://mxnet.incubator.apache.org/tutorials/gluon/save_load_params.html
net = gluon.nn.HybridSequential()

# Construct 13 layer VGGNet suggested in the paper
with net.name_scope():
    net.add(gluon.nn.Conv2D(channels=64, kernel_size=3, padding=(1,1), activation='relu'))
    net.add(gluon.nn.Conv2D(channels=64, kernel_size=3, padding=(1,1), activation='relu'))
    net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))
    net.add(gluon.nn.Dropout(0.25))
    
    net.add(gluon.nn.Conv2D(channels=128, kernel_size=3, padding=(1,1), activation='relu'))
    net.add(gluon.nn.Conv2D(channels=128, kernel_size=3, padding=(1,1), activation='relu'))
    net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))
    net.add(gluon.nn.Dropout(0.25))
    
    net.add(gluon.nn.Conv2D(channels=256, kernel_size=3, padding=(1,1), activation='relu'))
    net.add(gluon.nn.Conv2D(channels=256, kernel_size=3, padding=(1,1), activation='relu'))
    net.add(gluon.nn.Conv2D(channels=256, kernel_size=3, padding=(1,1), activation='relu'))
    net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))
    net.add(gluon.nn.Dropout(0.25))
    
    net.add(gluon.nn.Conv2D(channels=256, kernel_size=3, padding=(1,1), activation='relu'))
    net.add(gluon.nn.Conv2D(channels=256, kernel_size=3, padding=(1,1), activation='relu'))
    net.add(gluon.nn.Conv2D(channels=256, kernel_size=3, padding=(1,1), activation='relu'))
    net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))
    net.add(gluon.nn.Dropout(0.25))
    
    net.add(gluon.nn.Flatten())
    net.add(gluon.nn.Dense(1024, activation='relu'))
    net.add(gluon.nn.Dropout(0.5))
    net.add(gluon.nn.Dense(1024, activation='relu'))
    net.add(gluon.nn.Dropout(0.5))
    net.add(gluon.nn.Dense(8))

In [None]:
# We Hybridize the HybridSequential network to able to save the trained model as symbols and params.
# More Info - https://mxnet.incubator.apache.org/tutorials/gluon/save_load_params.html
net.hybridize()

# Step 2 – Initialize the parameters in Neural Network

In [None]:
net.collect_params().initialize(mx.init.Xavier(), ctx=ctx)
nd.waitall()

In [None]:
# Use MXBOARD here to visualize network
x = mx.sym.var('data')
sym = net(x)
mx.viz.plot_network(sym)


# Step 3 – Prepare the Trainer with optimizer

In [None]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

In [None]:
batch_size = 32
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.0025, 'momentum': 0.9})

# Step 4 – Prepare the model evaluation strategy

In [None]:
def evaluate_accuracy(data_iterator, net):
    acc = mx.metric.Accuracy()
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

# Step 5 – Prepare data loaders

In [None]:
train_labels = np.argmax(processed_train_labels, axis=1)
val_labels = np.argmax(processed_val_labels, axis=1)

train_data = gluon.data.DataLoader(gluon.data.ArrayDataset(processed_train_images, train_labels), batch_size = batch_size, shuffle=True)
val_data = gluon.data.DataLoader(gluon.data.ArrayDataset(processed_val_images, val_labels), batch_size = batch_size)


# Step 6 – Train the Neural Network

In [None]:
epochs = 25

train_accuracies = []
losses = []
val_accuracies = []

for e in range(epochs):
    batch = 0
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
            
        loss.backward()
        trainer.step(data.shape[0])
        curr_loss = nd.mean(loss).asscalar()
        batch +=1        

    val_accuracy = evaluate_accuracy(val_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    
    losses.append(curr_loss)
    train_accuracies.append(train_accuracy)
    val_accuracies.append(val_accuracy)
    
    print("Epoch %s. Loss: %s, Train_acc %s, Val_acc %s" % (e, curr_loss, train_accuracy, val_accuracy))

# Step 7 - Evaluate on Test Data

In [None]:
# Test accuracy

acc = mx.metric.Accuracy()
test_labels = np.argmax(processed_test_labels, axis=1)
data_iterator = gluon.data.DataLoader(gluon.data.ArrayDataset(processed_test_images, test_labels), batch_size = 32)
for i, (data, label) in enumerate(data_iterator):
    data = data.as_in_context(ctx)
    label = label.as_in_context(ctx)
    output = net(data)
    predictions = nd.argmax(output, axis=1)
    acc.update(preds=predictions, labels=label)
print("Test Accuracy - ", acc.get()[1])

In [None]:
# for plotting purposes
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

epochs = range(len(train_accuracies))

f = plt.figure(figsize=(12,6))
fg1 = f.add_subplot(121)
fg2 = f.add_subplot(122)

fg1.set_xlabel('epoch',fontsize=14)  
fg1.set_title('Loss over Training')
fg1.grid(True, which="both")
fg1.plot(epochs, losses)

fg2.set_title('Comparing accuracy')
fg2.set_xlabel('epoch', fontsize=14)
fg2.grid(True, which="both")

p1, = fg2.plot(epochs, train_accuracies)
p2, = fg2.plot(epochs, val_accuracies)
fg2.legend([p1, p2], ['training accuracy', 'validation accuracy'],fontsize=14)

In [None]:
# Example Inference
idx = 139
plt.imshow(processed_test_images[idx].reshape(64,64), cmap='gray')

In [None]:
print("Actual Emotion - ", emotion_table[test_labels[idx]])

In [None]:
# Perform Inference
output = net(mx.nd.array(processed_test_images[idx].reshape(1,1,64,64)).as_in_context(ctx))

In [None]:
print("Predicted Emotion - ", emotion_table[nd.argmax(output, axis=1).asnumpy()[0]])

![image.png](../assets/inference_wf.png)

# Step 8 - Export the model for Production

In [None]:
# Export the model for production deployment.
# There will be 2 files exported: 
# 1) gluon_ferplus-symbol.json => Contains the network definition
# 2) gluon_ferplus-0000.params => Contains the weights in the network
net.export('gluon_ferplus')

# References
1. I. J. Goodfellow, D. Erhan, P. L. Carrier, A. Courville,
M. Mirza, B. Hamner, W. Cukierski, Y. Tang,
D. Thaler, D.-H. Lee, et al. Challenges in
representation learning: A report on three machine
learning contests. In Neural information processing,
pages 117–124. Springer, 2013

2. Training Deep Networks for Facial Expression Recognition with Crowd-Sourced Label Distribution Emad Barsoum et. al. https://arxiv.org/abs/1608.01041
