# Setup


In [None]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


# initialize the seeds of different random number generators so that the 
# results will be the same every time the notebook is run
keras.utils.set_random_seed(42)

# **Introduction**

Your goal in this exercise is to detect emotion from a facial image. To that end, we will use the 2013 Facial Expression Recognition (FER) dataset. 

The dataset consists of ~36,000 images, each annotated with one of seven labels:
* angry
* disgust
* fear
* happy
* sad
* surprise
* neutral 

We will do two things:

1) Build a Convolutional Neural Network (CNN) *from scratch* to detect emotion in facial images.  
2) Use transfer learning to customize a pretrained model to solve the same problem. 

But first, let's get the data.

In [None]:
!wget -q -P ./ https://www.dropbox.com/s/ia62dg6kpp3q8wb/fer2013.csv

In [None]:
data = pd.read_csv('/content/fer2013.csv')

In [None]:
data.shape

In [None]:
data.columns

In [None]:
print(data.loc[0,'pixels'])

The pixel values for each image is provided as a space-separated list of numbers. How many pixels in an image?

In [None]:
len(data.loc[0, 'pixels'].split(' '))

So each (gray-scale) image is encoded as a list of 2304 pixels. We will reshape this into an 48x48 image next.

In [None]:
pixels = data['pixels'].tolist()
width, height = 48, 48
faces = []
for pixel_sequence in pixels:
    face = [int(pixel) for pixel in pixel_sequence.split(' ')] # read each face as a 1-d array 
    face = np.asarray(face).reshape(width, height) # reshape the length 2304 1-d array into an 48x48 array
    face = np.stack((face,)*3, axis=-1) # convert single channel to three channels simply by replicating the single channel we have. 
    faces.append(face.astype('float32'))
faces = np.asarray(faces)

Next, let's take a look at how emotion is encoded.

In [None]:
data.emotion.unique()

Ok, so it is sparse encoded.

Just for practice, we will change the sparse coding to one-hot encoding.

In [None]:
emotions = pd.get_dummies(data['emotion']).to_numpy() # each emotion is 'one-hot' encoded as a 7-dim vector
emotions_names = ('angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral') 

Lets take a look at some of these fun images! 

In [None]:
fig = plt.figure(figsize=(10, 10))
for i in range(9):
    ax = fig.add_subplot(3, 3, i+1)
    ax.set_title(f"{emotions_names[np.argmax(emotions[i])]}")
    ax.imshow(faces[i].astype('uint8'))
    ax.axis('off')

As in the original dataset, we will reserve the first 28,709 images for training and the rest for testing. 

In [None]:
train_faces, train_emotions =  faces[:28709], emotions[:28709]
test_faces, test_emotions =  faces[28709:], emotions[28709:]

In [None]:
print(train_faces.shape, train_emotions.shape)

In [None]:
print(test_faces.shape, test_emotions.shape)

# Problem 1

We will try a simple CNN on this dataset with three convolutional blocks + one dense layer + output layer.

In [None]:
input = keras.Input(shape=train_faces.shape[1:])
x = keras.layers.Rescaling(1./255)(input) #normalizing
x = keras.layers.Conv2D(16, kernel_size=(2, 2), activation="relu", name="Conv_1")(x) # convolutional layer!
x = keras.layers.MaxPool2D()(x) # pooling layer
x = keras.layers.Conv2D(16, kernel_size=(2, 2), activation="relu", name="Conv_2")(x) # convolutional layer!
x = keras.layers.MaxPool2D()(x) # pooling layer
x = keras.layers.Conv2D(16, kernel_size=(2, 2), activation="relu", name="Conv_3")(x) # convolutional layer!
x = keras.layers.MaxPool2D()(x) # pooling layer
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(256, activation="relu")(x)   
output = keras.layers.Dense(7, activation="softmax", name="output")(x)

model = keras.Model(input, output, name='CNN_model')

In [None]:
model.summary()

Model: "CNN_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 48, 48, 3)]       0         
                                                                 
 rescaling (Rescaling)       (None, 48, 48, 3)         0         
                                                                 
 Conv_1 (Conv2D)             (None, 47, 47, 16)        208       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 23, 23, 16)       0         
 )                                                               
                                                                 
 Conv_2 (Conv2D)             (None, 22, 22, 16)        1040      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 11, 11, 16)       0         
 2D)                                                     

In [None]:
model.compile(loss='categorical_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])

Since we one-hot-encoded the dependent variable, we use `categorical_crossentropy`, not `sparse_categorical_crossentropy`.👆.

In [None]:
epochs = 30
history = model.fit(train_faces, train_emotions, 
          batch_size=64, 
          epochs=epochs, 
          validation_split=0.2)

In [None]:
history_dict = history.history
acc = history_dict["accuracy"]
val_acc = history_dict["val_accuracy"]
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, "bo", label="Training acc")
plt.plot(epochs, val_acc, "b", label="Validation acc")
plt.title("Training and validation accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.show()

What's the test accuracy?

In [None]:
# Insert your code below and execute this cell


# Problem 2

Add an additional dense layer to the CNN from Problem 1.

In [None]:
input = keras.Input(shape=train_faces.shape[1:])
x = keras.layers.Rescaling(1./255)(input) #normalizing
x = keras.layers.Conv2D(16, kernel_size=(2, 2), activation="relu", name="Conv_1")(x) # convolutional layer!
x = keras.layers.MaxPool2D()(x) # pooling layer
x = keras.layers.Conv2D(16, kernel_size=(2, 2), activation="relu", name="Conv_2")(x) # convolutional layer!
x = keras.layers.MaxPool2D()(x) # pooling layer
x = keras.layers.Conv2D(16, kernel_size=(2, 2), activation="relu", name="Conv_3")(x) # convolutional layer!
x = keras.layers.MaxPool2D()(x) # pooling layer
x = keras.layers.Flatten()(x)

#################################################################
### ADD A DENSE LAYER WITH 256 RELU NEURONS IN THE LINE BELOW ###

#################################################################

x = keras.layers.Dense(256, activation="relu")(x)
output = keras.layers.Dense(7, activation="softmax", name="output")(x)

model2 = keras.Model(input, output, name='CNN_model2')

In [None]:
model2.summary()

In [None]:
model2.compile(loss='categorical_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])

In [None]:
epochs = 30
history = model2.fit(train_faces, train_emotions, 
          batch_size=64, 
          epochs=epochs, 
          validation_split=0.2)

In [None]:
history_dict = history.history
acc = history_dict["accuracy"]
val_acc = history_dict["val_accuracy"]
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, "bo", label="Training acc")
plt.plot(epochs, val_acc, "b", label="Validation acc")
plt.title("Training and validation accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.show()

What's the test accuracy?

In [None]:
# Insert your code below and execute this cell


# Problem 3

***Data Augmentation:*** 

The basic idea of augmentation is to alter the image so slightly that the value of the dependent variable (i.e. the category that it belongs to) doesn't change.

In [None]:
data_augmentation = keras.Sequential(
    [
        keras.layers.RandomFlip("horizontal"),
        keras.layers.RandomZoom(0.2),
    ]
)

Lets quickly visualize what the augmentation does ... 

In [None]:
augmented_images = [data_augmentation(np.expand_dims(train_faces[0],axis=0)) for i in range(9)]
fig = plt.figure(figsize=(10, 10))
for i in range(9):
    ax = fig.add_subplot(3, 3, i+1, xticks=[], yticks=[])
    ax.imshow(tf.keras.preprocessing.image.array_to_img(augmented_images[i][0]))

In [None]:
input = keras.Input(shape=train_faces.shape[1:])

x = data_augmentation(input)  

x = keras.layers.Rescaling(1./255)(x)

x = keras.layers.Conv2D(32, kernel_size=(2, 2), activation="relu", name="Conv_1")(x) # convolutional layer!
x = keras.layers.MaxPool2D()(x) # pooling layer
x = keras.layers.Conv2D(32, kernel_size=(2, 2), activation="relu", name="Conv_2")(x) # convolutional layer!
x = keras.layers.MaxPool2D()(x) # pooling layer
x = keras.layers.Conv2D(32, kernel_size=(2, 2), activation="relu", name="Conv_3")(x) # convolutional layer!
x = keras.layers.MaxPool2D()(x) # pooling layer
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(512, activation="relu")(x)   
output = keras.layers.Dense(7, activation="softmax")(x)

model = keras.Model(input, output, name='augmented_CNN_model')

In [None]:
model.summary()

In [None]:
model.compile(loss='categorical_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])

In [None]:
epochs = 30
history = model.fit(train_faces, train_emotions, 
          batch_size=64, 
          epochs=epochs, 
          validation_split=0.2)

In [None]:
history_dict = history.history
acc = history_dict["accuracy"]
val_acc = history_dict["val_accuracy"]
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, "bo", label="Training acc")
plt.plot(epochs, val_acc, "b", label="Validation acc")
plt.title("Training and validation accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.show()

What's the test accuracy?

In [None]:
# Insert your code below and execute this cell


# Problem 4

Next, we apply transfer learning to our problem. We will take a slightly different approach from what we saw in class: instead of running each image through a pre-trained "headless" model to get smart representations and then using them as the input to our own "little" NN, we will do the following: we will (a) remove the top from an existing pre-trained model to get a "headless" model (b) "attach" our little NN to to this "headless" model and (c) train this **entire** model. 


Why? So that we can 'fine-tune' the weights of the original pre-trained model (along with the weights of our "little" NN) to better minimize our loss function.

***Overall Approach*** 

In class, we used ResNet50. In this problem, we will use another pretrained model called [VGG19](https://keras.io/api/applications/vgg/).
 

1.   We will remove the top of VGG19 to make it "headless". We will refer to this as the "base model".
2.   We will create a little NN and connect the output of the base model to this little NN.
3. ** We will unfreeze the last 10 layers of the base model so that SGD/Adam can optimize those weights as well.** 



In [None]:
# We will define a function that will build a model per the approach above

def construct_model(no_classes, input_shape, metrics=['accuracy']):

  base_model = keras.applications.VGG19(
    include_top=False,   # this makes VGG19 headless
    weights="imagenet",
    input_tensor=None,
    input_shape=None,
    pooling=None,
    classes=1000,
    classifier_activation="softmax",
  )

  # Freeze the base_model
  base_model.trainable = False

  
  inputs = keras.Input(shape=input_shape)

  x = keras.layers.Rescaling(1./255)(inputs) #normalizing

  # Apply random data augmentation
  x = data_augmentation(x)  

  # The base model contains batchnorm layers. We want to keep them in inference mode
  # when we unfreeze the base model for fine-tuning, so we make sure that the
  # base_model is running in inference mode here. We didn't cover batchnorm 
  # layers in class so just take our word for it :-)
  x = base_model(x, training=False)
  
  # Next we connect the output from our headless model to our little NN
  # we will flatten the output of the headless
  x = keras.layers.Flatten()(x)
  x = keras.layers.Dense(1024, activation='relu')(x)
  x = keras.layers.Dense(1024, activation='relu')(x)
  outputs = keras.layers.Dense(no_classes, activation='softmax')(x)
  

  model = keras.Model(inputs, outputs)
  model.summary()

  # unfreeze the last 10 layers of the model so that we can 
  # optimize the weights of those layers (along with the weights
  # of the layers of the little NN)

  for layer in model.layers[-10:]:
      if not isinstance(layer, keras.layers.BatchNormalization): #the batch normalization layer is untouched 
          layer.trainable = True

  model.compile(loss='categorical_crossentropy', 
                optimizer=keras.optimizers.Adam(0.2*1e-4), #here we choose a different rate for Adam than default for better convergence
                metrics=metrics) 
  
  return model

***Training the Overall Model***

In [None]:
no_classes = 7
NUM_EPOCHS = 30
model = construct_model(no_classes,(48,48,3))

history = model.fit(train_faces, train_emotions, epochs=NUM_EPOCHS, validation_split=0.2)

In [None]:
history_dict = history.history
acc = history_dict["accuracy"]
val_acc = history_dict["val_accuracy"]
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, "bo", label="Training acc")
plt.plot(epochs, val_acc, "b", label="Validation acc")
plt.title("Training and validation accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.show()

Let's calc the accuracy on the test set.

In [None]:
# Insert your code below and execute this cell


The accuracy for the testing set has improved significantly. Note, however, that the state-of-the-art for this dataset is around 73.3%.

Let's calculate the confusion matrix.

In [None]:
predictions_index = model.predict(test_faces).argmax(axis=1)
actuals_index = test_emotions.argmax(axis=1)

actuals = [emotions_names[i] for i in actuals_index]
predictions = [emotions_names[i] for i in predictions_index]




In [None]:
df = pd.DataFrame({'Predictions': predictions, 'Actuals': actuals})
a=pd.crosstab(df.Predictions, df.Actuals)
a

To help with interpretability, we can plot a heatmap of the confusion matrix as well.

In [None]:
import numpy as np; np.random.seed(0)
import seaborn as sns; sns.set_theme()

ax = sns.heatmap(a)

# Problem 5
It is now your turn. 

Take the code for Problem 4 and modify it so that it uses ResNet50 as the base model and not VGG19. Answer the questions in the Homework document.