**[Google Colab Link](https://colab.research.google.com/drive/1UgfCVsx0Be2GQ4oQZiGihGBwteJRHFHE?usp=sharing)**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install keras-vggface
!pip install keras-applications

Collecting keras-vggface
  Downloading https://files.pythonhosted.org/packages/2f/7d/5f0319ebdc09ac1a2272364fa9583f5067b6f8aff93fbbf8835d81cbaad7/keras_vggface-0.6-py3-none-any.whl
Installing collected packages: keras-vggface
Successfully installed keras-vggface-0.6
Collecting keras-applications
[?25l  Downloading https://files.pythonhosted.org/packages/71/e3/19762fdfc62877ae9102edf6342d71b28fbfd9dea3d2f96a882ce099b03f/Keras_Applications-1.0.8-py3-none-any.whl (50kB)
[K     |████████████████████████████████| 51kB 3.0MB/s 
Installing collected packages: keras-applications
Successfully installed keras-applications-1.0.8


In [3]:
import pandas as pd
import numpy as np
import os
from PIL import Image
import matplotlib.pyplot as plt

import tensorflow as tf

from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from keras_vggface.vggface import VGGFace
from keras_vggface import utils
from tensorflow.keras.models import Model

In [4]:
import sys
sys.path.insert(0, '/content/drive/MyDrive/Colab Notebooks/')
from datagen import csv2img, get_datagenerator

In [5]:
# !rm -r '/content/csvs'
!unzip -q /content/drive/MyDrive/dataset/csvs.zip

[datagen.py file](https://drive.google.com/file/d/1oe6ou94stmeOYq-3iCrgahmzeLBBpVln/view?usp=sharing)
 and this notebook are in the same directory

---



In [10]:
# !rm -r '/content/data'

df = csv2img('/content/csvs')

--> icml_face_data_1.csv
--> icml_face_data_2.csv
--> icml_face_data_3.csv
--> icml_face_data_4.csv
--> icml_face_data_5.csv
--> icml_face_data_6.csv


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))


Training Data size: 28709
Validation Data size: 3589
Test Data size: 3589


In [11]:
df.head()

Unnamed: 0,emotion,usage,pixels
0,0,Training,70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...
1,0,Training,151 150 147 155 148 133 111 140 170 174 182 15...
2,2,Training,231 212 156 164 174 138 161 173 182 200 106 38...
3,4,Training,24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...
4,6,Training,4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...


In [12]:
df.iloc[0:,1]

0           Training
1           Training
2           Training
3           Training
4           Training
            ...     
35882    PrivateTest
35883    PrivateTest
35884    PrivateTest
35885    PrivateTest
35886    PrivateTest
Name: usage, Length: 35887, dtype: object

In [13]:
EPOCHS = 100
BS = 64
FROZEN_LAYER_NUM = 19
DROPOUT_RATE = 0.5
ADAM_LEARNING_RATE = 0.0001
SGD_LEARNING_RATE = 0.0001
SGD_DECAY = 0.0001

resize = 128

In [14]:
pf = utils.preprocess_input

train_generator  = get_datagenerator('/content/data/train',preprocessing_func=pf, img_size=(resize, resize), batch_size=BS, img_color='rgb', aug=True)
val_generator    = get_datagenerator('/content/data/test-public',preprocessing_func=pf, img_size=(resize, resize), batch_size=BS, img_color='rgb')
test_generator  = get_datagenerator('/content/data/test-private',preprocessing_func=pf, img_size=(resize, resize), batch_size=BS, img_color='rgb')
# img_size=(resize, resize), img_color='rgb'

Found 28709 images belonging to 7 classes.
Found 3589 images belonging to 7 classes.
Found 3589 images belonging to 7 classes.


In [15]:
from sklearn.utils import class_weight
data_x  = df[df['usage']=='PublicTest']['pixels']
data_y = df[df['usage']=='PublicTest']['emotion'].values
data_x = data_x.apply(lambda q: [int(pixel) for pixel in q.split()])
X, Y = data_x.tolist(), data_y
class_weights = class_weight.compute_class_weight('balanced', np.unique(Y), Y)
class_weights = {i:j for i,j in enumerate(class_weights)}
print(class_weights)

{0: 1.0978892627714898, 1: 9.15561224489796, 2: 1.0336981566820276, 3: 0.5728651237031125, 4: 0.7851673594399475, 5: 1.2354561101549053, 6: 0.844669333960932}


In [16]:
vgg_notop = VGGFace(include_top=False, input_shape=(resize, resize, 3), pooling='avg')
print(vgg_notop.summary())
for layer in vgg_notop.layers:
  layer.trainable = False

last_layer = vgg_notop.get_layer('pool5').output
# x = Conv2D(filters=64, kernel_size=1, activation='relu')(last_layer)
# x = BatchNormalization()(x)
# x = MaxPooling2D(pool_size=(2, 2), strides=1)(x)
# x = Conv2D(filters=64, kernel_size=2, activation='relu')(x)
# x = BatchNormalization()(x)
# x = Flatten(name='flatten')(x)
# x = Dense(1024, activation='relu', name='fc6')(x)
# x = Dense(2048, activation='relu', name='fc7')(x)

x = Flatten(name='flatten')(last_layer)
x = Dropout(DROPOUT_RATE)(x)
x = Dense(4096, activation='relu', name='fc6')(x)
x = Dropout(DROPOUT_RATE)(x)
x = Dense(1024, activation='relu', name='fc7')(x)

out = Dense(7, activation='softmax', name='classifier')(x)

custom_vgg_model = Model(vgg_notop.input, out)

adam = keras.optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)

custom_vgg_model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])

# checkpoint
cp_filepath='/content/drive/MyDrive/fer/cps/vgg16-weights-best.hdf5'
plateau= keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, patience=10, verbose=0, mode='max', epsilon=0.0001, cooldown=0, min_lr=0)
checkpoint = ModelCheckpoint(cp_filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = [plateau, checkpoint]

Downloading data from https://github.com/rcmalli/keras-vggface/releases/download/v2.0/rcmalli_vggface_tf_notop_vgg16.h5
Model: "vggface_vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 128, 128, 3)]     0         
_________________________________________________________________
conv1_1 (Conv2D)             (None, 128, 128, 64)      1792      
_________________________________________________________________
conv1_2 (Conv2D)             (None, 128, 128, 64)      36928     
_________________________________________________________________
pool1 (MaxPooling2D)         (None, 64, 64, 64)        0         
_________________________________________________________________
conv2_1 (Conv2D)             (None, 64, 64, 128)       73856     
_________________________________________________________________
conv2_2 (Conv2D)             (None, 64, 64, 128)       147584    

In [17]:
custom_vgg_model.load_weights(cp_filepath)

In [29]:
history = custom_vgg_model.fit(
  train_generator,
  validation_data = val_generator, 
  # class_weight=class_weights,
  steps_per_epoch=28709//BS,
  validation_steps=3589//BS,
  shuffle=True,
  epochs=EPOCHS,
  callbacks=callbacks_list,
  # use_multiprocessing=True,
  # initial_epoch=99,
) 

Epoch 100/100
  2/448 [..............................] - ETA: 1:06:51 - loss: 0.4868 - accuracy: 0.8281

KeyboardInterrupt: ignored

In [20]:
print('\n# Evaluate on validation data')
results_val = custom_vgg_model.evaluate(val_generator)
print('val loss, val_accuracy:', results_val)


# Evaluate on validation data
val loss, val_accuracy: [1.105054259300232, 0.6781833171844482]


In [21]:
print('\n# Evaluate on test data')
results_test = custom_vgg_model.evaluate(test_generator)
print('test loss, test accuracy:', results_test)


# Evaluate on test data
test loss, test accuracy: [1.052651047706604, 0.6901643872261047]


In [28]:
# list all data in history
print(history.history.keys())

# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

dict_keys([])


KeyError: ignored

In [30]:
epoch_str = '-EPOCHS_' + str(EPOCHS)
test_acc = '-test_acc_%.3f' % results_test[1]
custom_vgg_model.save('/content/drive/MyDrive/fer/saved_model/' + 'vgg16' + epoch_str + test_acc + '.h5')

In [33]:
from sklearn.metrics import confusion_matrix
from seaborn import heatmap

emotions = {0:'Angry', 1:'Disgust', 2:'Fear', 3:'Happy', 4:'Sad', 5:'Surprise', 6:'Neutral'}

y_pred = custom_vgg_model.predict(val_generator).argmax(axis=1)
y_true = val_generator.classes

cmat_df_test=pd.DataFrame(
  confusion_matrix(y_true, y_pred, normalize='true').round(2),
  index=emotions.values(), 
  columns=emotions.values()
  )

plt.figure(figsize=(5,5))
heatmap(cmat_df_test,annot=True,cmap=plt.cm.Blues)
plt.tight_layout()
plt.title('Confusion Matrix on Private Test Set')
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()

KeyboardInterrupt: ignored

In [34]:
print(f'tensorflow {tf.__version__}')
print(f'keras {keras.__version__}')
!python --version

tensorflow 2.4.1
keras 2.4.0
Python 3.7.10
