In [3]:
# Model with 61% Accuracy !

import pandas as pd  
import numpy as np

from keras.models import Sequential
from keras.utils import np_utils

from keras.layers import Dropout,Activation,Flatten,Dense
from keras.layers import Conv2D, MaxPooling2D,BatchNormalization

from keras.losses import categorical_crossentropy
from keras.optimizers import Adam


# I previosuly explored the dataset file : Dataset_overview 

data = pd.read_csv('fer2013.csv')


# Split the data into train and test

X_train, train_y , X_test , test_y = [],[],[],[]

for index,row in data.iterrows():
    pixls = row['pixels'].split(" ") # Pixels are separated by a space, we store them as a list called pixls
    try:
        if 'Training' in row ['Usage']: # Assign values to train sets
            X_train.append(np.array(pixls,'float32')) # pixels converted to float numpy arrays (needed for keras and normalization)
            train_y.append(row['emotion']) # our target
        elif 'PublicTest' in row['Usage']: # Assign values to test sets
            test_y.append(row['emotion']) 
            X_test.append(np.array(pixls,'float32'))  
    except: 
        print('Error found: index {index} row :{row}')        


# Convert test and train sets into numpy arrays (needed for keras and normalization)

X_train= np.array( X_train ,'float32')
X_test= np.array( X_test ,'float32')
train_y= np.array( train_y ,'float32')
test_y= np.array( test_y ,'float32')

# Normalization: substract the mean and divide it by the standard deviation (to convert all from 0 to 1 values)

X_train = (X_train - np.mean(X_train, axis=0)) / np.std(X_train, axis=0)
X_test = (X_test - np.mean(X_test, axis=0)) / np.std(X_test, axis=0)  

# Parameters to use later

num_features=64 
num_labels=7 # the emotions that we want to predict
batch_size=32 # Number of samples processed before the model is updated
epochs=30 # Number of complete passes through the training dataset
width,height = 48,48  # to reshape image size

# Reshape "X" for keras using the width,height = 48,48

X_train = X_train.reshape(X_train.shape[0],width,height,1) # 0 is for the row, 1 means one image will have this widht and height
X_test = X_test.reshape(X_test.shape[0],width,height,1)


# Change "Y" to categorical (in order to use categorical_crossentropy later)
# It converts an array into a matrix : we'll have as many colums as they are classes kinda like dummies in numpy ( the rows stay the same)

train_y=np_utils.to_categorical(train_y,num_classes=num_labels)
test_y=np_utils.to_categorical(test_y,num_classes=num_labels)

# Choose model 

model= Sequential() # a linear stack of layers ( no shared layers or multiple inputs or outputs)

# Add convolutional and pooling layers
# In a convolutional layer, neurons receive input from only a restricted subarea of the previous layer.


model.add(Conv2D(64,kernel_size=(3,3),input_shape=(48,48,1),activation='relu')) # shape not for all the row just (width,height,1)
model.add(Conv2D(64,kernel_size=(3,3),activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2))) # Downsamples the input representation
model.add(Dropout(0.5))   # Instead of Dropout to standardize the outputs of a hidden layer


model.add(Conv2D(128,(3,3), activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(128,(3,3), activation='relu'))
model.add(BatchNormalization())# 128 filters
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2))) 
model.add(Dropout(0.5))  


model.add(Conv2D(128,(3,3), activation='relu'))
model.add(BatchNormalization()) 
model.add(Conv2D(128,(3,3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2))) 
model.add(Dropout(0.5))  

# In the next layer I had to add padding to avoid the error message
# Same padding applies padding to the input image to get it fully covered by the filter and specified stride.
# It is called same because, for stride 1 , the output will be the same as the input.

model.add(Conv2D(256,(3,3), activation='relu',padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(256,(3,3), activation='relu',padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2))) 
model.add(Dropout(0.5)) 

# Flattening and adding two fully connected layers (to make a big vector for dense layers)

model.add(Flatten()) 

# Add two fully connected layers 
# In a dense layer (fully connected layer), each neuron receives input from every element of the previous layer. 

model.add(Dense(1024,activation='relu'))  # 1024 filters
model.add(Dropout(0.4)) 
model.add(Dense(1024,activation='relu'))  # 1024 filters
model.add(Dropout(0.5))

# Final layer : is softclas because we're doing multiclassification (7 labels / emotions)

model.add(Dense(7, activation="softmax")) 

# Compile model: Computes the crossentropy loss between the labels and predictions
# Crossentropy is the difference between two probability distributions

model.compile(loss=categorical_crossentropy, optimizer=Adam(), metrics=['accuracy']) 

# Fit the model

model.fit(X_train,train_y,batch_size=batch_size,
    epochs=epochs,
    verbose=2,  # verbose = 1, which includes both progress bar and one line per epoch. verbose = 0, means silent. verbose = 2, one line per epoch i.e. epoch no./total no. of epochs.
    validation_data= (X_test,test_y), # to measure accuracy with the test dataset
    shuffle=True) # to ensure that each data point creates an "independent" change on the model, without being biased by the same points before them.

# Save model version for the video tester

emotions_json= model.to_json()
with open ("model_17.json","w") as json_file:
  json_file.write(emotions_json)
model.save_weights("model_17.h5") 

Epoch 1/30
898/898 - 236s - loss: 1.8818 - accuracy: 0.2325 - val_loss: 1.7566 - val_accuracy: 0.2901
Epoch 2/30
898/898 - 232s - loss: 1.6790 - accuracy: 0.3305 - val_loss: 1.4838 - val_accuracy: 0.4166
Epoch 3/30
898/898 - 234s - loss: 1.5209 - accuracy: 0.4152 - val_loss: 1.3992 - val_accuracy: 0.4609
Epoch 4/30
898/898 - 258s - loss: 1.4317 - accuracy: 0.4561 - val_loss: 1.3186 - val_accuracy: 0.5001
Epoch 5/30
898/898 - 251s - loss: 1.3739 - accuracy: 0.4799 - val_loss: 1.2789 - val_accuracy: 0.5266
Epoch 6/30
898/898 - 250s - loss: 1.3366 - accuracy: 0.4974 - val_loss: 1.2560 - val_accuracy: 0.5233
Epoch 7/30
898/898 - 250s - loss: 1.3141 - accuracy: 0.5068 - val_loss: 1.2360 - val_accuracy: 0.5450
Epoch 8/30
898/898 - 250s - loss: 1.2851 - accuracy: 0.5184 - val_loss: 1.2419 - val_accuracy: 0.5277
Epoch 9/30
898/898 - 244s - loss: 1.2531 - accuracy: 0.5316 - val_loss: 1.1777 - val_accuracy: 0.5634
Epoch 10/30
898/898 - 231s - loss: 1.2317 - accuracy: 0.5414 - val_loss: 1.1841 - 