In [None]:
import sys
import os
import pandas as pd  
import numpy as np
from keras.models import Sequential
from keras.layers import Dense,Dropout,Activation,Flatten
from keras.layers import Conv2D, MaxPooling2D,BatchNormalization
from keras.losses import categorical_crossentropy
from keras.optimizers import Adam
from keras.regularizers import l2
from keras.utils import np_utils
import sklearn
from sklearn import preprocessing

# I previosuly explored the dataset in a jupyter notebook called Dataset_overview 

data = pd.read_csv('/Users/nelly/Facial_emotion_recogniton/fer2013.csv')


# Split the data into train and test

X_train, train_y , X_test , test_y = [],[],[],[]

for index,row in data.iterrows():
    pixls = row['pixels'].split(" ") # Pixels are separated by a space, we store them as a list called pixls
    try:
        if 'Training' in row ['Usage']: # Assign values to train sets
            X_train.append(np.array(pixls,'float32')) # pixels converted to float numpy arrays (needed for keras and normalization)
            train_y.append(row['emotion']) # our target
        elif 'PublicTest' in row['Usage']: # Assign values to test sets
            test_y.append(row['emotion']) 
            X_test.append(np.array(pixls,'float32'))  
    except: 
        print(f'Error found: index {index} row :{row}')        



# Convert test and train sets into numpy arrays (needed for keras and normalization)

X_train= np.array( X_train ,'float32')
X_test= np.array( X_test ,'float32')
train_y= np.array( train_y ,'float32')
test_y= np.array( test_y ,'float32')

# Normalization: substract the mean and divide it by the standard deviation (to convert all from 0 to 1 values)

X_train = (X_train - np.mean(X_train, axis=0)) / np.std(X_train, axis=0)
X_test = (X_test - np.mean(X_test, axis=0)) / np.std(X_test, axis=0)  

# Model

num_features=64 # filters
num_labels=7 # the emotions that we want to predict
batch_size=32 # Number of samples processed before the model is updated
epochs=1 # Number of complete passes through the training dataset
width,height = 48,48  # to reshape image size

# Reshape "X" for keras using the width,height = 48,48

X_train = X_train.reshape(X_train.shape[0],width,height,1) # 0 is for the row, 1 means one image will have this widht and height
X_test = X_test.reshape(X_test.shape[0],width,height,1)


# Change "Y" to categorical (in order to use categorical_crossentropy later)
# It converts an array into a matrix : we'll have as many colums as they are classes kinda like dummies in numpy ( the rows stay the same)

train_y=np_utils.to_categorical(train_y,num_classes=num_labels)
test_y=np_utils.to_categorical(test_y,num_classes=num_labels)

# Choose model 

model= Sequential() # a linear stack of layers ( no shared layers or multiple inputs or outputs)

# Add convolutional and pooling layers

# 1st layer 

model.add(Conv2D(64,kernel_size=(3,3),input_shape=(48,48,1),activation='relu')) # shape not for all the row just (width,height,1)
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2))) # Downsamples the input representation
model.add(BatchNormalization())   # Instead of Dropout to standardize the outputs of a hidden layer

# 2nd layer 

model.add(Conv2D(128,(3,3), activation='relu'))  # 128 filters
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2))) 
model.add(BatchNormalization())  

# 3rd layer 

model.add(Conv2D(128,(3,3), activation='relu')) 
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2))) 
model.add(BatchNormalization()) 



# 4rd layer 

model.add(Conv2D(256,(3,3), activation='relu')) # 256 filters
model.add(MaxPooling2D(pool_size=(2,2),strides=(2,2))) 
model.add(BatchNormalization()) 

# Flattening and adding two fully connected layers

model.add(Flatten()) # to make a big vector for dense layer

model.add(Dense(1024,activation='relu'))  # 1024 filters
model.add(BatchNormalization()) 

model.add(Dense(7, activation="softmax")) # Final layer : is softclas because we're doing multiclassification (7 labels/emotions)

# Compile model: Computes the crossentropy loss between the labels and predictions
# Crossentropy is the difference between two probability distributions

model.compile(loss=categorical_crossentropy, optimizer=Adam(), metrics=['accuracy']) 

# Fit the model

model.fit(X_train,train_y,batch_size=batch_size,
    epochs=epochs,
    verbose=2,  # verbose = 1, which includes both progress bar and one line per epoch. verbose = 0, means silent. verbose = 2, one line per epoch i.e. epoch no./total no. of epochs.
    validation_data= (X_test,test_y),
    shuffle=True) # to ensure that each data point creates an "independent" change on the model, without being biased by the same points before them.

 # Save model

emotions_json= model.to_json()
with open ("emotions12.json","w") as json_file:
  json_file.write(emotions_json)
model.save_weights("emotions12.h5") 