## FER2023 - Mohamed Uzair Anees
### Still a work in progress. Trying to optimize the recognition to as best as possible.

In [1]:
#git check
import tensorflow as tf
import cv2
import os
import matplotlib.pyplot as plt
import numpy as np

In [2]:
data_directory = "Training2/"    # this is going to be our training dataset

classes = ["0", "1", "2", "3", "4", "5", "6"]   # this is the list of classes -> this has to be the exact name of the training folders

In [3]:
# this is going to read all the images and we are going to resize them 
# into imageNet size and put them in an array called training_data

training_data = []
img_size = 224

def create_training_data():
    for category in classes:
        path = os.path.join(data_directory, category)
        class_num = classes.index(category)
        for img in os.listdir(path):
            try:
                img_array = cv2.imread(os.path.join(path, img))
                new_array = cv2.resize(img_array, (img_size, img_size))
                training_data.append([new_array, class_num])
            except Exception as e:
                pass

In [4]:
create_training_data()

In [5]:
# the reason why we're randomizing our data is because our
# deep learning model should not learn the sequence
import random

random.shuffle(training_data)

In [6]:
X = []
Y = []

for features,label in training_data:
    X.append(features)
    Y.append(label)

X = np.array(X).reshape(-1, img_size, img_size, 3)  # converting the data into 4 dimensions

In [7]:
X.shape # now we changed it into 4 dimensions where we have (images, size, size, 3 color channel rgb)

(21387, 224, 224, 3)

In [8]:
# we normalize the data

X = X/255.0

# Dividing the pixel values by 255 normalizes the data to a range between 0 and 1,
# which is useful for several reasons. 
# For example, it helps the model to converge faster during training, 
# and it also ensures that the input data is on the same scale, 
# which can improve the accuracy of the model. Additionally, 
# it helps to avoid numerical overflow or underflow issues that 
# can occur when working with large or small values.

In [9]:
Y[0]    # randomized label based on the class numbers. Y currently is a list

Y = np.array(Y)
Y.shape

(21387,)

### We will now make the deep learning model for training our data - This is a type of learning called Transfer Learning

In [10]:
import tensorflow as tf
from tensorflow import keras
from keras import layers

In [11]:
model = tf.keras.applications.MobileNetV2() # This is a pre-trained model
#model = tf.keras.applications.ResNet50V2() # This is a pre-trained model

#base_model = tf.keras.applications.ResNet50V2(weights="imagenet", include_top=False, input_shape=(224,224,3)) # This is a pre-trained model
#model = tf.keras.applications.VGG19() # This is a pre-trained model

In [12]:
model.summary() # this allows us to check the model (MobileNetV2)
#base_model.summary()

Model: "mobilenetv2_1.00_224"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 Conv1 (Conv2D)                 (None, 112, 112, 32  864         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 bn_Conv1 (BatchNormalization)  (None, 112, 112, 32  128         ['Conv1[0][0]']                  
                                )                                              

##### From this point onwards, we will be fine tuning the layers of the 
##### pre trained model to make sure it works BETTER with our data set

In [13]:
based_input = model.layers[0].input
based_output = model.layers[-2].output

In [14]:
based_output

<KerasTensor: shape=(None, 1280) dtype=float32 (created by layer 'global_average_pooling2d')>

In [15]:
final_output = layers.Dense(128)(based_output) # adding a new layer, after the output of the global pooling layer
final_output = layers.Activation('relu')(final_output) # activation layer
final_output = layers.Dense(64)(final_output)
final_output = layers.Activation('relu')(final_output)
final_output = layers.Dense(7, activation='softmax')(final_output)  # classification layer
# we have now succefully cut down the classes in our pre-training model from 1000 to 7

In [16]:
final_output

<KerasTensor: shape=(None, 7) dtype=float32 (created by layer 'dense_2')>

In [17]:
new_model = keras.Model(inputs = based_input, outputs = final_output)

In [18]:
new_model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 Conv1 (Conv2D)                 (None, 112, 112, 32  864         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 bn_Conv1 (BatchNormalization)  (None, 112, 112, 32  128         ['Conv1[0][0]']                  
                                )                                                             

In [19]:
new_model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics= ["accuracy"])

## PLEASE EXECUTE TILL THIS CELL. Skip the next two cells! -------------------------------------------------------------------------------------------------

In [20]:
#new_model.fit(X, Y, epochs=20) # training the model. Higher the epoch(cycles of training) higher the accuracy

In [21]:
#new_model.save('RESNETmy_model_epoch.h5') # save the ungodly 3.4 hours of model training we just did into a file

## PLEASE START AFTER THIS CELL FOR PROGRAM. DONT RUN THE ABOVE 2 CELLS!!!!! --------------------------------------------------------------------------

In [22]:
new_model = tf.keras.models.load_model('my_model_epoch40.h5') # load the newly trained model

In [23]:
#frame = cv2.imread("UziSelfie.jpg") # loading sample image to check model

#frame.shape

In [26]:
import cv2 as cv
faceCascade = cv.CascadeClassifier(cv.data.haarcascades + 'haarcascade_frontalface_default.xml')

### Now lets try to implement a live video of emotion capture.

In [27]:
# This code is used to detect faces in an image or a
# video frame using OpenCV's face detection algorithm,
# specifically the detectMultiScale method.

import cv2
import matplotlib.pyplot as plt
import numpy as np

# Open the webcam
cap = cv2.VideoCapture(1)
# check if the webcam is opened correctly
if not cap.isOpened():
    cap = cv2.VideoCapture(0)
if not cap.isOpened():
    raise IOError("Cannot open webcam")

#Need to figure out how to close window with red x button
'''
# Define a flag to indicate if the window is closed
window_closed = False

def on_window_close(event, x, y, flags, param):
    global window_closed
    if event == cv2.EVENT_LBUTTONDOWN:
        # Check if the click is within the close button area
        if 10 <= x <= 30 and 10 <= y <= 30:
            window_closed = True

cv2.namedWindow('Face Emotion Recognition')
cv2.setMouseCallback('Face Emotion Recognition', on_window_close)
'''

while True:
    # Read the video frame by frame
    ret, frame = cap.read()

    # Detect faces in the frame
    faceCascade = cv.CascadeClassifier(cv.data.haarcascades + 'haarcascade_frontalface_default.xml')
    gray_image = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    face = faceCascade.detectMultiScale(gray_image, 1.1, 4)
    for x,y,w,h in face:
        roi_gray =  gray_image[y:y+h, x:x+w]
        roi_color = frame[y:y+h, x:x+w]
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
        faces = faceCascade.detectMultiScale(roi_gray)
        if len(faces) == 0:
            print("Face not detected")
        else:
            for (ex, ey, ew, eh) in faces:
                face_roi = roi_color[ey: ey+eh, ex:ex + ew]     # cropping the face

    # Resize and normalize the face image
    final_image = cv2.resize(face_roi, (224, 224))
    final_image = np.expand_dims(final_image, axis=0)   # need fourth dimension
    final_image = final_image/255.0   # normalizing the image

    # Define font for text display
    font = cv2.FONT_HERSHEY_SIMPLEX

    # Make emotion predictions
    Predictions = new_model.predict(final_image)

    # Display the emotion and bounding box on the frame
    font_scale = 1.5
    font = cv2.FONT_HERSHEY_PLAIN

    # Emotion : Anger
    if (np.argmax(Predictions) == 0):
        
        status = "Angry"

        # Display text box for the emotion label
        x1,y1,w1,h1 = 0,0,175,75

        cv2.rectangle(frame, (x1,x1), (x1 + w1, y1 + h1), (0,0,0), -1)

        cv2.putText(frame, status, (x1 + int(w1/10), y1 + int(h1/2)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,225), 2)

        cv2.putText(frame, status, (100, 150), font, 3,(0,0,225), 2,cv2.LINE_4)

        cv2.rectangle(frame, (x,y), (x+w, y+h), (0,0,225))

    # Emotion : Disgust
    elif (np.argmax(Predictions) == 1):

        status = "Disgust"

        # Display text box for the emotion label
        x1,y1,w1,h1 = 0,0,175,75

        cv2.rectangle(frame, (x1,x1), (x1 + w1, y1 + h1), (0,0,0), -1)

        cv2.putText(frame, status, (x1 + int(w1/10), y1 + int(h1/2)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,225), 2)

        cv2.putText(frame, status, (100, 150), font, 3,(0,0,225), 2,cv2.LINE_4)

        cv2.rectangle(frame, (x,y), (x+w, y+h), (0,0,225))

    # Emotion : Fear
    elif (np.argmax(Predictions) == 2):

        status = "Fear"

        # Display text box for the emotion label
        x1,y1,w1,h1 = 0,0,175,75

        cv2.rectangle(frame, (x1,x1), (x1 + w1, y1 + h1), (0,0,0), -1)

        cv2.putText(frame, status, (x1 + int(w1/10), y1 + int(h1/2)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,225), 2)

        cv2.putText(frame, status, (100, 150), font, 3,(0,0,225), 2,cv2.LINE_4)

        cv2.rectangle(frame, (x,y), (x+w, y+h), (0,0,225))

    # Emotion : Joy
    elif (np.argmax(Predictions) == 3):

        status = "Joy"

        # Display text box for the emotion label
        x1,y1,w1,h1 = 0,0,175,75

        cv2.rectangle(frame, (x1,x1), (x1 + w1, y1 + h1), (0,0,0), -1)

        cv2.putText(frame, status, (x1 + int(w1/10), y1 + int(h1/2)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,225), 2)

        cv2.putText(frame, status, (100, 150), font, 3,(0,0,225), 2,cv2.LINE_4)

        cv2.rectangle(frame, (x,y), (x+w, y+h), (0,0,225))

    # Emotion : Neutral
    elif (np.argmax(Predictions) == 4):

        status = "Neutral"

        # Display text box for the emotion label
        x1,y1,w1,h1 = 0,0,175,75

        cv2.rectangle(frame, (x1,x1), (x1 + w1, y1 + h1), (0,0,0), -1)

        cv2.putText(frame, status, (x1 + int(w1/10), y1 + int(h1/2)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,225), 2)

        cv2.putText(frame, status, (100, 150), font, 3,(0,0,225), 2,cv2.LINE_4)

        cv2.rectangle(frame, (x,y), (x+w, y+h), (0,0,225))
    
    # Emotion : Sad
    elif (np.argmax(Predictions) == 5):

        status = "Sad"

        # Display text box for the emotion label
        x1,y1,w1,h1 = 0,0,175,75

        cv2.rectangle(frame, (x1,x1), (x1 + w1, y1 + h1), (0,0,0), -1)

        cv2.putText(frame, status, (x1 + int(w1/10), y1 + int(h1/2)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,225), 2)

        cv2.putText(frame, status, (100, 150), font, 3,(0,0,225), 2,cv2.LINE_4)

        cv2.rectangle(frame, (x,y), (x+w, y+h), (0,0,225))

    # Emotion : Surprised
    elif (np.argmax(Predictions) == 6):
        status = "Surprised"

        # Display text box for the emotion label
        x1,y1,w1,h1 = 0,0,175,75

        cv2.rectangle(frame, (x1,x1), (x1 + w1, y1 + h1), (0,0,0), -1)

        cv2.putText(frame, status, (x1 + int(w1/10), y1 + int(h1/2)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,225), 2)

        cv2.putText(frame, status, (100, 150), font, 3,(0,0,225), 2,cv2.LINE_4)

        cv2.rectangle(frame, (x,y), (x+w, y+h), (0,0,225))

    # Emotion : Neotral if the others don't match properly
    else:
        status = "Neutral"

        # Display text box for the emotion label
        x1,y1,w1,h1 = 0,0,175,75

        cv2.rectangle(frame, (x1,x1), (x1 + w1, y1 + h1), (0,0,0), -1)

        cv2.putText(frame, status, (x1 + int(w1/10), y1 + int(h1/2)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,225), 2)

        cv2.putText(frame, status, (100, 150), font, 3,(0,0,225), 2,cv2.LINE_4)

        cv2.rectangle(frame, (x,y), (x+w, y+h), (0,0,225))

    # Display the frame with emotion and bounding box
    cv2.imshow('Face Emotion Recognition', frame)
    
    '''
    # Check if the window is closed
    if window_closed:
        break
    '''
    
    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and close windows
cap.release()
cv2.destroyAllWindows()

Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not detected
Face not d