# DSA Deep Learning [3] - Hypertuning Our CNN

In [1]:
# Import statements
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from tensorflow.keras.utils import to_categorical
from sklearn.utils import class_weight
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image, UnidentifiedImageError
import io
import base64
from google.colab import files
from google.colab.patches import cv2_imshow
from IPython.display import display, HTML, Javascript
from google.colab import output, files
import zipfile

##5 Different Approach to Improve Accuracy and Low Functionality
1. Bigger Dataset
2. More Algorithms
3. Augmentation
4. Pre-processing
5. More training

In [2]:
#Load CSV files
train_csv_path = "train.csv"
val_csv_path = "fer2013.csv"

train_df = pd.read_csv(train_csv_path)
val_df = pd.read_csv(val_csv_path)

In [3]:
#Data Info
print(train_df.head())
print(train_df.columns)
print(val_df.head())
print(val_df.columns)

   emotion                                             pixels
0        0  70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...
1        0  151 150 147 155 148 133 111 140 170 174 182 15...
2        2  231 212 156 164 174 138 161 173 182 200 106 38...
3        4  24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...
4        6  4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...
Index(['emotion', 'pixels'], dtype='object')
   emotion                                             pixels     Usage
0        0  70 80 82 72 58 58 60 63 54 58 60 48 89 115 121...  Training
1        0  151 150 147 155 148 133 111 140 170 174 182 15...  Training
2        2  231 212 156 164 174 138 161 173 182 200 106 38...  Training
3        4  24 32 36 30 32 23 19 20 30 41 21 22 32 34 21 1...  Training
4        6  4 0 0 0 0 0 0 0 0 0 0 0 3 15 23 28 48 50 58 84...  Training
Index(['emotion', 'pixels', 'Usage'], dtype='object')


In [4]:
#Function for Data Injestion
def load_data(csv_path):
  df = pd.read_csv(csv_path)
  X = np.array([np.fromstring(pixel_str, dtype=float, sep=' ') for pixel_str in df['pixels']])
  X = X.reshape(-1,48,48,1)
  X = np.repeat(X, 3, axis=-1)
  X = X / 255.0
  y = to_categorical(df['emotion'])
  return X, y

In [5]:
#Data Injestion
X_train, y_train = load_data(train_csv_path)
X_val, y_val = load_data(val_csv_path)

## **#1 Issue on Machine Learning: Overfitting**
Overspecialing on more weighted attributes that creates a model that cannot generalise the data and fits too closely to the training dataset

In [6]:
#Data Generator - Selecting a standardized collection for balance class weight
def create_generator(X, y, batch_size = 64, shuffle=True):
  dataset = tf.data.Dataset.from_tensor_slices((X, y))
  if shuffle:
    dataset = dataset.shuffle(buffer_size=1024)
  dataset = dataset.batch(batch_size)
  dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
  return dataset

In [7]:
batch_size = 64
train_generator = create_generator(X_train, y_train, batch_size=batch_size, shuffle=True)
val_generator = create_generator(X_val, y_val, batch_size=batch_size, shuffle=False)

In [8]:
#Model Generalisation
y_train_labels = np.argmax(y_train, axis=1)

class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train_labels),
    y=y_train_labels
)

class_weight_dict = dict(enumerate(class_weights))

#Remove irrelevant data (i.e. data on hair features that does not contribute to facial emotions)
train_datagen = ImageDataGenerator(
    rotation_range = 30,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True,
    fill_mode = 'nearest'
)

In [18]:
base_model = MobileNetV2(input_shape=(48, 48, 3), include_top=False, weights="imagenet")

for layers in base_model.layers:
  layers.trainable = False

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation="relu", kernel_regularizer=l2(0.01))(x)
#Kernel regularizer, an algorithm/technique
x = Dropout(0.5)(x)
#Dropout, regularization technique, randomly drops out certain percentage of neurons(data)
#in each layer during training process to achieve better generalisation on unseen data
predictions = Dense(7, activation="softmax")(x)
model = Model(inputs=base_model.input, outputs=predictions)

#Fine Tunning for Precision
for layers in base_model.layers[-20:]:
  layers.trainable = True

model.compile(optimizer=Adam(learning_rate=1e-4), loss="categorical_crossentropy", metrics=["accuracy"])


  base_model = MobileNetV2(input_shape=(48, 48, 3), include_top=False, weights="imagenet")


In [19]:
#Reduces learning rate when no improvement in validation loss, validation loss plateau
reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.2, patience=3, min_lr=5)

#Stops training process when validation loss plateau
early_stopping = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)

#Saves model with best validation loss
checkpoint = ModelCheckpoint("emotion_recognition_model_advanced.keras", monitor="val_loss", save_best_only=True)

#Combines the 3 callbacks above
callbacks = [reduce_lr, early_stopping, checkpoint]

In [20]:
history = model.fit(
    train_generator,
    epochs=20,
    validation_data=val_generator,
    class_weight=class_weight_dict,
    callbacks=callbacks
)

Epoch 1/20
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 55ms/step - accuracy: 0.2034 - loss: 4.3894 - val_accuracy: 0.3113 - val_loss: 3.7211 - learning_rate: 1.0000e-04
Epoch 2/20
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 22ms/step - accuracy: 0.3051 - loss: 3.4849 - val_accuracy: 0.3597 - val_loss: 3.1308 - learning_rate: 1.0000e-04
Epoch 3/20
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 19ms/step - accuracy: 0.3522 - loss: 2.9421 - val_accuracy: 0.4283 - val_loss: 2.6304 - learning_rate: 1.0000e-04
Epoch 4/20
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 22ms/step - accuracy: 0.3976 - loss: 2.4856 - val_accuracy: 0.4536 - val_loss: 2.3240 - learning_rate: 1.0000e-04
Epoch 5/20
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 23ms/step - accuracy: 0.4348 - loss: 2.1581 - val_accuracy: 0.4967 - val_loss: 2.0463 - learning_rate: 1.0000e-04
Epoch 6/20
[1m449/449[0m [32m━━━━━━━━━

In [21]:
#Standardized Model
model_path="emotion_recognition_model_advanceed.keras"

In [22]:
# Define a dictionary for emotion labels based on FER2013 class order
emotion_labels = {
    0: "Angry",
    1: "Disgust",
    2: "Fear",
    3: "Happy",
    4: "Sad",
    5: "Surprise",
    6: "Neutral"
}

# Updated predict_emotion function to handle multiple faces
def predict_emotion(frame, model):
    # Convert to grayscale for face detection
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Load the face detection model (Haar Cascade)
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

    # Detect multiple faces in the frame
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(48, 48))

    # Process each detected face
    for (x, y, w, h) in faces:
        # Extract the face region from the frame
        face = frame[y:y+h, x:x+w]

        # Resize face region to 48x48, the input size expected by the model
        face_resized = cv2.resize(face, (48, 48))

        # Preprocess face (normalize and add batch dimension)
        face_array = np.expand_dims(face_resized, axis=0) / 255.0  # Scale pixel values to [0, 1]

        # Predict emotion
        emotion_prediction = model.predict(face_array)
        emotion = np.argmax(emotion_prediction)  # Get the emotion class with the highest probability

        # Draw a circle around the face and add the emotion label
        cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
        emotion_label = emotion_labels[emotion]  # Map the predicted emotion index to label
        cv2.putText(frame, emotion_label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    return frame


In [23]:
# JavaScript code to start the live webcam feed and capture image upon button click
def start_webcam_feed():
    js = """
    <script>
        let videoElement = null;
        let stream = null;

        async function startVideo() {
            if (!videoElement) {
                videoElement = document.createElement('video');
                videoElement.setAttribute('autoplay', '');
                videoElement.setAttribute('playsinline', '');
                document.body.appendChild(videoElement);
                stream = await navigator.mediaDevices.getUserMedia({ video: true })
                  .catch(err => {
                      console.error('Webcam not accessible:', err);
                      alert('Webcam not accessible. You can upload an image instead.');
                  });
                if (stream) {
                    videoElement.srcObject = stream;
                }
            }
        }

        async function capturePhoto() {
            if (!videoElement) {
                alert("Webcam is not active!");
                return;
            }
            const canvas = document.createElement('canvas');
            canvas.width = videoElement.videoWidth;
            canvas.height = videoElement.videoHeight;
            canvas.getContext('2d').drawImage(videoElement, 0, 0);

            // Stop video feed
            stream.getTracks().forEach(track => track.stop());
            videoElement.remove();
            videoElement = null;

            // Convert the photo to base64 and send to Python
            const dataUrl = canvas.toDataURL('image/jpeg');
            google.colab.kernel.invokeFunction('notebook.get_webcam_image', [dataUrl], {});
        }

        // Add the start and capture buttons to the DOM
        const startButton = document.createElement('button');
        startButton.innerHTML = 'Start Webcam Feed';
        startButton.onclick = startVideo;
        document.body.appendChild(startButton);

        const captureButton = document.createElement('button');
        captureButton.innerHTML = 'Capture Photo';
        captureButton.onclick = capturePhoto;
        document.body.appendChild(captureButton);
    </script>
    """
    display(HTML(js))

# Callback function to receive the captured image in Python
def get_webcam_image(dataUrl):
    img_data = base64.b64decode(dataUrl.split(",")[1])
    img = Image.open(io.BytesIO(img_data))
    img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
    processed_img = predict_emotion(img, model)
    cv2_imshow(processed_img)

# Register the callbacks
output.register_callback('notebook.get_webcam_image', get_webcam_image)

# Initialize the webcam feed, buttons, and file upload option
start_webcam_feed()

