# DSA Deep Learning [2] - Advanced CNNs

In [4]:
# Import Libraries
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt
from google.colab.patches import cv2_imshow
from IPython.display import display, HTML
from PIL import Image
from IPython.display import display, HTML, Javascript
import cv2
import numpy as np
from PIL import Image
import base64
import io
from google.colab.patches import cv2_imshow
from google.colab import output
import os
from tensorflow.keras.models import load_model

In [None]:
# Load the FER2013 CSV file from Google Colab's local storage
csv_path = 'fer2013.csv'
df = pd.read_csv(csv_path)

# Display the first few rows of the dataset to understand its structure
print(df.head())

In [None]:
# Block 2: Convert Pixel Data to Images
# The FER2013 dataset typically has 'emotion' and 'pixels' columns
df['pixels'] = df['pixels'].apply(lambda x: np.array(x.split(), dtype='float32'))
X = np.stack(df['pixels'].values)  # Convert to a 2D array
X = X.reshape(-1, 48, 48, 1)  # Reshape to (n_samples, 48, 48, 1) for grayscale images
X = np.repeat(X, 3, axis=-1)  # Convert grayscale to RGB by duplicating channels

# Normalize the pixel values
X = X / 255.0

# Convert labels to categorical format
y = to_categorical(df['emotion'])

# Split the data into training and testing sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Block 3: Building the Model
# Using MobileNetV2 for transfer learning
base_model = MobileNetV2(input_shape=(48, 48, 3), include_top=False, weights='imagenet')
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
predictions = Dense(7, activation='softmax')(x)  # FER2013 has 7 emotion classes

model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the base layers
for layer in base_model.layers:
    layer.trainable = False

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()


In [None]:
# Block 4: Training the Model
# Train the model with a small number of epochs
history = model.fit(
    X_train, y_train,
    epochs=15,  # Limited to 15 epochs
    validation_data=(X_val, y_val)
)


In [None]:
# Save the model after training
# Assuming `model` is the trained model from the training process
model.save('emotion_recognition_model.h5')
print("Model saved as 'emotion_recognition_model.h5'")


In [None]:
# Check if the model file exists and load it, otherwise train a new model
model_path = 'emotion_recognition_model.h5'

if os.path.exists(model_path):
    print("Loading saved model...")
    model = load_model(model_path)
    print("Model loaded successfully.")
else:
    print("No saved model found. Please train the model.")
    # Code for building and training the model goes here (from previous training blocks)

In [6]:
# Define a dictionary for emotion labels based on FER2013 class order
emotion_labels = {
    0: "Angry",
    1: "Disgust",
    2: "Fear",
    3: "Happy",
    4: "Sad",
    5: "Surprise",
    6: "Neutral"
}

# Updated predict_emotion function to handle multiple faces
def predict_emotion(frame, model):
    # Convert to grayscale for face detection
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Load the face detection model (Haar Cascade)
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

    # Detect multiple faces in the frame
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(48, 48))

    # Process each detected face
    for (x, y, w, h) in faces:
        # Extract the face region from the frame
        face = frame[y:y+h, x:x+w]

        # Resize face region to 48x48, the input size expected by the model
        face_resized = cv2.resize(face, (48, 48))

        # Preprocess face (normalize and add batch dimension)
        face_array = np.expand_dims(face_resized, axis=0) / 255.0  # Scale pixel values to [0, 1]

        # Predict emotion
        emotion_prediction = model.predict(face_array)
        emotion = np.argmax(emotion_prediction)  # Get the emotion class with the highest probability

        # Draw a circle around the face and add the emotion label
        cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
        emotion_label = emotion_labels[emotion]  # Map the predicted emotion index to label
        cv2.putText(frame, emotion_label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    return frame


In [None]:
# JavaScript code to start the live webcam feed and capture image upon button click
def start_webcam_feed():
    js = """
    <script>
        let videoElement = null;
        let stream = null;

        async function startVideo() {
            if (!videoElement) {
                videoElement = document.createElement('video');
                videoElement.setAttribute('autoplay', '');
                videoElement.setAttribute('playsinline', '');
                document.body.appendChild(videoElement);
                stream = await navigator.mediaDevices.getUserMedia({ video: true });
                videoElement.srcObject = stream;
            }
        }

        async function capturePhoto() {
            const canvas = document.createElement('canvas');
            canvas.width = videoElement.videoWidth;
            canvas.height = videoElement.videoHeight;
            canvas.getContext('2d').drawImage(videoElement, 0, 0);

            // Stop video feed
            stream.getTracks().forEach(track => track.stop());
            videoElement.remove();
            videoElement = null;

            // Convert the photo to base64 and send to Python
            const dataUrl = canvas.toDataURL('image/jpeg');
            google.colab.kernel.invokeFunction('notebook.get_webcam_image', [dataUrl], {});
        }

        // Add the start and capture buttons to the DOM
        const startButton = document.createElement('button');
        startButton.innerHTML = 'Start Webcam Feed';
        startButton.onclick = startVideo;
        document.body.appendChild(startButton);

        const captureButton = document.createElement('button');
        captureButton.innerHTML = 'Capture Photo';
        captureButton.onclick = capturePhoto;
        document.body.appendChild(captureButton);
    </script>
    """
    display(HTML(js))

# Callback function to receive the captured image in Python
def get_webcam_image(dataUrl):
    # Decode the base64 image data
    img_data = base64.b64decode(dataUrl.split(",")[1])
    img = Image.open(io.BytesIO(img_data))

    # Convert the image to OpenCV format
    img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)

    # Perform emotion detection and display the processed image
    processed_img = predict_emotion(img, model)
    cv2_imshow(processed_img)  # Display the processed image with predictions

# Register the callback function in Google Colab
output.register_callback('notebook.get_webcam_image', get_webcam_image)

# Initialize the webcam feed and buttons
start_webcam_feed()
