# DSA Deep Learning [3] - Hypertuning Our CNN

In [1]:
# Import statements
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from tensorflow.keras.utils import to_categorical
from sklearn.utils import class_weight
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image, UnidentifiedImageError
import io
import base64
from google.colab import files
from google.colab.patches import cv2_imshow
from IPython.display import display, HTML, Javascript
from google.colab import output, files
import zipfile

In [3]:
# Define a dictionary for emotion labels based on FER2013 class order
emotion_labels = {
    0: "Angry",
    1: "Disgust",
    2: "Fear",
    3: "Happy",
    4: "Sad",
    5: "Surprise",
    6: "Neutral"
}

# Updated predict_emotion function to handle multiple faces
def predict_emotion(frame, model):
    # Convert to grayscale for face detection
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Load the face detection model (Haar Cascade)
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

    # Detect multiple faces in the frame
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(48, 48))

    # Process each detected face
    for (x, y, w, h) in faces:
        # Extract the face region from the frame
        face = frame[y:y+h, x:x+w]

        # Resize face region to 48x48, the input size expected by the model
        face_resized = cv2.resize(face, (48, 48))

        # Preprocess face (normalize and add batch dimension)
        face_array = np.expand_dims(face_resized, axis=0) / 255.0  # Scale pixel values to [0, 1]

        # Predict emotion
        emotion_prediction = model.predict(face_array)
        emotion = np.argmax(emotion_prediction)  # Get the emotion class with the highest probability

        # Draw a circle around the face and add the emotion label
        cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
        emotion_label = emotion_labels[emotion]  # Map the predicted emotion index to label
        cv2.putText(frame, emotion_label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    return frame


In [None]:
# JavaScript code to start the live webcam feed and capture image upon button click
def start_webcam_feed():
    js = """
    <script>
        let videoElement = null;
        let stream = null;

        async function startVideo() {
            if (!videoElement) {
                videoElement = document.createElement('video');
                videoElement.setAttribute('autoplay', '');
                videoElement.setAttribute('playsinline', '');
                document.body.appendChild(videoElement);
                stream = await navigator.mediaDevices.getUserMedia({ video: true })
                  .catch(err => {
                      console.error('Webcam not accessible:', err);
                      alert('Webcam not accessible. You can upload an image instead.');
                  });
                if (stream) {
                    videoElement.srcObject = stream;
                }
            }
        }

        async function capturePhoto() {
            if (!videoElement) {
                alert("Webcam is not active!");
                return;
            }
            const canvas = document.createElement('canvas');
            canvas.width = videoElement.videoWidth;
            canvas.height = videoElement.videoHeight;
            canvas.getContext('2d').drawImage(videoElement, 0, 0);

            // Stop video feed
            stream.getTracks().forEach(track => track.stop());
            videoElement.remove();
            videoElement = null;

            // Convert the photo to base64 and send to Python
            const dataUrl = canvas.toDataURL('image/jpeg');
            google.colab.kernel.invokeFunction('notebook.get_webcam_image', [dataUrl], {});
        }

        // Add the start and capture buttons to the DOM
        const startButton = document.createElement('button');
        startButton.innerHTML = 'Start Webcam Feed';
        startButton.onclick = startVideo;
        document.body.appendChild(startButton);

        const captureButton = document.createElement('button');
        captureButton.innerHTML = 'Capture Photo';
        captureButton.onclick = capturePhoto;
        document.body.appendChild(captureButton);
    </script>
    """
    display(HTML(js))

# Callback function to receive the captured image in Python
def get_webcam_image(dataUrl):
    img_data = base64.b64decode(dataUrl.split(",")[1])
    img = Image.open(io.BytesIO(img_data))
    img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
    processed_img = predict_emotion(img, model)
    cv2_imshow(processed_img)

# Register the callbacks
output.register_callback('notebook.get_webcam_image', get_webcam_image)

# Initialize the webcam feed, buttons, and file upload option
start_webcam_feed()

