In [1]:
# imports need to be finalized
import cv2
import numpy as np
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import DepthwiseConv2D

def custom_depthwise_conv2d(**kwargs):
    if 'groups' in kwargs:
        kwargs.pop('groups')  # Remove the problematic argument
    return DepthwiseConv2D(**kwargs)

model_best = load_model('mobilenet_face_ft.h5', custom_objects={'DepthwiseConv2D': custom_depthwise_conv2d})

# Map Classes for the three emotional states
class_names = ['Confused', 'Confused', 'Confused', 'Happy', 'Neutral', 'Sad', 'Confused']

# Trained emoemotions:
# ['Anger', 'Disgust', 'Fear', 'Happiness', 'Neutral', 'Sadness', 'Surprise']

# Load the face cascade for face detection
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# Connect to the cam
cap = cv2.VideoCapture(0) # cam num (0), not sure abt mobile ?

while True:
    # Capture frame by frame
    ret, frame = cap.read()

    # Convert the frame to grayscale for face detection (required for haarcascade) 
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Detect faces in the frame
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(90, 90))

    # Process each detected face
    for (x, y, w, h) in faces:
        # Extract the face region
        face_roi = frame[y:y + h, x:x + w]

        # Resize the face image to the required input size for the model
        face_image = cv2.resize(face_roi, (224, 224))  # Change to 224*224
        face_image = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)  # Convert to RGB (if BGR)
        face_image = image.img_to_array(face_image)  # Convert to array
        face_image = np.expand_dims(face_image, axis=0)  # Add batch dimension


        # Predict emotion using the loaded model
        predictions = model_best.predict(face_image)

        # Set a threshold for the prediction confidence
        threshold = 0.4
        
        # Get the maximum prediction probability and its corresponding label index
        max_prob = np.max(predictions)
        max_index = np.argmax(predictions)
        
        # If the maximum probability exceeds the threshold, assign the corresponding label
        if max_prob >= threshold:
            emotion_label = class_names[max_index]
        else:
            emotion_label = "Neutral"  # Default value       

        # Display the emotion label on the frame
        cv2.putText(frame, f'Emotion: {emotion_label}', (x, y - 10), cv2.FONT_HERSHEY_COMPLEX,
                    0.9, (255, 0, 255), 2)

        # Draw a rectangle around the detected face
        cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 255), 2)

    # Display the resulting frame
    cv2.imshow('Emotion Detection', frame)

    # Break the loop if the 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and close the window
cap.release()
cv2.destroyAllWindows()

