In [None]:
import cv2
import numpy as np
import time
import keyboard
from tensorflow.keras.models import load_model
from PIL import Image, ImageDraw, ImageFont, ImageFilter
import mediapipe as mp

In [None]:
# Load the trained models
model1 = load_model('../models/best_age_model.keras')
model2 = load_model('../models/best_emotion_model.keras')
model3 = load_model('../models/best_gender_model.keras')

In [None]:
# Initialize MediaPipe face detection
mp_face_detection = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils

In [None]:
# Gender label dictionary
gender_dict = {0: 'Male \u2642', 1: 'Female \u2640'}

# Emotion label dictionary with emojis
emotion_labels = {
    'Angry': ('Angry 😠', (255, 0, 0)),  # Red
    'Happy': ('Happy 😃', (0, 255, 0)),  # Green
    'Neutral': ('Neutral 😐', (255, 255, 255)),  # White
    'Sad': ('Sad 😢', (0, 0, 255)),  # Blue
    'Surprise': ('Surprised 😲', (255, 255, 0))  # Yellow
}

In [None]:
# Function to preprocess the image for prediction
def preprocess_image(img, bbox):
    x, y, w, h = bbox
    face = img[y:y+h, x:x+w]  # Crop the face
    face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
    face = cv2.resize(face, (128, 128))  # Resize to model input size
    face = face / 255.0  # Normalize
    face = np.expand_dims(face, axis=-1)  # Add channel dimension
    face = np.expand_dims(face, axis=0)  # Add batch dimension
    return face

In [None]:
# Function to make predictions for gender, age, and emotion
def predict_gender_age_emotion(img, bbox):
    face_img = preprocess_image(img, bbox)

    # Predictions from the three models
    pred_age = model1.predict(face_img)
    pred_emotion = model2.predict(face_img)
    pred_gender = model3.predict(face_img)
    
    # Process gender prediction
    pred_gender_prob = pred_gender[0][0].item()  # Extract scalar using item()
    pred_gender_label = gender_dict[round(pred_gender_prob)]  # Round to nearest gender
    
    # Process age prediction
    pred_age_value = pred_age[0][0].item()  # Extract scalar using item()
    range_width = max(2, int(0.1 * pred_age_value))  # 10% of the predicted age, with a minimum range of 2
    pred_age_lower = max(0, round(pred_age_value - range_width))  # Lower bound
    pred_age_upper = round(pred_age_value + range_width)  # Upper bound
    
    # Process emotion prediction
    emotion_label = list(emotion_labels.keys())[np.argmax(pred_emotion)]
    pred_emotion_label, emotion_color = emotion_labels[emotion_label]  # Get the label with emoji and its color
    
    return pred_gender_label, (pred_age_lower, pred_age_upper), pred_emotion_label, emotion_color, bbox

In [None]:
# Path to the Segoe UI Emoji font (on Windows)
font_path = 'C:/Windows/Fonts/seguiemj.ttf'

# Function to draw text with Pillow
def draw_text_with_pillow(frame, text, position, font_path=font_path, font_size=30, text_color=(255, 255, 255), padding=10):
    # Convert OpenCV image to Pillow Image
    pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    draw = ImageDraw.Draw(pil_image)
    
    # Load a font with emoji support
    font = ImageFont.truetype(font_path, font_size)
    
    # Draw the text with Pillow
    draw.text(position, text, font=font, fill=text_color)  # Colored text
    
    # Convert Pillow Image back to OpenCV
    return cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)

In [None]:
# Open a connection to the webcam
cap = cv2.VideoCapture(0)

# Initialize the MediaPipe face detection model
with mp_face_detection.FaceDetection(model_selection=0, min_detection_confidence=0.5) as face_detection:
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        # Convert the image color format from BGR to RGB
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        # Perform face detection using MediaPipe
        results = face_detection.process(rgb_frame)
        
        if results.detections:
            for detection in results.detections:
                # Get bounding box of the detected face
                bboxC = detection.location_data.relative_bounding_box
                ih, iw, _ = frame.shape
                x, y, w, h = int(bboxC.xmin * iw), int(bboxC.ymin * ih), int(bboxC.width * iw), int(bboxC.height * ih)

                # Make predictions using the cropped face
                pred_gender, (pred_age_lower, pred_age_upper), pred_emotion, emotion_color, bbox = predict_gender_age_emotion(frame, (x, y, w, h))
                
                # Draw a rectangle around the face
                cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                
                # Draw the predictions using Pillow
                frame = draw_text_with_pillow(frame, f'Emotion: {pred_emotion}', (x, y - 120), text_color=emotion_color, font_size=30)
                frame = draw_text_with_pillow(frame, f'Gender: {pred_gender}', (x, y - 80), text_color=(255, 165, 0), font_size=28)
                frame = draw_text_with_pillow(frame, f'Age: {pred_age_lower}-{pred_age_upper}', (x, y - 40), text_color=(255, 250, 205), font_size=28)

        # Display the frame
        cv2.imshow('Gender, Age, and Emotion Prediction', frame)

        if keyboard.is_pressed('ctrl+q'):
            print("Ctrl + Q pressed, exiting...")
            break
        
        # Break the loop on 'q' key press
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

In [None]:
# When everything is done, release the capture and close windows
cap.release()
cv2.destroyAllWindows()