# A real-time Facial Keypoint Detection Project

In [1]:
import cv2
import numpy as np
import tensorflow as tf
import time

In [2]:
# Load the pre-trained Haar Cascade face detection classifier
face_cascade = cv2.CascadeClassifier("./detector_architectures/haarcascade_frontalface_default.xml")

# Load the trained model to detect Facial keypoints
model = tf.keras.models.load_model("./KeypointsDetector.keras")

In [3]:
# Initialize the webcam video capture
cap = cv2.VideoCapture(0)   # 0 indicates the default camera

# Loop to capture video frames until the user presses 'q'
while cap.isOpened():
    # Read a frame from the camera
    _, img = cap.read()

    # Convert the captured frame to grayscale for face detection
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Use the face_cascade to detect faces in the grayscale image
    faces = face_cascade.detectMultiScale(gray, 1.1, 4)

    # Check if faces are detected
    if len(faces) != 0:
        
        for (x, y, w, h) in faces:
            # Extract the Region of Interest (ROI) around the face with added margin for better landmarks detection
            roi_gray = gray[y-100: y + h+100, x-100: x + w+100]

            # Get the dimensions of the ROI for scaling back landmarks
            roi_h, roi_w = roi_gray.shape[:2]

            # Resize the ROI to the input size expected by the trained model (224x224)
            roi_resized = cv2.resize(roi_gray, (224, 224))

            # Add a batch dimension to the image for model input
            roi_resized = np.expand_dims(roi_resized, 0)

            # Normalize the image (pixel values between 0 and 1)
            roi_scaled = roi_resized / 255.0

            # Predict facial landmarks using the trained model
            predictions = model.predict(roi_scaled)

            # Undo the normalization of predicted keypoints, std = 30 and mean = 118, the values from the original projec but adjusted for better
            un_norm_predictions = predictions * 30 + 118

            # Reshape the landmark to (N, 2)
            landmarks  = un_norm_predictions.reshape(-1, 2)

            
            w_sf = roi_w / 224  # Scaling factor for the width: Converts from the model's input size (224px) to the width of the ROI (region of interest) in the original image
            h_sf = roi_h / 224  # Scaling factor for the height: Converts from the model's input size (224px) to the height of the ROI in the original image
            x_roi_gray = x - 100  # x-coordinate of the ROI in the original image, adjusting for the margin (100px) subtracted around the detected face in the x-direction
            y_roi_gray = y - 100  # y-coordinate of the ROI in the original image, adjusting for the margin (100px) subtracted around the detected face in the y-direction

            # Scale the landmarks back to the original image size and draw them on the image
            for (pts_x, pts_y) in landmarks:
                x_center = int(pts_x * w_sf + x_roi_gray) 
                y_center = int(pts_y * h_sf + y_roi_gray)   

                # Draw a circle for each predicted landmark point on the original image
                cv2.circle(img, (x_center, y_center), 1, (0, 255, 0), 3)  # Green circle for landmarks
                
                    
        # Display the image with landmarks drawn    
        cv2.imshow("img", img)

    # Exit the loop if 'q' is pressed
    if cv2.waitKey(1) == ord('q'):
        cv2.destroyAllWindows()
        break

# Release the video capture object and close all OpenCV windows
cap.release()    


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 638ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 257ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 117ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 116ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 113ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 108ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0