In [9]:
"""This module contains pipeline for extrating EAR and MAR from camera images
using dlib or mediapipe libraries.

mediapipe has the better performance and accuracy than dlib, please find the attached research paper
for more details: https://ieeexplore.ieee.org/document/10039811
"""

import mediapipe as mp
import cv2
import numpy as np
from typing import Tuple, Union


class CameraBasedFeatures:
    """Class for extracting Eye Aspect Ratio (EAR) and Mouth Aspect Ratio (MAR) from
    camera images using MediaPipe."""

    def __init__(self):
        """Initialize the MediaPipe Face Mesh solution."""

        self.RIGHT_EYE_EAR = [33, 159, 158, 133, 153, 145]
        self.LEFT_EYE_EAR = [362, 380, 374, 263, 386, 385]

        self.min_ear = None
        self.max_ear = None
        self.ear_threshold = 0.2  # Default EAR threshold for blink detection
        self.blink_counter = 0
        self.frame_counter = 0

        self.mp_face_mesh = mp.solutions.face_mesh
        self.ear_values = []
        self.frame_numbers = []
        self.face_mesh = self.mp_face_mesh.FaceMesh(
            static_image_mode=True,
            max_num_faces=1,
            refine_landmarks=True,
            min_detection_confidence=0.5,
            min_tracking_confidence=0.5,
        )
        self.mp_Draw = mp.solutions.drawing_utils
        self.drawSpecs = self.mp_Draw.DrawingSpec(thickness=1, circle_radius=2)

    def extract_landmarks(
        self, image: np.ndarray
    ) -> Union[mp.solutions.face_mesh.FaceMesh, None]:
        """Extract face landmarks from the image using MediaPipe.

        Args:
            image (np.ndarray): Input image in BGR format.

        Returns:
            Union[mp.solutions.face_mesh.FaceMesh, None]: Extracted landmarks or None if no face is detected.
        """
        results = self.face_mesh.process(image)
        if results.multi_face_landmarks:
            return results.multi_face_landmarks[0]
        return None

    def eye_aspect_ratio(self, eye_landmarks, landmarks):
        """
        Calculate the Eye Aspect Ratio (EAR) for a given eye.

        The EAR is calculated using the formula:
        EAR = (||p2-p6|| + ||p3-p5||) / (2||p1-p4||)
        where p1, p2, p3, p4, p5, p6 are 2D landmark points.

        Args:
            eye_landmarks (list): List of indices for the eye landmarks
            landmarks (list): List of all facial landmarks coordinates

        Returns:
            float: The calculated Eye Aspect Ratio
        """
        A = np.linalg.norm(
            np.array(landmarks[eye_landmarks[1]])
            - np.array(landmarks[eye_landmarks[5]])
        )
        B = np.linalg.norm(
            np.array(landmarks[eye_landmarks[2]])
            - np.array(landmarks[eye_landmarks[4]])
        )
        C = np.linalg.norm(
            np.array(landmarks[eye_landmarks[0]])
            - np.array(landmarks[eye_landmarks[3]])
        )
        ear = (A + B) / (2.0 * C)
        return ear

    def calculate_avg_ear(self, face_landmarks):

        ear = 0.0  # Initialize with default value
        # Calculate EAR for both eyes and average them
        if len(face_landmarks) > 0:
            right_ear = self.eye_aspect_ratio(self.RIGHT_EYE_EAR, face_landmarks)
            left_ear = self.eye_aspect_ratio(self.LEFT_EYE_EAR, face_landmarks)
            ear = (right_ear + left_ear) / 2.0
            return ear
        return ear

    def update_blink_count(self, ear):
        """
        Update blink counter based on current eye aspect ratio.

        This method implements the blink detection logic:
        - If EAR is below threshold, increment frame counter
        - If EAR returns above threshold and enough consecutive frames were counted,
          increment blink counter

        Args:
            ear (float): Current eye aspect ratio

        Returns:
            bool: True if a new blink was detected, False otherwise
        """
        blink_detected = False

        if ear < 0.2:
            self.frame_counter += 1
        else:
            self.blink_counter += 1
            blink_detected = True
            self.frame_counter = 0

        return blink_detected

    @staticmethod
    def perclos_metrics(number_of_frames_with_ear: int, perclos_window: int) -> float:
        """Calculate the PERCLOS (Percentage of Eye Closure) metric.

        Args:
            number_of_frames_with_ear (int): Number of frames where EAR is below the threshold.
            perclos_window (int): Number of frames in the window to calculate PERCLOS.

        Returns:
            float: The PERCLOS value as a percentage.
        """

        return (number_of_frames_with_ear / perclos_window) * 100.0

    def update_ear_threeshold(self, ear: float) -> float:
        """Update the EAR threshold based on the current EAR value.

        Args:
            ear (float): Current eye aspect ratio.

        Returns:
            float: Updated EAR threshold.
        """
        if self.min_ear is None or ear < self.min_ear:
            self.min_ear = ear
        if self.max_ear is None or ear > self.max_ear:
            self.max_ear = ear
        self.ear_threeshold = (self.min_ear + self.max_ear) / 2.0

In [20]:
def main():
    """Main function to demonstrate the usage of CameraBasedFeatures class."""
    cap = cv2.VideoCapture(0)
    feature_extractor = CameraBasedFeatures()

    fps = int(cap.get(cv2.CAP_PROP_FPS))
    perclos_window = fps
    frames = 0
    eyes_closed = 0
    blinks = 0
    print("Starting camera feed... Press 'q' to quit.")
    while cap.isOpened():
        print("camera feed")
        ret, frame = cap.read()
        if not ret:
            break

        landmarks = feature_extractor.extract_landmarks(frame)
        if landmarks:
            landmark_array = np.array(
                [[landmark.x, landmark.y] for landmark in landmarks.landmark]
            )

            ear = feature_extractor.calculate_avg_ear(landmark_array)
            feature_extractor.update_ear_threeshold(ear)

            if feature_extractor.update_blink_count(ear):
                blinks += 1
                print(f"Blink detected! Total blinks: {blinks}")

            if ear < 0.2:
                eyes_closed += 1
                print(f"Eyes closed for {eyes_closed} frames.")

            if frames > perclos_window:
                perclos_value = feature_extractor.perclos_metrics(eyes_closed, frames)
                print(
                    "===================================================================="
                    )
                print(f"perclos value: {perclos_value:.2f}")
                eyes_closed = 0
                blinks = 0

            frames += 1

        cv2.imshow("Camera Feed", frame)

        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    cap.release()
    cv2.destroyAllWindows()

In [21]:
main()

Starting camera feed... Press 'q' to quit.
camera feed
Blink detected! Total blinks: 1
camera feed
Blink detected! Total blinks: 2
camera feed
Blink detected! Total blinks: 3
camera feed
Blink detected! Total blinks: 4
camera feed
Blink detected! Total blinks: 5
camera feed
Blink detected! Total blinks: 6
camera feed
Blink detected! Total blinks: 7
camera feed
Blink detected! Total blinks: 8
camera feed
Blink detected! Total blinks: 9
camera feed
Blink detected! Total blinks: 10
camera feed
Blink detected! Total blinks: 11
camera feed
Blink detected! Total blinks: 12
camera feed
Blink detected! Total blinks: 13
camera feed
Blink detected! Total blinks: 14
camera feed
Blink detected! Total blinks: 15
camera feed
Blink detected! Total blinks: 16
camera feed
Blink detected! Total blinks: 17
camera feed
Blink detected! Total blinks: 18
camera feed
Blink detected! Total blinks: 19
camera feed
Blink detected! Total blinks: 20
camera feed
Blink detected! Total blinks: 21
camera feed
Blink det

In [17]:
cap = cv2.VideoCapture(0)
feature_extractor = CameraBasedFeatures()

fps = int(cap.get(cv2.CAP_PROP_FPS))
perclos_window = fps * 30
frames = 0
eyes_closed = 0
blinks = 0
print("Starting camera feed... Press 'q' to quit.")
# while cap.isOpened():
#     print("camera feed")
ret, frame = cap.read()

landmarks = feature_extractor.extract_landmarks(frame)
if landmarks:
    landmark_array = np.array(
        [[landmark.x, landmark.y] for landmark in landmarks.landmark]
    )

    ear = feature_extractor.calculate_avg_ear(landmark_array)
    print(f"Average EAR: {ear}")
    feature_extractor.update_ear_threeshold(ear)

    if feature_extractor.update_blink_count(ear):
        blinks += 1
        print(f"Blink detected! Total blinks: {blinks}")

    if ear < feature_extractor.ear_threshold:
        eyes_closed += 1
        print(f"Eyes closed for {eyes_closed} frames.")

    if frames > perclos_window:
        perclos_value = feature_extractor.perclos_metrics(eyes_closed, frames)
        print(f"PERCLOS: {perclos_value:.2f}%")
        eyes_closed = 0
        blinks = 0

    frames += 1

cv2.imshow("Camera Feed", frame)

if cv2.waitKey(1) & 0xFF == ord("q"):
    break

cap.release()
cv2.destroyAllWindows()

Starting camera feed... Press 'q' to quit.


SyntaxError: 'break' outside loop (2314539872.py, line 43)

In [11]:
landmarks.landmark[10]

x: 0.5542974472045898
y: 0.4416799545288086
z: -0.0013952141162008047

In [7]:
frame.shape

NameError: name 'frame' is not defined

In [16]:
landmark_array = np.array([[landmark.x, landmark.y] for landmark in landmarks.landmark])
print(landmark_array)


[[0.55705756 0.54557145]
 [0.55178964 0.53027225]
 [0.55591381 0.53632545]
 [0.55001038 0.50926763]
 [0.55123466 0.5236997 ]
 [0.55205929 0.51507396]
 [0.55508685 0.49424151]
 [0.53238344 0.49797752]
 [0.55541337 0.48055205]
 [0.55468911 0.47258222]
 [0.55429745 0.44167995]
 [0.55719769 0.54812002]
 [0.55756742 0.55085629]
 [0.55807692 0.55288523]
 [0.55781811 0.55261308]
 [0.55751121 0.55488223]
 [0.55743557 0.55808383]
 [0.55763346 0.56180859]
 [0.56141591 0.57829905]
 [0.55313486 0.53368801]
 [0.55069613 0.53382415]
 [0.52291518 0.47645929]
 [0.54487908 0.50124407]
 [0.54064429 0.5025121 ]
 [0.53675836 0.50288832]
 [0.53232288 0.49979737]
 [0.54801995 0.49929824]
 [0.53459591 0.48459136]
 [0.53979522 0.48444548]
 [0.53068316 0.48661315]
 [0.52902877 0.48973984]
 [0.53086209 0.50386047]
 [0.54817295 0.58933735]
 [0.53121871 0.49608991]
 [0.52426851 0.50263935]
 [0.52853799 0.4996179 ]
 [0.53947306 0.52648807]
 [0.55246192 0.54561883]
 [0.55377907 0.55165994]
 [0.54867935 0.54953045]
