In [3]:
import cv2
import matplotlib.pyplot as plt
import mediapipe as mp
import numpy as np
import pandas as pd

In [4]:
mp_face_mesh = mp.solutions.face_mesh.FaceMesh(
    static_image_mode=False,
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5,
)

In [5]:
LEFT_EYE_LANDMARKS = [463, 398, 384, 385, 386, 387, 388, 466, 263, 249, 390, 373, 374,380, 381, 382, 362]
RIGHT_EYE_LANDMARKS = [33, 246, 161, 160, 159, 158, 157, 173, 133, 155, 154, 153, 145, 144, 163, 7]
LEFT_EYEBROW_LANDMARKS = [70, 63, 105, 66, 107]
RIGHT_EYEBROW_LANDMARKS = [336, 296, 334, 293, 300]
MOUTH_LANDMARKS = [0, 267, 269, 270, 409, 306, 375, 321, 405, 314, 17, 84, 181, 91, 146, 61, 185, 40, 39, 37]
HEAD_POSE_LANDMARK = [1,152,263,33,61,291]

In [6]:
def extract_landmarks(img):
    landmarks = {}

    imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    results = mp_face_mesh.process(imgRGB)

    if results.multi_face_landmarks:
        all_landmark = results.multi_face_landmarks[0]
        landmarks["left_eye_landmarks"] = []
        landmarks["right_eye_landmarks"] = []
        landmarks["left_eyebrow_landmarks"] = []
        landmarks["right_eyebrow_landmarks"] = []
        landmarks["mouth_landmarks"] = []
        landmarks["head_pose_landmarks"] = []

        for i, lm in enumerate(all_landmark.landmark):
            h, w, ic = img.shape
            x, y = int(lm.x * w), int(lm.y * h)  # convert normalized coor to pixel

            if i in LEFT_EYE_LANDMARKS:
                landmarks["left_eye_landmarks"].append((x, y))
            if i in RIGHT_EYE_LANDMARKS:
                landmarks["right_eye_landmarks"].append((x, y))
            if i in LEFT_EYEBROW_LANDMARKS:
                landmarks["left_eyebrow_landmarks"].append((x, y))
            if i in RIGHT_EYEBROW_LANDMARKS:
                landmarks["right_eyebrow_landmarks"].append((x, y))
            if i in MOUTH_LANDMARKS:
                landmarks["mouth_landmarks"].append((x, y))
            if i in HEAD_POSE_LANDMARK:
                landmarks["head_pose_landmarks"].append((x, y))

    return landmarks

In [7]:
def draw_landmarks_on_frame(frame, landmarks, metrics=None):
    GREEN = (0, 255, 0)
    BLUE = (255, 0, 0)
    RED = (0, 0, 255)
    YELLOW = (0, 255, 255)

    for point in landmarks.get("left_eye_landmarks", []) + landmarks.get("right_eye_landmarks", []):
        cv2.circle(frame, point, 2, GREEN, -1)

    for point in landmarks.get("mouth_landmarks", []):
        cv2.circle(frame, point, 2, BLUE, -1)

    for point in landmarks.get("left_eyebrow_landmarks", []) + landmarks.get("right_eyebrow_landmarks", []):
        cv2.circle(frame, point, 2, RED, -1)

    for point in landmarks.get("head_pose_landmarks", []):
        cv2.circle(frame, point, 2, YELLOW, -1)

    if metrics:
        ear, mor, eyebrow_dist, pitch, yaw, roll = metrics
        y_offset = 30
        text_lines = [
            f"EAR: {ear:.3f}" if ear is not None else "EAR: N/A",
            f"MOR: {mor:.3f}" if mor is not None else "MOR: N/A",
            f"Eyebrow-Eye Dist: {eyebrow_dist:.2f}" if eyebrow_dist is not None else "Eyebrow-Eye Dist: N/A",
            f"Pitch: {pitch:.2f}, Yaw: {yaw:.2f}, Roll: {roll:.2f}" if pitch is not None else "Head Pose: N/A"
        ]
        for i, line in enumerate(text_lines):
            cv2.putText(frame, line, (10, y_offset + 25 * i), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 2)


In [11]:
def compute_facial_metrics(landmarks, img):
    metrics = []

    def euclidean(p1, p2):
        return np.linalg.norm(np.array(p1) - np.array(p2))

    #  EAR
    def eye_aspect_ratio(eye_landmarks):
        if len(eye_landmarks) < 6:
            return None
        A = euclidean(eye_landmarks[1], eye_landmarks[5])
        B = euclidean(eye_landmarks[2], eye_landmarks[4])
        C = euclidean(eye_landmarks[0], eye_landmarks[3])
        ear = (A + B) / (2.0 * C) if C != 0 else 0
        return ear

    # MOR
    def mouth_opening_ratio(mouth_landmarks):
        if len(mouth_landmarks) < 2:
            return None
        top_lip = mouth_landmarks[13]
        bottom_lip = mouth_landmarks[14]
        left = mouth_landmarks[0]
        right = mouth_landmarks[5]
        vertical = euclidean(top_lip, bottom_lip)
        horizontal = euclidean(left, right)
        ratio = vertical / horizontal if horizontal != 0 else 0
        return ratio

    # eyebrow to eye distance
    def eyebrow_eye_distance(eyebrow_pts, eye_pts):
        if not eyebrow_pts or not eye_pts:
            return None
        distances = [
            abs(eyebrow[1] - eye[1]) for eyebrow in eyebrow_pts for eye in eye_pts
        ]
        return np.mean(distances) if distances else None

    # head pose
    def estimate_head_pose(head_pose_landmarks, img_shape):
        model_points = np.array(
            [
                (0.0, 0.0, 0.0),
                (0.0, -63.6, -12.5),
                (-43.3, 32.7, -26.0),
                (43.3, 32.7, -26.0),
                (-28.9, -28.9, -24.1),
                (28.9, -28.9, -24.1),
            ]
        )

        image_points = np.array(head_pose_landmarks, dtype="double")
        h, w = img_shape[:2]
        focal_length = w
        center = (w / 2, h / 2)
        camera_matrix = np.array(
            [[focal_length, 0, center[0]], [0, focal_length, center[1]], [0, 0, 1]],
            dtype="double",
        )
        dist_coeffs = np.zeros((4, 1))

        success, rotation_vector, _ = cv2.solvePnP(
            model_points, image_points, camera_matrix, dist_coeffs
        )

        if not success:
            pass

        rmat, _ = cv2.Rodrigues(rotation_vector)
        angles, _, _, _, _, _ = cv2.RQDecomp3x3(rmat)
        return angles  # pitch, yaw, roll

    # metrics
    left_eye_ear = eye_aspect_ratio(landmarks["left_eye_landmarks"])
    right_eye_ear = eye_aspect_ratio(landmarks["right_eye_landmarks"])
    if left_eye_ear is not None and right_eye_ear is not None:
        average_ear = (left_eye_ear + right_eye_ear) / 2.0
    elif left_eye_ear is None and right_eye_ear is not None:
        average_ear = right_eye_ear
    elif left_eye_ear is not None and right_eye_ear is None:
        average_ear = left_eye_ear
    else:
        average_ear = None

    mouth_ratio = mouth_opening_ratio(landmarks["mouth_landmarks"])

    left_eyebrow_eye_dist = eyebrow_eye_distance(
        landmarks["left_eyebrow_landmarks"], landmarks["left_eye_landmarks"]
    )
    right_eyebrow_eye_dist = eyebrow_eye_distance(
        landmarks["right_eyebrow_landmarks"], landmarks["right_eye_landmarks"]
    )
    if left_eyebrow_eye_dist is not None and right_eyebrow_eye_dist is not None:
        eyebrow_eye_avg = (left_eyebrow_eye_dist + right_eyebrow_eye_dist) / 2.0
    else:
        eyebrow_eye_avg = None

    angles = estimate_head_pose(landmarks["head_pose_landmarks"], img.shape)

    metrics.append(average_ear)
    metrics.append(mouth_ratio)
    metrics.append(eyebrow_eye_avg)
    metrics.extend(angles)  # flatten angles

    return metrics

In [12]:
def extract_features_and_draw(cap, output_path="output_visualized.avi"):
    features = []

    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            break

        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        landmarks = extract_landmarks(frame_rgb)

        if not landmarks:
            out.write(frame)
            continue

        # Compute metrics (optional if just drawing)
        metrics = compute_facial_metrics(landmarks, frame_rgb)
        if metrics is not None:
            features.append(metrics)
            draw_landmarks_on_frame(frame, landmarks, metrics)
        else:
            draw_landmarks_on_frame(frame, landmarks)

        # Write the frame with landmarks
        out.write(frame)

    cap.release()
    out.release()

    return features


In [13]:
cap = cv2.VideoCapture("my_video.mp4")
features = extract_features_and_draw(cap, output_path="highlighted_output.mp4")
features

[[np.float64(2.6653026810347766),
  np.float64(0.28122253820234633),
  np.float64(31.441544117647055),
  -78.06536468166021,
  -48.851563267355914,
  87.23569589327246],
 [np.float64(2.7365728952861144),
  np.float64(0.242535625036333),
  np.float64(31.398529411764706),
  -77.42834611477159,
  -48.81999901427643,
  86.5378641939046],
 [np.float64(2.713838116433915),
  np.float64(0.23148086130253556),
  np.float64(30.94669117647059),
  -76.57506721473693,
  -48.6901189018234,
  85.07968608501915],
 [np.float64(2.854137318789257),
  np.float64(0.24388430433987696),
  np.float64(31.069485294117648),
  -76.80503251435002,
  -48.91534750985932,
  86.46638734803773],
 [np.float64(3.1170991729508817),
  np.float64(0.23861999450875743),
  np.float64(31.045955882352942),
  -76.87595579114571,
  -48.41816180125906,
  87.26428057231692],
 [np.float64(2.782744216266932),
  np.float64(0.2698570414549578),
  np.float64(31.321323529411764),
  -76.60559219061129,
  -48.41157967150702,
  87.00858983830

In [17]:
array_2d = np.array(features)
print(array_2d)

[[  2.66530268   0.28122254  31.44154412 -78.06536468 -48.85156327
   87.23569589]
 [  2.7365729    0.24253563  31.39852941 -77.42834611 -48.81999901
   86.53786419]
 [  2.71383812   0.23148086  30.94669118 -76.57506721 -48.6901189
   85.07968609]
 [  2.85413732   0.2438843   31.06948529 -76.80503251 -48.91534751
   86.46638735]
 [  3.11709917   0.23861999  31.04595588 -76.87595579 -48.4181618
   87.26428057]
 [  2.78274422   0.26985704  31.32132353 -76.60559219 -48.41157967
   87.00858984]
 [  3.05828932   0.23456041  31.5625     -77.08024104 -48.29791409
   87.2375757 ]
 [  2.87377962   0.22587698  31.83088235 -77.02500216 -48.512763
   87.10354925]
 [  2.9274519    0.2492137   31.34705882 -76.22045443 -48.6750752
   87.1618063 ]
 [  2.91387523   0.22724418  32.08198529 -76.57665967 -48.85503884
   88.01709544]
 [  2.81728818   0.23627724  32.08051471 -76.1889295  -48.83196226
   88.06557086]
 [  2.8899175    0.2438843   32.11066176 -75.94947077 -48.90288223
   87.39387029]
 [  2.778