In [None]:
%pip install mediapipe opencv-python numpy

In [1]:
!wget -q -O detector.tflite -q https://storage.googleapis.com/mediapipe-models/face_detector/blaze_face_short_range/float16/1/blaze_face_short_range.tflite

In [None]:
import cv2
import mediapipe as mp
import numpy as np
import math

# Mediapipe Face Mesh utilities
mp_face_mesh = mp.solutions.face_mesh
mp_drawing = mp.solutions.drawing_utils

# Drawing specs
drawing_spec_landmark = mp_drawing.DrawingSpec(thickness=1, circle_radius=1, color=(0, 255, 0))
drawing_spec_connection = mp_drawing.DrawingSpec(thickness=1, color=(255, 0, 0))

def extract_face_landmark_points(image, face_landmarks):
    """
    Convert normalized MediaPipe FaceMesh landmark coordinates into
    pixel coordinates (x, y, z*) for this face.
    z is the normalized depth value (negative = out of the screen).
    """
    h, w, _ = image.shape
    landmark_points = []
    for landmark in face_landmarks.landmark:
        x_px = int(landmark.x * w)
        y_px = int(landmark.y * h)
        z_val = landmark.z  # Normalized depth
        landmark_points.append([x_px, y_px, z_val])
    return np.array(landmark_points, dtype=np.float32)


def calculate_distances_and_angles(landmarks_array):
    """
    Compute example distances (e.g., between mouth corners) and angles (e.g., eyebrow slopes).
    Return as a dictionary for further analysis. You can add as many metrics as you like.
    
    For reference landmark indices (468 total):
    https://github.com/tensorflow/tfjs-models/tree/master/face-landmarks-detection#keypoints
    
    We'll pick a few for demonstration:
      - 61 = Right mouth corner
      - 291 = Left mouth corner
      - 159 = Right eyebrow top (approx)
      - 386 = Left eyebrow top (approx)
    Adjust indices as needed for your analysis.
    """
    metrics = {}
    
    # Ensure we have enough landmarks
    if landmarks_array.shape[0] < 400:
        return metrics  # Not enough landmarks, return empty
    
    # Example: Distance between mouth corners (Euclidean distance)
    # Right mouth corner = index 61, Left mouth corner = index 291
    mouth_right = landmarks_array[61][:2]   # (x, y)
    mouth_left  = landmarks_array[291][:2]
    mouth_width = np.linalg.norm(mouth_right - mouth_left)
    metrics["mouth_width"] = mouth_width
    
    # Example: Eyebrow slope (difference in Y between left and right eyebrow points)
    # Right eyebrow top = index 159, Left eyebrow top = index 386
    brow_right = landmarks_array[159][:2]
    brow_left  = landmarks_array[386][:2]
    brow_slope = brow_right[1] - brow_left[1]
    metrics["eyebrow_slope"] = brow_slope
    
    return metrics


def calculate_head_pose(image, landmarks_array):
    """
    Estimate head pose (pitch, yaw, roll) using solvePnP with a minimal subset
    of 3D points. We map 2D landmarks to a rough 3D head model.
    
    This is an approximation. A refined approach would require a calibrated
    camera, better 3D reference points, and possibly more advanced fitting.
    """
    h, w, _ = image.shape

    # For simplicity, let’s pick 6 standard face landmarks
    # Indices refer to MediaPipe face mesh:
    #   33 = Right eye outer corner
    #  263 = Left eye outer corner
    #   61 = Right mouth corner
    #  291 = Left mouth corner
    #   1  = Nose tip
    #  199 = Chin
    # You can refine or choose a different set of landmarks.
    landmark_indices = [33, 263, 61, 291, 1, 199]

    # 3D model reference points in a rough model-based coordinate system (e.g., an average face)
    # The values here are approximate and serve as a starting template.
    # Adjust or refine for better accuracy.
    model_3d_points = np.array([
        [ 0.0,    0.0,    0.0   ],  # Nose tip
        [-30.0,   -65.0,  -20.0 ],  # Chin
        [-60.0,    0.0,   -30.0 ],  # Left eye outer corner
        [ 60.0,    0.0,   -30.0 ],  # Right eye outer corner
        [-40.0,   -30.0,  -30.0 ],  # Left mouth corner
        [ 40.0,   -30.0,  -30.0 ],  # Right mouth corner
    ], dtype=np.float32)

    # We need to reorder the model points to match the chosen indices
    # We'll assume the order of our indices matches the order in model_3d_points above
    # That means: [1 (nose), 199 (chin), 263 (L eye), 33 (R eye), 291 (L mouth), 61 (R mouth)]
    # but we picked them in a different order, so we rearrange them carefully.
    # Actually, let's define an order that matches our indices array:
    # landmark_indices = [33, 263, 61, 291, 1, 199]
    # We'll map:
    #   1   -> model_3d_points[0] (nose tip)
    #   199 -> model_3d_points[1] (chin)
    #   263 -> model_3d_points[2] (left eye)
    #   33  -> model_3d_points[3] (right eye)
    #   291 -> model_3d_points[4] (left mouth)
    #   61  -> model_3d_points[5] (right mouth)
    # Adjust accordingly:

    # Extract 2D coordinates from the landmarks_array
    # We'll reorder them to align with model_3d_points
    nose_2d  = landmarks_array[1][:2]
    chin_2d  = landmarks_array[199][:2]
    leye_2d  = landmarks_array[263][:2]
    reye_2d  = landmarks_array[33][:2]
    lmouth_2d= landmarks_array[291][:2]
    rmouth_2d= landmarks_array[61][:2]

    image_points = np.array([
        nose_2d,   # nose tip
        chin_2d,   # chin
        leye_2d,   # left eye outer corner
        reye_2d,   # right eye outer corner
        lmouth_2d, # left mouth corner
        rmouth_2d, # right mouth corner
    ], dtype=np.float32)

    # Camera internals (approx for a typical webcam)
    focal_length = w
    center = (w / 2, h / 2)
    camera_matrix = np.array([
        [focal_length, 0,             center[0]],
        [0,            focal_length,  center[1]],
        [0,            0,             1       ]
    ], dtype=np.float32)

    # Assume no lens distortion
    dist_coeffs = np.zeros((4,1))

    # Solve the PnP to get rotation/translation
    # Make sure our 3D model points align with the order of the 2D image_points
    # model_3d_points must match the same semantic meaning
    # For demonstration, reorder our model_3d_points accordingly:
    #   index: semantic
    #     0 -> nose
    #     1 -> chin
    #     2 -> left eye
    #     3 -> right eye
    #     4 -> left mouth
    #     5 -> right mouth
    # So we reorder model_3d_points in the same semantic order:
    model_3d_points_ordered = np.array([
        [ 0.0,    0.0,    0.0   ],  # nose tip
        [-30.0,   -65.0,  -20.0 ],  # chin
        [-60.0,    0.0,   -30.0 ],  # left eye
        [ 60.0,    0.0,   -30.0 ],  # right eye
        [-40.0,   -30.0,  -30.0 ],  # left mouth
        [ 40.0,   -30.0,  -30.0 ],  # right mouth
    ], dtype=np.float32)

    success, rotation_vector, translation_vector = cv2.solvePnP(
        model_3d_points_ordered,
        image_points,
        camera_matrix,
        dist_coeffs,
        flags=cv2.SOLVEPNP_ITERATIVE
    )

    # Convert rotation vector to Euler angles (pitch, yaw, roll)
    # The rotation matrix can be converted to angles
    rmat, _ = cv2.Rodrigues(rotation_vector)
    # From the rotation matrix, compute euler angles
    # Reference: https://docs.opencv.org/4.x/dc/d84/group__core__mat.html#ga3c277fa1f4e5f5dd2c7f48adab2b7f72
    # We'll assume X=Pitch, Y=Yaw, Z=Roll with one of the common CV conventions
    sy = math.sqrt(rmat[0,0]*rmat[0,0] + rmat[1,0]*rmat[1,0])
    pitch = math.degrees(math.atan2(rmat[2,1], rmat[2,2]))
    yaw   = math.degrees(math.atan2(-rmat[2,0], sy))
    roll  = math.degrees(math.atan2(rmat[1,0], rmat[0,0]))

    head_pose = {
        "pitch": pitch,
        "yaw": yaw,
        "roll": roll
    }

    return head_pose


def calculate_average_facial_color(image, landmarks_array):
    """
    Compute the average color in a bounding polygon around the face or
    a bounding rectangle. For simplicity, let's use a bounding rectangle
    from the min/max landmark coordinates. This is a placeholder approach.
    """
    x_coords = landmarks_array[:, 0]
    y_coords = landmarks_array[:, 1]

    # Get bounding box
    x_min, x_max = int(np.min(x_coords)), int(np.max(x_coords))
    y_min, y_max = int(np.min(y_coords)), int(np.max(y_coords))

    # Clip to image boundaries
    h, w, _ = image.shape
    x_min = max(0, x_min)
    x_max = min(w - 1, x_max)
    y_min = max(0, y_min)
    y_max = min(h - 1, y_max)

    face_roi = image[y_min:y_max, x_min:x_max]
    if face_roi.size == 0:
        return (0, 0, 0)

    # Calculate mean color (B, G, R) in face region
    mean_color = cv2.mean(face_roi)[:3]  # ignoring alpha if present
    # mean_color is in BGR, e.g. (B, G, R)
    # Convert to a nicer format or keep it as is
    return mean_color


def main():
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Error: Could not open webcam.")
        return

    with mp_face_mesh.FaceMesh(
        static_image_mode=False,
        max_num_faces=1,
        refine_landmarks=True,
        min_detection_confidence=0.5,
        min_tracking_confidence=0.5
    ) as face_mesh:

        while True:
            success, frame = cap.read()
            if not success:
                print("Ignoring empty camera frame.")
                continue

            # Convert to RGB for MediaPipe
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = face_mesh.process(frame_rgb)

            if results.multi_face_landmarks:
                for face_landmarks in results.multi_face_landmarks:
                    # Draw the face mesh
                    mp_drawing.draw_landmarks(
                        image=frame,
                        landmark_list=face_landmarks,
                        connections=mp_face_mesh.FACEMESH_TESSELATION,
                        landmark_drawing_spec=drawing_spec_landmark,
                        connection_drawing_spec=drawing_spec_connection
                    )

                    # Extract landmarks as array
                    landmarks_array = extract_face_landmark_points(frame, face_landmarks)

                    # 1) Calculate distances and angles between certain facial landmarks
                    face_metrics = calculate_distances_and_angles(landmarks_array)

                    # 2) Estimate head pose (pitch, yaw, roll)
                    head_pose = calculate_head_pose(frame, landmarks_array)

                    # 3) Calculate average facial color (rough approximation)
                    avg_color = calculate_average_facial_color(frame, landmarks_array)

                    # Print or log data (in real use, you'd store or analyze these values)
                    # For demonstration, we print them in the console.
                    print("Face Metrics:", face_metrics)
                    print("Head Pose:", head_pose)
                    print("Average Facial Color (BGR):", avg_color)
                    print("----")

            cv2.imshow("Enhanced Face Mesh Analysis", frame)

            # Press 'Esc' to exit
            if cv2.waitKey(5) & 0xFF == 27:
                break

    cap.release()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()