In [1]:
# Install the required libraries
%pip install opencv-python opencv-contrib-python dlib numpy flask jupyter matplotlib mediapipe

Collecting opencv-python
  Using cached opencv_python-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl.metadata (20 kB)
Collecting opencv-contrib-python
  Using cached opencv_contrib_python-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl.metadata (20 kB)
Collecting dlib
  Using cached dlib-19.24.6-cp312-cp312-macosx_14_0_arm64.whl
Collecting numpy
  Using cached numpy-2.2.0-cp312-cp312-macosx_14_0_arm64.whl.metadata (62 kB)
Collecting flask
  Using cached flask-3.1.0-py3-none-any.whl.metadata (2.7 kB)
Collecting jupyter
  Using cached jupyter-1.1.1-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting matplotlib
  Using cached matplotlib-3.9.3-cp312-cp312-macosx_11_0_arm64.whl.metadata (11 kB)
Collecting mediapipe
  Using cached mediapipe-0.10.18-cp312-cp312-macosx_11_0_universal2.whl.metadata (9.7 kB)
Collecting Werkzeug>=3.1 (from flask)
  Using cached werkzeug-3.1.3-py3-none-any.whl.metadata (3.7 kB)
Collecting Jinja2>=3.1.2 (from flask)
  Using cached jinja2-3.1.4-py3-none-any.whl.metadata (2.6 kB)

In [2]:
import cv2
import dlib
import numpy as np
import matplotlib.pyplot as plt
import mediapipe as mp
import math
import time
from collections import deque


## Calibration (To personalise for camera setup)

In [9]:
# Initialize Mediapipe FaceMesh for calibration
mp_face_mesh = mp.solutions.face_mesh
calib_face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=False,
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.7
)

left_eye_indices = [33, 133, 159, 145, 153, 144, 160, 161]
right_eye_indices = [263, 362, 386, 374, 380, 373, 387, 388]
left_iris_indices = [468, 469, 470, 471]
right_iris_indices = [473, 474, 475, 476]

face_key_landmarks = {
    "nose_tip": 1,
    "chin": 152,
    "left_eye_outer": 33,
    "right_eye_outer": 263,
    "left_mouth": 61,
    "right_mouth": 291
}

calibration_done = False
pitch_values = []
yaw_values = []
iris_focus_values = []

def get_landmark_points(face_landmarks, frame_width, frame_height, indices):
    points = []
    for idx in indices:
        x = face_landmarks.landmark[idx].x * frame_width
        y = face_landmarks.landmark[idx].y * frame_height
        if 0 <= x < frame_width and 0 <= y < frame_height:
            points.append((int(x), int(y)))
        else:
            return None
    return points if len(points) == len(indices) else None

def crop_eye(frame, eye_coords):
    if eye_coords is None or len(eye_coords) == 0:
        return None, (0, 0, 0, 0)
    x_coords = [pt[0] for pt in eye_coords]
    y_coords = [pt[1] for pt in eye_coords]
    min_x, max_x = max(0, min(x_coords)), min(frame.shape[1], max(x_coords))
    min_y, max_y = max(0, min(y_coords)), min(frame.shape[0], max(y_coords))
    return frame[min_y:max_y, min_x:max_x], (min_x, min_y, max_x, max_y)

def get_iris_center(face_landmarks, frame_width, frame_height, iris_indices):
    iris_x = []
    iris_y = []
    for i in iris_indices:
        ix = face_landmarks.landmark[i].x * frame_width
        iy = face_landmarks.landmark[i].y * frame_height
        if 0 <= ix < frame_width and 0 <= iy < frame_height:
            iris_x.append(ix)
            iris_y.append(iy)
        else:
            return None
    if len(iris_x) != len(iris_indices):
        return None
    return (int(sum(iris_x) / len(iris_x)), int(sum(iris_y) / len(iris_y)))

def compute_iris_focus(iris_point, eye_box):
    if iris_point is None:
        return None
    min_x, min_y, max_x, max_y = eye_box
    eye_width, eye_height = max_x - min_x, max_y - min_y
    if eye_width == 0 or eye_height == 0:
        return None
    eye_center_x, eye_center_y = (min_x + max_x) // 2, (min_y + max_y) // 2

    h_offset = abs(iris_point[0] - eye_center_x) / (eye_width / 2.0)
    v_offset = abs(iris_point[1] - eye_center_y) / (eye_height / 2.0)

    h_focus = max(0, min(100, (1 - h_offset) * 100))
    v_focus = max(0, min(100, (1 - v_offset) * 100))
    return (h_focus + v_focus) / 2.0

def get_face_vectors(face_landmarks):
    # Returns the LR (left-right) vector and the normal vector
    l_idx = face_key_landmarks["left_eye_outer"]
    r_idx = face_key_landmarks["right_eye_outer"]
    n_idx = face_key_landmarks["nose_tip"]

    L = face_landmarks.landmark[l_idx]
    R = face_landmarks.landmark[r_idx]
    N = face_landmarks.landmark[n_idx]

    lx, ly, lz = L.x, L.y, L.z
    rx, ry, rz = R.x, R.y, R.z
    nx, ny, nz = N.x, N.y, N.z

    LR = np.array([rx - lx, ry - ly, rz - lz])
    LN = np.array([nx - lx, ny - ly, nz - lz])
    normal = np.cross(LR, LN)
    norm_len = np.linalg.norm(normal)
    if norm_len < 1e-7:
        normal = np.array([0.0, 0.0, -1.0])
    else:
        normal /= norm_len

    # Ensure normal points forward (camera generally at z=0, face in -z)
    # If Nz > 0, flip
    if normal[2] > 0:
        normal = -normal

    return LR, normal

def get_pitch_yaw_roll(LR, normal):
    # Compute yaw from LR vector projected onto the XZ plane
    LR_x, LR_y, LR_z = LR
    # Project onto XZ plane (ignore Y)
    # This makes yaw stable against head tilts (roll)
    LR_proj_x = LR_x
    LR_proj_z = LR_z
    yaw_rad = math.atan2(LR_proj_x, LR_proj_z)

    # Compute pitch from normal vector
    Nx, Ny, Nz = normal
    denom = math.sqrt(Nx*Nx + Nz*Nz)
    pitch_rad = math.atan2(Ny, denom) if denom > 1e-7 else 0.0

    roll_rad = 0.0  # Not used

    pitch = math.degrees(pitch_rad)
    yaw = math.degrees(yaw_rad)
    roll = math.degrees(roll_rad)

    return pitch, yaw, roll

print("Ensure you are looking straight at the camera.")
print("Press 'c' to start calibration...")

cap_calib = cv2.VideoCapture(0)
calibration_duration = 5

while True:
    ret, frame = cap_calib.read()
    if not ret:
        break

    frame_height, frame_width = frame.shape[:2]
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = calib_face_mesh.process(rgb_frame)

    cv2.putText(frame, "Look straight at the camera.", (50, 50),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2)
    cv2.putText(frame, "Press 'c' to start calibration (5s).", (50, 80),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0), 2)

    key = cv2.waitKey(1) & 0xFF
    if key == ord('c'):
        print("Calibration started. Hold still for about 5 seconds...")
        start_time = time.time()

        while True:
            ret2, frame_calib = cap_calib.read()
            if not ret2:
                break
            rgb_calib = cv2.cvtColor(frame_calib, cv2.COLOR_BGR2RGB)
            results_calib = calib_face_mesh.process(rgb_calib)
            fh, fw = frame_calib.shape[:2]

            elapsed = time.time() - start_time
            remaining = calibration_duration - elapsed
            if remaining <= 0:
                break

            cv2.putText(frame_calib, f"Calibrating... {int(remaining)}s left", (50,50),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,0,255), 2)

            if results_calib.multi_face_landmarks:
                for face_landmarks in results_calib.multi_face_landmarks:
                    left_eye = get_landmark_points(face_landmarks, fw, fh, left_eye_indices)
                    right_eye = get_landmark_points(face_landmarks, fw, fh, right_eye_indices)

                    if left_eye and right_eye:
                        _, left_box = crop_eye(frame_calib, left_eye)
                        _, right_box = crop_eye(frame_calib, right_eye)
                        left_iris_center = get_iris_center(face_landmarks, fw, fh, left_iris_indices)
                        right_iris_center = get_iris_center(face_landmarks, fw, fh, right_iris_indices)

                        if left_iris_center and right_iris_center:
                            left_if = compute_iris_focus(left_iris_center, left_box)
                            right_if = compute_iris_focus(right_iris_center, right_box)
                            if left_if is not None and right_if is not None:
                                iris_f = (left_if + right_if) / 2.0

                                LR, normal = get_face_vectors(face_landmarks)
                                pitch, yaw, roll = get_pitch_yaw_roll(LR, normal)
                                if pitch is not None and yaw is not None:
                                    pitch_values.append(pitch)
                                    yaw_values.append(yaw)
                                    iris_focus_values.append(iris_f)

            cv2.imshow('Calibration', frame_calib)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        if len(pitch_values) > 0 and len(yaw_values) > 0 and len(iris_focus_values) > 0:
            neutral_pitch = sum(pitch_values) / len(pitch_values)
            neutral_yaw = sum(yaw_values) / len(yaw_values)
            neutral_iris_focus = sum(iris_focus_values) / len(iris_focus_values)
        else:
            neutral_pitch = 0.0
            neutral_yaw = 0.0
            neutral_iris_focus = 50.0

        print("Calibration complete!")
        print(f"Neutral Pitch: {neutral_pitch:.2f}, Neutral Yaw: {neutral_yaw:.2f}, Neutral Iris Focus: {neutral_iris_focus:.2f}")
        print("Press 'q' to quit calibration.")

        calibration_done = True

    if calibration_done:
        cv2.putText(frame, "Calibration done! Press 'q' to quit.", (50,110),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255,0,0), 2)

    if key == ord('q'):
        break

    cv2.imshow('Calibration Setup', frame)

cap_calib.release()
cv2.destroyAllWindows()

print("Neutral values stored and ready to use in the main code:")
print(f"Neutral Pitch: {neutral_pitch:.2f}")
print(f"Neutral Yaw: {neutral_yaw:.2f}")
print(f"Neutral Iris Focus: {neutral_iris_focus:.2f}")

Ensure you are looking straight at the camera.
Press 'c' to start calibration...


I0000 00:00:1733728662.577435  484637 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M1 Pro
W0000 00:00:1733728662.582552  503999 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733728662.593619  503998 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Calibration started. Hold still for about 5 seconds...
Calibration complete!
Neutral Pitch: -39.40, Neutral Yaw: 94.19, Neutral Iris Focus: 79.16
Press 'q' to quit calibration.
Neutral values stored and ready to use in the main code:
Neutral Pitch: -39.40
Neutral Yaw: 94.19
Neutral Iris Focus: 79.16


## Testing of Focus

In [10]:
import cv2
import mediapipe as mp
import math
import numpy as np
from collections import deque

try:
    neutral_pitch
    neutral_yaw
    neutral_iris_focus
except NameError:
    neutral_pitch = 0.0
    neutral_yaw = 0.0
    neutral_iris_focus = 50.0

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=False,
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.7
)

left_eye_indices = [33, 133, 159, 145, 153, 144, 160, 161]
right_eye_indices = [263, 362, 386, 374, 380, 373, 387, 388]
left_iris_indices = [468, 469, 470, 471]
right_iris_indices = [473, 474, 475, 476]

face_key_landmarks = {
    "nose_tip": 1,
    "chin": 152,
    "left_eye_outer": 33,
    "right_eye_outer": 263,
    "left_mouth": 61,
    "right_mouth": 291
}

focus_scores_buffer = deque(maxlen=10)

def get_landmark_points(face_landmarks, frame_width, frame_height, indices):
    points = []
    for idx in indices:
        x = face_landmarks.landmark[idx].x * frame_width
        y = face_landmarks.landmark[idx].y * frame_height
        if 0 <= x < frame_width and 0 <= y < frame_height:
            points.append((int(x), int(y)))
        else:
            return None
    return points if len(points) == len(indices) else None

def crop_eye(frame, eye_coords):
    if eye_coords is None or len(eye_coords) == 0:
        return None, (0, 0, 0, 0)
    x_coords = [pt[0] for pt in eye_coords]
    y_coords = [pt[1] for pt in eye_coords]
    min_x, max_x = max(0, min(x_coords)), min(frame.shape[1], max(x_coords))
    min_y, max_y = max(0, min(y_coords)), min(frame.shape[0], max(y_coords))
    return frame[min_y:max_y, min_x:max_x], (min_x, min_y, max_x, max_y)

def get_iris_center(face_landmarks, frame_width, frame_height, iris_indices):
    iris_x = []
    iris_y = []
    for i in iris_indices:
        ix = face_landmarks.landmark[i].x * frame_width
        iy = face_landmarks.landmark[i].y * frame_height
        if 0 <= ix < frame_width and 0 <= iy < frame_height:
            iris_x.append(ix)
            iris_y.append(iy)
        else:
            return None
    if len(iris_x) != len(iris_indices):
        return None
    return (int(sum(iris_x) / len(iris_x)), int(sum(iris_y) / len(iris_y)))

def compute_iris_focus(iris_point, eye_box):
    if iris_point is None:
        return 0.0
    min_x, min_y, max_x, max_y = eye_box
    eye_width, eye_height = max_x - min_x, max_y - min_y
    if eye_width == 0 or eye_height == 0:
        return 0.0
    eye_center_x, eye_center_y = (min_x + max_x) // 2, (min_y + max_y) // 2

    h_offset = abs(iris_point[0] - eye_center_x) / (eye_width / 2.0) 
    v_offset = abs(iris_point[1] - eye_center_y) / (eye_height / 2.0)

    h_focus = max(0, min(100, (1 - h_offset) * 100))
    v_focus = max(0, min(100, (1 - v_offset) * 100))
    raw_focus = (h_focus + v_focus) / 2.0

    adjusted_focus = (raw_focus / neutral_iris_focus) * 90
    adjusted_focus = min(max(adjusted_focus, 0), 100)
    return adjusted_focus

def get_face_vectors(face_landmarks):
    l_idx = face_key_landmarks["left_eye_outer"]
    r_idx = face_key_landmarks["right_eye_outer"]
    n_idx = face_key_landmarks["nose_tip"]

    L = face_landmarks.landmark[l_idx]
    R = face_landmarks.landmark[r_idx]
    N = face_landmarks.landmark[n_idx]

    lx, ly, lz = L.x, L.y, L.z
    rx, ry, rz = R.x, R.y, R.z
    nx, ny, nz = N.x, N.y, N.z

    LR = np.array([rx - lx, ry - ly, rz - lz])
    LN = np.array([nx - lx, ny - ly, nz - lz])

    normal = np.cross(LR, LN)
    norm_len = np.linalg.norm(normal)
    if norm_len < 1e-7:
        normal = np.array([0, 0, -1], dtype=float)
    else:
        normal /= norm_len

    # Ensure normal points forward
    if normal[2] > 0:
        normal = -normal

    return LR, normal

def get_pitch_yaw_roll(LR, normal):
    # Yaw from LR vector (projected onto XZ plane)
    LR_x, LR_y, LR_z = LR
    yaw_rad = math.atan2(LR_x, LR_z)

    Nx, Ny, Nz = normal
    denom = math.sqrt(Nx*Nx + Nz*Nz)
    pitch_rad = math.atan2(Ny, denom) if denom > 1e-7 else 0.0
    roll_rad = 0.0

    pitch = math.degrees(pitch_rad)
    yaw = math.degrees(yaw_rad)
    roll = math.degrees(roll_rad)

    pitch -= neutral_pitch
    yaw -= neutral_yaw

    return pitch, yaw, roll

def compute_angle_focus(angle, max_angle):
    angle_abs = abs(angle)
    return max(0.0, (1 - (angle_abs / max_angle)) * 100.0)

angle_smoothing_window = 10
pitch_buffer = deque(maxlen=angle_smoothing_window)
yaw_buffer = deque(maxlen=angle_smoothing_window)
roll_buffer = deque(maxlen=angle_smoothing_window)

iris_weight = 0.7
orientation_weight = 0.3
max_yaw_angle = 20.0
max_pitch_angle = 15.0

cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    if not ret:
        break

    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(rgb_frame)
    frame_height, frame_width = frame.shape[:2]

    final_focus = 0.0
    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            left_eye = get_landmark_points(face_landmarks, frame_width, frame_height, left_eye_indices)
            right_eye = get_landmark_points(face_landmarks, frame_width, frame_height, right_eye_indices)

            if left_eye is None or right_eye is None:
                final_focus = 0.0
                continue

            _, left_box = crop_eye(frame, left_eye)
            _, right_box = crop_eye(frame, right_eye)

            left_iris_center = get_iris_center(face_landmarks, frame_width, frame_height, left_iris_indices)
            right_iris_center = get_iris_center(face_landmarks, frame_width, frame_height, right_iris_indices)

            if left_iris_center is None or right_iris_center is None:
                final_focus = 0.0
                continue

            left_iris_focus = compute_iris_focus(left_iris_center, left_box)
            right_iris_focus = compute_iris_focus(right_iris_center, right_box)
            iris_focus = (left_iris_focus + right_iris_focus) / 2.0

            LR, normal = get_face_vectors(face_landmarks)
            pitch, yaw, roll = get_pitch_yaw_roll(LR, normal)

            pitch_buffer.append(pitch)
            yaw_buffer.append(yaw)
            roll_buffer.append(roll)

            pitch = sum(pitch_buffer) / len(pitch_buffer)
            yaw = sum(yaw_buffer) / len(yaw_buffer)
            roll = sum(roll_buffer) / len(roll_buffer)

            yaw_focus = compute_angle_focus(yaw, max_yaw_angle)
            pitch_focus = compute_angle_focus(pitch, max_pitch_angle)

            yaw_weight = 0.6
            pitch_weight = 0.4
            orientation_focus = (yaw_focus * yaw_weight) + (pitch_focus * pitch_weight)

            final_focus = (iris_focus * iris_weight) + (orientation_focus * orientation_weight)

            focus_scores_buffer.append(final_focus)
            smoothed_focus = sum(focus_scores_buffer) / len(focus_scores_buffer)

            cv2.putText(frame, f"Iris Focus: {int(iris_focus)}%", (50, 80),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)
            cv2.putText(frame, f"Yaw: {int(yaw)} deg (Focus: {int(yaw_focus)}%)", (50, 110),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)
            cv2.putText(frame, f"Pitch: {int(pitch)} deg (Focus: {int(pitch_focus)}%)", (50, 140),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)
            cv2.putText(frame, f"Orientation Focus: {int(orientation_focus)}%", (50, 170),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)

            cv2.putText(frame, f"Final Focus: {int(smoothed_focus)}%", (50, 50),
                        cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), 2)

            for (x, y) in (left_eye + right_eye):
                cv2.circle(frame, (x, y), 2, (0, 255, 0), -1)
            cv2.circle(frame, left_iris_center, 3, (255, 0, 0), -1)
            cv2.circle(frame, right_iris_center, 3, (255, 0, 0), -1)

            print(f"Iris Focus: {iris_focus:.2f}, Yaw: {yaw:.2f}, Pitch: {pitch:.2f}, "
                  f"Orientation: {orientation_focus:.2f}, Final Focus: {smoothed_focus:.2f}")

    else:
        focus_scores_buffer.append(0.0)
        smoothed_focus = sum(focus_scores_buffer) / len(focus_scores_buffer)
        cv2.putText(frame, f"Final Focus: {int(smoothed_focus)}%", (50, 50),
                    cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 2)

    cv2.imshow('Focus Measurement', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

I0000 00:00:1733728679.131389  484637 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88), renderer: Apple M1 Pro
W0000 00:00:1733728679.138519  504658 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1733728679.146930  504657 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Iris Focus: 84.09, Yaw: -1.07, Pitch: 2.91, Orientation: 89.04, Final Focus: 85.57
Iris Focus: 86.55, Yaw: -0.46, Pitch: 1.92, Orientation: 93.49, Final Focus: 87.10
Iris Focus: 88.67, Yaw: -0.14, Pitch: 1.46, Orientation: 95.70, Final Focus: 88.33
Iris Focus: 85.39, Yaw: 0.03, Pitch: 1.33, Orientation: 96.37, Final Focus: 88.42
Iris Focus: 86.40, Yaw: 0.22, Pitch: 1.17, Orientation: 96.22, Final Focus: 88.60
Iris Focus: 82.78, Yaw: 0.33, Pitch: 1.18, Orientation: 95.84, Final Focus: 88.29
Iris Focus: 89.51, Yaw: 0.41, Pitch: 1.17, Orientation: 95.63, Final Focus: 88.72
Iris Focus: 82.81, Yaw: 0.46, Pitch: 1.14, Orientation: 95.57, Final Focus: 88.46
Iris Focus: 89.88, Yaw: 0.43, Pitch: 1.19, Orientation: 95.56, Final Focus: 88.81
Iris Focus: 83.69, Yaw: 0.42, Pitch: 1.18, Orientation: 95.60, Final Focus: 88.66
Iris Focus: 86.50, Yaw: 0.56, Pitch: 1.03, Orientation: 95.57, Final Focus: 89.02
Iris Focus: 84.71, Yaw: 0.67, Pitch: 1.10, Orientation: 95.07, Final Focus: 88.94
Iris Focus: 8

KeyboardInterrupt: 