In [1]:
import cv2
import numpy as np
import mediapipe as mp

In [2]:
# Initialize MediaPipe Face Mesh
mp_face_mesh = mp.solutions.face_mesh

# Indices for iris landmarks and eye corners
left_iris_indices = [474, 475, 476, 477]
right_iris_indices = [469, 470, 471, 472]
left_eye_inner = 362
left_eye_outer = 263
right_eye_inner = 133
right_eye_outer = 33

# Function to calculate iris position
def get_iris_position(face_landmarks, iris_indices, frame_width, frame_height):
    iris_x = np.mean([face_landmarks.landmark[i].x for i in iris_indices]) * frame_width
    iris_y = np.mean([face_landmarks.landmark[i].y for i in iris_indices]) * frame_height
    return (iris_x, iris_y)

# Function to calculate the relative position of the iris within the eye
def calculate_relative_position(iris_pos, eye_inner_pos, eye_outer_pos):
    eye_width = eye_outer_pos[0] - eye_inner_pos[0]
    iris_offset_x = iris_pos[0] - eye_inner_pos[0]
    iris_relative_x = iris_offset_x / eye_width  # 0.5 means looking straight
    return iris_relative_x

# Head pose estimation
def get_head_pose(face_landmarks, frame_width, frame_height, camera_matrix, dist_coeffs):
    model_points = np.array([
        (0.0, 0.0, 0.0),  # Nose tip
        (0.0, -330.0, -65.0),  # Chin
        (-225.0, 170.0, -135.0),  # Left eye corner
        (225.0, 170.0, -135.0),  # Right eye corner
        (-150.0, -150.0, -125.0),  # Left mouth corner
        (150.0, -150.0, -125.0)  # Right mouth corner
    ])

    image_points = np.array([
        (face_landmarks.landmark[1].x * frame_width, face_landmarks.landmark[1].y * frame_height),  # Nose tip
        (face_landmarks.landmark[152].x * frame_width, face_landmarks.landmark[152].y * frame_height),  # Chin
        (face_landmarks.landmark[33].x * frame_width, face_landmarks.landmark[33].y * frame_height),  # Left eye corner
        (face_landmarks.landmark[263].x * frame_width, face_landmarks.landmark[263].y * frame_height),  # Right eye corner
        (face_landmarks.landmark[61].x * frame_width, face_landmarks.landmark[61].y * frame_height),  # Left mouth corner
        (face_landmarks.landmark[291].x * frame_width, face_landmarks.landmark[291].y * frame_height)  # Right mouth corner
    ], dtype='float64')

    success, rotation_vector, translation_vector = cv2.solvePnP(
        model_points, image_points, camera_matrix, dist_coeffs)
    
    # Get rotation matrix and euler angles
    rvec_matrix = cv2.Rodrigues(rotation_vector)[0]
    proj_matrix = np.hstack((rvec_matrix, translation_vector))
    eulerAngles = cv2.decomposeProjectionMatrix(proj_matrix)[6]
    pitch, yaw, roll = eulerAngles  # Extract head orientation angles
    
    return pitch, yaw, roll

# Camera internals
frame_width = 640
frame_height = 480
focal_length = frame_width
center = (frame_width / 2, frame_height / 2)
camera_matrix = np.array([[focal_length, 0, center[0]],
                          [0, focal_length, center[1]],
                          [0, 0, 1]], dtype='float64')
dist_coeffs = np.zeros((4, 1))  # assuming no distortion

In [3]:
cap = cv2.VideoCapture(0)

with mp_face_mesh.FaceMesh(
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5) as face_mesh:

    while cap.isOpened():
        success, image = cap.read()
        if not success:
            print("Empty camera frame.")
            continue

        # Get frame dimensions
        frame_height, frame_width, _ = image.shape

        # Process the image for face landmarks
        image.flags.writeable = False
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        results = face_mesh.process(image)

        # Convert the image back to BGR for OpenCV
        image.flags.writeable = True
        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

        if results.multi_face_landmarks:
            for face_landmarks in results.multi_face_landmarks:
                # Extract iris positions
                left_iris_pos = get_iris_position(face_landmarks, left_iris_indices, frame_width, frame_height)
                right_iris_pos = get_iris_position(face_landmarks, right_iris_indices, frame_width, frame_height)

                # Extract eye corner positions
                left_eye_inner_pos = (face_landmarks.landmark[left_eye_inner].x * frame_width,
                                      face_landmarks.landmark[left_eye_inner].y * frame_height)
                left_eye_outer_pos = (face_landmarks.landmark[left_eye_outer].x * frame_width,
                                      face_landmarks.landmark[left_eye_outer].y * frame_height)
                right_eye_inner_pos = (face_landmarks.landmark[right_eye_inner].x * frame_width,
                                       face_landmarks.landmark[right_eye_inner].y * frame_height)
                right_eye_outer_pos = (face_landmarks.landmark[right_eye_outer].x * frame_width,
                                       face_landmarks.landmark[right_eye_outer].y * frame_height)

                # Calculate relative iris positions for both eyes
                left_iris_relative_x = calculate_relative_position(left_iris_pos, left_eye_inner_pos, left_eye_outer_pos)
                right_iris_relative_x = calculate_relative_position(right_iris_pos, right_eye_inner_pos, right_eye_outer_pos)

                # Head pose estimation (pitch, yaw, roll)
                pitch, yaw, roll = get_head_pose(face_landmarks, frame_width, frame_height, camera_matrix, dist_coeffs)

                # Adjust the gaze detection based on head pose
                # If head is turned slightly
                horizontal_adjustment = 0.1 * abs(yaw / 70)  # Adjust the threshold based on head yaw
                if (0.4 - horizontal_adjustment/2) < left_iris_relative_x < (0.6 + horizontal_adjustment/2) and \
                   (0.4 - horizontal_adjustment) < right_iris_relative_x < (0.6 + horizontal_adjustment):
                    gaze_text = "Concentrated"
                    color = (0, 255, 0)
                else:
                    gaze_text = "Not Concentrated"
                    color = (0, 0, 255)

                # Display gaze direction on the screen
                # cv2.putText(image, gaze_text, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

            cv2.putText(image, gaze_text, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)

        # Show the image with gaze and head pose estimation
        cv2.imshow('Eyeball & Head Pose Detection', image)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()