few codes refers to 'https://blog.roboflow.com/gaze-direction-position/#bonus-using-gaze-points'

In [None]:
pip install inference supervision

In [None]:
from inference import get_model

# Use YOLOv8n
model = get_model(model_id = "yolov8n-640") 
'''
만약 괜찮다면 v8n 말고 다른 애들 중 적은 용량의 모델(Implement가 쉬울 것이기에) && 객체를 잘 탐지하는 모델을 또 실험해봐야될 것으로 예상
+ 고려해야될 것들:
- 공부 환경
  * 조명에 영향을 많이 받는 환경일 것 -> Color Correction? 필요할 것
- 정확도 및 민감도 조정:
  * 특정 객체를 얼마나 잘 탐지하는지
'''

In [None]:
import cv2
import mediapipe as mp
import numpy as np
import time

# Initialize Mediapipe FaceMesh
'''Mediapipe FaceMesh는 얼굴의 좌표를 보다 정확히 알 수 있게 해줌'''
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(refine_landmarks=True, min_detection_confidence=0.5)

# Open the Camera
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)

# Screen resolution for mapping
screen_width, screen_height = 1920, 1080

while cap.isOpened():
    success, frame = cap.read()
    if not success:
        break

    start = time.time()

    # Preprocess the frame for Mediapipe
    frame = cv2.flip(frame, 1)
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)    
    '''편의상 시연할 때 거울과 같이 얼굴을 마주보고자 좌우반전을 함'''
    results = face_mesh.process(rgb_frame)

    if results.multi_face_landmarks:
        face_landmarks = results.multi_face_landmarks[0]

        LEFT_EYE = [33, 160, 158, 133, 153, 144]
        RIGHT_EYE = [362, 385, 387, 263, 373, 380]

        for eye_landmarks, eye_name in zip([LEFT_EYE, RIGHT_EYE], ["Left Eye", "Right Eye"]):
            eye_points = np.array([
                [int(face_landmarks.landmark[idx].x * frame.shape[1]),
                 int(face_landmarks.landmark[idx].y * frame.shape[0])]
                for idx in eye_landmarks
            ])

            # Draw eye landmarks
            for point in eye_points:
                cv2.circle(frame, tuple(point), 2, (0, 255, 0), -1)

            # Eye region
            x, y, w, h = cv2.boundingRect(eye_points)
            roi = frame[y:y + h, x:x + w]

            # Process the eye region
            gray_eye = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
            blurred_eye = cv2.GaussianBlur(gray_eye, (5, 5), 0)
            binary_eye = cv2.adaptiveThreshold(
                blurred_eye, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
                cv2.THRESH_BINARY_INV, 11, 2
            )

            contours, _ = cv2.findContours(binary_eye, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
            if contours:
                largest_contour = max(contours, key=cv2.contourArea)
                (cx, cy), radius = cv2.minEnclosingCircle(largest_contour)
                cx, cy = int(cx), int(cy)

                # Pupil position in the ROI
                cv2.circle(roi, (cx, cy), int(radius), (255, 0, 0), 2)
                cv2.putText(frame, f"{eye_name} Pupil: ({cx},{cy})", (x, y - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

                # Map pupil to screen coordinates
                screen_x = int(screen_width * (cx / w))
                screen_y = int(screen_height * (cy / h))

                # Display screen mapping
                cv2.circle(frame, (screen_x, screen_y), 10, (0, 255, 255), -1)
                cv2.putText(frame, f"Screen: ({screen_x},{screen_y})", (x, y + h + 20),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

    # Convert back to BGR for display
    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

    # Show the frame
    cv2.imshow("Eye Tracking", frame)

    # Exit condition
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()