In [1]:
import cv2
import mediapipe as mp
import pyautogui
import numpy as np

# Initialize MediaPipe
mp_face_mesh = mp.solutions.face_mesh
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils  # Drawing utilities

# Capture video from webcam
cap = cv2.VideoCapture(1)

# Get screen dimensions
screen_width, screen_height = pyautogui.size()

# Define Landmark Indices for Face
EYE_LANDMARKS = [33, 133, 362, 263]
LIP_LANDMARKS = [61, 146, 291, 308]

# Smoothing parameters for mouse control
smooth_factor = 5
prev_x, prev_y = 0, 0

# Initialize Face Mesh and Hand Tracking
with mp_face_mesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5) as face_mesh, \
     mp_hands.Hands(min_detection_confidence=0.5, min_tracking_confidence=0.5) as hands:

    while cap.isOpened():
        ret, frame = cap.read()  # Read frame
        if not ret:
            break

        frame = cv2.flip(frame, 1)  # Flip the frame for a mirror effect
        h, w, _ = frame.shape  # Get frame dimensions
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert to RGB

        # Process face and hands
        face_results = face_mesh.process(rgb_frame)
        hand_results = hands.process(rgb_frame)

        # Draw Face Mesh Landmarks
        if face_results.multi_face_landmarks:
            for face_landmarks in face_results.multi_face_landmarks:
                # Draw face mesh
                mp_drawing.draw_landmarks(frame, face_landmarks, mp_face_mesh.FACEMESH_TESSELATION)

                # Draw Eye Landmarks
                for landmark_idx in EYE_LANDMARKS:
                    landmark = face_landmarks.landmark[landmark_idx]
                    x, y = int(landmark.x * w), int(landmark.y * h)
                    cv2.circle(frame, (x, y), 3, (0, 255, 255), -1)  # Yellow for eyes

                # Draw Lip Landmarks
                for landmark_idx in LIP_LANDMARKS:
                    landmark = face_landmarks.landmark[landmark_idx]
                    x, y = int(landmark.x * w), int(landmark.y * h)
                    cv2.circle(frame, (x, y), 3, (0, 0, 255), -1)  # Red for lips

        # Process Hand Landmarks
        if hand_results.multi_hand_landmarks:
            for hand_landmarks in hand_results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                # Get index finger tip and thumb tip
                index_finger_tip = hand_landmarks.landmark[8]  # Index finger tip
                thumb_tip = hand_landmarks.landmark[4]  # Thumb tip

                # Convert coordinates to screen space
                x_index = int(index_finger_tip.x * w)
                y_index = int(index_finger_tip.y * h)
                x_thumb = int(thumb_tip.x * w)
                y_thumb = int(thumb_tip.y * h)

                # Convert to screen resolution
                screen_x = np.interp(x_index, [0, w], [0, screen_width])
                screen_y = np.interp(y_index, [0, h], [0, screen_height])

                # Smooth cursor movement
                curr_x = (prev_x + screen_x) / smooth_factor
                curr_y = (prev_y + screen_y) / smooth_factor
                prev_x, prev_y = curr_x, curr_y

                # Move mouse cursor
                pyautogui.moveTo(int(curr_x), int(curr_y), duration=0.1)

                # Check for clicking gesture (if index and thumb are close)
                distance = np.linalg.norm(np.array([x_index, y_index]) - np.array([x_thumb, y_thumb]))

                if distance < 30:  # Click when fingers are close
                    pyautogui.click()
                    cv2.putText(frame, "Click", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        # Display the frame
        cv2.imshow('Face & Hand Landmarks with Virtual Mouse', frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()


I0000 00:00:1738912943.857397  276061 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M2
I0000 00:00:1738912943.861701  276061 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M2
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1738912943.904217  276299 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1738912943.905592  276305 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1738912943.908461  276299 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1738912943.911073  276305 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback t