In [1]:
import cv2
import mediapipe as mp
import pyautogui
import numpy as np

# hand tracking module
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.7)
mp_draw = mp.solutions.drawing_utils
cap = cv2.VideoCapture(0)

# Get the screen size
screen_width, screen_height = pyautogui.size()

# margins are defined for making the frame region appear at top corner
top_margin = 50
right_margin = 50
frame_width = 300
frame_height = 200

while True:
    success, frame = cap.read()
    if not success:
        break

    # Get the frame dimensions
    frame_height_cam, frame_width_cam, _ = frame.shape

    frame_region = {
        "start_point": (frame_width_cam - frame_width - right_margin, top_margin),
        "end_point": (frame_width_cam - right_margin, top_margin + frame_height),
        "color": (255, 0, 0), 
        "thickness": 1, 
    }

    # Flip the frame horizontally and convert the color from BGR to RGB - generated by GPT because it was having some error
    frame = cv2.flip(frame, 1)
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Draw the detection frame region on the webcam feed
    cv2.rectangle(frame, frame_region["start_point"], frame_region["end_point"], frame_region["color"], frame_region["thickness"])

    # Process the frame and detect hands
    results = hands.process(frame_rgb)

    # Check if any hand is detected
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Get the tip of the index finger
            tip_x = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].x
            tip_y = hand_landmarks.landmark[mp_hands.HandLandmark.INDEX_FINGER_TIP].y

            # Convert hand landmark position to relative frame coordinates
            relative_x = int(tip_x * frame_width_cam)
            relative_y = int(tip_y * frame_height_cam)

            # Check if the index finger tip is within the detection frame region
            if (frame_region["start_point"][0] < relative_x < frame_region["end_point"][0]) and (frame_region["start_point"][1] < relative_y < frame_region["end_point"][1]):
                # Draw hand landmarks
                mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

                # Map the hand position to the screen size
                screen_x = np.interp(relative_x, [frame_region["start_point"][0], frame_region["end_point"][0]], [0, screen_width])
                screen_y = np.interp(relative_y, [frame_region["start_point"][1], frame_region["end_point"][1]], [0, screen_height])

                # Move the cursor
                pyautogui.moveTo(screen_x, screen_y)

    # Display the frame
    cv2.imshow("Hand Tracking", frame)

    if cv2.waitKey(1) & 0xFF == 27:  # ESC to break
        break

cap.release()
cv2.destroyAllWindows()