In [1]:
import cv2
import pyautogui
import mediapipe as mp

In [2]:
class HandDetector:
    def __init__(self):
        self.hand = mp.solutions.hands.Hands()
        self.hand_drawing_utils = mp.solutions.drawing_utils
        self.hand_landmarks = self.create_hand_landmarks()

    def create_hand_landmarks(self) -> dict:
        hand_landmarks = dict()
        fingers = ['thumb', 'index', 'middle', 'ring', 'pinkie']
        for finger in fingers:
            hand_landmarks[finger] = {
                'x': 0,
                'y': 0
            }
        return hand_landmarks
        
    def detect_hands(self, frame) -> None:
        height, width, _ = frame.shape
        s_height, s_width = pyautogui.size()
        detection = self.hand.process(frame)
        hands = detection.multi_hand_landmarks
        if hands:
            for hand in hands:
                self.hand_drawing_utils.draw_landmarks(frame, hand)
                landmarks = hand.landmark
                for id, landmark in enumerate(landmarks):
                    x = int(landmark.x * width)
                    y = int(landmark.y * height)
                    if id == 12:
                        cv2.circle(frame, (x, y), 10, (0, 255, 255))
                        finger = self.hand_landmarks['index']
                        finger['x'] = s_width / width * x
                        finger['y'] = s_height / height * y
                        pyautogui.moveTo(finger['x'], finger['y'])
                    if id == 8:
                        cv2.circle(frame, (x, y), 10, (0, 255, 255))
                        finger = self.hand_landmarks['thumb']
                        finger['x'] = s_width / width * x
                        finger['y'] = s_height / height * y
                        if abs(self.hand_landmarks['index']['y'] - finger['y']) < 10:
                            pyautogui.click()

In [3]:
class FaceDetector:
    def __init__(self):
        self.face = mp.solutions.face_mesh.FaceMesh(refine_landmarks=True)

    def detect_eyes(self, frame) -> None:
        height, width, _ = frame.shape
        s_height, s_width = pyautogui.size()
        detection = self.face.process(frame)
        features = detection.multi_face_landmarks
        if features:
            landmarks = features[0].landmark
            for id, landmark in enumerate(landmarks[474:478]):
                x = int(landmark.x * width)
                y = int(landmark.y * height)
                cv2.circle(frame, (x, y), 3, (0, 255, 0))
                if id == 1:
                    cursor_x = s_width / width * x
                    cursor_y = s_height / height * y
                    pyautogui.moveTo(cursor_x, cursor_y)

In [4]:
cam = cv2.VideoCapture(0)
hand_detector = HandDetector()
face_detector = FaceDetector()

In [5]:
while True:
    ret, frame = cam.read()
    assert ret == True
    frame = cv2.flip(frame, 1)
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    hand_detector.detect_hands(rgb_frame)
    #face_detector.detect_eyes(rgb_frame)
    cv2.imshow('', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'): 
        break
cam.release()
cv2.destroyAllWindows()

