In [1]:
%pip install pynput

Note: you may need to restart the kernel to use updated packages.


In [2]:
import cv2
import mediapipe as mp
import pyautogui as pag # using this library for mouse moving
import  utils
from pynput.mouse import Button, Controller
import random


In [3]:
screen_width, screen_height = pag.size()
mpHands = mp.solutions.hands
hands = mpHands.Hands(
    static_image_mode=False,
    model_complexity=1,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.7,
    max_num_hands=1
)
mouse = Controller()

In [4]:
def find_finger_tip(processed):
    if processed.multi_hand_landmarks:
        hand_landmarks = processed.multi_hand_landmarks[0]
        return hand_landmarks.landmark[mpHands.HandLandmark.INDEX_FINGER_TIP]
    return None

In [5]:
def moving_mouse(index_finger_tip):
    if index_finger_tip:
        x = int(index_finger_tip.x * screen_width)
        y = int(index_finger_tip.y * screen_height)
        pag.moveTo(x, y)

In [10]:
def detect_gesture(frame, landmarks_list, processed):
    if(len(landmarks_list) >= 21):
        index_finger_tip = find_finger_tip(processed)
        thumb_index_dist = utils.get_distance([landmarks_list[4], landmarks_list[5]])
        thumb_index_angle = utils.get_angle(landmarks_list[5], landmarks_list[6], landmarks_list[8])
        
        if thumb_index_dist < 50 and thumb_index_angle > 90:    
            moving_mouse(index_finger_tip)
            
#         LEFT CLICK
        elif is_left_click(landmarks_list, thumb_index_dist):
            mouse.press(Button.left)
            mouse.release(Button.left)
            cv2.putText(frame, "Left click", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
#         RIGHT CLICK
        elif is_right_click(landmarks_list, thumb_index_dist):
            mouse.press(Button.right)
            mouse.release(Button.right)
            cv2.putText(frame, "Right click",(50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
#         DOUBLE CLICK
        elif is_double_click(landmarks_list, thumb_index_angle):
            pag.doubleClick()
            cv2.putText(frame, "Double click", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
#         SCREENSHOT
        elif is_screenshot(landmarks_list, thumb_index_dist):
            img = pag.screenshot()
            label = random.randint(1, 1000)
            img.save( ".\\screenshot\\image" + str(label) + ".png")
            cv2.putText(frame, "Screenshot", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

In [7]:
def is_left_click(landmarks_list,thumb_index_dist):
    return (utils.get_angle(landmarks_list[5], landmarks_list[6], landmarks_list[8]) < 50 and
            utils.get_angle(landmarks_list[9], landmarks_list[10], landmarks_list[12]) > 90 and
            thumb_index_dist > 50)

def is_right_click(landmarks_list,thumb_index_dist):
    return (utils.get_angle(landmarks_list[5], landmarks_list[6], landmarks_list[8]) > 90 and
            utils.get_angle(landmarks_list[9], landmarks_list[10], landmarks_list[12]) < 50 and
            thumb_index_dist > 50)

def is_double_click(landmarks_list,thumb_index_dist):
    return (utils.get_angle(landmarks_list[5], landmarks_list[6], landmarks_list[8]) < 50 and
            utils.get_angle(landmarks_list[9], landmarks_list[10], landmarks_list[12]) < 50 and 
            thumb_index_dist > 50)

def is_screenshot(landmarks_list,thumb_index_dist):
    return (utils.get_angle(landmarks_list[5], landmarks_list[6], landmarks_list[8]) < 50 and
            utils.get_angle(landmarks_list[9], landmarks_list[10], landmarks_list[12]) < 50 and 
            thumb_index_dist < 50)


In [11]:
def main():
    cap = cv2.VideoCapture(0)
    draw = mp.solutions.drawing_utils
    try:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.flip(frame, 1)
            frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            processed = hands.process(frameRGB)
            landmarks_list =  list()
            
            if processed.multi_hand_landmarks:
                hand_landmarks = processed.multi_hand_landmarks[0]
                draw.draw_landmarks(frame, hand_landmarks, mpHands.HAND_CONNECTIONS)
                
                for lm in hand_landmarks.landmark:
                    landmarks_list.append((lm.x, lm.y))
                    
            detect_gesture(frame, landmarks_list, processed)
            
            cv2.imshow('Frame', frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
    finally:
        cap.release()
        cv2.destroyAllWindows()
        
if __name__ == '__main__':
    main()