In [1]:
import cv2
import numpy as np
import mediapipe as mp
import pyttsx3 as ts
import pyautogui
import math
import time
from IPython.display import clear_output
import time

In [2]:
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

In [3]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.flip(image, 1)
    image.flags.writeable = False
    result = model.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, result

In [4]:
def draw_landmarks_left(image, results):
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(121, 22, 76), thickness=2, circle_radius=4), #Circle
                                mp_drawing.DrawingSpec(color=(121, 44, 250), thickness=2, circle_radius=2)) #Lines


In [5]:
def draw_landmarks_right(image, results):
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                                mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=4), #Circle 
                                mp_drawing.DrawingSpec(color=(0, 255, 255), thickness=2, circle_radius=1)) #Lines


In [6]:
def extract_keypoints(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    return lh

In [7]:
def extract_thumb_keypoints(results):
    keypoints = []
    for data_point in results.left_hand_landmarks.landmark:
        keypoints.extend([data_point.x, data_point.y, data_point.z])
    return keypoints

In [8]:
def left_move_mouse(landmarks, image, frame, is_clicked, prev_ind_finger_x, prev_ind_finger_y, initial_time):
    clear_output(wait=True)
    time_spent = (time.time()-initial_time)*1000
    mouse_x, mouse_y = pyautogui.position()
    screenH, screenW = 1080, 1980
    frameH, frameW, _ = frame.shape
    mid_finger_y, thumb_y = 0, 0
    mid_finger_x, thumb_x = 0, 0
    finger_x, finger_y = 0, 0
    for id, landmark in enumerate(landmarks.landmark):
        """if id == 4:
            thumb_x = int(landmark.x * frameW)
            thumb_y = int(landmark.y * frameH)
        if id == 12:
            mid_finger_x = int(landmark.x * frameW)
            mid_finger_y = int(landmark.y * frameH)"""
        if id == 8:
            finger_x = int(landmark.x * frameW)
            finger_y = int(landmark.y * frameH)

        dist_x = finger_x - prev_ind_finger_x
        dist_y = finger_y - prev_ind_finger_y
        dist = math.dist([prev_ind_finger_x, prev_ind_finger_y], [finger_x, finger_y])
        speed = int((dist/time_spent)*10)

    if(not speed): speed += 1 
    print(speed)
    # clear_output()
    # cv2.circle(image, (thumb_x, thumb_y), 10, (0, 255, 0), thickness = 2)
    cv2.circle(image, (finger_x, finger_y), 10, (0, 34, 234), thickness = 2)
    # cv2.circle(image, (mid_finger_x, mid_finger_y), 10, (0, 34, 234), thickness = 2)
    if is_clicked :
        x = mouse_x + (dist_x*speed)
        y = mouse_y + (dist_y*speed)
        if x < 2: x = 2
        if y < 2: y = 2
        if x > 1920: x = 1920
        if y > 1075: y = 1075
        pyautogui.moveTo(x, y)
    
    return finger_x, finger_y, time.time()

In [9]:
def right_move_mouse(landmarks, image, frame):
    clear_output(wait=True)
    screenH, screenW = 1080, 1980
    frameH, frameW, _ = frame.shape
    ring_finger_y, mid_finger_y, thumb_y = 0, 0, 0
    ring_finger_y, mid_finger_x, thumb_x = 0, 0, 0
    for id, landmark in enumerate(landmarks.landmark):
        if id == 4:
            thumb_x = int(landmark.x * frameW)
            thumb_y = int(landmark.y * frameH)
        if id == 8:
            ind_finger_x = int(landmark.x * frameW)
            ind_finger_y = int(landmark.y * frameH)
        if id == 12:
            mid_finger_x = int(landmark.x * frameW)
            mid_finger_y = int(landmark.y * frameH)
        if id == 16:
            ring_finger_x = int(landmark.x * frameW)
            ring_finger_y = int(landmark.y * frameH)
        

    cv2.circle(image, (thumb_x, thumb_y), 10, (0, 255, 0), thickness = 2)
    cv2.circle(image, (ind_finger_x, ind_finger_y), 10, (0, 34, 234), thickness = 2)

    dist = math.dist([thumb_x, thumb_y], [ind_finger_x, ind_finger_y])
    ring_click_dist = math.dist([thumb_x, thumb_y], [ring_finger_x, ring_finger_y])
    click_dist = math.dist([thumb_x, thumb_y], [mid_finger_x, mid_finger_y])

    mouse_x, mouse_y = pyautogui.position()

    if ring_click_dist < 30:
        pyautogui.rightClick(mouse_x, mouse_y)
    if click_dist < 30:
        pyautogui.click()

    if dist < 80:
        return True;



    

In [10]:
#New detection variables

screenH, screenW = 1080, 1980
ind_finger_x, ind_finger_y = 0, 0
thumb_x, thumb_y = 0, 0
cap = cv2.VideoCapture(0)
initial_time = time.time()
# access mediapipe model
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    try:
        while cap.isOpened():

            # read feed
            
            ret, frame = cap.read()

            # make detection
            image, results = mediapipe_detection(frame, holistic)
            
            left_landmarks = results.right_hand_landmarks
            right_landmarks = results.left_hand_landmarks

            is_clicked = False

            if right_landmarks: 
                is_clicked =  right_move_mouse(right_landmarks, image, frame)

            if left_landmarks: 
                ind_finger_x, ind_finger_y, initial_time = left_move_mouse(left_landmarks, image, frame, is_clicked, ind_finger_x, ind_finger_y, initial_time)

            cv2.imshow('feed', image)

            # break on condition
            if cv2.waitKey(10) & 0xff == ord('q'):
                break
    finally: 
        cap.release()
        cv2.destroyAllWindows()

1
