In [270]:
import cv2
import mediapipe as mp
import utils
import pyautogui
import keyboard
import numpy as np
import time

In [271]:
screen_w, screen_h = pyautogui.size()
mpHands = mp.solutions.hands
hands = mpHands.Hands(
    static_image_mode=False,
    model_complexity = 1,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.7,
    max_num_hands = 1
)

In [272]:
def find_landmark_cordinates(processed,index,frame):
        frame_h, frame_w, _ = frame.shape
        if processed.multi_hand_landmarks:
                hand_landmarks = processed.multi_hand_landmarks[0]
                x = int(hand_landmarks.landmark[index].x * frame_w)
                y = int(hand_landmarks.landmark[index].y * frame_h)
                return (x,y)
        return None

In [273]:
def draw_landmarks(processed,frame):
    index_finger_tip = find_landmark_cordinates(processed,8,frame)
    thumb_finger_tip = find_landmark_cordinates(processed,4,frame)
    wrist = find_landmark_cordinates(processed,0,frame)
    middle_figer_tip = find_landmark_cordinates(processed,12,frame)
    pinky_finger_tip = find_landmark_cordinates(processed,20,frame)

    if index_finger_tip:
        cv2.circle(frame, (index_finger_tip[0],index_finger_tip[1]), 10, (0,255,0), 2)
    if thumb_finger_tip:
        cv2.circle(frame, (thumb_finger_tip[0],thumb_finger_tip[1]), 10, (255,125,0), 2)
    if wrist:
        cv2.circle(frame, (wrist[0],wrist[1]), 10, (0,255,255), 2)
    if middle_figer_tip:
        cv2.circle(frame, (middle_figer_tip[0],middle_figer_tip[1]), 10, (255,255,86), 2)
    if pinky_finger_tip:
        cv2.circle(frame, (pinky_finger_tip[0],pinky_finger_tip[1]), 10, (20,20,20), 2)



def draw_triangle(middle_cordinates,frame):
    cx, cy = middle_cordinates
    size = 30
    half_size = size / 2
    color = (210,0,255)
    thickness = 2
    pt1 = (int(cx), int(cy - half_size))
    pt2 = (int(cx - half_size), int(cy + half_size))
    pt3 = (int(cx + half_size), int(cy + half_size))  
    triangle_cnt = np.array([pt1, pt2, pt3])
    cv2.drawContours(frame, [triangle_cnt], 0, color, thickness)

In [274]:
can_gesture = False
first_cord_captured = False
initial_cord = ()
hand_closed_start_time = None
play_pause_start_time = None
volume_up_start_time = None
volume_mode_start_time = None
volume_mode = False


def detect_gestures(frame,landmarks_list,processed):
    global can_gesture
    global initial_cord
    global first_cord_captured
    global hand_closed_start_time
    global play_pause_start_time
    global volume_up_start_time
    global volume_mode_start_time
    global volume_mode
    
    min_volume = 30
    max_volume = 250
    max_hand_closed_timer = 1


    if len(landmarks_list) >= 21:
        middle_wrist_dist = utils.get_distance([landmarks_list[12],landmarks_list[0]])
        thumb_index_dist = utils.get_distance([landmarks_list[8],landmarks_list[4]])
        pinky_thum_dist = utils.get_distance([landmarks_list[20],landmarks_list[4]])

        current_cord = ()
        if middle_wrist_dist > 290:
            can_gesture = True
            hand_closed_start_time = None

            if not first_cord_captured:
                initial_cord = find_landmark_cordinates(processed,8,frame)            
                first_cord_captured = True
            else:
                current_cord = find_landmark_cordinates(processed,8,frame)
        else:
            if hand_closed_start_time is None:
                hand_closed_start_time = time.time()
            elif (time.time() - hand_closed_start_time) > max_hand_closed_timer: 
                can_gesture = False
                first_cord_captured = False

        #Draw number of distances
        frame_h, frame_w, _ = frame.shape
        string = "MW:{0} TI:{1} TP:{2} Volume Mode:{3}".format(
            str(int(middle_wrist_dist)),
            str(int(thumb_index_dist)),
            str(int(pinky_thum_dist)),
            str(volume_mode))
        cv2.putText(frame, string, (int(frame_w / 2) - 300, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (167,191,62), 2)

        #Draw ready trinagle on initial cordinate 
        if first_cord_captured:
            draw_triangle(initial_cord,frame)
        
        if can_gesture:
            
            if first_cord_captured and current_cord:
                if initial_cord[0] > frame_w / 2 and current_cord[0] < frame_w / 2:
                    keyboard.send("next track")
                    initial_cord = ()
                    first_cord_captured = False
                elif initial_cord[0] < frame_w / 2 and current_cord[0] > frame_w / 2:
                    keyboard.send("previous track")
                    initial_cord = ()
                    first_cord_captured = False

            volume_level = np.interp(thumb_index_dist, [min_volume, max_volume], [0, 100])


            if volume_mode_start_time is None:
                if thumb_index_dist < 35:
                    volume_mode = not volume_mode
                    volume_mode_start_time = time.time()
            elif (time.time() - volume_mode_start_time) > 2:
                if thumb_index_dist < 35:
                    volume_mode_start_time = None
                



            if thumb_index_dist < max_volume and volume_mode:

                if volume_up_start_time is None:
                    if volume_level > 50:
                        keyboard.send("Volume Up")
                    elif volume_level < 50:
                        keyboard.send("Volume Down")
                    volume_up_start_time = time.time()
                elif (time.time() - volume_up_start_time > 0.05):
                    volume_up_start_time = None

            if pinky_thum_dist < 45:
                if play_pause_start_time is None:
                    keyboard.send("play/pause media")
                    play_pause_start_time = time.time()
                elif (time.time() - play_pause_start_time) > 1:
                    play_pause_start_time = None

            

In [275]:
cap = cv2.VideoCapture(0)
draw = mp.solutions.drawing_utils

try:
    while cap.isOpened():
        ret, frame = cap.read()
        frame_h, frame_w, _ = frame.shape
        
        if not ret:
            break
        frame = cv2.flip(frame,1)
        frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        processed = hands.process(frameRGB)
        
        landmarks_list = []

        if processed.multi_hand_landmarks:
            hand_landkarms = processed.multi_hand_landmarks[0]
            # draw.draw_landmarks(frame, hand_landkarms, mpHands.HAND_CONNECTIONS)

            for lm in hand_landkarms.landmark:
                landmarks_list.append((lm.x,lm.y))
        
        draw_landmarks(processed,frame)
        detect_gestures(frame,landmarks_list,processed)
        
        cv2.imshow('Frame',frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        
finally:
    cap.release()
    cv2.destroyAllWindows()

