In [1]:
import cv2
import mediapipe as mp
import pyautogui
import speech_recognition as sr
import time
import numpy as np
from collections import deque

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=1)
mp_draw = mp.solutions.drawing_utils

screen_width, screen_height = pyautogui.size()
mode = 'gesture'  # default mode

scroll_buffer = deque(maxlen=5)
scroll_up_queue = deque(maxlen=3)  # stores last 3 gesture states


# ✅ Add these for camera + gesture timing
cap = cv2.VideoCapture(0)
prev_y_index = 0
prev_y_middle = 0
prev_right_click_time = 0


In [2]:
def get_finger_status(hand_landmarks):
    finger_tips = [8, 12, 16, 20]
    finger_fold_status = []

    for tip in finger_tips:
        if hand_landmarks.landmark[tip].y < hand_landmarks.landmark[tip - 2].y:
            finger_fold_status.append(1)
        else:
            finger_fold_status.append(0)

    thumb_tip = hand_landmarks.landmark[4]
    thumb_ip = hand_landmarks.landmark[3]
    if thumb_tip.x < thumb_ip.x:
        thumb = 1
    else:
        thumb = 0

    return [thumb] + finger_fold_status


def listen_voice_command():
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Listening for voice command...")
        audio = recognizer.listen(source, phrase_time_limit=3)

        try:
            command = recognizer.recognize_google(audio).lower()
            print("Heard:", command)
            return command
        except:
            print("Could not recognize.")
            return ""


def calculate_scroll_direction(buffer):
    if len(buffer) < 5:
        return 0
    y_vals = [pt[1] for pt in buffer]
    diff = y_vals[-1] - y_vals[0]
    if abs(diff) < 0.02:
        return 0
    return -1 if diff < 0 else 1


In [3]:
last_click_time = 0
click_delay = 0.7  # seconds between clicks to avoid repeats

mode_switch_time = 0
mode_switch_delay = 1.5  # seconds between mode switches to avoid accidental toggling


In [4]:
# 4th cell — GESTURE CONTROL ONLY (No imports here)

cap = cv2.VideoCapture(0)
prev_right_click_time = 0
prev_left_click_time = 0
prev_scroll_time = 0
scroll_cooldown = 1

while True:
    if mode == 'voice':
        cap.release()
        command = listen_voice_command()

        # ——— Add quit command here ———
        if 'quit' in command:
            print("Quitting program...")
            break

        elif 'left click' in command:
            pyautogui.click()
        elif 'right click' in command:
            pyautogui.rightClick()
        elif 'scroll up' in command:
            pyautogui.scroll(300)
        elif 'scroll down' in command:
            pyautogui.scroll(-300)
        elif 'volume up' in command:
            pyautogui.press('volumeup')
        elif 'volume down' in command:
            pyautogui.press('volumedown')
        elif 'move up' in command:
            pyautogui.moveRel(0, -50)
        elif 'move down' in command:
            pyautogui.moveRel(0, 50)
        elif 'move left' in command:
            pyautogui.moveRel(-100, 0)
        elif 'move right' in command:
            pyautogui.moveRel(100, 0)

        # Switch back to gesture mode
        elif 'switch to gesture' in command:
            mode = 'gesture'
            cap = cv2.VideoCapture(0)

        continue
    # Gesture Mode
    success, img = cap.read()
    if not success:
        continue

    img = cv2.flip(img, 1)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    result = hands.process(img_rgb)

    if result.multi_hand_landmarks:
        for hand_landmark in result.multi_hand_landmarks:
            mp_draw.draw_landmarks(img, hand_landmark, mp_hands.HAND_CONNECTIONS)

            lm_list = []
            for id, lm in enumerate(hand_landmark.landmark):
                h, w, _ = img.shape
                cx, cy = int(lm.x * w), int(lm.y * h)
                lm_list.append((cx, cy))

            if lm_list:
                fingers = []
                # Thumb
                fingers.append(1 if lm_list[4][0] > lm_list[3][0] else 0)
                # Fingers
                for tip in [8, 12, 16, 20]:
                    fingers.append(1 if lm_list[tip][1] < lm_list[tip - 2][1] else 0)

                # GESTURE: Mouse Move
                if fingers == [0, 1, 0, 0, 0]:
                 x, y = lm_list[8]
                 screen_x = np.interp(x, [0, w], [0, screen_width])
                 screen_y = np.interp(y, [0, h], [0, screen_height])
                 pyautogui.moveTo(screen_x, screen_y)
                 cv2.putText(img, "Mouse Move", (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

                # GESTURE: Left Click (Index + Thumb pinch)
                elif fingers == [0, 1, 1, 1, 0]:  # Left Click
                 current_time = time.time()
                 if current_time - prev_left_click_time > 1:
                  pyautogui.click()
                  prev_left_click_time = current_time
                  suppress_mouse_move = True
                  cv2.putText(img, "Left Click", (10, 80), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

                # GESTURE: Right Click (Index + Middle up)
                elif fingers == [0, 1, 1, 0, 0]:
                 current_time = time.time()
                 if current_time - prev_right_click_time > 1:
                  pyautogui.rightClick()
                  prev_right_click_time = current_time
                  suppress_mouse_move = True
                  cv2.putText(img, "Right Click", (10, 110), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)

               # GESTURE: Scroll Up — 4 fingers up (thumb down)
                elif fingers == [0, 1, 1, 1, 1]:
                 current_time = time.time()
                 if current_time - prev_scroll_time > scroll_cooldown:
                  pyautogui.scroll(300)  # Scroll Up
                  prev_scroll_time = current_time
                  cv2.putText(img, "Scroll Up", (10, 140), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 100), 2)

               # GESTURE: Scroll Down — all 5 fingers up
                elif fingers == [1, 1, 1, 1, 1]:
                 current_time = time.time()
                 if current_time - prev_scroll_time > scroll_cooldown:
                  pyautogui.scroll(-300)  # Scroll Down
                  prev_scroll_time = current_time
                  cv2.putText(img, "Scroll Down", (10, 170), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 100), 2)

                # GESTURE: Volume Up — thumb only
                elif fingers == [1, 0, 0, 0, 0]:
                    pyautogui.press('volumeup')
                    time.sleep(0.2)
                    cv2.putText(img, "Volume Up", (10, 200), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)

                # GESTURE: Volume Down — all down (back palm must be ensured via hand orientation)
                elif fingers == [0, 0, 0, 0, 0]:
                    pyautogui.press('volumedown')
                    time.sleep(0.3)
                    cv2.putText(img, "Volume Down", (10, 230), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)

                # GESTURE: Switch to Voice — YoYo gesture (thumb + index + pinky)
                elif fingers == [1, 1, 0, 0, 1]:
                    mode = 'voice'
                    cv2.putText(img, "Switched to Voice", (10, 260), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
                    time.sleep(1)
    cv2.imshow("Gesture Mode", img)
    if cv2.waitKey(1) == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


Listening for voice command...
Heard: switch
Listening for voice command...
Heard: switch to gesture
Listening for voice command...
Heard: scroll up
Listening for voice command...
Heard: scroll up
Listening for voice command...
Could not recognize.
Listening for voice command...
Heard: scroll
Listening for voice command...
Heard: scroll up
Listening for voice command...
Heard: move left
Listening for voice command...
Heard: right click
Listening for voice command...
Heard: move right
Listening for voice command...
Could not recognize.
Listening for voice command...
Heard: move
Listening for voice command...
Heard: move up
Listening for voice command...
Heard: left click
Listening for voice command...
Could not recognize.
Listening for voice command...
Heard: iske liye kyon hai iske liye
Listening for voice command...
Could not recognize.
Listening for voice command...
Heard: quit
Quitting program...
