In [38]:
pip install opencv-python mediapipe pyautogui





[notice] A new release of pip is available: 24.1 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


Import Libraries and Set Up Hand Detection

In [19]:
import cv2
import mediapipe as mp
import pyautogui
import time
import numpy as np
from collections import deque

# Initialize Mediapipe hand detection with higher confidence thresholds
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.8, min_tracking_confidence=0.8)
mp_draw = mp.solutions.drawing_utils

# Variables for delay management
last_play_pause_time = time.time()
last_volume_change_time = time.time()
last_gesture_time = time.time()

# Thresholds
play_pause_delay = 1  # 1 second delay for play/pause
volume_change_delay = 0.2  # 1/5 second delay for volume change
gesture_delay = 1  # 1 second delay for gestures
movement_threshold = 0.005  # adjust threshold for detecting meaningful fist movements

# Gesture States
is_open_palm = False
is_fist = False
previous_fist_position = None
debug_text = ""  # Text to display on the video feed

# Frame rate control
target_fps = 24
frame_duration = 1.0 / target_fps

# Smoothing settings
smoothing_window_size = 5
landmark_queue = deque(maxlen=smoothing_window_size)

# Capture video from webcam
cap = cv2.VideoCapture(0)

def is_open_palm_landmarks(landmarks):
    return landmarks[8][1] < landmarks[6][1] and \
           landmarks[12][1] < landmarks[10][1] and \
           landmarks[16][1] < landmarks[14][1] and \
           landmarks[20][1] < landmarks[18][1]

def is_fist_landmarks(landmarks):
    return landmarks[8][1] > landmarks[6][1] and \
           landmarks[12][1] > landmarks[10][1] and \
           landmarks[16][1] > landmarks[14][1] and \
           landmarks[20][1] > landmarks[18][1]

def is_index_finger_pointing_up(landmarks):
    return landmarks[8][1] < landmarks[6][1] and \
           landmarks[12][1] > landmarks[10][1] and \
           landmarks[16][1] > landmarks[14][1] and \
           landmarks[20][1] > landmarks[18][1]

def is_thumb_down(landmarks):
    return landmarks[4][1] > landmarks[3][1] and \
           landmarks[8][1] < landmarks[6][1] and \
           landmarks[12][1] < landmarks[10][1] and \
           landmarks[16][1] < landmarks[14][1] and \
           landmarks[20][1] < landmarks[18][1]

def get_fist_direction(current_position, previous_position):
    if current_position is not None and previous_position is not None:
        x_movement = current_position[0] - previous_position[0]
        return x_movement
    return None

def smooth_landmarks(landmarks):
    landmark_queue.append(landmarks)
    avg_landmarks = np.mean(landmark_queue, axis=0)
    return avg_landmarks

while cap.isOpened():
    start_time = time.time()

    success, frame = cap.read()
    if not success:
        print("Failed to capture frame")
        break
    
    # Convert the image to RGB
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    
    # Process the image and detect hands
    results = hands.process(image)
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            # Convert landmarks to numpy array for smoothing
            landmarks_array = np.array([(lm.x, lm.y, lm.z) for lm in hand_landmarks.landmark])
            
            # Smooth the landmarks
            smoothed_landmarks = smooth_landmarks(landmarks_array)
            
            # Check if the hand is open palm
            if is_open_palm_landmarks(smoothed_landmarks):
                if not is_open_palm and (time.time() - last_play_pause_time) > play_pause_delay:
                    debug_text = "Open palm detected: Play/Pause triggered"
                    print(debug_text)
                    pyautogui.press('space')
                    last_play_pause_time = time.time()
                    is_open_palm = True
                is_fist = False  # Reset fist status
            else:
                is_open_palm = False
            
            # Check if the hand is in a fist
            if is_fist_landmarks(smoothed_landmarks):
                debug_text = "Fist detected"
                print(debug_text)
                current_position = smoothed_landmarks[9]  # Use landmark 9 as reference for fist position
                if is_fist and previous_fist_position is not None:
                    x_movement = get_fist_direction(current_position, previous_fist_position)
                    if x_movement:
                        debug_text = f"X movement: {x_movement}"
                        print(debug_text)
                        if x_movement < -movement_threshold and (time.time() - last_volume_change_time) > volume_change_delay:
                            debug_text = "Volume Down triggered"
                            print(debug_text)
                            pyautogui.press('volumedown')
                            last_volume_change_time = time.time()
                        elif x_movement > movement_threshold and (time.time() - last_volume_change_time) > volume_change_delay:
                            debug_text = "Volume Up triggered"
                            print(debug_text)
                            pyautogui.press('volumeup')
                            last_volume_change_time = time.time()
                is_fist = True
                previous_fist_position = current_position  # Update the previous fist position
            else:
                is_fist = False

            # Check for index finger pointing up
            if is_index_finger_pointing_up(smoothed_landmarks) and (time.time() - last_gesture_time) > gesture_delay:
                debug_text = "Index finger up detected: Fast forward 5 secs"
                print(debug_text)
                pyautogui.press('right')
                last_gesture_time = time.time()

            # Check for thumb down
            if is_thumb_down(smoothed_landmarks) and (time.time() - last_gesture_time) > gesture_delay:
                debug_text = "Thumb down detected: Rewind 5 secs"
                print(debug_text)
                pyautogui.press('left')
                last_gesture_time = time.time()

            # Draw hand landmarks
            mp_draw.draw_landmarks(image, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            # Display the debug text on the video feed
            cv2.putText(image, debug_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
    
    # Display the video feed
    cv2.imshow('Hand Gesture Control', image)
    
    # Break loop with 'q' key
    if cv2.waitKey(1) & 0xFF == ord('q'):
        print("Quitting program")
        break

    # Frame rate control
    end_time = time.time()
    elapsed_time = end_time - start_time
    if elapsed_time < frame_duration:
        time.sleep(frame_duration - elapsed_time)

cap.release()
cv2.destroyAllWindows()

Open palm detected: Play/Pause triggered
Index finger up detected: Fast forward 5 secs
Open palm detected: Play/Pause triggered
Fist detected
Fist detected
X movement: -0.0009413897991180087
Fist detected
X movement: -0.0005737662315368763
Fist detected
X movement: -2.567768096922718e-05
Fist detected
X movement: 0.0002780795097350741
Open palm detected: Play/Pause triggered
Fist detected
Fist detected
X movement: -0.002808868885040283
Fist detected
X movement: -0.0022542536258697288
Fist detected
X movement: -0.0012466788291931041
Fist detected
X movement: -0.0002993762493133656
Fist detected
X movement: -0.0004285275936126709
Fist detected
X movement: -0.000398099422454834
Fist detected
X movement: -0.00043759942054749645
Fist detected
X movement: -0.0003159463405609242
Fist detected
X movement: -1.5985965728770868e-05
Fist detected
X movement: -0.00039495229721064895
Fist detected
X movement: 9.664297103878505e-05
Fist detected
X movement: 0.0015637993812561146
Fist detected
X movem