In [1]:
import cv2
import mediapipe as mp
import numpy as np
import os
import pyautogui
import joblib
from time import sleep

In [2]:
cap = cv2.VideoCapture(0)  # Use webcam

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.7)
mp_draw = mp.solutions.drawing_utils

In [2]:
while True:
    success, img = cap.read()
    if not success:
        continue
    
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = hands.process(img_rgb)
    
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_draw.draw_landmarks(img, hand_landmarks, mp_hands.HAND_CONNECTIONS)
    
    cv2.imshow("Hand Tracking", img)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

In [None]:
# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1, min_detection_confidence=0.7)
mp_drawing = mp.solutions.drawing_utils

In [None]:
# Create dataset directory
DATA_DIR = 'gesture_data'
if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

In [None]:
# Define your gestures
gestures = ['index_up', 'fist', 'palm_open']
samples_per_gesture = 100  # Number of samples to collect per gesture

In [3]:
# Initialize webcam
cap = cv2.VideoCapture(0)

for gesture in gestures:
    # Create subdirectory for each gesture
    gesture_dir = os.path.join(DATA_DIR, gesture)
    if not os.path.exists(gesture_dir):
        os.makedirs(gesture_dir)
    
    print(f'Collecting data for {gesture}. Press "q" to skip this gesture.')
    print('Get ready in 3 seconds...')
    sleep(3)
    
    sample_count = 0
    while sample_count < samples_per_gesture:
        ret, frame = cap.read()
        if not ret:
            continue
            
        # Flip frame horizontally for a mirror effect
        frame = cv2.flip(frame, 1)
        
        # Convert to RGB and process with MediaPipe
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(frame_rgb)
        
        # Display countdown on screen
        cv2.putText(frame, f'Collecting {gesture}: Sample {sample_count+1}/{samples_per_gesture}', 
                   (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        
        if results.multi_hand_landmarks:
            hand_landmarks = results.multi_hand_landmarks[0]
            
            # Draw hand landmarks
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            
            # Extract landmarks and save
            landmarks = []
            for landmark in hand_landmarks.landmark:
                landmarks.extend([landmark.x, landmark.y, landmark.z])
            
            np.save(os.path.join(gesture_dir, f'{sample_count}.npy'), np.array(landmarks))
            sample_count += 1
            
            # Small delay between samples
            sleep(0.1)
        
        cv2.imshow('Data Collection', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

Collecting data for index_up. Press "q" to skip this gesture.
Get ready in 3 seconds...
Collecting data for fist. Press "q" to skip this gesture.
Get ready in 3 seconds...
Collecting data for palm_open. Press "q" to skip this gesture.
Get ready in 3 seconds...


In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

In [6]:
gestures = ["index_up", "fist", "palm_open"]
X, y = [], []

for idx, gesture in enumerate(gestures):
    for file in os.listdir(f"gesture_data/{gesture}"):
        data = np.load(f"gesture_data/{gesture}/{file}")
        X.append(data)
        y.append(idx)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model = SVC(kernel='linear')
model.fit(X_train, y_train)
print("Accuracy:", model.score(X_test, y_test))

Accuracy: 0.9666666666666667


In [7]:
import joblib
joblib.dump(model, "gesture_model.pkl")

['gesture_model.pkl']

In [8]:
# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
    static_image_mode=False,
    max_num_hands=1,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.5
)
mp_drawing = mp.solutions.drawing_utils

# Initialize video capture
cap = cv2.VideoCapture(0)  # 0 for default camera

# Load the trained gesture recognition model
try:
    model = joblib.load("gesture_model.pkl")
except FileNotFoundError:
    print("Error: Model file 'gesture_model.pkl' not found.")
    exit(1)

# Get screen dimensions
screen_width, screen_height = pyautogui.size()

# Set pyautogui failsafe (move mouse to corner to abort)
pyautogui.FAILSAFE = True

# For smoothing cursor movement
prev_x, prev_y = 0, 0
smoothing_factor = 0.5

while True:
    success, img = cap.read()
    if not success:
        continue
    
    # Flip image horizontally for mirror effect
    img = cv2.flip(img, 1)
    
    # Convert to RGB for MediaPipe
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    # Process hand landmarks
    results = hands.process(img_rgb)
    
    if results.multi_hand_landmarks:
        hand_landmarks = results.multi_hand_landmarks[0]
        
        # Draw hand landmarks on image
        mp_drawing.draw_landmarks(
            img, hand_landmarks, mp_hands.HAND_CONNECTIONS)
        
        # Extract landmarks and normalize
        data = []
        for landmark in hand_landmarks.landmark:
            data.extend([landmark.x, landmark.y, landmark.z])
        
        # Make prediction
        try:
            prediction = model.predict([data])[0]
            
            # Perform actions based on prediction
            if prediction == 0:  # 👆 Index Up → Left Click
                pyautogui.click()
                cv2.putText(img, "Left Click", (10, 50), 
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
                
            elif prediction == 1:  # ✊ Fist → Right Click
                pyautogui.rightClick()
                cv2.putText(img, "Right Click", (10, 50), 
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
                
            elif prediction == 2:  # ✋ Palm Open → Move Cursor
                # Get index finger tip coordinates (landmark 8)
                x = int(hand_landmarks.landmark[8].x * screen_width)
                y = int(hand_landmarks.landmark[8].y * screen_height)
                
                # Smooth cursor movement
                x = int(prev_x * (1 - smoothing_factor) + x * smoothing_factor)
                y = int(prev_y * (1 - smoothing_factor) + y * smoothing_factor)
                prev_x, prev_y = x, y
                
                pyautogui.moveTo(x, y)
                cv2.putText(img, "Moving", (10, 50), 
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
                
        except Exception as e:
            print(f"Prediction error: {e}")
    
    # Display the image
    cv2.imshow("Virtual Mouse Gesture Control", img)
    
    # Exit on 'q' key press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()