In [7]:
# dataset_collector.py
import cv2
import mediapipe as mp
import csv

mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=1)
mp_draw = mp.solutions.drawing_utils

cap = cv2.VideoCapture(0)
gesture_name = input("Enter the gesture name: ")

data = []

print("Collecting data... Press 'q' to quit.")

while True:
    success, img = cap.read()
    if not success:
        break

    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    results = hands.process(img_rgb)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            mp_draw.draw_landmarks(img, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            landmarks = []
            for lm in hand_landmarks.landmark:
                landmarks.extend([lm.x, lm.y, lm.z])
            landmarks.append(gesture_name)
            data.append(landmarks)

    cv2.imshow("Collecting Data", img)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

# Save to CSV
with open('hand_gesture_data.csv', 'a', newline='') as f:
    writer = csv.writer(f)
    writer.writerows(data)

print(f"{len(data)} samples collected for '{gesture_name}' gesture.")


NameError: name 'audio_classifier' is not defined

In [None]:
import pandas as pd

# Load the CSV file (skip malformed lines)
data = pd.read_csv("hand_gesture_data.csv", header=None, on_bad_lines='skip')

# Print original label counts
print("Before deletion:\n", data.iloc[:, -1].value_counts())

# Label to delete
label_to_delete = ""  # replace this with your target label

# Remove all rows with that label
data = data[data.iloc[:, -1] != label_to_delete]

# Print new label counts
print("After deletion:\n", data.iloc[:, -1].value_counts())

# Optional: Save the cleaned dataset
data.to_csv("hand_gesture_data.csv", index=False, header=False)


Before deletion:
 63
screenshot      1068
zoom_in          583
click            545
drag             502
right_click      491
move             363
scroll_up        332
scroll_down      325
double_click     231
Name: count, dtype: int64
After deletion:
 63
screenshot      1068
zoom_in          583
click            545
drag             502
right_click      491
move             363
scroll_up        332
scroll_down      325
double_click     231
Name: count, dtype: int64


In [None]:
import pandas as pd

data = pd.read_csv("hand_gesture_data.csv", header=None, on_bad_lines='skip')  # <- skips malformed lines
labels = data.iloc[:, -1]
print("Label counts:\n", labels.value_counts())


Label counts:
 63
screenshot      1068
zoom_in          583
click            545
zoom_out         517
drag             502
right_click      491
move             363
scroll_up        332
scroll_down      325
double_click     231
Name: count, dtype: int64


In [2]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Load the dataset
df = pd.read_csv("hand_gesture_data.csv", header=None)
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Encode gesture labels
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)

# Split into train/test sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

# Train Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"✅ Model Accuracy: {accuracy * 100:.2f}%")
print("\n📊 Classification Report:\n", classification_report(y_test, y_pred, target_names=encoder.classes_))

# Save model and encoder
joblib.dump(model, 'gesture_rf_model.pkl')
joblib.dump(encoder, 'gesture_label_encoder.pkl')

print("🎉 Model and encoder saved.")


✅ Model Accuracy: 99.09%

📊 Classification Report:
               precision    recall  f1-score   support

       click       0.99      0.98      0.99       109
double_click       1.00      1.00      1.00        46
        drag       0.99      1.00      1.00       100
        move       0.96      0.99      0.97        73
 right_click       0.97      0.98      0.97        98
  screenshot       1.00      1.00      1.00       214
 scroll_down       1.00      1.00      1.00        65
   scroll_up       1.00      0.98      0.99        66
     zoom_in       1.00      0.98      0.99       117
    zoom_out       1.00      1.00      1.00       104

    accuracy                           0.99       992
   macro avg       0.99      0.99      0.99       992
weighted avg       0.99      0.99      0.99       992

🎉 Model and encoder saved.


In [1]:
# After fitting
train_pred = model.predict(X_train)
train_acc = accuracy_score(y_train, train_pred)
print(f"🧠 Training Accuracy: {train_acc * 100:.2f}%")

test_pred = model.predict(X_test)
test_acc = accuracy_score(y_test, test_pred)
print(f"🧪 Test Accuracy: {test_acc * 100:.2f}%")


NameError: name 'model' is not defined

In [10]:
import tkinter as tk
from threading import Thread
import cv2
import mediapipe as mp
import joblib
import pyautogui
import numpy as np
import time
import math
import os
from pynput.mouse import Controller, Button
from ctypes import cast, POINTER
from comtypes import CLSCTX_ALL
from pycaw.pycaw import AudioUtilities, IAudioEndpointVolume

# Load model and encoder
model = joblib.load("gesture_rf_model.pkl")
encoder = joblib.load("gesture_label_encoder.pkl")

# Setup mouse and screen
mouse = Controller()
screen_width, screen_height = pyautogui.size()

# Volume control
devices = AudioUtilities.GetSpeakers()
interface = devices.Activate(IAudioEndpointVolume._iid_, CLSCTX_ALL, None)
volume = cast(interface, POINTER(IAudioEndpointVolume))
minVol, maxVol = volume.GetVolumeRange()[0], volume.GetVolumeRange()[1]

# Mediapipe setup
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.7)
mp_draw = mp.solutions.drawing_utils

# Globals
running = False
mode = "mouse"
current_gesture = "None"
prev_x, prev_y = 0, 0
screenshot_dir = "screenshots"
os.makedirs(screenshot_dir, exist_ok=True)  # Create folder

# Gesture functions
def get_angle(a, b, c):
    ba = np.array([a.x - b.x, a.y - b.y])
    bc = np.array([c.x - b.x, c.y - b.y])
    cosine = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc) + 1e-6)
    return np.degrees(np.arccos(np.clip(cosine, -1.0, 1.0)))

def get_distance(a, b):
    return math.hypot(a.x - b.x, a.y - b.y)

def classify_gesture(lm):
    features = [v for pt in lm for v in (pt.x, pt.y, pt.z)]
    features += [
        get_angle(lm[5], lm[6], lm[8]),
        get_angle(lm[9], lm[10], lm[12]),
        get_distance(lm[4], lm[8])
    ]
    if len(features) != model.n_features_in_:
        return "Unknown"
    return encoder.inverse_transform([model.predict([features])[0]])[0]

def move_mouse_smooth(tip):
    global prev_x, prev_y
    x, y = int(tip.x * screen_width), int(tip.y * screen_height)
    dist = math.hypot(x - prev_x, y - prev_y)
    smooth = 1 if dist > 60 else 3
    curr_x = prev_x + (x - prev_x) // smooth
    curr_y = prev_y + (y - prev_y) // smooth
    pyautogui.moveTo(curr_x, curr_y)
    prev_x, prev_y = curr_x, curr_y

# Real-time gesture detection loop
def gesture_loop():
    global running, mode, current_gesture
    cap = cv2.VideoCapture(0)
    pTime = time.time()

    while running:
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.flip(frame, 1)
        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(rgb)

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_draw.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
                lm = hand_landmarks.landmark
                gesture = classify_gesture(lm)
                current_gesture = gesture

                if gesture == "screenshot":
                    filename = f"screenshot_{int(time.time())}.png"
                    pyautogui.screenshot(os.path.join(screenshot_dir, filename))
                    cv2.putText(frame, "Screenshot Taken", (20, 70), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 255), 2)

                if mode == "mouse":
                    move_mouse_smooth(lm[8])
                elif mode == "volume":
                    x1, y1 = int(lm[4].x * frame.shape[1]), int(lm[4].y * frame.shape[0])
                    x2, y2 = int(lm[8].x * frame.shape[1]), int(lm[8].y * frame.shape[0])
                    length = math.hypot(x2 - x1, y2 - y1)
                    vol = np.interp(length, [50, 300], [minVol, maxVol])
                    volume.SetMasterVolumeLevel(vol, None)

        cTime = time.time()
        fps = 1 / (cTime - pTime + 1e-6)
        pTime = cTime
        cv2.putText(frame, f'FPS: {int(fps)}', (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)

        cv2.imshow("Gesture Control", frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
    running = False

# GUI control functions
def start_gesture_control():
    global running
    if not running:
        running = True
        Thread(target=gesture_loop, daemon=True).start()
        update_labels()

def stop_gesture_control():
    global running
    running = False

def update_labels():
    if running:
        lbl_status.config(text="Running")
        lbl_mode.config(text=f"Mode: {mode.upper()}")
        lbl_gesture.config(text=f"Gesture: {current_gesture}")
        root.after(200, update_labels)
    else:
        lbl_status.config(text="Stopped")

def switch_mode():
    global mode
    mode = "volume" if mode == "mouse" else "mouse"

# --- Tkinter GUI Layout ---
root = tk.Tk()
root.title("Gesture Control GUI")
root.geometry("400x300")
root.configure(bg="#222")

tk.Label(root, text="Gesture Control System", font=("Helvetica", 16, "bold"), bg="#222", fg="white").pack(pady=10)
lbl_status = tk.Label(root, text="Stopped", font=("Helvetica", 14), bg="#222", fg="red")
lbl_status.pack()

lbl_mode = tk.Label(root, text="Mode: MOUSE", font=("Helvetica", 12), bg="#222", fg="white")
lbl_mode.pack(pady=5)

lbl_gesture = tk.Label(root, text="Gesture: None", font=("Helvetica", 12), bg="#222", fg="lightgreen")
lbl_gesture.pack(pady=5)

tk.Button(root, text="Start", command=start_gesture_control, width=10, bg="green", fg="white", font=("Helvetica", 12)).pack(pady=10)
tk.Button(root, text="Stop", command=stop_gesture_control, width=10, bg="red", fg="white", font=("Helvetica", 12)).pack(pady=5)
tk.Button(root, text="Switch Mode", command=switch_mode, width=15, bg="blue", fg="white", font=("Helvetica", 11)).pack(pady=10)

root.mainloop()


NameError: name 'audio_classifier' is not defined