In [4]:
import cv2
import numpy as np
import mediapipe as mp
import pickle
import os

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)

# Temporary database to store gestures
gestures_db = {}

# Load existing database if available
db_filename = "gestures.pkl"
if os.path.exists(db_filename):
    with open(db_filename, "rb") as f:
        gestures_db = pickle.load(f)

def extract_hand_landmarks(image, bbox=None):
    """Extract hand landmarks and return bounding box."""
    img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Crop the image to the hand bounding box if available
    if bbox:
        x_min, y_min, x_max, y_max = bbox
        img_rgb = img_rgb[y_min:y_max, x_min:x_max]  # Crop to hand area

    results = hands.process(img_rgb)

    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            landmarks = []
            x_list, y_list = [], []

            for lm in hand_landmarks.landmark:
                # Scale coordinates to the cropped image
                x, y = int(lm.x * img_rgb.shape[1]), int(lm.y * img_rgb.shape[0])
                x_list.append(x)
                y_list.append(y)
                landmarks.append([lm.x, lm.y, lm.z])  # Normalize coordinates

            # Get bounding box coordinates (in cropped image space)
            x_min, x_max = min(x_list), max(x_list)
            y_min, y_max = min(y_list), max(y_list)

            # Convert to original image space if cropped
            if bbox:
                x_min += bbox[0]
                x_max += bbox[0]
                y_min += bbox[1]
                y_max += bbox[1]

            return np.array(landmarks).flatten(), (x_min, y_min, x_max, y_max)  # Convert to 1D array
    return None, None

def add_new_gesture(name, landmarks):
    """Store new hand gesture in the database."""
    gestures_db[name] = landmarks
    with open(db_filename, "wb") as f:
        pickle.dump(gestures_db, f)
    print(f"Gesture '{name}' saved successfully!")

def recognize_gesture(landmarks):
    """Recognize the closest matching gesture."""
    if not gestures_db:
        return "No gestures saved"

    min_distance = float("inf")
    recognized_gesture = None

    for gesture, stored_landmarks in gestures_db.items():
        distance = np.linalg.norm(landmarks - stored_landmarks)
        if distance < min_distance:
            min_distance = distance
            recognized_gesture = gesture

    return recognized_gesture if min_distance < 0.1 else "Unknown gesture"

# Start webcam
cap = cv2.VideoCapture(0)

mode = "recognize"  # Default mode
bbox = None  # To store hand bounding box

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)  # Flip for better usability
    landmarks, new_bbox = extract_hand_landmarks(frame)

    if landmarks is not None:
        bbox = new_bbox  # Update bounding box

        # Draw bounding box around the hand
        x_min, y_min, x_max, y_max = bbox
        cv2.rectangle(frame, (x_min - 20, y_min - 20), (x_max + 20, y_max + 20), (255, 0, 0), 2)

        if mode == "recognize":
            gesture_name = recognize_gesture(landmarks)
            cv2.putText(frame, f"Gesture: {gesture_name}", (50, 50),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        elif mode == "add":
            gesture_name = input("Enter gesture name: ")
            add_new_gesture(gesture_name, landmarks)
            mode = "recognize"  # Switch back after adding

    cv2.imshow("Hand Gesture Recognition", frame)

    key = cv2.waitKey(1) & 0xFF
    if key == ord("x"):  # Quit when 'x' is pressed
        print("Exiting program...")
        break
    elif key == ord("n"):  # Switch to add mode
        mode = "add"

cap.release()
cv2.destroyAllWindows()


Exiting program...
