In [12]:
import cv2
import numpy as np
import mediapipe as mp
import pickle
import os

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7)

gestures_db = {}
db_filename = "gestures.pkl"
if os.path.exists(db_filename):
    with open(db_filename, "rb") as f:
        gestures_db = pickle.load(f)

def normalize_landmarks(landmarks):
    """Normalize landmarks relative to the wrist (landmark 0)."""
    wrist = landmarks[0]
    return np.array([(lm[0] - wrist[0], lm[1] - wrist[1], lm[2] - wrist[2]) for lm in landmarks]).flatten()

def extract_hand_landmarks(image):
    """Extracts hand landmarks and normalizes them."""
    img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = hands.process(img_rgb)
    hand_data = []
    
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            landmarks = [(lm.x, lm.y, lm.z) for lm in hand_landmarks.landmark]
            hand_data.append(normalize_landmarks(landmarks))
    
    return hand_data

def add_new_gesture(name, landmarks):
    """Store new hand gesture in the database."""
    gestures_db[name] = landmarks
    with open(db_filename, "wb") as f:
        pickle.dump(gestures_db, f)

def recognize_gesture(landmarks):
    """Recognize the closest matching gesture with adaptive thresholding."""
    if not gestures_db:
        return "No gestures saved"
    
    min_distance = float("inf")
    recognized_gesture = None
    
    for gesture, stored_landmarks in gestures_db.items():
        distance = np.linalg.norm(landmarks - stored_landmarks)
        if distance < min_distance:
            min_distance = distance
            recognized_gesture = gesture
    
    # Adaptive threshold: 10% of the average stored distances
    threshold = np.mean([np.linalg.norm(lm - landmarks) for lm in gestures_db.values()]) * 0.1
    return recognized_gesture if min_distance < threshold else "Unknown"

# Webcam Setup
cap = cv2.VideoCapture(0)
mode = "recognize"
gesture_name = ""

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame = cv2.flip(frame, 1)
    hand_data = extract_hand_landmarks(frame)

    if hand_data:
        for landmarks in hand_data:
            gesture = recognize_gesture(landmarks)
            color = (0, 255, 0) if gesture != "Unknown" else (0, 0, 255)
            cv2.putText(frame, f"Gesture: {gesture}", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)
            
            if mode == "add":
                cv2.putText(frame, f"Enter name: {gesture_name}", (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2)
    
    cv2.imshow("Hand Gesture Recognition", frame)
    
    key = cv2.waitKey(1) & 0xFF
    if key == ord("x"): 
        break
    elif key == ord("n"): 
        mode = "add"
        gesture_name = ""
    elif mode == "add" and key != 255:  # Capture text input
        if key == 13:  # Enter key
            if gesture_name and hand_data:
                add_new_gesture(gesture_name, hand_data[0])
                mode = "recognize"
        elif key == 8:  # Backspace
            gesture_name = gesture_name[:-1]
        else:
            gesture_name += chr(key)

cap.release()
cv2.destroyAllWindows()