In [None]:
pip install pygame


: 

In [None]:
import cv2
import mediapipe as mp
from gtts import gTTS
import pygame
import threading
import time
import os
import tempfile

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
hands = mp_hands.Hands(
    max_num_hands=2,
    min_detection_confidence=0.7,
    min_tracking_confidence=0.5
)

# Initialize pygame mixer for audio playback
pygame.mixer.init()

# Variables
last_spoken_gesture = ""
gesture_hold_time = 2.0
is_speaking = False

# --------------------- SPEAK FUNCTION ---------------------
def speak(text):
    global is_speaking
    
    def run_speech():
        global is_speaking
        try:
            is_speaking = True
            print(f"Speaking: {text}")

            temp_file = os.path.join(tempfile.gettempdir(), "gesture_speech.mp3")
            tts = gTTS(text=text, lang='en')
            tts.save(temp_file)

            pygame.mixer.music.load(temp_file)
            pygame.mixer.music.play()

            while pygame.mixer.music.get_busy():
                time.sleep(0.1)

            pygame.mixer.music.unload()
            os.remove(temp_file)

        except Exception as e:
            print(f"Speech Error: {e}")

        is_speaking = False

    if not is_speaking:
        thread = threading.Thread(target=run_speech)
        thread.daemon = True
        thread.start()

# --------------------- FINGER STATE ---------------------
def get_finger_states(hand_landmarks, handedness):
    finger_states = []

    finger_tips = [8, 12, 16, 20]
    finger_pips = [6, 10, 14, 18]

    for tip, pip in zip(finger_tips, finger_pips):
        finger_states.append(1 if hand_landmarks.landmark[tip].y < hand_landmarks.landmark[pip].y else 0)

    # Thumb rule
    thumb_tip = hand_landmarks.landmark[4]
    thumb_ip = hand_landmarks.landmark[3]

    if handedness == "Right":
        thumb_up = thumb_tip.x < thumb_ip.x
    else:
        thumb_up = thumb_tip.x > thumb_ip.x

    finger_states.insert(0, 1 if thumb_up else 0)
    return finger_states

# --------------------- CLEANED GESTURE DICTIONARY ---------------------
gesture_dict = {
    (0, 0, 0, 0, 0): {"name": "Fist", "sentence": "I am showing a closed fist"},
    (1, 1, 1, 1, 1): {"name": "Open Hand", "sentence": "stop"},
    (1, 0, 0, 0, 0): {"name": "Thumbs Up", "sentence": "Thumbs up, everything is good"},
    (0, 1, 0, 0, 0): {"name": "Pointing", "sentence": "I am pointing at something"},
    (0, 1, 1, 0, 0): {"name": "Peace", "sentence": "Peace sign, victory gesture"},
    (0, 0, 0, 0, 1): {"name": "Little Finger", "sentence": "Get lost from this place"},
    (1, 1, 0, 0, 0): {"name": "Two Fingers", "sentence": "Had Dinner"},
    (1, 1, 1, 0, 0): {"name": "Three Fingers", "sentence": "Three fingers are up"},
    (0, 0, 1, 1, 1): {"name": "OK", "sentence": "OK sign, everything is okay"},
    (0, 1, 0, 1, 0): {"name": "Rock", "sentence": "Rock and roll hand sign"},
    (1, 0, 1, 1, 1): {"name": "Hi", "sentence": "Hello, I am greeting you"},
    (1, 0, 0, 1, 0): {"name": "Love You", "sentence": "I love you sign language"},
    (1, 1, 1, 0, 1): {"name": "Stop", "sentence": "Stop, please wait"},
    (1, 1, 1, 1, 0): {"name": "Thank You", "sentence": "Thank you very much"},

    # -------- Multi-hand gestures (10 values) --------
    (0,0,0,0,0, 0,0,0,0,0): {
        "name": "Both Fists",
        "sentence": "I am showing closed fists with both hands"
    },
    (1,1,1,1,1, 1,1,1,1,1): {
        "name": "Prayer ",
        "sentence": " Thank You Namaste"
    },
    (1,0,0,0,0, 1,0,0,0,0): {
        "name": "Double Thumbs Up",
        "sentence": "Thumbs up with both hands"
    },
    (0,1,0,0,0, 0,1,0,0,0): {
        "name": "Both Pointing",
        "sentence": "Pointing with both hands"
    },
    (0,1,1,0,0, 0,1,1,0,0): {
        "name": "Double Peace Sign",
        "sentence": "Peace sign with both hands"
    },
    (1,1,0,0,1, 0,0,0,0,0):{
        "name":"I Love You (Left)",
        "sentence":"I Love You"
    },
    (0,0,0,0,0, 1,1,0,0,1):{
        "name":"I Love You (Right)",
        "sentence":"I Love You"
    },
}

# --------------------- MAIN LOOP ---------------------
current_gesture = None
gesture_start_time = None

cap = cv2.VideoCapture(0)

print("\n===============================================")
print("Hand Gesture Recognition - Google TTS Enabled")
print("Hold gesture for 2 seconds for speech output")
print("===============================================\n")

while cap.isOpened():
    success, frame = cap.read()
    if not success:
        break

    frame = cv2.flip(frame, 1)
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb)

    current_time = time.time()
    gesture_info = None
    fingers_combined = []

    if results.multi_hand_landmarks:
        for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
            handedness = results.multi_handedness[idx].classification[0].label
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)

            states = get_finger_states(hand_landmarks, handedness)
            fingers_combined.extend(states)

        # Single-hand
        if len(fingers_combined) == 5:
            gesture_info = gesture_dict.get(tuple(fingers_combined), None)

        # Two-hands
        if len(fingers_combined) == 10:
            gesture_info = gesture_dict.get(tuple(fingers_combined), None)

        if gesture_info:
            gesture_name = gesture_info["name"]
            sentence = gesture_info["sentence"]

            cv2.putText(frame, f"Gesture: {gesture_name}", (10, 40),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2)

            if current_gesture != gesture_name:
                current_gesture = gesture_name
                gesture_start_time = current_time
                last_spoken_gesture = ""

            if current_time - gesture_start_time >= gesture_hold_time:
                if last_spoken_gesture != gesture_name and not is_speaking:
                    speak(sentence)
                    last_spoken_gesture = gesture_name
    else:
        current_gesture = None
        gesture_start_time = None

        cv2.putText(frame, "No hand detected", (10,40),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2)

    cv2.imshow("Gesture Recognition - TTS", frame)

    if cv2.waitKey(1) & 0xFF == 27:
        break

cap.release()
cv2.destroyAllWindows()
pygame.mixer.quit()


pygame 2.6.1 (SDL 2.28.4, Python 3.12.7)
Hello from the pygame community. https://www.pygame.org/contribute.html

Hand Gesture Recognition - Google TTS Enabled
Hold gesture for 2 seconds for speech output

