In [1]:
pip install gtts

Collecting gtts
  Downloading gTTS-2.5.3-py3-none-any.whl.metadata (4.1 kB)
Downloading gTTS-2.5.3-py3-none-any.whl (29 kB)
Installing collected packages: gtts
Successfully installed gtts-2.5.3
Note: you may need to restart the kernel to use updated packages.


In [5]:
pip install pygame

Collecting pygame
  Downloading pygame-2.6.0-cp312-cp312-win_amd64.whl.metadata (13 kB)
Downloading pygame-2.6.0-cp312-cp312-win_amd64.whl (10.8 MB)
   ---------------------------------------- 0.0/10.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/10.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/10.8 MB 435.7 kB/s eta 0:00:25
   ---------------------------------------- 0.1/10.8 MB 939.4 kB/s eta 0:00:12
    --------------------------------------- 0.2/10.8 MB 1.2 MB/s eta 0:00:10
    --------------------------------------- 0.2/10.8 MB 1.3 MB/s eta 0:00:09
   - -------------------------------------- 0.3/10.8 MB 1.3 MB/s eta 0:00:09
   - -------------------------------------- 0.4/10.8 MB 1.3 MB/s eta 0:00:08
   - -------------------------------------- 0.5/10.8 MB 1.3 MB/s eta 0:00:08
   - -------------------------------------- 0.5/10.8 MB 1.4 MB/s eta 0:00:08
   -- ------------------------------------- 0.6/10.8 MB 1.5 MB/s eta 0:00:07
   -- --------

In [9]:
import cv2
import mediapipe as mp
import numpy as np
import pandas as pd
import string
from tensorflow import keras
import itertools
import os
from gtts import gTTS
import pygame
import tempfile

# Initialize Pygame for audio playback
pygame.init()

# Load the pre-trained model
try:
    asl_model = keras.models.load_model("model_v2.h5")
except Exception as e:
    print(f"Error loading model: {e}")
    exit()

# Initialize MediaPipe Hands and Drawing modules
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
mp_styles = mp.solutions.drawing_styles

# Define the alphabet list for predictions
asl_alphabet = ['1', '2', '3', '4', '5', '6', '7', '8', '9'] + list(string.ascii_uppercase)

# Function to calculate landmark positions in the image
def extract_landmarks(img, hand_landmarks):
    img_width, img_height = img.shape[1], img.shape[0]
    landmarks = [[min(int(landmark.x * img_width), img_width - 1),
                  min(int(landmark.y * img_height), img_height - 1)]
                 for landmark in hand_landmarks.landmark]
    return landmarks

# Function to preprocess landmarks for model input
def preprocess_landmarks(landmarks):
    base_x, base_y = landmarks[0][0], landmarks[0][1]
    relative_landmarks = [[x - base_x, y - base_y] for x, y in landmarks]
    flattened_landmarks = list(itertools.chain.from_iterable(relative_landmarks))
    max_val = max(map(abs, flattened_landmarks))
    normalized_landmarks = [val / max_val for val in flattened_landmarks] if max_val != 0 else flattened_landmarks
    return normalized_landmarks

# Function to generate and play speech
def speak(text):
    try:
        # Create a temporary file for the speech output
        with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_file:
            filename = temp_file.name
            tts = gTTS(text=text, lang='en')
            tts.save(filename)
        
        # Play the generated speech
        pygame.mixer.music.load(filename)
        pygame.mixer.music.play()
        while pygame.mixer.music.get_busy():  # Wait for the playback to finish
            pygame.time.Clock().tick(10)
        
        # Clean up the temporary file
        os.remove(filename)
    except Exception as e:
        print(f"Error during speech playback: {e}")

# Start capturing from webcam
video_capture = cv2.VideoCapture(0)
last_predicted_label = ""

with mp_hands.Hands(model_complexity=0, max_num_hands=2, 
                    min_detection_confidence=0.5, min_tracking_confidence=0.5) as hand_tracker:

    while video_capture.isOpened():
        ret, frame = video_capture.read()
        if not ret:
            print("No frame captured from webcam.")
            continue
        
        # Flip the image and convert color format
        flipped_frame = cv2.flip(frame, 1)
        rgb_frame = cv2.cvtColor(flipped_frame, cv2.COLOR_BGR2RGB)
        
        # Process the frame for hand landmarks
        hand_results = hand_tracker.process(rgb_frame)

        # Reconvert the image to BGR for display
        annotated_frame = cv2.cvtColor(rgb_frame, cv2.COLOR_RGB2BGR).copy()

        # Display the annotated frame
        cv2.imshow('ISL Detector', annotated_frame)
        
        # Check for key press to evaluate gesture
        key = cv2.waitKey(1)
        if key == ord('e'):  # Press 'e' to evaluate gesture
            if hand_results.multi_hand_landmarks:
                for hand_landmarks in hand_results.multi_hand_landmarks:
                    # Extract and preprocess landmarks
                    landmark_coords = extract_landmarks(annotated_frame, hand_landmarks)
                    model_input = preprocess_landmarks(landmark_coords)
                    model_input_df = pd.DataFrame([model_input])

                    # Predict the gesture and annotate the frame
                    try:
                        pred_probs = asl_model.predict(model_input_df, verbose=0)
                        predicted_class = np.argmax(pred_probs, axis=1)[0]
                        predicted_label = asl_alphabet[predicted_class]

                        # Speak if there is a valid gesture
                        if predicted_label:
                            speak(predicted_label)

                        # Draw landmarks and label
                        mp_drawing.draw_landmarks(annotated_frame, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                                                  mp_styles.get_default_hand_landmarks_style(),
                                                  mp_styles.get_default_hand_connections_style())
                        cv2.putText(annotated_frame, predicted_label, (50, 50), 
                                    cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0, 0, 255), 2)
                        
                        print(predicted_label)
                        print("------------------------")
                    except Exception as e:
                        print(f"Error during prediction: {e}")

        # Exit the loop when 'ESC' is pressed
        if key == 27:
            break

# Release resources
video_capture.release()
cv2.destroyAllWindows()
pygame.quit()


pygame 2.6.0 (SDL 2.28.4, Python 3.12.4)
Hello from the pygame community. https://www.pygame.org/contribute.html




Error during speech playback: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\91977\\AppData\\Local\\Temp\\tmpr1ha1vdg.mp3'
F
------------------------
Error during speech playback: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\91977\\AppData\\Local\\Temp\\tmp196u65ch.mp3'
1
------------------------




Error during speech playback: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\91977\\AppData\\Local\\Temp\\tmpp006mgqw.mp3'
F
------------------------
Error during speech playback: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\91977\\AppData\\Local\\Temp\\tmps_waq6dk.mp3'
2
------------------------




Error during speech playback: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\91977\\AppData\\Local\\Temp\\tmp_fqtauuj.mp3'
A
------------------------
Error during speech playback: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\91977\\AppData\\Local\\Temp\\tmpfx7kqj86.mp3'
A
------------------------




Error during speech playback: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\91977\\AppData\\Local\\Temp\\tmpre64uqj3.mp3'
Z
------------------------
Error during speech playback: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\91977\\AppData\\Local\\Temp\\tmpjasj5neu.mp3'
Z
------------------------




Error during speech playback: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\91977\\AppData\\Local\\Temp\\tmp6ft1qao4.mp3'
P
------------------------
Error during speech playback: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\91977\\AppData\\Local\\Temp\\tmpixbrq8yx.mp3'
O
------------------------




Error during speech playback: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\91977\\AppData\\Local\\Temp\\tmpyykg7cj3.mp3'
1
------------------------
Error during speech playback: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\91977\\AppData\\Local\\Temp\\tmpgcyj8wta.mp3'
A
------------------------




Error during speech playback: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\91977\\AppData\\Local\\Temp\\tmpv65b3_l7.mp3'
F
------------------------
Error during speech playback: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\91977\\AppData\\Local\\Temp\\tmply_js6q3.mp3'
F
------------------------




Error during speech playback: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\91977\\AppData\\Local\\Temp\\tmplegr2vaw.mp3'
D
------------------------
Error during speech playback: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\91977\\AppData\\Local\\Temp\\tmpawes323w.mp3'
P
------------------------




Error during speech playback: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\91977\\AppData\\Local\\Temp\\tmpyuzadwq_.mp3'
I
------------------------
Error during speech playback: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\91977\\AppData\\Local\\Temp\\tmpwyvg0w09.mp3'
I
------------------------




Error during speech playback: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\91977\\AppData\\Local\\Temp\\tmp19wh7z9j.mp3'
9
------------------------


