# SIGN TO TEXT/AUDIO

In [5]:
import cv2
import mediapipe as mp
import numpy as np
from keras.models import load_model
import os

# Load the models
model_one_hand = load_model(r'SIGN_LANGUAGE\model_CNN_ONE_HAND_FINAL.h5')  # One-hand model
model_two_hand = load_model(r'SIGN_LANGUAGE\hand_landmark_model_6.h5')      # Two-hand model

# Load labels from label1.txt (one-hand gestures)
labels_one_hand = {}
with open(r'"C:\Users\Athira\OneDrive\Documents\Desktop\SIGN_LANGUAGE\label_one_hand.txt"', 'r') as file:
    for line in file.readlines():
        index, label = line.strip().split(":")
        labels_one_hand[int(index)] = label

# Load labels from label2.txt (two-hand gestures)
labels_two_hand = {}
with open(r'"C:\Users\Athira\OneDrive\Documents\Desktop\SIGN_LANGUAGE\label_two_hand.txt"', 'r') as file:
    for line in file.readlines():
        index, label = line.strip().split(":")
        labels_two_hand[int(index)] = label

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5)

# Function to extract raw landmark features for one-hand model
def extract_raw_features(hand_landmarks, handedness):
    features = []
    for lm in hand_landmarks.landmark:
        features.extend([lm.x, lm.y, lm.z])
    features.extend([1 if handedness == "Right" else 0, 1 if handedness == "Left" else 0])  # Handedness
    return np.array(features).reshape(1, -1)  # Shape (1, 65)

# Function to extract normalized distance/angle features for two-hand model
def extract_distance_features(hand_landmarks, handedness):
    features = []
    wrist = hand_landmarks[0]  # Wrist landmark
    normalized_landmarks = [(lm.x - wrist.x, lm.y - wrist.y, lm.z - wrist.z) for lm in hand_landmarks]
    
    # Compute distances
    for i, lm1 in enumerate(normalized_landmarks):
        for j, lm2 in enumerate(normalized_landmarks):
            if i < j:  # Avoid duplicate pairs
                distance = np.linalg.norm(np.array(lm1) - np.array(lm2))
                features.append(distance)
    
    # Add handedness as a binary feature
    features.append(1 if handedness == "Right" else 0)
    return features

# Testing loop
cap = cv2.VideoCapture(0)
print("Press 'q' to quit.")

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Flip and process frame
    frame = cv2.flip(frame, 1)
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb_frame)

    # Initialize text to display
    prediction_text = "No prediction"

    if results.multi_hand_landmarks:
        hand_features_list = []  # Collect features for each hand
        for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
            handedness = results.multi_handedness[idx].classification[0].label  # "Right" or "Left"

            # Extract raw features (for one-hand model)
            raw_features = extract_raw_features(hand_landmarks, handedness)
            hand_features_list.append(raw_features)

        # Make predictions
        if len(hand_features_list) == 1:
            # Prediction for one hand
            prediction_one_hand = model_one_hand.predict(hand_features_list[0])
            predicted_class = np.argmax(prediction_one_hand)
            prediction_text = f"One-hand Prediction: {labels_one_hand.get(predicted_class, 'Unknown')}"
        elif len(hand_features_list) == 2:
            # Combine features for two-hand model
            hand_features_combined = []
            for idx, (hand_landmarks, handedness) in enumerate(
                zip(results.multi_hand_landmarks, results.multi_handedness)
            ):
                hand_features_combined.extend(
                    extract_distance_features(hand_landmarks.landmark, handedness.classification[0].label)
                )
            hand_features_combined = np.array(hand_features_combined).reshape(1, -1)  # Shape (1, 422)

            # Prediction for two hands
            prediction_two_hand = model_two_hand.predict(hand_features_combined)
            predicted_class = np.argmax(prediction_two_hand)
            prediction_text = f"Two-hand Prediction: {labels_two_hand.get(predicted_class, 'Unknown')}"

    # Display the prediction on the screen
    cv2.putText(frame, prediction_text, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0, 255), 2)

    # Display the frame
    cv2.imshow("Sign Language Detection", frame)

    # Break loop on 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()
hands.close()




Press 'q' to quit.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

# WITH VOICE

In [1]:
import cv2
import mediapipe as mp
import numpy as np
from keras.models import load_model
import os
import pyttsx3
import time

# Initialize pyttsx3 engine
engine = pyttsx3.init()
engine.setProperty('rate', 150)  # Set speech speed
engine.setProperty('volume', 1)  # Set volume level (0.0 to 1.0)

# Load the models
model_one_hand = load_model(r'SIGN_LANGUAGE\model_CNN_ONE_HAND_FINAL.h5')  # One-hand model
model_two_hand = load_model(r'SIGN_LANGUAGE\hand_landmark_model_6.h5')      # Two-hand model

# Load labels from label1.txt (one-hand gestures)
labels_one_hand = {}
with open(r'SIGN_LANGUAGE\label_one_hand.txt', 'r') as file:
    for line in file.readlines():
        index, label = line.strip().split(":")
        labels_one_hand[int(index)] = label

# Load labels from label2.txt (two-hand gestures)
labels_two_hand = {}
with open(r'SIGN_LANGUAGE\label_two_hand.txt', 'r') as file:
    for line in file.readlines():
        index, label = line.strip().split(":")
        labels_two_hand[int(index)] = label

# Initialize MediaPipe Hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5)

# Function to extract raw landmark features for one-hand model
def extract_raw_features(hand_landmarks, handedness):
    features = []
    for lm in hand_landmarks.landmark:
        features.extend([lm.x, lm.y, lm.z])
    features.extend([1 if handedness == "Right" else 0, 1 if handedness == "Left" else 0])  # Handedness
    return np.array(features).reshape(1, -1)  # Shape (1, 65)

# Function to extract normalized distance/angle features for two-hand model
def extract_distance_features(hand_landmarks, handedness):
    features = []
    wrist = hand_landmarks[0]  # Wrist landmark
    normalized_landmarks = [(lm.x - wrist.x, lm.y - wrist.y, lm.z - wrist.z) for lm in hand_landmarks]
    
    # Compute distances
    for i, lm1 in enumerate(normalized_landmarks):
        for j, lm2 in enumerate(normalized_landmarks):
            if i < j:  # Avoid duplicate pairs
                distance = np.linalg.norm(np.array(lm1) - np.array(lm2))
                features.append(distance)
    
    # Add handedness as a binary feature
    features.append(1 if handedness == "Right" else 0)
    return features

# Function to speak the text using pyttsx3
def speak_text(text):
    engine.say(text)
    engine.runAndWait()

# Testing loop
cap = cv2.VideoCapture(0)
print("Press 'q' to quit.")

last_prediction = ""  # Variable to track the last prediction

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Flip and process frame
    frame = cv2.flip(frame, 1)
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(rgb_frame)

    # Initialize text to display
    prediction_text = "No prediction"

    if results.multi_hand_landmarks:
        hand_features_list = []  # Collect features for each hand
        for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
            handedness = results.multi_handedness[idx].classification[0].label  # "Right" or "Left"

            # Extract raw features (for one-hand model)
            raw_features = extract_raw_features(hand_landmarks, handedness)
            hand_features_list.append(raw_features)

        # Make predictions
        if len(hand_features_list) == 1:
            # Prediction for one hand
            prediction_one_hand = model_one_hand.predict(hand_features_list[0])
            predicted_class = np.argmax(prediction_one_hand)
            prediction_text = f"One-hand Prediction: {labels_one_hand.get(predicted_class, 'Unknown')}"

            # Speak the prediction if it has changed
            if prediction_text != last_prediction:
                speak_text(prediction_text)
                last_prediction = prediction_text

        elif len(hand_features_list) == 2:
            # Combine features for two-hand model
            hand_features_combined = []
            for idx, (hand_landmarks, handedness) in enumerate(
                zip(results.multi_hand_landmarks, results.multi_handedness)
            ):
                hand_features_combined.extend(
                    extract_distance_features(hand_landmarks.landmark, handedness.classification[0].label)
                )
            hand_features_combined = np.array(hand_features_combined).reshape(1, -1)  # Shape (1, 422)

            # Prediction for two hands
            prediction_two_hand = model_two_hand.predict(hand_features_combined)
            predicted_class = np.argmax(prediction_two_hand)
            prediction_text = f"Two-hand Prediction: {labels_two_hand.get(predicted_class, 'Unknown')}"

            # Speak the prediction if it has changed
            if prediction_text != last_prediction:
                speak_text(prediction_text)
                last_prediction = prediction_text

    # Display the prediction on the screen
    cv2.putText(frame, prediction_text, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 255), 2)

    # Display the frame
    cv2.imshow("Sign Language Detection", frame)

    # Break loop on 'q'
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
cap.release()
cv2.destroyAllWindows()
hands.close()


FileNotFoundError: [Errno 2] Unable to open file (unable to open file: name = 'SIGN_LANGUAGE\model_CNN_ONE_HAND_FINAL.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)