In [1]:
import time
import cv2
import numpy as np
import mediapipe as mp
import os
from tensorflow.keras.models import load_model

In [4]:
# Initialize MediaPipe Holistic
mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils
holistic = mp_holistic.Holistic(min_detection_confidence=0.8, min_tracking_confidence=0.8)

I0000 00:00:1726415456.940058  658930 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 88.1), renderer: Apple M1
W0000 00:00:1726415457.023662  659948 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1726415457.030231  659948 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1726415457.030835  659946 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1726415457.030886  659951 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1726415457.031178  659950 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support 

In [5]:
# Load the trained model
model = load_model('sign_language_model_test.h5')



In [7]:
# Load gesture labels
gesture_labels =sorted(os.listdir('adjectives'))# Add all 59 gesture labels here
len(gesture_labels)

59

In [8]:
max_frames = 90
max_sequence = 90
sequence = []
predictions = []

In [9]:
def preprocess_frame(frame):
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    #rgb_frame = cv2.flip(rgb_frame, 1)
    return rgb_frame

In [10]:
def extract_landmarks(results):
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    pose = np.array([[res.x, res.y, res.z] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*3)
    # face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    return np.concatenate([lh, rh, pose])

In [11]:
def recognize_gesture(sequence):
    global predictions
    threshold = 0.8
    sequence=sequence[:max_sequence]
    
    if len(sequence) == max_sequence:
        res = model.predict(np.expand_dims(sequence, axis=0))[0]
        # predictions.append(gesture_labels[np.argmax(res)])
        
        if res[np.argmax(res)]>threshold:
            if len(predictions) > 0:
                if gesture_labels[np.argmax(res)] != predictions[-1]:
                    predictions.append(gesture_labels[np.argmax(res)][3:])
            else:
                predictions.append(gesture_labels[np.argmax(res)])
                
        if len(predictions) > 5:
            predictions = predictions[-5:]

        return predictions
    return ""

In [12]:
def draw_landmarks(image, results):
    # mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS)
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS)
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS)

In [14]:
def main():
    global sequence
    cap = cv2.VideoCapture(0)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("Failed to grab frame")
            break

        processed_frame = preprocess_frame(frame)
        results = holistic.process(processed_frame)
        landmarks = extract_landmarks(results)
        sequence.append(landmarks)
        sequence = sequence[-max_frames:]
        gesture = recognize_gesture(sequence)
        draw_landmarks(frame, results)

        cv2.putText(frame, f"Gesture: {gesture}", (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
        cv2.imshow('Sign Language to Text', frame)

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

In [11]:
# import cv2
# import time
# from collections import deque

# # Assuming preprocess_frame, extract_landmarks, recognize_gesture, draw_landmarks are defined elsewhere

# def main():
#     global sequence
#     cap = cv2.VideoCapture(0)
#     max_frames = 120
#     sequence = deque(maxlen=max_frames)
#     gesture_display_time = 1.5  # Duration to display the gesture in seconds
#     last_gesture_time = None
#     current_gesture = ""

#     while cap.isOpened():
#         ret, frame = cap.read()
#         if not ret:
#             print("Failed to grab frame")
#             break

#         processed_frame = preprocess_frame(frame)
#         results = holistic.process(processed_frame)
#         landmarks = extract_landmarks(results)
#         sequence.append(landmarks)

#         # Once 120 frames are collected, recognize gesture and display it
#         if len(sequence) == max_frames and last_gesture_time is None:
#             current_gesture = recognize_gesture(sequence)
#             last_gesture_time = time.time()
#             sequence.clear()  # Reset sequence for the next set of frames

#         # Draw the pose landmarks
#         draw_landmarks(frame, results)

#         # If a gesture has been recognized, display it for 1.5 seconds
#         if last_gesture_time:
#             if time.time() - last_gesture_time <= gesture_display_time:
#                 cv2.putText(frame, f"Gesture: {current_gesture}", (10, 50), 
#                             cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
#             else:
#                 last_gesture_time = None  # Reset timer for the next gesture

#         cv2.imshow('Sign Language to Text', frame)

#         if cv2.waitKey(10) & 0xFF == ord('q'):
#             break

#     cap.release()
#     cv2.destroyAllWindows()


In [12]:
def main1():
    global sequence
    gesture_display_time = 1.5
    last_gesture_time = None
    cap = cv2.VideoCapture(0)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("Failed to grab frame")
            break


        processed_frame = preprocess_frame(frame)
        results = holistic.process(processed_frame)
        landmarks = extract_landmarks(results)
        sequence.append(landmarks)
        sequence = sequence[-max_frames:]
        gesture = recognize_gesture(sequence)
        last_gesture_time = time.time()
        draw_landmarks(frame, results)

        if last_gesture_time:
            if time.time() - last_gesture_time <= gesture_display_time:
                cv2.putText(frame, f"Gesture: {gesture}", (10, 50), 
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)
            else:
                last_gesture_time = None
        cv2.imshow('Sign Language to Text', frame)


        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

In [13]:
def main2():
    global sequence
    sentence = []
    cap = cv2.VideoCapture(0)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            print("Failed to grab frame")
            break

        processed_frame = preprocess_frame(frame)
        results = holistic.process(processed_frame)
        draw_landmarks(frame, results)
        landmarks = extract_landmarks(results)
        sequence.append(landmarks)
        sequence = sequence[-max_frames:]
        if len(sequence)==max_frames:
            gesture = recognize_gesture(sequence)
        
        
        cv2.rectangle(frame, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(frame, f"Gesture: {predictions}", (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 9, 0), 5)
        cv2.imshow('Sign Language to Text', frame)


        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

In [None]:
if __name__ == "__main__":
    main2()



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 135ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1

In [11]:
model.summary()