In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import cv2
import joblib
import mediapipe as mp
from tensorflow.keras.preprocessing.sequence import pad_sequences
import matplotlib.pyplot as plt

from keras.initializers import Orthogonal


In [2]:
custom_objects = {'Orthogonal': Orthogonal}
loaded_model = tf.keras.models.load_model('my_model.h5', custom_objects=custom_objects)


In [3]:
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
mp_pose = mp.solutions.pose
mp_face_mesh = mp.solutions.face_mesh
max_sequence_length = 100 

In [4]:

def capture_video():
    cap = cv2.VideoCapture(0)  # Open default camera (index 0)

    while True:
        ret, frame = cap.read()  # Read frame from camera
        cv2.imshow('Live Video', frame)  # Display frame
        if cv2.waitKey(1) & 0xFF == ord('q'):  # Press 'q' to quit
            break

    cap.release()
    cv2.destroyAllWindows()

In [5]:
def extract_landmarks_mediapipe(frame):
    with mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5) as hands:
        with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
            with mp_face_mesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5) as face_mesh:
                # Convert BGR to RGB
                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                
                # Process hand landmarks
                hands_results = hands.process(frame_rgb)
                left_hand_landmarks, right_hand_landmarks = [],[]
                if hands_results.multi_hand_landmarks:
                    for hand_landmarks, handedness in zip(hands_results.multi_hand_landmarks, hands_results.multi_handedness):
                        if handedness.classification[0].label == 'Left':
                            left_hand_landmarks = hand_landmarks
                        elif handedness.classification[0].label == 'Right':
                            right_hand_landmarks = hand_landmarks

                # Process pose landmarks
                pose_results = pose.process(frame_rgb)
                pose_landmarks = pose_results.pose_landmarks


                # Process face landmarks
                face_results = face_mesh.process(frame_rgb)
                face_landmarks = face_results.multi_face_landmarks

                

    return left_hand_landmarks, right_hand_landmarks, pose_landmarks, face_landmarks

In [6]:
# import numpy as np
# from tensorflow.keras.preprocessing.sequence import pad_sequences  # Import pad_sequences from Keras

def preprocess_landmarks(left_hand_landmarks, right_hand_landmarks, pose_landmarks, face_landmarks):
    # Convert landmarks to arrays
    if face_landmarks:
        face = np.array([[lm.x, lm.y] for landmark_list in face_landmarks for lm in landmark_list.landmark])
    else:
        face = np.zeros((0, 2))  # Empty array if no face landmarks
    
    if left_hand_landmarks:
        left_hand = np.array([[lm.x, lm.y] for lm in left_hand_landmarks.landmark])
    else:
        left_hand = np.zeros((0, 2))  # Empty array if no left hand landmarks
    
    if pose_landmarks:
        pose = np.array([[lm.x, lm.y] for lm in pose_landmarks.landmark])
    else:
        pose = np.zeros((0, 2))  # Empty array if no pose landmarks
    
    if right_hand_landmarks:
        right_hand = np.array([[lm.x, lm.y] for lm in right_hand_landmarks.landmark])
    else:
        right_hand = np.zeros((0, 2))  # Empty array if no right hand landmarks
    
    # Ensure the landmarks have consistent lengths
    max_landmarks = max(len(face), len(left_hand), len(pose), len(right_hand))
    face = np.pad(face, ((0, max_landmarks - len(face)), (0, 0)), mode='constant')
    left_hand = np.pad(left_hand, ((0, max_landmarks - len(left_hand)), (0, 0)), mode='constant')
    pose = np.pad(pose, ((0, max_landmarks - len(pose)), (0, 0)), mode='constant')
    right_hand = np.pad(right_hand, ((0, max_landmarks - len(right_hand)), (0, 0)), mode='constant')
    
    # Concatenate the landmarks
    sequence = np.concatenate([face, left_hand, pose, right_hand], axis=0)
    
    # Calculate the maximum sequence length (if needed)
    max_sequence_length = 500  # Set a maximum sequence length
    
    # Pad the sequences to ensure they all have the same length
    sequences_padded = pad_sequences([sequence], maxlen=max_sequence_length, padding='post', dtype='float32')
    
    return sequences_padded





In [7]:
def display_frame(frame):
    plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    plt.axis('off')  # Turn off axis labels
    plt.show()


In [8]:
label_encoder = joblib.load('label_encoder.pkl')


In [9]:
def live_sign_recognition(model, max_sequence_length):
    cap = cv2.VideoCapture(0)  # Open default camera

    while True:
        ret, frame = cap.read()  
        frame = cv2.flip(frame, 1)
# Read frame from camera
        left_hand, right_hand, pose, face = extract_landmarks_mediapipe(frame)  # Extract landmarks using MediaPipe

        if left_hand or right_hand or pose or face:
            padded_landmarks = preprocess_landmarks(left_hand, right_hand, pose, face)  # Preprocess landmarks
            
            # padded_landmarks = np.expand_dims(padded_landmarks, axis=0)

# Check the shape before passing to model.predict
            print(padded_landmarks.shape)
            
            prediction = model.predict(padded_landmarks)# Make prediction
            print(prediction)  

            # Convert prediction indices to sign labels using label encoder
            sign_labels = label_encoder.inverse_transform(prediction.flatten().astype(int))
            sign_label = sign_labels[0]  # Assuming single prediction per frame

            # Display sign label on frame
            cv2.putText(frame, sign_label, (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        # Display the frame
        cv2.imshow('Frame', frame)
        
        # Press 'q' to exit the loop
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()



In [10]:
live_sign_recognition(loaded_model, max_sequence_length)


(1, 500, 2)
[[0.20220973 0.18514434 0.20427456 0.20381045 0.20456097]]
(1, 500, 2)
[[0.20220973 0.18514434 0.20427456 0.20381045 0.20456097]]
(1, 500, 2)
[[0.20220973 0.18514434 0.20427456 0.20381045 0.20456097]]
(1, 500, 2)
[[0.20220973 0.18514434 0.20427456 0.20381045 0.20456097]]
(1, 500, 2)
[[0.20220973 0.18514434 0.20427456 0.20381045 0.20456097]]
(1, 500, 2)
[[0.20220973 0.18514434 0.20427456 0.20381045 0.20456097]]
(1, 500, 2)
[[0.20220973 0.18514434 0.20427456 0.20381045 0.20456097]]
(1, 500, 2)
[[0.20076953 0.18790464 0.2058035  0.20975766 0.19576469]]
(1, 500, 2)
[[0.20220973 0.18514434 0.20427456 0.20381045 0.20456097]]
(1, 500, 2)
[[0.20220973 0.18514434 0.20427456 0.20381045 0.20456097]]
(1, 500, 2)
[[0.2006756  0.18783547 0.20577116 0.21025977 0.19545794]]
(1, 500, 2)
[[0.20220973 0.18514434 0.20427456 0.20381045 0.20456097]]
(1, 500, 2)
[[0.20054707 0.1872834  0.20581956 0.21114497 0.19520505]]
(1, 500, 2)
[[0.20064661 0.18700477 0.20569408 0.21110569 0.1955488 ]]
(1, 50

: 

In [None]:
# from mediapipe.framework.formats import landmark_pb2  # Import the protobuf definition

# # Example list of NormalizedLandmarkList (replace with your actual data or how you receive it)
# normalized_landmark_lists = [
#     landmark_pb2.NormalizedLandmarkList(
#         landmark=[
#             landmark_pb2.NormalizedLandmark(x=0.1, y=0.2, z=0.3),
#             landmark_pb2.NormalizedLandmark(x=0.4, y=0.5, z=0.6),
#         ]
#     ),
#     landmark_pb2.NormalizedLandmarkList(
#         landmark=[
#             landmark_pb2.NormalizedLandmark(x=0.7, y=0.8, z=0.9),
#             landmark_pb2.NormalizedLandmark(x=0.2, y=0.3, z=0.4),
#         ]
#     )
# ]

# # Iterate through each NormalizedLandmarkList
# for landmark_list in normalized_landmark_lists:
#     # Iterate through each NormalizedLandmark in the current list
#     for landmark in landmark_list.landmark:
#         print(f"x: {landmark.x}")  

x: 0.10000000149011612
x: 0.4000000059604645
x: 0.699999988079071
x: 0.20000000298023224
