In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import cv2
import joblib
import mediapipe as mp
from tensorflow.keras.preprocessing.sequence import pad_sequences
import matplotlib.pyplot as plt
from keras.initializers import Orthogonal
import pickle


In [2]:
custom_objects = {'Orthogonal': Orthogonal}
loaded_model = tf.keras.models.load_model('Models/my_model.h5', custom_objects=custom_objects)


In [3]:
mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
mp_pose = mp.solutions.pose
mp_face_mesh = mp.solutions.face_mesh

In [4]:

def capture_video():
    cap = cv2.VideoCapture(0)  # Open default camera (index 0)

    while True:
        ret, frame = cap.read()  # Read frame from camera
        cv2.imshow('Live Video', frame)  # Display frame
        if cv2.waitKey(1) & 0xFF == ord('q'):  # Press 'q' to quit
            break

    cap.release()
    cv2.destroyAllWindows()

In [5]:
def extract_landmarks_mediapipe(frame):
    with mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5) as hands:
        with mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose:
            with mp_face_mesh.FaceMesh(min_detection_confidence=0.5, min_tracking_confidence=0.5) as face_mesh:
                # Convert BGR to RGB
                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                
                # Process hand landmarks
                hands_results = hands.process(frame_rgb)
                left_hand_landmarks, right_hand_landmarks = [],[]
                if hands_results.multi_hand_landmarks:
                    for hand_landmarks, handedness in zip(hands_results.multi_hand_landmarks, hands_results.multi_handedness):
                        if handedness.classification[0].label == 'Left':
                            left_hand_landmarks = hand_landmarks
                        elif handedness.classification[0].label == 'Right':
                            right_hand_landmarks = hand_landmarks

                # Process pose landmarks
                pose_results = pose.process(frame_rgb)
                pose_landmarks = pose_results.pose_landmarks


                # Process face landmarks
                face_results = face_mesh.process(frame_rgb)
                face_landmarks = face_results.multi_face_landmarks

                

    return left_hand_landmarks, right_hand_landmarks, pose_landmarks, face_landmarks

In [6]:
max_face_index = 467
max_left_hand_index = 20
max_right_hand_index = 20
max_pose_index = 32

face_columns = [f"face_{i}" for i in range(max_face_index + 1)]
left_hand_columns = [f"left_hand_{i}" for i in range(max_left_hand_index + 1)]
right_hand_columns = [f"right_hand_{i}" for i in range(max_right_hand_index + 1)]
pose_columns = [f"pose_{i}" for i in range(max_pose_index + 1)]

header =      [f"{col}_{coord}" for col in face_columns for coord in ['x', 'y']] + \
              [f"{col}_{coord}" for col in left_hand_columns for coord in ['x', 'y']] + \
              [f"{col}_{coord}" for col in right_hand_columns for coord in ['x', 'y']] + \
              [f"{col}_{coord}" for col in pose_columns for coord in ['x', 'y']] 

In [7]:


def landmarks_to_df(left_hand_landmarks, right_hand_landmarks, pose_landmarks, face_landmarks, header):
    # Initialize dictionaries to store landmark data
    landmarks_data = {}

    # Process face landmarks
    if face_landmarks:
        for i, landmark_list in enumerate(face_landmarks):
            for j, lm in enumerate(landmark_list.landmark):
                landmarks_data[f"face_{j}_x"] = lm.x
                landmarks_data[f"face_{j}_y"] = lm.y
            # Fill missing face landmarks with zeros
            for j in range(len(landmark_list.landmark), max_face_index + 1):
                landmarks_data[f"face_{j}_x"] = 0.0
                landmarks_data[f"face_{j}_y"] = 0.0
    else:
        # Fill all face landmarks with zeros if face_landmarks is None
        for j in range(max_face_index + 1):
            landmarks_data[f"face_{j}_x"] = 0.0
            landmarks_data[f"face_{j}_y"] = 0.0

    # Process left hand landmarks
    if left_hand_landmarks:
        for i, lm in enumerate(left_hand_landmarks.landmark):
            landmarks_data[f"left_hand_{i}_x"] = lm.x
            landmarks_data[f"left_hand_{i}_y"] = lm.y
        # Fill missing left hand landmarks with zeros
        for i in range(len(left_hand_landmarks.landmark), max_left_hand_index + 1):
            landmarks_data[f"left_hand_{i}_x"] = 0.0
            landmarks_data[f"left_hand_{i}_y"] = 0.0
    else:
        # Fill all left hand landmarks with zeros if left_hand_landmarks is None
        for i in range(max_left_hand_index + 1):
            landmarks_data[f"left_hand_{i}_x"] = 0.0
            landmarks_data[f"left_hand_{i}_y"] = 0.0

    # Process right hand landmarks
    if right_hand_landmarks:
        for i, lm in enumerate(right_hand_landmarks.landmark):
            landmarks_data[f"right_hand_{i}_x"] = lm.x
            landmarks_data[f"right_hand_{i}_y"] = lm.y
        # Fill missing right hand landmarks with zeros
        for i in range(len(right_hand_landmarks.landmark), max_right_hand_index + 1):
            landmarks_data[f"right_hand_{i}_x"] = 0.0
            landmarks_data[f"right_hand_{i}_y"] = 0.0
    else:
        # Fill all right hand landmarks with zeros if right_hand_landmarks is None
        for i in range(max_right_hand_index + 1):
            landmarks_data[f"right_hand_{i}_x"] = 0.0
            landmarks_data[f"right_hand_{i}_y"] = 0.0

    # Process pose landmarks
    if pose_landmarks:
        for i, lm in enumerate(pose_landmarks.landmark):
            landmarks_data[f"pose_{i}_x"] = lm.x
            landmarks_data[f"pose_{i}_y"] = lm.y
        # Fill missing pose landmarks with zeros
        for i in range(len(pose_landmarks.landmark), max_pose_index + 1):
            landmarks_data[f"pose_{i}_x"] = 0.0
            landmarks_data[f"pose_{i}_y"] = 0.0
    else:
        # Fill all pose landmarks with zeros if pose_landmarks is None
        for i in range(max_pose_index + 1):
            landmarks_data[f"pose_{i}_x"] = 0.0
            landmarks_data[f"pose_{i}_y"] = 0.0

    # Create DataFrame from extracted landmark data
    df = pd.DataFrame([landmarks_data], columns=header)


    return df.iloc[0]


In [8]:
def display_frame(frame):
    plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    plt.axis('off')  # Turn off axis labels
    plt.show()


In [9]:
with open('Models/label_encoder.pkl', 'rb') as file:
    label_encoder = pickle.load(file)

# Load the saved OneHotEncoder
with open('Models/onehot_encoder.pkl', 'rb') as file:
    onehot_encoder = pickle.load(file)




In [10]:
import cv2
import numpy as np

def live_sign_recognition(model,  header):
    cap = cv2.VideoCapture(0)  # Open default camera

    while True:
        ret, frame = cap.read()
        if not ret:
            print("Failed to grab frame")
            break

        frame = cv2.flip(frame, 1)  # Flip the frame horizontally

        # Extract landmarks using MediaPipe
        left_hand, right_hand, pose, face = extract_landmarks_mediapipe(frame)

        if left_hand or right_hand or pose or face:
            # Preprocess landmarks
            padded_landmarks = landmarks_to_df(left_hand, right_hand, pose, face, header)
            padded_landmarks = np.expand_dims(padded_landmarks, axis=0)
            padded_landmarks = np.expand_dims(padded_landmarks, axis=0)

            # Debug: Print the shape of the input to the model

            # Make prediction
            prediction = model.predict(padded_landmarks)

            # Debug: Print the raw prediction
            # print(f"Raw prediction: {prediction}")

            if not np.isnan(prediction).any():
                # Convert prediction indices to sign labels using label encoder
                # sign_labels = label_encoder.inverse_transform(np.argmax(prediction, axis=1))
                # sign_label = sign_labels[0]  # Assuming single prediction per frame
                predicted_class_index = np.argmax(prediction, axis=1)
                predicted_class_label = label_encoder.inverse_transform(predicted_class_index)
                
                confidence = prediction[0, predicted_class_index][0]

                # Print the class name if confidence is above 0.7
                if confidence > 0.95:
                    print(f"Predicted sign: {predicted_class_label} with confidence {confidence:.2f}")

                # Display sign label on frame
                cv2.putText(frame, str(predicted_class_label), (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            else:
                print("Prediction contains NaN values")

        # Display the frame
        cv2.imshow('Frame', frame)

        # Press 'q' to exit the loop
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

# Example usage
# Assuming `model` is your trained model, `label_encoder` is the fitted label encoder, and `header` is the list of column names used in preprocessing
# live_sign_recognition(model, label_encoder, header)



In [11]:
live_sign_recognition(loaded_model,header)




error: OpenCV(4.10.0) :-1: error: (-5:Bad argument) in function 'putText'
> Overload resolution failed:
>  - Can't convert object to 'str' for 'text'
>  - Can't convert object to 'str' for 'text'


In [None]:
# from mediapipe.framework.formats import landmark_pb2  # Import the protobuf definition

# # Example list of NormalizedLandmarkList (replace with your actual data or how you receive it)
# normalized_landmark_lists = [
#     landmark_pb2.NormalizedLandmarkList(
#         landmark=[
#             landmark_pb2.NormalizedLandmark(x=0.1, y=0.2, z=0.3),
#             landmark_pb2.NormalizedLandmark(x=0.4, y=0.5, z=0.6),
#         ]
#     ),
#     landmark_pb2.NormalizedLandmarkList(
#         landmark=[
#             landmark_pb2.NormalizedLandmark(x=0.7, y=0.8, z=0.9),
#             landmark_pb2.NormalizedLandmark(x=0.2, y=0.3, z=0.4),
#         ]
#     )
# ]

# # Iterate through each NormalizedLandmarkList
# for landmark_list in normalized_landmark_lists:
#     # Iterate through each NormalizedLandmark in the current list
#     for landmark in landmark_list.landmark:
#         print(f"x: {landmark.x}")  