In [1]:
import pickle
import cv2
import mediapipe as mp
import numpy as np
import json
import os

In [3]:
# Load pre-trained model
model_dict = pickle.load(open('./model.p', 'rb'))
model = model_dict['model']

# Initialize mediapipe
mp_hands = mp.solutions.hands

def read_dict(file_path):
    path = os.path.expanduser(file_path)
    with open(path, "r") as f:
        dict_json = json.load(f)
    return dict_json

hands = mp_hands.Hands(
    static_image_mode=False,
    min_detection_confidence=0.3,
    max_num_hands=2,
)

sign_path = './sign_to_prediction_index_map.json'
sign_mapping = read_dict(sign_path)
sign_mapping = {int(key): value for key, value in sign_mapping.items()}

In [8]:
def translate_from_sign_to_text(video_path: str):

    cap = cv2.VideoCapture(video_path)
    predictions = []

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break 

        H, W, _ = frame.shape
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = hands.process(frame_rgb)

        if results.multi_hand_landmarks:
            for hand_landmarks in results.multi_hand_landmarks:
                x_ = []
                y_ = []
                data_aux = []

                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y
                    x_.append(x)
                    y_.append(y)

                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y
                    data_aux.append(x - min(x_))
                    data_aux.append(y - min(y_))

                prediction = model.predict([np.asarray(data_aux)])
                predicted_character = sign_mapping[int(prediction[0])]
                predictions.append(predicted_character)

    cap.release()

    cleaned_predictions = []
    for p in predictions:
        if not cleaned_predictions or cleaned_predictions[-1] != p:
            cleaned_predictions.append(p)

    return " ".join(cleaned_predictions)


In [9]:
text = translate_from_sign_to_text("./hello.mp4")  # Example video file path
print(text)

Hello thanks Hello
