# Predicción en tiempo real con cámara usando modelo LSTM y landmarks de MediaPipe

Este notebook utiliza la cámara del dispositivo para detectar la mano, extraer los landmarks y predecir el número en lenguaje de señas usando el modelo entrenado.

In [None]:
import cv2
import numpy as np
import mediapipe as mp
import tensorflow as tf
import joblib
import os

# Cargar modelo y label encoder
modelo_path = os.path.join("..", "Modelos", "modelo_lstm_landmarks.keras")
encoder_path = os.path.join("..", "Modelos", "label_encoder_landmarks.pkl")
model = tf.keras.models.load_model(modelo_path)
le = joblib.load(encoder_path)

mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

# Usar fuente similar a Times New Roman en OpenCV
FONT = cv2.FONT_HERSHEY_TRIPLEX

def extract_landmarks_from_frame(frame, hands):
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(frame_rgb)
    if results.multi_hand_landmarks:
        hand_landmarks = results.multi_hand_landmarks[0].landmark
        landmarks = np.array([[l.x, l.y, l.z] for l in hand_landmarks]).flatten()
        return landmarks, results.multi_hand_landmarks[0]
    else:
        return None, None

cap = cv2.VideoCapture(0)
if not cap.isOpened():
    raise RuntimeError("No se pudo acceder a la cámara.")

# Inicializar MediaPipe Hands una sola vez
with mp_hands.Hands(static_image_mode=False, max_num_hands=1) as hands:
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break

            landmarks, hand_landmarks_obj = extract_landmarks_from_frame(frame, hands)
            if landmarks is not None:
                # Redimensionar para LSTM: (1, 21, 3)
                X_pred = landmarks.reshape(1, 21, 3)
                pred = model.predict(X_pred, verbose=0)
                pred_class = np.argmax(pred, axis=1)[0]
                label = le.inverse_transform([pred_class])[0]
                # Dibujar landmarks
                mp_drawing.draw_landmarks(frame, hand_landmarks_obj, mp_hands.HAND_CONNECTIONS)
                cv2.putText(frame, f"Prediccion: {label}", (10, 40), FONT, 1.2, (0,255,0), 3)
            else:
                cv2.putText(frame, "Mano no detectada", (10, 40), FONT, 1.2, (0,0,255), 3)

            cv2.imshow("Predicción en tiempo real", frame)
            if cv2.waitKey(1) & 0xFF == 27:  # ESC para salir
                break
    finally:
        cap.release()
        cv2.destroyAllWindows()