## Librerías


In [None]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
from datetime import timedelta, datetime
import collections
from tkinter import *
import mediapipe as mp
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [None]:
#import threading

In [None]:
#Instalar YOLO
%pip install ultralytics

In [None]:
import ultralytics
from ultralytics import YOLO
import sys
import torch

## Functions

In [None]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [None]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [None]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS) # Draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections

In [None]:
def draw_styled_landmarks(image, results):
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS,
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             )
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             )
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             )
    # Draw right hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             )

In [None]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, lh, rh]) #face

In [None]:
# Path for exported data, numpy arrays
#DATA_PATH = os.path.join('MP_Data')

# Actions that we try to detect
actions = np.array(['G', 'J','S', 'X', 'Z'])  #actions = np.array(['G', 'J', 'Ñ', 'S', 'X', 'Z']) #ESTO DEBERÍA IR

# Thirty videos worth of data
no_sequences = 30

# Videos are going to be 30 frames in length
sequence_length = 30

## Loading models

In [None]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))

In [None]:
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])
model.summary()
model.load_weights('GJSXZ.h5') #VERIFICAR NOMBRE DEL MODELO DINAMICO A CARGAR

In [None]:
model_YOLO = YOLO('best.pt') #VERIFICAR NOMBRE DEL MODELO YOLO A CARGAR
#model_YOLO.model.to(torch.device('cpu')) #retirar si GPU

## Testing in real time

In [None]:
sequence = []
sentence = []
predictions = []
predictionsArray=[]
word=[]
threshold = 0.8
model_state = 0 #0 for YOLO, 1 for mediapipe

cap = cv2.VideoCapture(0)

className = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
             'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
             'U', 'V', 'W', 'X', 'Y', 'Z']

root = Tk()
root.title("Interpreter")
prediction_label = Label(root, text="No hay letra",padx=1,pady=1, justify=CENTER, font=("Arial", 40))
prediction_label.pack()
word_label = Label(root, text="Palabra",padx=1,pady=1, justify=CENTER, font=("Arial", 40))
word_label.pack()
window = timedelta(seconds=1) #tiempo de la ventana
initTime=datetime.now()

while cap.isOpened():
    #Set YOLO model
    ret, frame = cap.read()
    if model_state == 0:
        # Read feed
        #results = model_YOLO(frame, conf=0.5) # SI NO HAY GPU
        results = model_YOLO.predict(frame, conf=0.5) #Devuelve bounding box, etiqueta y una probabilidad

        #guardar las predicciones en un array
        for r in results:
            boxes = r.boxes
            for box in boxes:
                cls = int(box.cls[0])
                #print("predicted class",className[cls])
                predictionsArray.append(className[cls])

        #se acaba el tiempo de la ventana
        if datetime.now()-initTime >= window:
            if predictionsArray:
                #se encuentra la letra más común
                counter = collections.Counter(predictionsArray)
                most_common = counter.most_common(1)[0][0]
                word.append(most_common)
                prediction_label.config(text=f"Letra : {most_common}")
                word_label.config(text=f"{''.join(word)}")
            else:
                prediction_label.config(text="No hay letra")
                word_label.config(text=f"{''.join(word)}")
            predictionsArray.clear()
            initTime=datetime.now()
        root.update()
        annotated_frame = results[0].plot()

        cv2.imshow("YOLOv8 Inference", annotated_frame)

    # Set mediapipe model
    else: #model_state == 1: Mediapipe
        with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
            # Make detections
            image, results = mediapipe_detection(frame, holistic)
            print(results)

            # Draw landmarks
            draw_styled_landmarks(image, results)

            # 2. Prediction logic
            keypoints = extract_keypoints(results)
            sequence.append(keypoints)
            sequence = sequence[-30:]

            if len(sequence) == 30:
                res = model.predict(np.expand_dims(sequence, axis=0))[0]
                print(actions[np.argmax(res)])
                cv2.putText(image, '{}'.format(res), (200,120),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 255), 3, cv2.LINE_AA)
                predictions.append(np.argmax(res))

            # Show to screen
            cv2.imshow('OpenCV Feed', image)

    # Break gracefully
    key = cv2.waitKey(1) & 0xFF
    if key == ord('d'):
        # Cambia al otro modelo
        cv2.destroyAllWindows()
        if model_state == 0:
            model_state = 1
        else:
            model_state = 0
        time.sleep(2)
        #cv2.namedWindow('Detección de Gestos')

    # Sale del bucle si se presiona la tecla 'q'
    elif key == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()