In [1]:
import cv2
import numpy as np
import mediapipe as mp
from tensorflow.keras.models import load_model
import os

In [2]:
mp_holistic = mp.solutions.holistic # inicia el modelo donde podemos seguir los puntos del cuerpo humano, incluyendo pose, mano y caras.
mp_drawing = mp.solutions.drawing_utils # inicializa el dibujado o trazados de los puntos del modelo.
mp_pose = mp.solutions.pose
mp_hands = mp.solutions.hands
mp_face_mesh = mp.solutions.face_mesh

In [3]:
def mediapipe_detection(image, model):
    """
    OpenCV carga las imágenes en el formato BGR por defecto. Sin embargo, 
    muchos modelos de procesamiento de imágenes, 
    incluido MediaPipe, esperan que las imágenes estén en formato RGB. 
    Por lo tanto, esta funcion convierte la imagen del formato BGR a RGB y devuelve la posición de
    los puntos  clave del cuerpo, la mano y la cara.
    """
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [31]:
def draw_styled_landmarks(image,results):
    """
        La función permite conectar los puntes de referenica a la imagen y luego dibuja
        las conexiones, permitiendo ver (mallas).
    """
    # Dibuja los landmarks del cuerpo
    # mp_drawing.draw_landmarks(image, #Toma como referencia la imagen capturada
    #                               results.pose_landmarks, #Toma como referencia los puntos predecidos por el modelo Holistic.
    #                               mp_pose.POSE_CONNECTIONS, #Conecta cada punto predecidos.
    #                               mp_drawing.DrawingSpec( #Funcion que personaliza los puntos predecidos.
    #                                                      color=(80,22,10), #Color del punto
    #                                                      thickness=2, #Grosor del punto
    #                                                      circle_radius=4), #Radio del circulo.
    #                               mp_drawing.DrawingSpec( #Funcion que personaliza las conexiones predecidas.
    #                                                      color=(80,44,121),
    #                                                      thickness=2,
    #                                                      circle_radius=2))
    #mp_drawing.draw_landmarks(image, results.face_landmarks, mp_face_mesh.FACEMESH_TESSELATION,mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius=1),mp_drawing.DrawingSpec(color=(80,256,121),thickness=1,circle_radius=1))
    #mp_drawing.draw_landmarks(image, results.face_landmarks,mp_face_mesh.FACEMESH_CONTOURS,mp_drawing.DrawingSpec(color=(80,110,10),thickness=1,circle_radius=1),mp_drawing.DrawingSpec(color=(80,256,121),thickness=1,circle_radius=1))
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_hands.HAND_CONNECTIONS,mp_drawing.DrawingSpec(color=(121,22,76),thickness=2,circle_radius=4),mp_drawing.DrawingSpec(color=(121,44,250),thickness=2,circle_radius=2))
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_hands.HAND_CONNECTIONS,mp_drawing.DrawingSpec(color=(245,117,66),thickness=2,circle_radius=4),mp_drawing.DrawingSpec(color=(245,66,230),thickness=2,circle_radius=2))

In [32]:
def extract_keypoints(results):
    # face = np.array([[res.x,res.y,res.z]for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    rh = np.array([[res.x,res.y,res.z]for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    lh = np.array([[res.x,res.y,res.z]for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    # pose = np.array([[res.x,res.y,res.z,res.visibility]for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    # return np.concatenate([pose,face,lh,rh])
    # return np.concatenate([pose,lh,rh])
    return np.concatenate([lh,rh])

In [6]:
no_videos = 1
no_frames = 30

In [7]:
category = {
    'VOCALES': ['A','E','I','O','U']
}
actions = np.array([value for values in category.values() for value in values])

In [29]:
model = load_model('models/train16.h5')



In [9]:
sequence = []
for no_frame in range(no_frames):
    # res = np.load(os.path.join('Directorio','Seña','Num video',"{}.npy".format(no_frame)))
    # res = np.load(os.path.join('MP_Data_Celular','ELLA','8',"{}.npy".format(no_frame)))
    res = np.load(os.path.join('MP_Data_Webcam','YO','8',"{}.npy".format(no_frame)))
    sequence.append(res)

In [43]:
cap = cv2.VideoCapture(0)
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
    while True:
        ret,frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_styled_landmarks(image,results)
        cv2.imshow('Recolectando Datos',image)
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    
    cv2.waitKey(1000)
    # sequence = np.zeros((no_frames,1662))
    sequence = []
    for frame_num in range(no_frames):
        ret, frame = cap.read()
        image, results = mediapipe_detection(frame, holistic)
        draw_styled_landmarks(image,results)
        keypoints = extract_keypoints(results)
        # sequence[frame_num] = keypoints
        sequence.append(keypoints)

        if frame_num + 1 == no_frames:
            cv2.putText(image,'PRESIONE CUALQUIERA TECLA PARA SALIR',(15,12),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255),2,cv2.LINE_AA)
            cv2.putText(image,'RECOLECTANDO VIDEO...',(15,35),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255),1,cv2.LINE_AA)
            cv2.putText(image,'FRAME CAPTURADO #{}'.format(frame_num+1),(15,60),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255),1,cv2.LINE_AA)
            # cv2.imshow('Recolectando Datos',image)
            cv2.waitKey(0)
        else: 
            cv2.putText(image,'RECOLECTANDO VIDEO...',(15,12),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255),1,cv2.LINE_AA)
            cv2.putText(image,'FRAME CAPTURADO #{}'.format(frame_num+1),(15,30),cv2.FONT_HERSHEY_SIMPLEX,0.5,(0,0,255),1,cv2.LINE_AA)
            # cv2.imshow('Recolectando Datos',image)
        
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
        
        cv2.imshow('Recolectando Datos',image)
        
    cap.release()
    cv2.destroyAllWindows()

In [34]:
sequence

[array([ 8.67709816e-01,  5.28169334e-01, -4.42385840e-07,  8.19542229e-01,
         5.07775724e-01, -1.59757808e-02,  7.77627707e-01,  4.51206654e-01,
        -2.17071641e-02,  7.49675393e-01,  4.07339066e-01, -2.80907191e-02,
         7.22881019e-01,  3.87721062e-01, -3.09350863e-02,  8.06773603e-01,
         4.00418997e-01, -2.21232325e-03,  8.05199802e-01,  3.66570830e-01,
        -2.65253298e-02,  8.13996673e-01,  4.14994955e-01, -3.84919532e-02,
         8.17780495e-01,  4.36008900e-01, -4.37187925e-02,  8.38902712e-01,
         3.95097733e-01, -4.72862739e-03,  8.37706149e-01,  3.62495035e-01,
        -3.03803366e-02,  8.40000749e-01,  4.21110183e-01, -3.55841890e-02,
         8.38114083e-01,  4.33171123e-01, -3.40543203e-02,  8.70071173e-01,
         3.97431672e-01, -1.17275389e-02,  8.71519744e-01,  3.68128777e-01,
        -3.81804146e-02,  8.68372500e-01,  4.22694743e-01, -3.00392993e-02,
         8.63342464e-01,  4.33792889e-01, -1.87199786e-02,  8.99324298e-01,
         4.0

In [35]:
np.array(sequence).shape

(30, 126)

In [44]:
if len(sequence) == no_frames:
    res = model.predict(np.expand_dims(sequence, axis=0))[0]
    print(np.argmax(res))
    print(actions[np.argmax(res)])
else:
    print("La secuencia no tiene la longitud requerida.")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
4
U
