# Dependencias requeridas

In [1]:
# !pip install scikit-learn mediapipe opencv-python

In [1]:
import cv2
import numpy as np
import os
from matplotlib import pyplot as plt
import time
import mediapipe as mp

from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.callbacks import TensorBoard

2024-04-06 10:28:25.734116: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-06 10:28:26.095530: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-06 10:28:26.095581: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-06 10:28:26.164325: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-04-06 10:28:26.309823: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-06 10:28:26.312295: I tensorflow/core/platform/cpu_feature_guard.cc:1

### Atajos para utilidades

In [2]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities


## Utilidades para detección y visualización de puntos clave

In [3]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

In [4]:
def draw_landmarks(image, results):
    # mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS) # Draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connectio

In [5]:
def draw_styled_landmarks(image, results):
    # Draw face connections
    # mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACE_CONNECTIONS, 
    #                          mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
    #                          mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
    #                          ) 
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

In [7]:
def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])  

In [None]:
from scipy import stats

colors = [
    (78, 55, 100),
    (13, 250, 85),
    (52, 170, 11),
    (128, 166, 153),
    (252, 160, 11),
    (241, 86, 54),
    (246, 94, 126),
    (69, 241, 201),
    (226, 32, 231), 
    (84, 102, 226)
]

def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

### Prueba de las utilidades

In [6]:
%%script true
W=640
H=480

cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc('M','J','P','G'))
#cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc('Y','U','Y','V'))
cap.set(cv2.CAP_PROP_FRAME_WIDTH, W)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, H)
cap.set(cv2.CAP_PROP_FPS, 30)

with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            continue
    
        image, results = mediapipe_detection(frame, holistic)
    
        draw_landmarks(image, results)
        
        cv2.imshow('usb cam test', image)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

## Definición de directorios y parámetros de video

In [9]:
# Path for exported data, numpy arrays
DATA_PATH = os.path.join('v3', 'MP_Data') 

# Actions that we try to detect
actions = np.array([
    'aceleracion',
    'calculo',
    'computadora',
    'constante',
    'derivada',
    'ecuacion',
    'fisica',
    'formula',
    'inversa',
    'software',
])

# Thirty videos worth of data
n_sequences = 90

# Videos are going to be 30 frames in length
sequence_length = 29

# Folder start
start_folder = 0


### Etiquetas usadas en las predicciones

In [12]:
label_map = {label:num for num, label in enumerate(actions)}

In [13]:
label_map

{'aceleracion': 0,
 'calculo': 1,
 'computadora': 2,
 'constante': 3,
 'derivada': 4,
 'ecuacion': 5,
 'fisica': 6,
 'formula': 7,
 'inversa': 8,
 'software': 9}

### Carga del dataset en formato numpy a memoria

In [14]:
sequences, labels = [], []

# These sequences were found not suitable for training
excluded = {
  "computadora": [58],
  "aceleracion": [62, 75],
  "constante": [42]
}

for action in actions:
    for sequence in np.array(os.listdir(os.path.join(DATA_PATH, action))).astype(int):
        if sequence in excluded.get(action, []):        
            print(f"sequence {sequence} for action {action} is excluded")
            continue
        
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

sequence 75 for action aceleracion is excluded
sequence 62 for action aceleracion is excluded
sequence 58 for action computadora is excluded
sequence 42 for action constante is excluded


### Inspección de los datos

In [15]:
np.array(sequences).shape

(896, 29, 1662)

In [16]:
X = np.array(sequences)

In [17]:
X.shape

(896, 29, 1662)

In [18]:
y = to_categorical(labels).astype(int)

## Partición del dataset para entrenamiento y pruebas

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

In [20]:
y_train.shape

(851, 10)

In [21]:
y_test.shape

(45, 10)

#### Parámetros de TensorBoard

In [22]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

## Definición del primer modelo evaluado: LSTM

In [23]:
modelLSTM = Sequential()
modelLSTM.add(LSTM(128, return_sequences=True, activation='relu', input_shape=(29,1662)))
modelLSTM.add(LSTM(256, return_sequences=True, activation='relu'))
modelLSTM.add(LSTM(128, return_sequences=False, activation='relu'))
modelLSTM.add(Dense(256, activation='relu'))
modelLSTM.add(Dense(32, activation='relu'))
modelLSTM.add(Dense(actions.shape[0], activation='softmax'))

2024-04-06 10:28:57.451211: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-06 10:28:57.628090: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [24]:
modelLSTM.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [55]:
modelLSTM.fit(X_train, y_train, epochs=100, callbacks=[tb_callback])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7f1edabd1600>

In [25]:
modelLSTM.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 29, 128)           916992    
                                                                 
 lstm_1 (LSTM)               (None, 29, 256)           394240    
                                                                 
 lstm_2 (LSTM)               (None, 128)               197120    
                                                                 
 dense (Dense)               (None, 256)               33024     
                                                                 
 dense_1 (Dense)             (None, 32)                8224      
                                                                 
 dense_2 (Dense)             (None, 10)                330       
                                                                 
Total params: 1549930 (5.91 MB)
Trainable params: 154993

In [37]:
res = modelLSTM.predict(X_test)

10
10


In [39]:
print(actions[np.argmax(res[4])])
print(actions[np.argmax(y_test[4])] )


inversa
inversa


In [56]:
#modelLSTM.save('LSTM v2.h5')

In [26]:
modelLSTM.load_weights('models/LSTM v2.h5')

In [60]:
yhat = modelLSTM.predict(X_test)



In [61]:
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [61]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score

multilabel_confusion_matrix(ytrue, yhat)

array([[[39,  0],
        [ 2,  4]],

       [[42,  0],
        [ 1,  2]],

       [[37,  4],
        [ 0,  4]],

       [[42,  0],
        [ 1,  2]],

       [[36,  3],
        [ 6,  0]],

       [[28, 10],
        [ 0,  7]],

       [[41,  0],
        [ 0,  4]],

       [[39,  1],
        [ 5,  0]],

       [[39,  1],
        [ 4,  1]],

       [[43,  0],
        [ 0,  2]]])

In [62]:
accuracy_score(ytrue, yhat)

0.8888888888888888

### Creando una copia con otras dimensiones para el segundo modelo utilizado: CNN

In [39]:
print(X_train.shape)
print(y_train.shape)

(851, 29, 1662)
(851, 10)


In [64]:
X_train2 = X_train.copy()  # replace this with your actual array
X_train2 = X_train2.reshape(851, 29, 1662, 1)

y_train2 = y_train.copy()
# print(y_train2)
y_train2 = np.argmax(y_train2, axis=1)
# y_train2 = y_train2.reshape(85, 3, 1)


print(X_train2.shape)
print(y_train2.shape)

(851, 29, 1662, 1)
(851,)


## Definición del modelo CNN utilizado

In [14]:
modelCNN = Sequential()
modelCNN.add(Conv2D(32, kernel_size=(3, 3), padding='valid', activation='relu', input_shape=(29, 1662, 1)))
modelCNN.add(MaxPooling2D(pool_size=(2, 2), strides=2))
modelCNN.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
modelCNN.add(MaxPooling2D(pool_size=(2, 2), strides=2))
modelCNN.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
modelCNN.add(MaxPooling2D(pool_size=(2, 2), strides=2))
modelCNN.add(Flatten())
modelCNN.add(Dense(32, activation='relu'))
modelCNN.add(Dense(actions.shape[0], activation='softmax'))


2024-04-05 16:51:26.903249: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-04-05 16:51:27.118558: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [15]:
modelCNN.compile(optimizer='Adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [67]:
modelCNN.fit(X_train2, y_train2, epochs=60)

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60

KeyboardInterrupt: 

In [16]:
modelCNN.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 27, 1660, 32)      320       
                                                                 
 max_pooling2d (MaxPooling2  (None, 13, 830, 32)       0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 11, 828, 64)       18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 5, 414, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 3, 412, 32)        18464     
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 1, 206, 32)        0

In [69]:
modelCNN.predict(X_test)



array([[1.47613298e-06, 2.08142090e-08, 2.88250381e-13, 8.43532471e-05,
        1.10454776e-03, 5.11684000e-01, 5.57877800e-10, 1.92804430e-02,
        3.06039292e-04, 4.67539102e-01],
       [2.10855138e-08, 1.23223674e-15, 1.04352964e-16, 2.63383509e-10,
        4.58700011e-09, 6.72736633e-07, 2.28620296e-22, 9.99999225e-01,
        4.55630840e-08, 4.59102239e-18],
       [2.97422071e-06, 8.84541154e-01, 1.23411110e-06, 4.25771152e-09,
        2.31484866e-11, 1.66040044e-12, 5.23057320e-10, 3.21313028e-12,
        5.80880222e-12, 1.15454569e-01],
       [3.58954918e-13, 2.72541906e-10, 3.71886480e-11, 2.70868267e-12,
        1.71413466e-01, 1.70494978e-08, 2.88050627e-17, 3.42190149e-03,
        8.25163245e-01, 1.34676122e-06],
       [9.84136328e-09, 3.51067092e-10, 1.13190346e-10, 1.38425111e-12,
        9.95505571e-01, 3.31888965e-04, 1.35243368e-18, 3.84214683e-03,
        1.60244628e-04, 1.60151278e-04],
       [6.05156134e-11, 3.17259466e-17, 8.22030440e-16, 3.53512705e-11,
   

In [78]:
#modelCNN.save('CNN model v1.h5')

  saving_api.save_model(


In [17]:
modelCNN.load_weights('models/CNN model v1.h5')

## Ejecución del Motor de Reconocimiento de la LSV

In [44]:
# 1. New detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.5


W=640
H=480
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc('M','J','P','G'))

cap.set(cv2.CAP_PROP_FRAME_WIDTH, W)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, H)
cap.set(cv2.CAP_PROP_FPS, 30)

# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections
        image, results = mediapipe_detection(frame, holistic)
        # print(results)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        # 2. Prediction logic
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-29:]
        
        if len(sequence) == 29:
            res = modelLSTM.predict(np.expand_dims(sequence, axis=0))[0]
            print(actions[np.argmax(res)])
            predictions.append(np.argmax(res))
            
            
        #3. Viz logic
            if np.unique(predictions[-10:])[0]==np.argmax(res): 
                if res[np.argmax(res)] > threshold: 
                    
                    if len(sentence) > 0: 
                        if actions[np.argmax(res)] != sentence[-1]:
                            sentence.append(actions[np.argmax(res)])
                    else:
                        sentence.append(actions[np.argmax(res)])

            if len(sentence) > 5: 
                sentence = sentence[-5:]

            # Viz probabilities
            image = prob_viz(res, actions, image, colors)
            
        cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3,30), 
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', image)

        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()

E0000 00:00:1712414197.198087    2028 gl_context.cc:408] INTERNAL: ; RET_CHECK failure (mediapipe/gpu/gl_context_egl.cc:303) successeglMakeCurrent() returned error 0x3008;  (entering GL context)
E0000 00:00:1712414197.198157    2028 gl_context.cc:408] INTERNAL: ; RET_CHECK failure (mediapipe/gpu/gl_context_egl.cc:303) successeglMakeCurrent() returned error 0x3008;  (entering GL context)
E0000 00:00:1712414197.198164    2028 gl_context.cc:408] INTERNAL: ; RET_CHECK failure (mediapipe/gpu/gl_context_egl.cc:303) successeglMakeCurrent() returned error 0x3008;  (entering GL context)
E0000 00:00:1712414197.198170    2028 gl_context.cc:408] INTERNAL: ; RET_CHECK failure (mediapipe/gpu/gl_context_egl.cc:303) successeglMakeCurrent() returned error 0x3008;  (entering GL context)
E0000 00:00:1712414197.198176    2028 gl_context.cc:408] INTERNAL: ; RET_CHECK failure (mediapipe/gpu/gl_context_egl.cc:303) successeglMakeCurrent() returned error 0x3008;  (entering GL context)
E0000 00:00:1712414197.19

formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
formula
ecuacion
ecuacion
inversa
inversa
inversa
inversa
inversa
ecuacion
ecuacion
ecuacion
ecuacion
ecuacion
ecuacion
ecuacion
ecuacion
ecuacion
ecuacion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
aceleracion
acelerac

In [41]:
cap.release()
cv2.destroyAllWindows()