In [121]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import cv2 
import numpy as np
import os 
from matplotlib import pyplot as plt
import time
import mediapipe as mp 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard

mp_holistic = mp.solutions.holistic
mp_drawing = mp.solutions.drawing_utils

def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 
    image.flags.writeable = False 
    results = model.process(image)
    image.flags.writeable = True 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    return image, results 

def draw_landmarks(image, results):
    if results.face_landmarks:
        mp_drawing.draw_landmarks(
            image, 
            results.face_landmarks, 
            mp_holistic.FACEMESH_TESSELATION, 
            mp_drawing.DrawingSpec(color=(209, 12, 163), thickness=1, circle_radius=1), 
            mp_drawing.DrawingSpec(color=(0, 0, 0), thickness=1, circle_radius=1) 
        )
    if results.pose_landmarks:
        mp_drawing.draw_landmarks(
            image,
            results.pose_landmarks,
            mp_holistic.POSE_CONNECTIONS,
            mp_drawing.DrawingSpec(color=(209,12,163), thickness=2, circle_radius=4),
            mp_drawing.DrawingSpec(color=(0,0,0), thickness=2, circle_radius=2)
        )
    if results.left_hand_landmarks:
        mp_drawing.draw_landmarks(
            image,
            results.left_hand_landmarks,
            mp_holistic.HAND_CONNECTIONS,
            mp_drawing.DrawingSpec(color=(209,12,163), thickness=2, circle_radius=4),
            mp_drawing.DrawingSpec(color=(0,0,0), thickness=2, circle_radius=2)
        )
    if results.right_hand_landmarks:
        mp_drawing.draw_landmarks(
            image,
            results.right_hand_landmarks,
            mp_holistic.HAND_CONNECTIONS,
            mp_drawing.DrawingSpec(color=(209,12,163), thickness=2, circle_radius=4),
            mp_drawing.DrawingSpec(color=(0,0,0), thickness=2, circle_radius=2)
        )

def normalize_keypoints(keypoints):
    min_val = np.min(keypoints, axis=0)
    max_val = np.max(keypoints, axis=0)
    normalized_keypoints = (keypoints - min_val) / (max_val - min_val + 1e-6)
    return normalized_keypoints

def smooth_keypoints(sequence, window_size=5):
    smoothed_sequence = []
    for i in range(len(sequence)):
        start_idx = max(0, i - window_size // 2)
        end_idx = min(len(sequence), i + window_size // 2 + 1)
        smoothed_keypoints = np.mean(sequence[start_idx:end_idx], axis=0)
        smoothed_sequence.append(smoothed_keypoints)
    return np.array(smoothed_sequence)

DATA_PATH = os.path.join('trainTest') 
actions = np.array(['hola', 'mi_nombre_es', 'como_estas', 'chao', 'buenas_noches', 'por_favor', 'parado'])
no_sequences = 50
sequence_length = 30

label_map = {label:num for num, label in enumerate(actions)}

sequences, labels = [], []
for action in actions:
    for sequence in range(no_sequences):
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            res = normalize_keypoints(res)  # Normalizar
            window.append(res)
        window = smooth_keypoints(window)  # Suavizado
        sequences.append(window)
        labels.append(label_map[action])

In [122]:
np.array(labels).shape #50*11

(350,)

In [123]:
labels

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,


In [124]:
X = np.array(sequences)
X.shape # (50 Videos * 11 Labels, 30 frames cada uno, 1662 keypoints)

(350, 30, 1662)

In [125]:
y = to_categorical(labels).astype(int) # Convertir enteros a categorical info
y

array([[1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1]])

In [126]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)


In [127]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard
log_dir = os.path.join("Logs")
tb_callback = TensorBoard(log_dir = log_dir)

model = Sequential()
model.add(LSTM(64, return_sequences=True, activation="relu", input_shape=(30, 1662)))
model.add(BatchNormalization())  # Normalización
model.add(LSTM(128, return_sequences=True, activation="relu"))
model.add(BatchNormalization())  # Normalización
model.add(LSTM(64, return_sequences=False, activation="relu"))
model.add(Dense(64, activation="relu"))
model.add(Dense(32, activation="relu"))
model.add(Dense(actions.shape[0], activation="softmax"))

model.compile(optimizer="Adam", loss="categorical_crossentropy", metrics=["categorical_accuracy"])
model.fit(X_train, y_train, epochs=1600, callbacks=[tb_callback])

2024-11-13 00:44:59.308788: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2024-11-13 00:44:59.308825: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.
2024-11-13 00:44:59.308870: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.


Epoch 1/1600
 2/11 [====>.........................] - ETA: 1s - loss: 2.0739 - categorical_accuracy: 0.1953 

2024-11-13 00:45:02.774591: I tensorflow/core/profiler/lib/profiler_session.cc:136] Profiler session initializing.
2024-11-13 00:45:02.774630: I tensorflow/core/profiler/lib/profiler_session.cc:155] Profiler session started.




2024-11-13 00:45:03.032966: I tensorflow/core/profiler/lib/profiler_session.cc:71] Profiler session collecting data.
2024-11-13 00:45:03.089435: I tensorflow/core/profiler/lib/profiler_session.cc:172] Profiler session tear down.
2024-11-13 00:45:03.151199: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: Logs/train/plugins/profile/2024_11_13_00_45_03
2024-11-13 00:45:03.224163: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for trace.json.gz to Logs/train/plugins/profile/2024_11_13_00_45_03/saur.trace.json.gz
2024-11-13 00:45:03.260811: I tensorflow/core/profiler/rpc/client/save_profile.cc:137] Creating directory: Logs/train/plugins/profile/2024_11_13_00_45_03
2024-11-13 00:45:03.260967: I tensorflow/core/profiler/rpc/client/save_profile.cc:143] Dumped gzipped tool data for memory_profile.json.gz to Logs/train/plugins/profile/2024_11_13_00_45_03/saur.memory_profile.json.gz
2024-11-13 00:45:03.262447: I tensorflow/core/p

Epoch 2/1600
Epoch 3/1600
Epoch 4/1600
Epoch 5/1600
Epoch 6/1600
Epoch 7/1600
Epoch 8/1600
Epoch 9/1600
Epoch 10/1600
Epoch 11/1600
Epoch 12/1600
Epoch 13/1600
Epoch 14/1600
Epoch 15/1600
Epoch 16/1600
Epoch 17/1600
Epoch 18/1600
Epoch 19/1600
Epoch 20/1600
Epoch 21/1600
Epoch 22/1600
Epoch 23/1600
Epoch 24/1600
Epoch 25/1600
Epoch 26/1600
Epoch 27/1600
Epoch 28/1600
Epoch 29/1600
Epoch 30/1600
Epoch 31/1600
Epoch 32/1600
Epoch 33/1600
Epoch 34/1600
Epoch 35/1600
Epoch 36/1600
Epoch 37/1600
Epoch 38/1600
Epoch 39/1600
Epoch 40/1600
Epoch 41/1600
Epoch 42/1600
Epoch 43/1600
Epoch 44/1600
Epoch 45/1600
Epoch 46/1600
Epoch 47/1600
Epoch 48/1600
Epoch 49/1600
Epoch 50/1600
Epoch 51/1600
Epoch 52/1600
Epoch 53/1600
Epoch 54/1600
Epoch 55/1600
Epoch 56/1600
Epoch 57/1600
Epoch 58/1600
Epoch 59/1600
Epoch 60/1600
Epoch 61/1600
Epoch 62/1600
Epoch 63/1600
Epoch 64/1600
Epoch 65/1600
Epoch 66/1600
Epoch 67/1600
Epoch 68/1600
Epoch 69/1600
Epoch 70/1600
Epoch 71/1600
Epoch 72/1600
Epoch 73/1600


<tensorflow.python.keras.callbacks.History at 0x7fafe452edc0>

In [128]:
model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_18 (LSTM)               (None, 30, 64)            442112    
_________________________________________________________________
batch_normalization (BatchNo (None, 30, 64)            256       
_________________________________________________________________
lstm_19 (LSTM)               (None, 30, 128)           98816     
_________________________________________________________________
batch_normalization_1 (Batch (None, 30, 128)           512       
_________________________________________________________________
lstm_20 (LSTM)               (None, 64)                49408     
_________________________________________________________________
dense_18 (Dense)             (None, 64)                4160      
_________________________________________________________________
dense_19 (Dense)             (None, 32)               

In [129]:
res = model.predict(X_test)


In [130]:
actions[np.argmax(y_test[4])]


'por_favor'

In [131]:
model.save("pruebota.h5")

In [14]:
#model.load_weights("refined1.h5")


In [132]:
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score


In [133]:
yhat = model.predict(X_test)


In [134]:
ytrue = np.argmax(y_test, axis=1).tolist()
yhat = np.argmax(yhat, axis=1).tolist()

In [135]:
multilabel_confusion_matrix(ytrue, yhat)


array([[[16,  0],
        [ 2,  0]],

       [[15,  0],
        [ 3,  0]],

       [[ 9,  2],
        [ 0,  7]],

       [[11,  7],
        [ 0,  0]],

       [[16,  0],
        [ 1,  1]],

       [[15,  0],
        [ 3,  0]],

       [[17,  0],
        [ 0,  1]]])

In [136]:
accuracy_score(ytrue, yhat)


0.5

In [137]:
colors = [(245,117,16), (117,245,16), (16,117,245), (117,245,16),(117,245,16),(117,245,16),(117,245,16),(117,245,16),(117,245,16),(117,245,16),(117,245,16)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for idx, prob in enumerate(res):
        cv2.rectangle(output_frame, (0, 60 + idx*40), (int(prob*100), 90 + idx*40), colors[idx], -1)
        cv2.putText(output_frame, f'{actions[idx]}: {prob:.2f}', (5, 85 + idx*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
    return output_frame

def extract_keypoints(results):
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    keypoints = np.concatenate([pose, face, lh, rh])
    return normalize_keypoints(keypoints)

In [138]:
# 1. New detection variables
sequence = []
sentence = []
predictions = []
threshold = 0.5

cap = cv2.VideoCapture(0)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
        # Leer video
        ret, frame = cap.read()
        if not ret:
            break

        # Realizar detecciones
        image, results = mediapipe_detection(frame, holistic)
        draw_landmarks(image, results)

        # Extraer y normalizar keypoints
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        sequence = sequence[-30:]

        # Suavizar la secuencia
        smoothed_sequence = smooth_keypoints(sequence)

        if len(smoothed_sequence) == 30:
            res = model.predict(np.expand_dims(smoothed_sequence, axis=0))[0]
            print(actions[np.argmax(res)])
            predictions.append(np.argmax(res))

            # Lógica de visualización
            if np.unique(predictions[-30:])[0] == np.argmax(res):
                if res[np.argmax(res)] > threshold:
                    if len(sentence) > 0:
                        if actions[np.argmax(res)] != sentence[-1]:
                            sentence.append(actions[np.argmax(res)])
                    else:
                        sentence.append(actions[np.argmax(res)])

            if len(sentence) > 5:
                sentence = sentence[-5:]

            # Visualización de probabilidades
            image = prob_viz(res, actions, image, colors)

        cv2.rectangle(image, (0, 0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3, 30), 
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)

        # Mostrar en pantalla
        cv2.imshow('OpenCV Feed', image)

        # Cierre del bucle
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()























































chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
parado
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao
chao

In [45]:
cap.release()
cv2.destroyAllWindows() #Stop all CASE