# 1. Import and Install Dependecies  

In [16]:
%pip install tensorflow opencv-python mediapipe matplotlib scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [17]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as pl
import time
import mediapipe as mp
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout, LSTM, Embedding
from tensorflow.keras.utils import to_categorical



# 2. Key Points using MP Holistic


In [19]:
mp_holistic = mp.solutions.holistic # Modelo Holistico
mp_drawing = mp.solutions.drawing_utils # Desenhando Utilidades

In [20]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)              # Conversão de Cores
    image.flags.writeable = False                               # Imagem is no longer writeable
    results = model.process(image)                              # Fazer Predição
    image.flags.writeable = True                                # Imagem is now writeable
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)              # Conversão de Cores
    return image, results

In [21]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION)  # Desenho no Rosto
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS)  # Desenho no Corpo
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Desenho na Mão Esquerda
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Desenho na Mão Direita

In [22]:
def draw_styled_landmarks(image, results):
    # Desenho no Rosto
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION,
                              mp_drawing.DrawingSpec(color=(80, 110, 10),  thickness=1, circle_radius=1),
                              mp_drawing.DrawingSpec(color=(80, 256, 121),  thickness=1, circle_radius=1)          
                              )
    # Desenho no Corpo
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(80, 22, 10),  thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(80, 44, 121),  thickness=2, circle_radius=2)  
                              )
    # Desenho na Mão Esquerda
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(121, 22, 76),  thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(121, 44, 250),  thickness=2, circle_radius=2)  
                              ) 
    # Desenho na Mão Direita
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
                              mp_drawing.DrawingSpec(color=(245, 117, 66),  thickness=2, circle_radius=4),
                              mp_drawing.DrawingSpec(color=(245, 66, 230),  thickness=2, circle_radius=2)  
                              )

In [36]:
cap = cv2.VideoCapture(0)
# Acessar o Modelo do MediaPipe
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():
    
        # Lendo o Feed
        ret, frame = cap.read()
    
        # Fazer Detecções
        image, results = mediapipe_detection(frame, holistic)
        
        # Desenhos de Pontos de Detecção
        draw_styled_landmarks(image, results)
        
        # Mostrando a Tela
        cv2.imshow('Opencv Feed', image)
    
        # Parada de Feed
        if cv2.waitKey(10) & 0XFF ==ord('q'):
            break

cap.release()
cv2.destroyAllWindows()

# 3. Extract Keypoint Values 

In [37]:
pose = []
for res in results.pose_landmarks.landmark:
   test = np.array([res.x, res.y, res.z, res.visibility])
   pose.append(test)

In [82]:
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(132)
face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(1404)
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)

In [83]:
def extract_keypoints(results):
   pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
   face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
   lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
   rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
   return np.concatenate([pose,face, lh, rh]) 

In [84]:
result_test = extract_keypoints(results)

In [146]:
keypoints_dim = result_test.shape[0]

In [147]:
result_test

array([ 0.50751984,  0.38934931, -0.53029358, ...,  0.        ,
        0.        ,  0.        ])

In [148]:
np.save('0', result_test)

In [149]:
np.load('0.npy')

array([ 0.50751984,  0.38934931, -0.53029358, ...,  0.        ,
        0.        ,  0.        ])

# 4. Setup Folders for Collection

In [123]:
# Caminho para exportar dados, numpy arrays
DATA_PATH = os.path.join('MP_Data')

# Ações que vão ser detectadas
actions_sequence1 = np.array(['Oi', 'Tudo Bem', 'Quanto Tempo', 'Saudade', 'Tchau'])
actions_sequence2 = np.array(['Casa', 'Por Favor', 'Mais ou Menos', 'Ver', 'Hora'])

# Combine as sequências de ações em uma lista
actions = [actions_sequence1, actions_sequence2]

# trinta videos para valor de dados
no_sequences = 30

# os vídeos que terão 30 quadros de duração
sequence_length = 30

# Crie um dicionário para armazenar os rótulos de ação
action_labels = {}

# Itere sobre os elementos dos arrays
for action_sequence in actions:
  for action in action_sequence:
    # Atribua um índice à ação
    action_labels[action] = len(action_labels)

In [124]:
for actions_sequence in actions:
    for action in actions_sequence:
        for sequence in range(no_sequences):
            try:
                os.makedirs(os.path.join(DATA_PATH, action, str(sequence)))
            except:
                pass

# 5. Collect KeyPoint Value for Training and Testing

In [47]:
cap = cv2.VideoCapture(0)
# Acessar o Modelo do MediaPipe
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    
        # Loop através das sequências de ações
    for actions_sequence in actions:
        
        # Loop através das ações dentro da sequência
        for action in actions_sequence:
            
            # Loop através das sequências de vídeo
            for sequence in range(no_sequences):
                
                # loop atráves do tamanho do vídeo ou tamanho da sequência
                for frame_num in range(sequence_length):
    
                 # Lendo o Feed
                 ret, frame = cap.read()
    
                 # Fazer Detecções
                 image, results = mediapipe_detection(frame, holistic)
        
                 # Desenhos de Pontos de Detecção
                 draw_styled_landmarks(image, results)
        
                 # Lógica para esperar a iniciar a coleção
                 if frame_num == 0: 
                    cv2.putText(image, 'Start the Collection', (120,200), 
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                    cv2.putText(image, 'Coletando frames para {} Video Numero {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    # Show to screen
                    cv2.imshow('OpenCV Feed', image)
                    cv2.waitKey(1000)
                else: 
                    cv2.putText(image, 'Collecting frames for {} Video Numero {}'.format(action, sequence), (15,12), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    # Show to screen
                    cv2.imshow('OpenCV Feed', image)
                
                # Export keypoints
                keypoints = extract_keypoints(results)
                npy_path = os.path.join(DATA_PATH, action, str(sequence), str(frame_num))
                np.save(npy_path, keypoints)

                # Break gracefully
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break

cap.release()
cv2.destroyAllWindows()

In [35]:
cap.release()
cv2.destroyAllWindows()

# 6. Preprocess Data and Create Labels and Features

In [125]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [132]:
# Criar o dicionário label_map
label_map = action_labels

In [133]:
label_map

{'Oi': 0,
 'Tudo Bem': 1,
 'Quanto Tempo': 2,
 'Saudade': 3,
 'Tchau': 4,
 'Casa': 5,
 'Por Favor': 6,
 'Mais ou Menos': 7,
 'Ver': 8,
 'Hora': 9}

In [150]:
sequences, labels = [], []

for idx, actions_sequence in enumerate(actions):
    for action in actions_sequence:
        for sequence in range(no_sequences):
            sequence_path = os.path.join(DATA_PATH, action, str(sequence))
            
            window = []
            for frame_num in range(sequence_length):
                file_path = os.path.join(sequence_path, "{}.npy".format(frame_num))
                
                # Verifique se o arquivo existe antes de tentar carregar
                if os.path.exists(file_path):
                    res = np.load(file_path)
                    window.append(res)

            sequences.append(window)
            labels.append(idx)

In [135]:
# Organizar em arrays numpy
X = np.array(sequences)
y = to_categorical(labels).astype(int)

In [137]:
# Dividir os dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [139]:
# Verificar as formas dos conjuntos de treinamento e teste
print("Shape of X:", X.shape)
print("Shape of y:", y.shape)

Shape of X: (300, 1, 1662)
Shape of y: (300, 2)


In [140]:
# Verificar as formas dos conjuntos de treinamento e teste
print("Shape of X_train:", X_train.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_test:", y_test.shape)

Shape of X_train: (240, 1, 1662)
Shape of y_train: (240, 2)
Shape of X_test: (60, 1, 1662)
Shape of y_test: (60, 2)


In [141]:
np.array(sequences).shape

(300, 1, 1662)

In [142]:
np.array(labels).shape

(300,)

In [143]:
X = np.array(sequences)

In [145]:
print("Shape of X_train:", X_train.shape)

Shape of X_train: (240, 1, 1662)


In [103]:
np.array(sequences).shape

(300, 1, 1662)

In [104]:
y = to_categorical(labels).astype(int)

In [105]:
y

array([[1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1,

In [106]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05)

In [107]:
X_train.shape

(285, 1, 1662)

# 7. Build and Train LTSTM(RNN) Neural Network

In [108]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard

In [109]:
log_dir = os.path.join('Logs')
tb_callback = TensorBoard(log_dir=log_dir)

In [110]:
model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(64, return_sequences=True, activation='relu', input_shape=(sequence_length, keypoints_dim)),
    tf.keras.layers.LSTM(128, return_sequences=True, activation='relu'),
    tf.keras.layers.LSTM(64, return_sequences=False, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(len(actions), activation='softmax'),
])

In [111]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [112]:
# Reshape dos dados de treinamento e teste
X_train = X_train.reshape((X_train.shape[285], sequence_length, keypoints_dim))
X_test = X_test.reshape((X_test.shape[285], sequence_length, keypoints_dim))

IndexError: tuple index out of range

In [113]:
# Treinar o modelo
model.fit(X_train, y_train, epochs=2000, validation_data=(X_test, y_test), callbacks=[tb_callback])

Epoch 1/2000


ValueError: in user code:

    File "C:\Users\User\anaconda3\lib\site-packages\keras\src\engine\training.py", line 1401, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\User\anaconda3\lib\site-packages\keras\src\engine\training.py", line 1384, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\User\anaconda3\lib\site-packages\keras\src\engine\training.py", line 1373, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\User\anaconda3\lib\site-packages\keras\src\engine\training.py", line 1150, in train_step
        y_pred = self(x, training=True)
    File "C:\Users\User\anaconda3\lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\User\anaconda3\lib\site-packages\keras\src\engine\input_spec.py", line 298, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential_5" is incompatible with the layer: expected shape=(None, 30, 1662), found shape=(None, 1, 1662)


# 8. Make Predictions 

# 9. Make Predictions 