
# **<center><font style="color:rgb(100,109,254)">Entrenamiento de modelo (CNN + LSTM)</font> </center>**



### **<font style="color:rgb(134,19,348)"> Importar librerías</font>**



In [None]:
# Discard the output of this cell.
#%%capture

# Install the required libraries.
!pip install tensorflow opencv-contrib-python youtube-dl moviepy pydot graphviz
!pip install git+https://github.com/TahaAnwar/pafy.git#egg=pafy

In [3]:
# Import the required libraries.
import os
import cv2
import pafy
import math
import random
import numpy as np
import datetime as dt
import tensorflow as tf
from collections import deque
import matplotlib.pyplot as plt
import time
import mediapipe as mp

from moviepy.editor import *
%matplotlib inline

from sklearn.model_selection import train_test_split

from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model

Semillas consistentes para todas las funciones

In [4]:
seed_constant = 27
np.random.seed(seed_constant)
random.seed(seed_constant)
tf.random.set_seed(seed_constant)

## **<font style="color:rgb(134,19,348)">1. Preprocesamiento de Información</font>**

### **<font style="color:rgb(134,19,348)">1.1 Asignación de variables</font>**

In [5]:
# Ancho y alto de los videos del conjunto de datos
IMAGE_HEIGHT , IMAGE_WIDTH = 64, 64

# Lista de acciones a detectar
CLASSES_LIST = ['Buenos dias', 'Buenas tardes', 'Amigo', 'Lo siento', 'Casa', 'z']

# Cantidad de videos por acción
NO_SEQUENCE = 5

# Cantidad de frames por video
SEQUENCE_LENGTH = 30

# Dirección de los datos
DATASET_DIR = "D:/Work/Aplicada CNN+RNN"


### **<font style="color:rgb(134,19,348)">1.2. Identificacion de Keypoints usando Mediapipe Holistic</font>**

In [6]:
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

In [7]:
def mediapipe_detection(image, model):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Inversion de color de BGR a RGB
    image.flags.writeable = False                  # Quitar propiedad de escritura
    results = model.process(image)                 # Prediccion
    image.flags.writeable = True                   # Agregar propiedad de escritura
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Inversion de color de RGB a BGR
    return image, results

In [8]:
def draw_landmarks(image, results):
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION) # Dibujar puntos en el rostro
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Puntos de postura
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Puntos mano izquierda
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Puntos mano derecha

In [9]:
def draw_styled_landmarks(image, results):
    # Colocar puntos en el rostro
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 
    # Colocar puntos de postura
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Colocar puntos mano izquierda
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Colocar puntos mano derecha 
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 

### **<font style="color:rgb(134,19,348)">1.3. Obtención de datos</font>**
Creación del dataset a partir de frames tomados con la cámara del dispositivo

In [41]:

cap = cv2.VideoCapture(0)
cv2.waitKey(1000)
#Llamando al modelo mediapipe
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    # Contenedor de los frames extraidos
    features = []
    # Etiquetas de cada video los videos asociados a cada signo
    labels = []
    for num, signo in enumerate(CLASSES_LIST): # Para cada signo de la lista
        try:
            os.makedirs(os.path.join(DATASET_DIR, signo))
        except:
            pass
        print(f'Generando datos del signo: {signo}')
        
        for secuencia in range(NO_SEQUENCE): # Para cada video de cada signo
            # Nombre del archivo de video
            video_filename = os.path.join(DATASET_DIR, signo, str(secuencia) + '.avi')
            # Define el codec y crea el VideoWriter objeto
            fourcc = cv2.VideoWriter_fourcc(*'XVID')
            out = cv2.VideoWriter(video_filename, fourcc, 20.0, (640, 480))
            frames_list = []
            for n_frame in range(SEQUENCE_LENGTH): #Para cada frame del video
                ret, frame=cap.read()
                image, results = mediapipe_detection(frame, holistic)
                draw_styled_landmarks(image, results)
                out.write(image)
                if n_frame == 0:
                    cv2.putText(image, 'Empezando Recolección', (120,200), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255, 0), 4, cv2.LINE_AA)
                    cv2.putText(image, 'Recolectando frames para {}: \n Video Numero: {}'.format(signo, secuencia), (15,12),
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    # Mostrar en pantalla
                    cv2.imshow('OpenCV Feed', image)
                    cv2.waitKey(1000)
                else:
                    cv2.putText(image, 'Recolectando frames para {}: \n Video Numero: {}'.format(signo, secuencia), (15,12),
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                    # Mostrar en pantalla
                    cv2.imshow('OpenCV Feed', image)
                if cv2.waitKey(10) & 0xFF == ord('q'):
                    break
                resized_frame = cv2.resize(image, (IMAGE_HEIGHT, IMAGE_WIDTH))  # Redimencionamiento de cada frame
                normalized_frame = resized_frame / 255                          # Normalizacion de cada frame para dar valores de 0 y 1 a cada pixel
                frames_list.append(normalized_frame)

            out.release()
            features.append(frames_list)
            labels.append(num)

        
    features = np.asarray(features)    
    labels=np.array(labels)

    cap.release()
    cv2.destroyAllWindows()

print('Dataset Finalizado')

Generando datos del signo: Buenos dias
Generando datos del signo: Buenas tardes
Generando datos del signo: Amigo
Generando datos del signo: Lo siento
Generando datos del signo: Casa
Generando datos del signo: z
Dataset Finalizado


### **<font style="color:rgb(134,19,348)">1.4. Guardar datos de Labels y Features</font>**

In [None]:
# Recuperar Datos previos
try:
    l = np.load('Labels.npy')
    f = np.load('Features.nyp')
    new_labels = np.concatenate((l,labels))
    new_features =np.concatenate((f,features))
    np.save('Labels', new_labels)
    np.save('Features', new_features)
except:
    np.save('Labels', labels)
    np.save('Features', features)

## **<font style="color:rgb(134,19,348)">2. Entrenamiento del modelo</font>**

### **<font style="color:rgb(134,19,348)">2.1. Recuperar datos de Labels y Features</font>**

In [11]:
labels = np.load('Labels.npy')
features = np.load('Features.nyp')

Ahora convertimos los labes obtenidos en vectores one-hot (vectores binarios).

In [12]:
# Se usa el métod de la librería Keras para conertir los labels en vectores one-hot
one_hot_encoded_labels = to_categorical(labels)

### **<font style="color:rgb(134,19,348)">2.2. Separación de datos de entrenamiento</font>**


In [13]:
# Dividir la colección de datos en Entrenamiento ( 75% ) y Verificación ( 25% ).
features_train, features_test, labels_train, labels_test = train_test_split(features, one_hot_encoded_labels,
                                                                            test_size = 0.25, shuffle = True,
                                                                            random_state = seed_constant)

## **<font style="color:rgb(134,19,348)">3. Modelo ConvLSTM</font>**

### **<font style="color:rgb(134,19,348)">3.1. Construir el modelo ConvLSTM</font>**

In [14]:
def create_convlstm_model():
    '''
    This function will construct the required convlstm model.
    Returns:
        model: It is the required constructed convlstm model.
    '''

    # We will use a Sequential model for model construction
    model = Sequential()

    # Define the Model Architecture.
    ########################################################################################################################
    
    model.add(ConvLSTM2D(filters = 4, kernel_size = (3, 3), activation = 'tanh',data_format = "channels_last",
                         recurrent_dropout=0.2, return_sequences=True, input_shape = (SEQUENCE_LENGTH,
                                                                                      IMAGE_HEIGHT, IMAGE_WIDTH, 3)))
    
    model.add(MaxPooling3D(pool_size=(1, 2, 2), padding='same', data_format='channels_last'))
    model.add(TimeDistributed(Dropout(0.2)))
    
    model.add(ConvLSTM2D(filters = 8, kernel_size = (3, 3), activation = 'tanh', data_format = "channels_last",
                         recurrent_dropout=0.2, return_sequences=True))
    
    model.add(MaxPooling3D(pool_size=(1, 2, 2), padding='same', data_format='channels_last'))
    model.add(TimeDistributed(Dropout(0.2)))
    
    model.add(ConvLSTM2D(filters = 14, kernel_size = (3, 3), activation = 'tanh', data_format = "channels_last",
                         recurrent_dropout=0.2, return_sequences=True))
    
    model.add(MaxPooling3D(pool_size=(1, 2, 2), padding='same', data_format='channels_last'))
    model.add(TimeDistributed(Dropout(0.2)))
    
    model.add(ConvLSTM2D(filters = 16, kernel_size = (3, 3), activation = 'tanh', data_format = "channels_last",
                         recurrent_dropout=0.2, return_sequences=True))
    
    model.add(MaxPooling3D(pool_size=(1, 2, 2), padding='same', data_format='channels_last'))
    #model.add(TimeDistributed(Dropout(0.2)))
    
    model.add(Flatten()) 
    
    model.add(Dense(len(CLASSES_LIST), activation = "softmax"))
    
    ########################################################################################################################
     
    # Display the models summary.
    model.summary()
    
    # Return the constructed convlstm model.
    return model

In [None]:
# Construir el modelo ConvLSTM.
convlstm_model = create_convlstm_model()


### **<font style="color:rgb(134,19,348)">3.2. Entrenar el modelo ConvLSTM</font>**


In [15]:
# Detención automática en overfitting
early_stopping_callback = EarlyStopping(monitor = 'val_loss', patience = 10, mode = 'min', restore_best_weights = True)

# Función de perdida, optimizador y metricas de evaluación del modelo
convlstm_model.compile(loss = 'categorical_crossentropy', optimizer = 'Adam', metrics = ["accuracy"])

# Inicio del entrenamiento del modelo
convlstm_model_training_history = convlstm_model.fit(x = features_train, y = labels_train, epochs = 50, batch_size = 4,
                                                     shuffle = True, validation_split = 0.2, 
                                                     callbacks = [early_stopping_callback])

Epoch 1/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 242ms/step - accuracy: 0.1817 - loss: 1.8171 - val_accuracy: 0.1688 - val_loss: 1.7904
Epoch 2/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 208ms/step - accuracy: 0.1791 - loss: 1.7912 - val_accuracy: 0.1299 - val_loss: 1.7898
Epoch 3/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 212ms/step - accuracy: 0.2287 - loss: 1.7886 - val_accuracy: 0.1818 - val_loss: 1.7793
Epoch 4/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 207ms/step - accuracy: 0.2179 - loss: 1.7726 - val_accuracy: 0.1688 - val_loss: 1.7720
Epoch 5/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 491ms/step - accuracy: 0.3101 - loss: 1.6353 - val_accuracy: 0.2338 - val_loss: 1.5760
Epoch 6/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 217ms/step - accuracy: 0.4905 - loss: 1.2768 - val_accuracy: 0.3766 - val_loss: 1.7209
Epoch 7/50
[1m77/77[

### **<font style="color:rgb(134,19,348)">3.3. Evaluar el modelo entrenado</font>**


In [16]:
# Evaluación del modelo CONVLSTM
model_evaluation_history = convlstm_model.evaluate(features_test, labels_test)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 282ms/step - accuracy: 0.7969 - loss: 0.5756


In [None]:
def plot_metric(model_training_history, metric_name_1, metric_name_2, plot_name):
    '''
    This function will plot the metrics passed to it in a graph.
    Args:
        model_training_history: A history object containing a record of training and validation 
                                loss values and metrics values at successive epochs
        metric_name_1:          The name of the first metric that needs to be plotted in the graph.
        metric_name_2:          The name of the second metric that needs to be plotted in the graph.
        plot_name:              The title of the graph.
    '''
    
    # Get metric values using metric names as identifiers.
    metric_value_1 = model_training_history.history[metric_name_1]
    metric_value_2 = model_training_history.history[metric_name_2]
    
    # Construct a range object which will be used as x-axis (horizontal plane) of the graph.
    epochs = range(len(metric_value_1))

    # Plot the Graph.
    plt.plot(epochs, metric_value_1, 'blue', label = metric_name_1)
    plt.plot(epochs, metric_value_2, 'red', label = metric_name_2)

    # Add title to the plot.
    plt.title(str(plot_name))

    # Add legend to the plot.
    plt.legend()

In [None]:
plot_metric(convlstm_model_training_history, 'loss', 'val_loss', 'Total Loss vs Total Validation Loss')

In [None]:
plot_metric(convlstm_model_training_history, 'accuracy', 'val_accuracy', 'Total Accuracy vs Total Validation Accuracy') 

### **<font style="color:rgb(134,19,348)">3.4. Guardar el modelo</font>**


In [17]:
# Guardar el modelo
convlstm_model.save('modelo_convlstm.h5')
# Guardar los pesos del modelo
convlstm_model.save_weights('convlstm-pesos.weights.h5')



## **<font style="color:rgb(134,19,348)">4. Modelo LRCN</font>**

### **<font style="color:rgb(134,19,348)">4.1. Construcción del modelo</font>**


In [17]:
def create_LRCN_model():
    '''
    This function will construct the required LRCN model.
    Returns:
        model: It is the required constructed LRCN model.
    '''

    # We will use a Sequential model for model construction.
    model = Sequential()
    
    # Define the Model Architecture.
    ########################################################################################################################
    
    model.add(TimeDistributed(Conv2D(16, (3, 3), padding='same',activation = 'relu'),
                              input_shape = (SEQUENCE_LENGTH, IMAGE_HEIGHT, IMAGE_WIDTH, 3)))
    
    model.add(TimeDistributed(MaxPooling2D((4, 4)))) 
    model.add(TimeDistributed(Dropout(0.25)))
    
    model.add(TimeDistributed(Conv2D(32, (3, 3), padding='same',activation = 'relu')))
    model.add(TimeDistributed(MaxPooling2D((4, 4))))
    model.add(TimeDistributed(Dropout(0.25)))
    
    model.add(TimeDistributed(Conv2D(64, (3, 3), padding='same',activation = 'relu')))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Dropout(0.25)))
    
    model.add(TimeDistributed(Conv2D(64, (3, 3), padding='same',activation = 'relu')))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    #model.add(TimeDistributed(Dropout(0.25)))
                                      
    model.add(TimeDistributed(Flatten()))
                                      
    model.add(LSTM(32))
                                      
    model.add(Dense(len(CLASSES_LIST), activation = 'softmax'))

    ########################################################################################################################

    # Display the models summary.
    model.summary()
    
    # Return the constructed LRCN model.
    return model

In [None]:

LRCN_model = create_LRCN_model()


### **<font style="color:rgb(134,19,348)">4.2. Entrenar el modelo</font>**




In [19]:
# Create an Instance of Early Stopping Callback.
early_stopping_callback = EarlyStopping(monitor = 'val_loss', patience = 15, mode = 'min', restore_best_weights = True)
 
# Compile the model and specify loss function, optimizer and metrics to the model.
LRCN_model.compile(loss = 'categorical_crossentropy', optimizer = 'Adam', metrics = ["accuracy"])

# Start training the model.
LRCN_model_training_history = LRCN_model.fit(x = features_train, y = labels_train, epochs = 50, batch_size = 4 ,
                                             shuffle = True, validation_split = 0.2, callbacks = [early_stopping_callback])

Epoch 1/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 163ms/step - accuracy: 0.1517 - loss: 1.8254 - val_accuracy: 0.1948 - val_loss: 1.7902
Epoch 2/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 163ms/step - accuracy: 0.1479 - loss: 1.7908 - val_accuracy: 0.2987 - val_loss: 1.7883
Epoch 3/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 166ms/step - accuracy: 0.1399 - loss: 1.7818 - val_accuracy: 0.3766 - val_loss: 1.7330
Epoch 4/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 200ms/step - accuracy: 0.2187 - loss: 1.6725 - val_accuracy: 0.4286 - val_loss: 1.3296
Epoch 5/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 156ms/step - accuracy: 0.4470 - loss: 1.3300 - val_accuracy: 0.6623 - val_loss: 0.9188
Epoch 6/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 126ms/step - accuracy: 0.6724 - loss: 0.9996 - val_accuracy: 0.7662 - val_loss: 0.6956
Epoch 7/50
[1m77/77[

### **<font style="color:rgb(134,19,348)">4.3. Evaluating the trained Model</font>**



In [20]:
# Evaluación del modelo LRCN
model_evaluation_history = LRCN_model.evaluate(features_test, labels_test)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step - accuracy: 0.9667 - loss: 0.1511


In [None]:
# Valores de Perdida durante el entrenamiento
plot_metric(LRCN_model_training_history, 'loss', 'val_loss', 'Total Loss vs Total Validation Loss')

In [None]:
# Valores de Exactitud durante el entrenamiento
plot_metric(LRCN_model_training_history, 'accuracy', 'val_accuracy', 'Total Accuracy vs Total Validation Accuracy')

### **<font style="color:rgb(134,19,348)">4.4. Guardar el modelo</font>**

In [None]:
LRCN_model.save_weights('LRCN-pesos.weights.h5')

## **<font style="color:rgb(134,19,348)">5. Probar el modelo en tiempo real</font>**

In [None]:

cap = cv2.VideoCapture(0)
# Setear modelo mediapipe 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    exit_flag = True
    while cap.isOpened() and exit_flag:
        frames_list = []
        for n_frame in range(SEQUENCE_LENGTH): #Para cada frame del video
            ret, frame=cap.read()
            image, results = mediapipe_detection(frame, holistic)
            draw_styled_landmarks(image, results)
            resized_frame = cv2.resize(image, (IMAGE_HEIGHT, IMAGE_WIDTH))  # Redimencionamiento de cada frame
            normalized_frame = resized_frame / 255                          # Normalizacion de cada frame para dar valores de 0 y 1 a cada pixel
            frames_list.append(normalized_frame)

            if len(frames_list) == SEQUENCE_LENGTH:
                # # Pass the normalized frames to the model and get the predicted probabilities.
                predicted_labels_probabilities = LRCN_model.predict(np.expand_dims(frames_list, axis = 0))[0]

                # # Get the index of class with highest probability.qq
                predicted_label = np.argmax(predicted_labels_probabilities)

                # # Get the class name using the retrieved index.
                predicted_class_name = CLASSES_LIST[predicted_label]

                # Visualizacion
                cv2.rectangle(image, (0,0), (250, 40), (245, 117, 16), -1)
                cv2.putText(image, predicted_class_name, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

                # Mostrar en pantalla
                cv2.imshow('OpenCV Feed', image)
                cv2.waitKey(1000)

            else:
                cv2.putText(image, 'Recolectando frames....', (15,12), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA)
                # Mostrar en pantalla
                cv2.imshow('OpenCV Feed', image)
                
            if cv2.waitKey(10) & 0xFF == ord('q'):
                exit_flag = False
                break
        
    cap.release()
    cv2.destroyAllWindows()