# Consignador RNN

## Contenido
1. [Importar librerias](#Importar-librerias)
2. [Cargar datos](#Cargar-datos)
3. [Preparar datos](#Preparar-datos)
4. [Modelo](#Modelo)
5. [Entrenamiento](#Entrenamiento) (#TODO: agregar grafico de perdida y guardar mejor modelo en cada epoca)
6. [Validacion](#Validacion)
7. [Prediccion](#Prediccion)
8. [Guardar modelo](#Guardar-modelo)


## Importar librerias

In [35]:
# Importar librerias
import utilities as ut
import os
import pandas as pd
import numpy as np
from keras import backend as K
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import sklearn.metrics as metrics
from keras.models import load_model, Sequential
from keras.layers import Dense, SimpleRNN
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint
from keras.metrics import AUC, Precision, Recall
from matplotlib import pyplot as plt
from imblearn.over_sampling import SMOTE
from collections import Counter



## Cargar datos

In [36]:
data = pd.read_pickle(ut.INCIDENTES_CAMARAS_FILENAME)
# convertir el id_camara a int
data['id_camara'] = data['id_camara'].astype(int)
# data = data.drop(["latitud", "longitud"], axis=1)
data = data[["fecha_creacion", "id_camara"]]

data

Unnamed: 0,fecha_creacion,id_camara
0,2022-01-12 08:49:38,1
1,2022-01-18 00:49:47,1
2,2023-01-08 20:47:51,1
3,2023-01-22 05:10:07,1
4,2022-02-02 01:19:04,1
...,...,...
31972,2021-07-27 10:06:22,17290
31973,2022-08-14 00:07:55,17290
31974,2021-09-06 11:43:53,17290
31975,2022-09-02 10:11:05,17290


#### Cargar datos de puntos de interes

In [38]:
DIR_PATH = '\\\\C4wadpninv004\\ANALISIS II-DGGE\\02. SME\\GUSTAVO\\'
# DIR_PATH = '..\\data\\'
FILE_PATH = DIR_PATH + 'B200m_CONSIGNADAS.csv'
FILE_PATH = DIR_PATH + 'B200m_CONSIGNADAS-v3.csv'
data_interest_points = pd.read_csv(FILE_PATH, sep=',', encoding='latin-1')
# Renombrar la columna 'Etiquetas' a 'id_camara'
data_interest_points.rename(columns={'id': 'id_camara'}, inplace=True)
# Ordenar los datos por la columna 'id_camara'
data_interest_points.sort_values(by=['id_camara'], inplace=True)
# Resetear los índices
data_interest_points.reset_index(drop=True, inplace=True)

data_interest_points

Unnamed: 0,id_camara,latitud,longitud,sector,tipo,C01-BANCOS,C05-CENTRALES CAMIONERAS,C06-CENTROS COMERCIALES,C09-CUARTELES DE LA POLICÍA AUXILIAR,C10-CUARTELES PBI,...,C62-OXXO,C67-CENTROS PILARES,C72-ACCESOS METRO,C73-ATRACTIVOS TURISTICOS,C75-FONOTECAS,C76-FOTOTECA,C77-GALERIAS,C78-ZONAS ARQUEOLOGICAS,C81-ESTACIONES DE CABLEBUS,C82-ALCALDIAS
0,1,19.435283,-99.147152,ALAMEDA,9m,25,0,1,0,0,...,4,0,1,3,0,0,2,0,0,0
1,2,19.435098,-99.145820,ALAMEDA,9m,27,0,1,0,0,...,1,0,0,4,0,0,1,0,0,0
2,3,19.434573,-99.143858,ALAMEDA,9m,15,0,1,0,0,...,0,0,0,1,0,0,0,0,0,0
3,4,19.434374,-99.142704,ALAMEDA,9m,16,0,1,0,0,...,2,0,1,2,0,0,1,0,0,0
4,5,19.434242,-99.141880,ALAMEDA,9m,13,0,1,0,0,...,2,0,2,3,0,0,2,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
441,12318,19.435075,-99.119716,CONGRESO,9m,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
442,12578,19.439129,-99.139482,CENTRO,9m,0,0,0,0,0,...,1,0,1,1,0,0,0,0,0,0
443,13795,19.436069,-99.121176,CONGRESO,9m,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
444,16948,19.434749,-99.144808,ALAMEDA,9m,21,0,0,0,0,...,1,0,0,2,0,0,0,0,0,0


#### Agrupar los puntos de interes

In [39]:
IP_groups = [['C09-CUARTELES DE LA POLICÍA AUXILIAR','C10-CUARTELES PBI','C11-CUARTELES PGJ','C16-JUZGADOS CIVILES Y PENALES','C20-MINISTERIOR PUBLICOS','C21-MODULOS SSP',],    # P01 - Seguridad

['C05-CENTRALES CAMIONERAS','C19-METROBUS','C31-TREN LIGERO','C42-TROLEBUS','C43-TURIBUS','C72-ACCESOS METRO','C81-ESTACIONES DE CABLEBUS',],                                        # P02 - Transporte

['C06-CENTROS COMERCIALES','C38-MERCADOS PUBLICOS','C49-TIENDAS DEPARTAMENTALES','C53-CINES','C62-OXXO',],#'C46-ESTABLECIMIENTOS MERCANTILES',],                                    # P03 - Comercio

['C22-MONUMENTOS HISTORICOS', 'C57-EVENTOS MASIVOS', 'C73-ATRACTIVOS TURISTICOS', 'C78-ZONAS ARQUEOLOGICAS',],                                                                        # P04 - Turismo

['C58-CASAS Y CENTROS DE CULTURA','C59-MUSEOS Y TEATROS','C75-FONOTECAS','C76-FOTOTECA','C77-GALERIAS', 'C26-PLAZAS Y PARQUES',],                                                    # P05 - Cultura

['C23-NOTARIAS','C24-OFICINAS DE GOBIERNO','C27-RECLUSORIOS','C30-TESORERIAS','C82-ALCALDIAS', 'C01-BANCOS'],                                                                        # P06 - Gobierno

['C15-HOSPITALES','C45-CENTROS DE SALUD Y CLINICAS',],                                                                                                                                # P07 - Hospitales

['C44-GUARDERIAS', 'C67-CENTROS PILARES', 'C13-EDUCACION',],#'C13-CAM', 'C13-CPARA EL TRABAJO', 'C13_PREESC', 'C13_PRIM', 'C13_SEC', 'C13_BACH', 'C13_SUP',],                                            # P08 - Escuelas

['C33-IGLESIAS Y TEMPLOS',],                                                                                                                                                        # P09 - Iglesias

['C51-EDIFICIOS',],]                                                                                                                                    # P10 - Edificios

# Cada nueva columna es la suma de las columnas que se encuentran en la lista de agrupaciones con nombre GRUPO-k
for i in range(len(IP_groups)):
    data_interest_points['P.Interes-'+str(i+1)] = data_interest_points[IP_groups[i]].sum(axis=1)
    # # Eliminamos las columnas que ya no se van a utilizar
    data_interest_points = data_interest_points.drop(IP_groups[i], axis=1)
data_interest_points = data_interest_points.drop(["C46-ESTABLECIMIENTOS MERCANTILES"], axis=1)

# Estandarizamos los datos
scaler = StandardScaler(with_mean=False, with_std=False)
data_interest_points[data_interest_points.filter(regex='^P.Interes-\d+').columns] = scaler.fit_transform(data_interest_points[data_interest_points.filter(regex='^P.Interes-\d+').columns])
# data_interest_points[data_interest_points.filter(regex='^P.Interes-\d+').columns] = data_interest_points[data_interest_points.filter(regex='^P.Interes-\d+').columns].copy()
data_interest_points

Unnamed: 0,id_camara,latitud,longitud,sector,tipo,P.Interes-1,P.Interes-2,P.Interes-3,P.Interes-4,P.Interes-5,P.Interes-6,P.Interes-7,P.Interes-8,P.Interes-9,P.Interes-10
0,1,19.435283,-99.147152,ALAMEDA,9m,1.0,4.0,9.0,14.0,7.0,25.0,0.0,3.0,0.0,0.0
1,2,19.435098,-99.145820,ALAMEDA,9m,1.0,2.0,5.0,12.0,6.0,27.0,0.0,3.0,0.0,0.0
2,3,19.434573,-99.143858,ALAMEDA,9m,1.0,1.0,4.0,11.0,4.0,16.0,0.0,1.0,1.0,0.0
3,4,19.434374,-99.142704,ALAMEDA,9m,0.0,2.0,7.0,8.0,4.0,17.0,0.0,1.0,1.0,0.0
4,5,19.434242,-99.141880,ALAMEDA,9m,0.0,4.0,8.0,8.0,7.0,14.0,0.0,1.0,2.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
441,12318,19.435075,-99.119716,CONGRESO,9m,0.0,3.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
442,12578,19.439129,-99.139482,CENTRO,9m,0.0,6.0,1.0,1.0,4.0,0.0,1.0,3.0,1.0,0.0
443,13795,19.436069,-99.121176,CONGRESO,9m,0.0,2.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0
444,16948,19.434749,-99.144808,ALAMEDA,9m,1.0,1.0,3.0,13.0,4.0,22.0,0.0,1.0,1.0,0.0


In [40]:
data['date'] = data['fecha_creacion'].dt.strftime('%Y-%m-%d %H:00').astype('datetime64[ns]')
camaras_fechas_frecuencias = data.groupby(['id_camara', 'date']).size().reset_index(name='n_incidentes')

# Convertir la columna de fecha a datetime si aún no lo es
camaras_fechas_frecuencias['date'] = pd.to_datetime(camaras_fechas_frecuencias['date'])

# Crear un DataFrame con todas las fechas posibles por hora
fechas = pd.date_range(start='2021-01-01', end=(camaras_fechas_frecuencias['date'].max().date() + pd.Timedelta(days=1)).isoformat(), freq='H')

# Obtener los id únicos de camara
camaras = camaras_fechas_frecuencias['id_camara'].unique()

# Crear un DataFrame con todas las combinaciones posibles de id_camara y fechas
df_total = pd.DataFrame(index=pd.MultiIndex.from_product([camaras, fechas], names=['id_camara', 'date'])).reset_index()

# Unir el DataFrame total con el DataFrame original
data = pd.merge(df_total, camaras_fechas_frecuencias, how='left', on=['id_camara', 'date'])

# Llenar los valores nulos con 0
data['n_incidentes'] = data['n_incidentes'].replace(np.nan, 0)

# Ahora desagregamos la fecha
data['anio'] = data['date'].dt.year
data['mes'] = data['date'].dt.month
data['dia_semana'] = data['date'].dt.dayofweek+1
data['dia'] = data['date'].dt.day
data['hora'] = data['date'].dt.hour

# data_final.drop(['date'], axis=1, inplace=True)

data

Unnamed: 0,id_camara,date,n_incidentes,anio,mes,dia_semana,dia,hora
0,1,2021-01-01 00:00:00,0.0,2021,1,5,1,0
1,1,2021-01-01 01:00:00,0.0,2021,1,5,1,1
2,1,2021-01-01 02:00:00,0.0,2021,1,5,1,2
3,1,2021-01-01 03:00:00,0.0,2021,1,5,1,3
4,1,2021-01-01 04:00:00,0.0,2021,1,5,1,4
...,...,...,...,...,...,...,...,...
8767017,17290,2023-03-30 20:00:00,0.0,2023,3,4,30,20
8767018,17290,2023-03-30 21:00:00,0.0,2023,3,4,30,21
8767019,17290,2023-03-30 22:00:00,0.0,2023,3,4,30,22
8767020,17290,2023-03-30 23:00:00,0.0,2023,3,4,30,23


#### Asignar los puntos de interes a la cada registro de la base de datos de 'data'

In [41]:
# Recorrer los registros del df data y agregar las columnas de P.Interes de acuerdo a la columna id_camara
interest_points_columns = ["id_camara", "latitud", "longitud"] + data_interest_points.filter(regex='^P.Interes-\d+').columns.to_list()

data = data.merge(data_interest_points[interest_points_columns], 
                              left_on='id_camara', 
                              right_on='id_camara', 
                              how='left')

data

Unnamed: 0,id_camara,date,n_incidentes,anio,mes,dia_semana,dia,hora,latitud,longitud,P.Interes-1,P.Interes-2,P.Interes-3,P.Interes-4,P.Interes-5,P.Interes-6,P.Interes-7,P.Interes-8,P.Interes-9,P.Interes-10
0,1,2021-01-01 00:00:00,0.0,2021,1,5,1,0,19.435283,-99.147152,1.0,4.0,9.0,14.0,7.0,25.0,0.0,3.0,0.0,0.0
1,1,2021-01-01 01:00:00,0.0,2021,1,5,1,1,19.435283,-99.147152,1.0,4.0,9.0,14.0,7.0,25.0,0.0,3.0,0.0,0.0
2,1,2021-01-01 02:00:00,0.0,2021,1,5,1,2,19.435283,-99.147152,1.0,4.0,9.0,14.0,7.0,25.0,0.0,3.0,0.0,0.0
3,1,2021-01-01 03:00:00,0.0,2021,1,5,1,3,19.435283,-99.147152,1.0,4.0,9.0,14.0,7.0,25.0,0.0,3.0,0.0,0.0
4,1,2021-01-01 04:00:00,0.0,2021,1,5,1,4,19.435283,-99.147152,1.0,4.0,9.0,14.0,7.0,25.0,0.0,3.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8767017,17290,2023-03-30 20:00:00,0.0,2023,3,4,30,20,19.435700,-99.116500,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
8767018,17290,2023-03-30 21:00:00,0.0,2023,3,4,30,21,19.435700,-99.116500,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
8767019,17290,2023-03-30 22:00:00,0.0,2023,3,4,30,22,19.435700,-99.116500,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
8767020,17290,2023-03-30 23:00:00,0.0,2023,3,4,30,23,19.435700,-99.116500,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [42]:
# # obtener el id_camara con mayor número de incidentes
data[data["n_incidentes"]>0].groupby("id_camara").size().sort_values(ascending=False).head(10)
data[data["id_camara"].isin([ 2469, 2525, 2532, 10534, 2422, 6, 6756, 2407, 12578, 6691, ])]

Unnamed: 0,id_camara,date,n_incidentes,anio,mes,dia_semana,dia,hora,latitud,longitud,P.Interes-1,P.Interes-2,P.Interes-3,P.Interes-4,P.Interes-5,P.Interes-6,P.Interes-7,P.Interes-8,P.Interes-9,P.Interes-10
98285,6,2021-01-01 00:00:00,0.0,2021,1,5,1,0,19.433729,-99.146751,1.0,2.0,6.0,4.0,2.0,27.0,0.0,3.0,0.0,0.0
98286,6,2021-01-01 01:00:00,0.0,2021,1,5,1,1,19.433729,-99.146751,1.0,2.0,6.0,4.0,2.0,27.0,0.0,3.0,0.0,0.0
98287,6,2021-01-01 02:00:00,0.0,2021,1,5,1,2,19.433729,-99.146751,1.0,2.0,6.0,4.0,2.0,27.0,0.0,3.0,0.0,0.0
98288,6,2021-01-01 03:00:00,0.0,2021,1,5,1,3,19.433729,-99.146751,1.0,2.0,6.0,4.0,2.0,27.0,0.0,3.0,0.0,0.0
98289,6,2021-01-01 04:00:00,0.0,2021,1,5,1,4,19.433729,-99.146751,1.0,2.0,6.0,4.0,2.0,27.0,0.0,3.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8708046,12578,2023-03-30 20:00:00,0.0,2023,3,4,30,20,19.439129,-99.139482,0.0,6.0,1.0,1.0,4.0,0.0,1.0,3.0,1.0,0.0
8708047,12578,2023-03-30 21:00:00,0.0,2023,3,4,30,21,19.439129,-99.139482,0.0,6.0,1.0,1.0,4.0,0.0,1.0,3.0,1.0,0.0
8708048,12578,2023-03-30 22:00:00,0.0,2023,3,4,30,22,19.439129,-99.139482,0.0,6.0,1.0,1.0,4.0,0.0,1.0,3.0,1.0,0.0
8708049,12578,2023-03-30 23:00:00,0.0,2023,3,4,30,23,19.439129,-99.139482,0.0,6.0,1.0,1.0,4.0,0.0,1.0,3.0,1.0,0.0


In [43]:
date_split = '2023-01-01'
# date_split = '2022-10-01'
data_ = data[data["id_camara"].isin([ 2469, 2525, 2532, 10534, 2422, 6, 6756, 2407, 12578, 6691, ])]

#Split data into train and test, train will be until 2022 and test will be rest of the data
train = data_[data_['date'] < date_split]
test = data_[data_['date'] >= date_split]

#Split train and test into X and y
X_train = train.drop(['id_camara', 'n_incidentes', 'date'], axis=1).copy()
y_train = train['n_incidentes'].copy()
X_test = test.drop(['id_camara', 'n_incidentes', 'date'], axis=1).copy()
y_test = test['n_incidentes'].copy()


print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
# Imprimir el porcentaje de train y test respecto al total
print(f"Porcentaje de train: {round(len(train)/len(data)*100, 2)}%")
print(f"Porcentaje de test: {round(len(test)/len(data)*100, 2)}%")

# del data
# del train
# del test
# del data_interest_points
# del camaras_fechas_frecuencias
# del fechas
# del camaras
# del df_total
import gc
gc.collect()

(175200, 17) (175200,) (21370, 17) (21370,)
Porcentaje de train: 2.0%
Porcentaje de test: 0.24%


12804

### Oversampling

In [44]:
counter = Counter(y_train)
print("Antes de oversampling: ", counter)

# duplicar la clase 5 para que sea
# X_train = pd.concat([X_train, X_train[y_train == 3] ], axis=0)
# y_train = pd.concat([y_train, y_train[y_train == 3] ], axis=0)
# X_train = pd.concat([X_train, X_train[y_train == 5], X_train[y_train == 5], X_train[y_train == 5], X_train[y_train == 5], X_train[y_train == 5]], axis=0)
# y_train = pd.concat([y_train, y_train[y_train == 5], y_train[y_train == 5], y_train[y_train == 5], y_train[y_train == 5], y_train[y_train == 5]], axis=0)
X_train = pd.concat([X_train, X_train[y_train == 5], X_train[y_train == 5], X_train[y_train == 5], X_train[y_train == 5], X_train[y_train == 5]], axis=0)
y_train = pd.concat([y_train, y_train[y_train == 5], y_train[y_train == 5], y_train[y_train == 5], y_train[y_train == 5], y_train[y_train == 5]], axis=0)

# Oversampling para las clases minoritarias
oversample = SMOTE()
X_train, y_train = oversample.fit_resample(X_train, y_train)

# Resumen de la distribución de clases
counter = Counter(y_train)
print("Después de oversampling: ", counter)


Antes de oversampling:  Counter({0.0: 171963, 1.0: 2972, 2.0: 214, 3.0: 37, 4.0: 10, 5.0: 4})
Después de oversampling:  Counter({0.0: 171963, 1.0: 171963, 3.0: 171963, 2.0: 171963, 4.0: 171963, 5.0: 171963})


### Modelo de Red Neuronal - Regresion Logistica

##### Deteccion del ultimo modelo ya existente

In [45]:
MODELS_PATH = '../models/'

# Find last model id from filename structure: model-{id}-{epoch:03d}-{accuracy:.3f}.h5
def find_last_model_id():
    last_model_id = 0
    for file in os.listdir(MODELS_PATH):
        if file.startswith('model-'):
            model_id = int(file.split('-')[1])
            if model_id > last_model_id:
                last_model_id = model_id
    return last_model_id
print(find_last_model_id())


4


##### Creacion del modelo

In [46]:
# Creacion del modelo con grafica de perdida y accuracy
# Pasos:
# 1. Definir el modelo
# 2. Entrenar el modelo
# 3. Evaluar el modelo
# 4. Guardar el modelo
# 5. Cargar el modelo
# 6. Hacer predicciones con el modelo

# Solo toma como el total, los que son 1 y de esos verifica cuantos y_pred son 1
def c5_score(y_true, y_pred):
	import tensorflow as tf
	y_true = tf.cast(y_true, tf.float32) # cast means convert
	y_pred = tf.cast(tf.round(y_pred), tf.float32) # round means round to nearest integer
	# probando el valor que debera regresar
	y_pred_ = tf.where(y_pred != 0, 1, 0)
	y_true_ = tf.where(y_true != 0, 1, 0)


	return tf.keras.metrics.binary_accuracy(y_true_, y_pred_)

from keras.optimizers import SGD, Adam, Adagrad, Adadelta, RMSprop
# import loss
from keras.losses import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error, binary_crossentropy


# 1 Definir el modelo
def create_model(input_dim, output_dim, hidden_layers, neurons):
    model = Sequential()
    model.add(SimpleRNN(10, input_shape=(input_dim[1], input_dim[2]), activation='relu'))
    # model.add(Dense(neurons, input_dim=input_dim, activation='relu'))
    # for i in range(hidden_layers):
    #     model.add(Dense(neurons, activation='relu'))
    # model.add(Dense(output_dim, activation='linear'))
    model.compile(loss=mean_squared_error, optimizer=RMSprop(learning_rate=0.0005), metrics=["accuracy", c5_score, "mse", "mae"])
    return model

# 2 Entrenar el modelo
def train_model(model, X_train, y_train, epochs, batch_size, validation_split, model_id):
    # Crear el callback para guardar el modelo
    # checkpoint = ModelCheckpoint(MODELS_PATH+'model-'+model_id+'-{accuracy:.4f}-{epoch:03d}.h5', verbose=0, monitor='accuracy', save_best_only=True, mode='auto')
    checkpoint = ModelCheckpoint(MODELS_PATH+'model-'+model_id+'-{accuracy:.4f}-{epoch:03d}.h5', verbose=0, monitor='accuracy', save_best_only=True, mode='auto')

    history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=validation_split, callbacks=[checkpoint])
    return history

# 3 Evaluar el modelo
def evaluate_model(model, X_test, y_test):
    result = model.evaluate(X_test, y_test) # loss, accuracy
    return result
# 4 Guardar el mejor modelo (el que tenga el mejor accuracy)
def save_model(model, model_id, accuracy, epoch):
    model.save(f'{MODELS_PATH}model-{model_id}-{epoch:03d}-{accuracy:.3f}.h5')
    
# 5 Cargar el modelo
def load_model(model_path):
    model = load_model(model_path, custom_objects={'c5_score': c5_score})
    # model = load_model(model_path)
    return model

# 6 Hacer predicciones con el modelo
def predict(model, X):
    predictions = model.predict(X)
    return predictions

# 7 Graficar la perdida y el accuracy
def plot_history(history):
	# Plot training & validation accuracy values
	plt.plot(history.history['accuracy'])
	plt.plot(history.history['val_accuracy'])
	plt.title('Precisión del modelo')
	plt.ylabel('Precisión')
	plt.xlabel('Época')
	plt.legend(['Entrenamiento', 'Validación'], loc='upper left')
	plt.show()
	# Plot training & validation loss values
	plt.plot(history.history['loss'])
	plt.plot(history.history['val_loss'])
	plt.title('Pérdida del modelo')
	plt.ylim(0, 1)
	plt.ylabel('Pérdida')
	plt.xlabel('Época')
	plt.legend(['Entrenamiento', 'Validación'], loc='upper left')
	plt.show()
	# Plot training & validation c5_score values
	plt.plot(history.history['c5_score'])
	plt.plot(history.history['val_c5_score'])
	plt.title('C5 Score del modelo')
	plt.ylim(0, 1)
	plt.ylabel('C5 Score')
	plt.xlabel('Época')
	plt.legend(['Entrenamiento', 'Validación'], loc='upper left')
	plt.show()
		
# 8 Graficar la matriz de confusión
def plot_confusion_matrix(y_test, y_pred):
    from sklearn.metrics import confusion_matrix
    import seaborn as sns

    cm = confusion_matrix(y_test, y_pred)
    print(cm)
    plt.figure(figsize=(5,5))
    sns.heatmap(cm, annot=True, fmt=".0f", linewidths=.5, square = True, cmap = 'Blues_r')
    plt.ylabel('Actual label')
    plt.xlabel('Predicted label')
    plt.title('Matriz de confusión')
    plt.show()
        
# 9 Graficar la curva ROC (Receiver Operating Characteristic), es decir, la curva de la sensibilidad (recall) vs la especificidad
def plot_roc_curve(y_test, y_pred):
    from sklearn.metrics import roc_curve

    fpr, tpr, thresholds = roc_curve(y_test, y_pred)
    plt.plot([0, 1], [0, 1], linestyle='--')
    plt.plot(fpr, tpr, marker='.')
    plt.title('Curva ROC')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.show()

columns_labels = X_train.columns
X_train = np.reshape(X_train.to_numpy(), (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test.to_numpy(), (X_test.shape[0], X_test.shape[1], 1))

print(X_train.shape)


# 1 Definir el modelo
model = create_model(input_dim=X_train.shape, output_dim=1, hidden_layers=2, neurons=32)
model.summary()
print("")
model_id = find_last_model_id() + 1

# 2 Entrenar el modelo
history = train_model(model, X_train, y_train, epochs=200, batch_size=32, validation_split=0.2, model_id=str(model_id))

# 3 Evaluar el modelo
metrics_result = evaluate_model(model, X_test, y_test)
print(f'Loss: {metrics_result[0]}')
print(f'Accuracy: {metrics_result[1]}')
print(f'C5 Score: {metrics_result[2]}')
print(f'MSE: {metrics_result[3]}')
print(f'MAE: {metrics_result[5]}')

# 4 Guardar el mejor modelo (el que tenga el mejor accuracy)
save_model(model, model_id, metrics_result[1], len(history.history['accuracy']))

# 5 Cargar el modelo
# model = load_model(f'{MODELS_PATH}model-{model_id}-{len(history.history["accuracy"]):03d}-{accuracy:.3f}.h5')

# 6 Hacer predicciones con el modelo
y_pred = predict(model, X_test)
y_pred = np.round(y_pred).astype(int).reshape(1, -1)[0]
print(y_pred)

# 7 Graficar la perdida y el accuracy
plot_history(history)

# 8 Graficar la matriz de confusión
plot_confusion_matrix(y_test, y_pred)

# 9 Graficar la curva ROC (Receiver Operating Characteristic), es decir, la curva de la sensibilidad (recall) vs la especificidad
# plot_roc_curve(y_test, y_pred)

(1031778, 17, 1)
Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_7 (SimpleRNN)    (None, 10)                120       
                                                                 
Total params: 120
Trainable params: 120
Non-trainable params: 0
_________________________________________________________________

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epo