## Práctica 3 

### Predicción de sufrir un infarto con Keras

In [1]:
# Importamos las librerias

import pandas as pd
import numpy as np
import keras_metrics

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import optimizers
from tensorflow.keras import utils as k
from tensorflow.keras.callbacks import EarlyStopping

file= 'infarto.csv'
dt=pd.read_csv(file, sep=";")


# Aunque no seamos expertos en el tema entendemos que las columnas casado y trabajo no aportan nada para el modelo
# La residencia si porque la calidad del aire es diferente

# Vemos si hay valores nulos

dt=dt.drop(['id','casado', 'trabajo'], axis=1)

print("Numero de filas con NaN values:\n",dt.isnull().sum())
print("\n\nComo no hay muchos NaN las filas que cuenten con valores nulos las descartamos")

# Quitamos las filas con valores nulos

dt=dt.dropna()
dt.head(5)

Numero de filas con NaN values:
 genero                    0
edad                      0
hipertensión              0
enfermedad_coronaria      0
residencia                0
nivel_glucosa             0
bmi                     201
uso_tabaco                0
infarto                   0
dtype: int64


Como no hay muchos NaN las filas que cuenten con valores nulos las descartamos


Unnamed: 0,genero,edad,hipertensión,enfermedad_coronaria,residencia,nivel_glucosa,bmi,uso_tabaco,infarto
0,Masculino,67.0,0,1,Urbana,228.69,36.6,Ex fumador,1
2,Masculino,80.0,0,1,Rural,105.92,32.5,Nunca,1
3,Femenino,49.0,0,0,Urbana,171.23,34.4,Fumador,1
4,Femenino,79.0,1,0,Rural,174.12,24.0,Nunca,1
5,Masculino,81.0,0,0,Urbana,186.21,29.0,Ex fumador,1


#### Categorizacion de columnas

In [2]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

#Transformamos las columnas necesarias con to_categorical
#antes 

columna1=np.array(dt["genero"])
columna2=np.array(dt["residencia"])
columna3=np.array(dt["uso_tabaco"])
label_encoder = LabelEncoder()

genero = label_encoder.fit_transform(columna1)
residencia= label_encoder.fit_transform(columna2)
tabaco= label_encoder.fit_transform(columna3)


dt["genero"]= k.to_categorical(genero)
dt["residencia"]= k.to_categorical(residencia)
dt["uso_tabaco"]= k.to_categorical(tabaco)

#### Normalización de los datos y obtención del conjunto de prueba

In [3]:
#NORMALIZAMOS LOS DATOS POR COLUMNAS

def minmax_norm(datos_input): 
    return (datos_input - dt.min()) / ( dt.max() - dt.min())
dt=minmax_norm(dt)
dt.head(5)

#OBTENEMOS EL CONJUNTO DE PRUEBA

esperada= dt.iloc[:, 8]
esperada

datos= dt.iloc[:,0:8]
datos

Unnamed: 0,genero,edad,hipertensión,enfermedad_coronaria,residencia,nivel_glucosa,bmi,uso_tabaco
0,0.0,0.816895,0.0,1.0,0.0,0.801265,0.301260,1.0
2,0.0,0.975586,0.0,1.0,1.0,0.234512,0.254296,0.0
3,1.0,0.597168,0.0,0.0,0.0,0.536008,0.276060,0.0
4,1.0,0.963379,1.0,0.0,1.0,0.549349,0.156930,0.0
5,0.0,0.987793,0.0,0.0,0.0,0.605161,0.214204,1.0
...,...,...,...,...,...,...,...,...
5104,1.0,0.157715,0.0,0.0,1.0,0.221402,0.095074,0.0
5106,1.0,0.987793,0.0,0.0,0.0,0.323516,0.340206,0.0
5107,1.0,0.426270,0.0,0.0,1.0,0.128658,0.232532,0.0
5108,0.0,0.621582,0.0,0.0,1.0,0.513203,0.175258,1.0


In [4]:
#ENTRENAMIENTO, VALIDACION

#NO INFARTOS
x_train, x_resto, y_train, y_resto= train_test_split(datos[209::3], esperada[209::3], test_size=0.3)
x_test, x_pred, y_test, y_pred = train_test_split(x_resto, y_resto, test_size=0.2)

#INFARTOS
x_in_train, x_in_resto,y_in_train,y_in_resto = train_test_split(datos[:209], esperada[:209], test_size=0.2)
x_in_test, x_in_pred,y_in_test,y_in_pred = train_test_split(x_in_resto, y_in_resto, test_size=0.2)


#Sumamos los casos para que la red cuente con ambos casos en cada etapa
x_train=pd.concat([x_in_train, x_train], axis=0)
x_test=pd.concat([x_in_test, x_test], axis=0)
x_pred=pd.concat([x_in_pred, x_pred], axis=0)

y_train=pd.concat([y_in_train, y_train], axis=0)
y_test=pd.concat([y_in_test, y_test], axis=0)
y_pred=pd.concat([y_in_pred, y_pred], axis=0)

print(len(x_train))
print(len(x_in_train))



1263
167


#### Creamos la arquitectura de la red neuronal

In [5]:
#PROCEDEMOS A CREAR LA RED
# Configuración del modelo
# Lo primero es crear un modelo Secuencial vacio
# Ahora, añadimos 3 capas.
# La capa de entrada
# La capa oculta
# La última capa es la de salida con una neurona

model=Sequential()

#APLICANDO LA FORMULA SABEMOS QUE EN LA CAPA OCULTA DEBEMOS TENER ENTRE 245 Y 981 NEURONAS
model.add(Dense(500, 'sigmoid', 8))
model.add(Dense(400, 'relu'))
model.add(Dense(1, 'sigmoid'))# UNA SALIDA (0/1)

# Entrenamiento de la red 
# Con model.fit indicamos las entradas y salidas y la cantidad de iteraciones de aprendizaje (epochs)

model.compile(loss="mean_squared_error", optimizer='adam',metrics="accuracy")
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=100)
model.fit(x=x_train,y=y_train,validation_split=0.2, epochs=1000, shuffle=True, use_multiprocessing=True)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100


Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<tensorflow.python.keras.callbacks.History at 0x7f9bd15342e0>

In [6]:
from sklearn.metrics import precision_score, recall_score, confusion_matrix

y_pred1 = model.predict(x_pred)
y_pred1 = y_pred1.round(0)

# Evaluamos el modelo 
scores = model.evaluate(x_test, y_test)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))



accuracy: 89.98%


In [7]:
# Mostramos los resultados de precision y recall

print(precision_score(y_pred, y_pred1 , average="macro"))
print(recall_score(y_pred, y_pred1 , average="macro"))

0.5885416666666667
0.57953216374269


In [8]:
# Mostramos los resultados de la matriz de confusión

cm = confusion_matrix(y_pred, y_pred1)
print(cm)

[[89  6]
 [ 7  2]]
