In [18]:
from tensorflow import keras
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt



In [2]:

keras.utils.set_random_seed(1234)

In [19]:
dataset = pd.read_csv("infarto.csv", delimiter=";")
dataset.drop(columns=["id", "casado"], inplace=True)
dataset.dropna(inplace=True)
dataset

Unnamed: 0,genero,edad,hipertensión,enfermedad_coronaria,trabajo,residencia,nivel_glucosa,bmi,uso_tabaco,infarto
0,Masculino,67.0,0,1,Privado,Urbana,228.69,36.6,Ex fumador,1
2,Masculino,80.0,0,1,Privado,Rural,105.92,32.5,Nunca,1
3,Femenino,49.0,0,0,Privado,Urbana,171.23,34.4,Fumador,1
4,Femenino,79.0,1,0,Autonomo,Rural,174.12,24.0,Nunca,1
5,Masculino,81.0,0,0,Privado,Urbana,186.21,29.0,Ex fumador,1
...,...,...,...,...,...,...,...,...,...,...
5104,Femenino,13.0,0,0,Menor de edad,Rural,103.08,18.6,NSNC,0
5106,Femenino,81.0,0,0,Autonomo,Urbana,125.20,40.0,Nunca,0
5107,Femenino,35.0,0,0,Autonomo,Rural,82.99,30.6,Nunca,0
5108,Masculino,51.0,0,0,Privado,Rural,166.29,25.6,Ex fumador,0


In [4]:
import sklearn.preprocessing


datasetNp = dataset.to_numpy()
encoder = sklearn.preprocessing.OneHotEncoder(handle_unknown='ignore')
categorical_cols = ["genero", "edad", "hipertensión", "enfermedad_coronaria",
                   "trabajador", "residencia", "uso_tabaco"]

encoder.fit(dataset)

In [5]:
import sklearn.model_selection


encodedData = encoder.transform(dataset)

# normalize the encoded data
scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
scaler.fit(encodedData)
normalizedData = scaler.transform(encodedData)

# split the data into training and testing
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(normalizedData, dataset["infarto"], test_size=0.1)

X_train2, X_val, y_train2, y_val = sklearn.model_selection.train_test_split(X_train, y_train, test_size=0.2)

X_train.shape, X_val.shape, X_test.shape



((4418, 4394), (884, 4394), (491, 4394))

In [24]:
squaredError = keras.losses.MeanSquaredError()

capasOcultasDic = [3, 6, 9]
numNeuronasDic = [12, 48, 128]

for capasOcultas in capasOcultasDic:
    for numNeuronas in numNeuronasDic:
        model = keras.Sequential()
        model.add(keras.layers.Dense(9, activation="relu", input_shape=(normalizedData.shape[1],)))
        for i in range(capasOcultas):
            model.add(keras.layers.Dense(numNeuronas, activation="relu"))
        model.add(keras.layers.Dense(2, activation="sigmoid"))

        optimizer = keras.optimizers.Adam(learning_rate=0.001)
        model.compile(optimizer=optimizer, loss=squaredError, metrics=["accuracy"])

        model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0, validation_data=(X_val, y_val))
        trainLoss, trainAccuracy = model.evaluate(X_train, y_train)
        valLoss, valAccuracy = model.evaluate(X_val, y_val)
        testLoss, testAccuracy = model.evaluate(X_test, y_test)
        print(f"Capas ocultas: {capasOcultas}, Neuronas: {numNeuronas}")
        print(f"Train Loss: {trainLoss}, Train Accuracy: {trainAccuracy}")
        print(f"Val Loss: {valLoss}, Val Accuracy: {valAccuracy}")
        print(f"Test Loss: {testLoss}, Test Accuracy: {testAccuracy}")
        print("-"*50)
        print("\n\n")
        #save the data to a csv file
        with open("gidSearchP3.csv", "a") as f:
            f.write(f"{capasOcultas},{numNeuronas},{trainLoss},{trainAccuracy},{valLoss},{valAccuracy},{testLoss},{testAccuracy}\n")





  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 539us/step - accuracy: 0.5726 - loss: 4.3495e-09
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 535us/step - accuracy: 0.5923 - loss: 1.8523e-09
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 647us/step - accuracy: 0.4498 - loss: 0.0332
Capas ocultas: 3, Neuronas: 12
Train Loss: 1.7839807853192724e-08, Train Accuracy: 0.5692620873451233
Val Loss: 1.6959248450021391e-09, Val Accuracy: 0.5723981857299805
Test Loss: 0.03018464893102646, Test Accuracy: 0.4867617189884186
--------------------------------------------------



[1m139/139[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 551us/step - accuracy: 0.0255 - loss: 8.6870e-10 
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 602us/step - accuracy: 0.0168 - loss: 6.7497e-10   
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 734us/step - accuracy: 0.0936 - loss: 0.0416
Capas ocultas: 3, Neuronas: 48
Tr

In [23]:
y_predicted = model.predict(X_test)

y_real = y_test.to_numpy().reshape(-1, 1)

sklearn.metrics.confusion_matrix(y_real, y_predicted > 0.5)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 


array([[469,   0],
       [ 22,   0]])

(4418, 491, 884)