## **IMPORTACIONES**

In [75]:
import pandas as pd
import numpy as np

from matplotlib import pyplot as plt

from sklearn.model_selection import train_test_split

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

import joblib 

### **Crear DataFrame**
- Creamos el DataFrame e imprimimos las 5 primeras filas para ver superficialmente el contenido.

In [76]:
df = pd.read_csv('FraminghamScale.csv')
df.head()

Unnamed: 0,male,age,education,currentSmoker,cigsPerDay,BPMeds,prevalentStroke,prevalentHyp,diabetes,totChol,sysBP,diaBP,BMI,heartRate,glucose,TenYearCHD
0,1,39,4.0,0,0.0,0.0,0,0,0,195.0,106.0,70.0,26.97,80.0,77.0,0
1,0,46,2.0,0,0.0,0.0,0,0,0,250.0,121.0,81.0,28.73,95.0,76.0,0
2,1,48,1.0,1,20.0,0.0,0,0,0,245.0,127.5,80.0,25.34,75.0,70.0,0
3,0,61,3.0,1,30.0,0.0,0,1,0,225.0,150.0,95.0,28.58,65.0,103.0,1
4,0,46,3.0,1,23.0,0.0,0,0,0,285.0,130.0,84.0,23.1,85.0,85.0,0


### **Analizar las columnas**
- Verificamos la existencia de valores nulos y tipo de dato de cada columna.
- Podemos ver que hay ciertas columnas con valores nulos.

In [77]:
df.info()
df.isnull().sum()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4238 entries, 0 to 4237
Data columns (total 16 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   male             4238 non-null   int64  
 1   age              4238 non-null   int64  
 2   education        4133 non-null   float64
 3   currentSmoker    4238 non-null   int64  
 4   cigsPerDay       4209 non-null   float64
 5   BPMeds           4185 non-null   float64
 6   prevalentStroke  4238 non-null   int64  
 7   prevalentHyp     4238 non-null   int64  
 8   diabetes         4238 non-null   int64  
 9   totChol          4188 non-null   float64
 10  sysBP            4238 non-null   float64
 11  diaBP            4238 non-null   float64
 12  BMI              4219 non-null   float64
 13  heartRate        4237 non-null   float64
 14  glucose          3850 non-null   float64
 15  TenYearCHD       4238 non-null   int64  
dtypes: float64(9), int64(7)
memory usage: 529.9 KB


male                 0
age                  0
education          105
currentSmoker        0
cigsPerDay          29
BPMeds              53
prevalentStroke      0
prevalentHyp         0
diabetes             0
totChol             50
sysBP                0
diaBP                0
BMI                 19
heartRate            1
glucose            388
TenYearCHD           0
dtype: int64

### **Normalización**
- Seleccionamos cada columna que contenga mínimamente un valor nulo.
- Rellenamos los campos vacíos con la mediana de su respectiva columna.

In [78]:
for col in df.columns:
    if df[col].isnull().sum() > 0:
        median_value = df[col].median()
        df[col].fillna(median_value, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(median_value, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(median_value, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always 

- Volvemos a verficar si existen valores nulos y podemos ver que ningún campo está vacío. 

In [79]:
df.isnull().sum()

male               0
age                0
education          0
currentSmoker      0
cigsPerDay         0
BPMeds             0
prevalentStroke    0
prevalentHyp       0
diabetes           0
totChol            0
sysBP              0
diaBP              0
BMI                0
heartRate          0
glucose            0
TenYearCHD         0
dtype: int64

### **Creación de variables**
- El modelo se va a entrenar utilizando todas las columnas excepto "TenYearCHD" (X).
- Nos va a devolver una prediccion basandose en la columna "TenYearCHD" (Y).

In [80]:
X = df.drop('TenYearCHD', axis=1)  
y = df['TenYearCHD']               

### **Entrenar modelo**

- Comenzamos dividiendo el entrenamiento en dos partes.
- El modelo utilizará un 80% de los datos del DF para su entrenamiento y un 20% de datos para probar.

In [81]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

- Definimos y configuramos el modelo.

In [82]:
model = Sequential()
model.add(Dense(1, input_shape=(X_train.shape[1],), activation='sigmoid'))

model.compile(
    optimizer='adam', 
    loss='binary_crossentropy', 
    metrics=['accuracy']
    )

history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8447 - loss: 22.0435 - val_accuracy: 0.8407 - val_loss: 15.7226
Epoch 2/50
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8536 - loss: 12.3589 - val_accuracy: 0.8407 - val_loss: 6.4116
Epoch 3/50
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8272 - loss: 4.6137 - val_accuracy: 0.7788 - val_loss: 2.5457
Epoch 4/50
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7565 - loss: 2.7160 - val_accuracy: 0.7788 - val_loss: 2.3531
Epoch 5/50
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7606 - loss: 2.4370 - val_accuracy: 0.7714 - val_loss: 2.1740
Epoch 6/50
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7664 - loss: 2.2093 - val_accuracy: 0.7935 - val_loss: 2.0405
Epoch 7/50
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━

- Evaluamos el modelo.

In [83]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Pérdida: {loss:.4f}")
print(f"Precisión: {accuracy * 100:.2f}%")

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8672 - loss: 0.3949 
Pérdida: 0.4201
Precisión: 85.50%


### **Predicción**

In [84]:
y_pred = (model.predict(X_test) > 0.3).astype(int)

print("VALORES REALES VS PREDICCIONES\n")
for i in range(10):
    real = y_test.iloc[i]
    pred = y_pred[i]
    paciente_id = y.index[i]
    
    real_text = "CON riesgo" if real == 1 else "SIN riesgo"
    pred_text = "CON riesgo" if pred == 1 else "SIN riesgo"
    
    correcto = "✅" if real == pred else "❌"
    
    print(f"Paciente {paciente_id + 1}: Real: {real_text} - Predicción: {pred_text} {correcto}")

[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
VALORES REALES VS PREDICCIONES

Paciente 1: Real: SIN riesgo - Predicción: SIN riesgo ✅
Paciente 2: Real: SIN riesgo - Predicción: SIN riesgo ✅
Paciente 3: Real: SIN riesgo - Predicción: SIN riesgo ✅
Paciente 4: Real: SIN riesgo - Predicción: SIN riesgo ✅
Paciente 5: Real: SIN riesgo - Predicción: SIN riesgo ✅
Paciente 6: Real: SIN riesgo - Predicción: SIN riesgo ✅
Paciente 7: Real: CON riesgo - Predicción: SIN riesgo ❌
Paciente 8: Real: SIN riesgo - Predicción: SIN riesgo ✅
Paciente 9: Real: SIN riesgo - Predicción: SIN riesgo ✅
Paciente 10: Real: CON riesgo - Predicción: SIN riesgo ❌


### **Guardar modelo**

In [85]:
joblib.dump(model, 'modelo_framingham.pkl')

['modelo_framingham.pkl']

### **Crear un DF de prueba**
- Creamos un DF con dos pacientes, donde el primero tiene valores de riesgos para una enfermedad coronaria y el segundo tiene valores normales.    

In [86]:
pacientes = [{
    'male': 1,                
    'age': 65,                
    'education': 1.0,           
    'currentSmoker': 1,        
    'cigsPerDay': 40.0,      
    'BPMeds': 1.0,            
    'prevalentStroke': 1,      
    'prevalentHyp': 1,          
    'diabetes': 1,           
    'totChol': 300.0,          
    'sysBP': 180.0,           
    'diaBP': 110.0,        
    'BMI': 35.0,               
    'heartRate': 95.0,          
    'glucose': 200.0  
},{
    'male': 0,                 
    'age': 30,                 
    'education': 4.0,           
    'currentSmoker': 0,         
    'cigsPerDay': 0.0,          
    'BPMeds': 0.0,              
    'prevalentStroke': 0,     
    'prevalentHyp': 0,         
    'diabetes': 0,             
    'totChol': 170.0,          
    'sysBP': 110.0,          
    'diaBP': 70.0,             
    'BMI': 22.0,               
    'heartRate': 70.0,          
    'glucose': 85.0 
}]

df_pacientes = pd.DataFrame(pacientes)

### **Cargar y probar modelo**


In [88]:
reg = joblib.load('modelo_framingham.pkl')

pred = (reg.predict(df_pacientes) > 0.3).astype(int)

for i in range(len(pred)):
    paciente_id = i + 1
    riesgo = "CON riesgo" if pred[i] == 1 else "SIN riesgo"
    print(f"Paciente {paciente_id}: {riesgo}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
Paciente 1: CON riesgo
Paciente 2: SIN riesgo
