### Imports y Definición de Modelos

In [1]:
!pip install scikeras mlxtend tensorflow

Collecting scikeras
  Downloading scikeras-0.13.0-py3-none-any.whl.metadata (3.1 kB)
Collecting scikit-learn>=1.4.2 (from scikeras)
  Downloading scikit_learn-1.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (17 kB)
Downloading scikeras-0.13.0-py3-none-any.whl (26 kB)
Downloading scikit_learn-1.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.9/12.9 MB[0m [31m103.0 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25hInstalling collected packages: scikit-learn, scikeras
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.2.2
    Uninstalling scikit-learn-1.2.2:
      Successfully uninstalled scikit-learn-1.2.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
category-encoders 2.7.0 requires scikit-learn<1.6.0,>

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from mlxtend.feature_selection import SequentialFeatureSelector as SFS

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from scikeras.wrappers import KerasClassifier
from sklearn.neural_network import MLPClassifier  # SOLO para selección de atributos

# Función generadora del modelo MLP para scikeras
def crear_modelo_mlp(input_dim):
    model = Sequential()
    model.add(Dense(100, activation='relu', input_shape=(input_dim,)))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

2025-06-13 19:08:51.332071: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749841731.516600      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1749841731.569315      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
import tensorflow as tf

# Lista los dispositivos físicos tipo GPU disponibles
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print("GPU detectada y lista para usar:")
    for gpu in gpus:
        print("   -", gpu)
else:
    print("No se detectó GPU. Se está usando CPU.")

GPU detectada y lista para usar:
   - PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


###  Función de evaluación

In [4]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score, classification_report

# Lista acumulativa de resultados
resultados_mlp = []

def evaluar_modelo_mlp(modelo, X_train, X_test, y_train, y_test, escenario=""):
    inicio = time.time()

    modelo.fit(X_train, y_train)

    if hasattr(modelo, "predict_proba"):
        y_proba = modelo.predict_proba(X_test)[:, 1]
        y_pred = (y_proba >= 0.5).astype(int)
    else:
        y_pred = modelo.predict(X_test)
        y_proba = y_pred

    fin = time.time()
    duracion = fin - inicio

    acc = accuracy_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    spec = tn / (tn + fp)
    auc = roc_auc_score(y_test, y_proba)

    print("Classification Report:")
    print(classification_report(y_test, y_pred))
    print(f"Accuracy     : {acc:.3f}")
    print(f"Recall       : {rec:.3f}")
    print(f"Precision    : {prec:.3f}")
    print(f"F1-score     : {f1:.3f}")
    print(f"Specificity  : {spec:.3f}")
    print(f"AUC-ROC      : {auc:.3f}")
    print(f"Tiempo de ejecución: {duracion:.2f} segundos")

    # Guardar en lista global
    resultados_mlp.append({
        "Escenario": escenario,
        "Modelo": "MLP",
        "Accuracy": acc,
        "Recall": rec,
        "Precision": prec,
        "F1-score": f1,
        "Specificity": spec,
        "AUC-ROC": auc,
        "Tiempo (s)": duracion
    })


def guardar_resultados_csv_mlp(ruta="resultados_mlp_todos.csv"):
    df_resultados = pd.DataFrame(resultados_mlp)

    # Si el archivo ya existe, solo agrega sin sobrescribir el encabezado
    try:
        with open(ruta, 'x') as f:  # Intenta crear el archivo
            df_resultados.to_csv(f, index=False)
            print(f"Archivo nuevo creado y resultados guardados en {ruta}")
    except FileExistsError:
        df_resultados.to_csv(ruta, mode='a', header=False, index=False)
        print(f"Resultados añadidos a {ruta}")

### Escenario 1: Dataset completo

In [5]:
import time
from tensorflow.keras.callbacks import EarlyStopping

df1 = pd.read_csv("/kaggle/input/data-csv/Hipertension_Arterial_Mexico_limpio.csv")
y1 = df1["riesgo_hipertension"]
print("Escenario 1 – MLP (Dataset completo)")

X1 = df1.drop(columns=["riesgo_hipertension", "FOLIO_I"]).copy()
if "sueno_horas" in X1.columns:
    X1 = pd.get_dummies(X1, columns=["sueno_horas"], drop_first=True)

scaler = StandardScaler()
X1 = scaler.fit_transform(X1)

input_dim = X1.shape[1]
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

mlp = KerasClassifier(
    model=crear_modelo_mlp,
    model__input_dim=input_dim,
    epochs=100,
    batch_size=32,
    verbose=1,
    callbacks=[early_stop],
    validation_split=0.2
)

X1_train, X1_test, y1_train, y1_test = train_test_split(
    X1, y1, test_size=0.2, stratify=y1, random_state=42
)

# Usar función extendida con soporte para guardar en CSV
evaluar_modelo_mlp(mlp, X1_train, X1_test, y1_train, y1_test, escenario="Escenario 1 – MLP")

# Guardar resultados en CSV
guardar_resultados_csv_mlp("resultados_mlp_todos.csv")



Escenario 1 – MLP (Dataset completo)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1749841743.928608      35 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


Epoch 1/100


I0000 00:00:1749841746.665884      84 service.cc:148] XLA service 0x7a0e980041e0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1749841746.666504      84 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1749841746.881133      84 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m75/88[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 2ms/step - accuracy: 0.6767 - loss: 0.6171

I0000 00:00:1749841747.309054      84 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - accuracy: 0.6851 - loss: 0.6091 - val_accuracy: 0.8149 - val_loss: 0.4780
Epoch 2/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8128 - loss: 0.4692 - val_accuracy: 0.8321 - val_loss: 0.4224
Epoch 3/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8343 - loss: 0.4303 - val_accuracy: 0.8522 - val_loss: 0.3911
Epoch 4/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8314 - loss: 0.4158 - val_accuracy: 0.8795 - val_loss: 0.3701
Epoch 5/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8554 - loss: 0.3776 - val_accuracy: 0.8752 - val_loss: 0.3567
Epoch 6/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8519 - loss: 0.3695 - val_accuracy: 0.8824 - val_loss: 0.3498
Epoch 7/100
[1m88/88[0m [32m━━━━━━━━━━━━━━

### Escenario 2: Dataset con FSS

In [6]:
import time
from tensorflow.keras.callbacks import EarlyStopping

df2 = pd.read_csv("/kaggle/input/data-csv/variables_seleccionadas.csv")
y2 = df2["riesgo_hipertension"]
print("Escenario 2 – MLP (Dataset con FSS)")

X2 = df2.drop(columns=["riesgo_hipertension", "FOLIO_I"]).copy()
if "sueno_horas" in X2.columns:
    X2 = pd.get_dummies(X2, columns=["sueno_horas"], drop_first=True)

scaler = StandardScaler()
X2 = scaler.fit_transform(X2)

input_dim = X2.shape[1]
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

mlp = KerasClassifier(
    model=crear_modelo_mlp,
    model__input_dim=input_dim,
    epochs=100,
    batch_size=32,
    verbose=1,
    callbacks=[early_stop],
    validation_split=0.2
)

X2_train, X2_test, y2_train, y2_test = train_test_split(
    X2, y2, test_size=0.2, stratify=y2, random_state=42
)

# Evaluar y guardar resultados con etiqueta de escenario
evaluar_modelo_mlp(mlp, X2_train, X2_test, y2_train, y2_test, escenario="Escenario 2 – MLP")

# Guardar resultados acumulados (sin sobrescribir anteriores)
guardar_resultados_csv_mlp("resultados_mlp_todos.csv")


Escenario 2 – MLP (Dataset con FSS)
Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - accuracy: 0.6980 - loss: 0.5882 - val_accuracy: 0.8207 - val_loss: 0.4607
Epoch 2/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7973 - loss: 0.4716 - val_accuracy: 0.8479 - val_loss: 0.4091
Epoch 3/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8140 - loss: 0.4371 - val_accuracy: 0.8637 - val_loss: 0.3767
Epoch 4/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8320 - loss: 0.4184 - val_accuracy: 0.8709 - val_loss: 0.3560
Epoch 5/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8411 - loss: 0.3912 - val_accuracy: 0.8838 - val_loss: 0.3430
Epoch 6/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8545 - loss: 0.3655 - val_accuracy: 0.8867 - val_loss: 0.3245
Epoch 7/100
[1m88/88[0m [32m━━━━━━━━━━━━━━

### Escenario 3: Wrapper SFS (k=15)

In [7]:
import time
from tensorflow.keras.callbacks import EarlyStopping

print("Escenario 3 – MLP (Selección por Wrapper SFS, k=8, muestra 1500)")
df3 = pd.read_csv("/kaggle/input/data-csv/Hipertension_Arterial_Mexico_limpio.csv")
y3 = df3["riesgo_hipertension"]

X3_base = df3.drop(columns=["riesgo_hipertension", "FOLIO_I"]).copy()
if "sueno_horas" in X3_base.columns:
    X3 = pd.get_dummies(X3_base, columns=["sueno_horas"], drop_first=True)
    feature_names = X3.columns
else:
    X3 = X3_base.values
    feature_names = X3_base.columns

scaler = StandardScaler()
X3 = scaler.fit_transform(X3)

# Selección sobre muestra
sample_size = 1500
X3_sample = X3[:sample_size]
y3_sample = y3[:sample_size]
modelo_sfs = MLPClassifier(max_iter=500, random_state=42)

print("Iniciando selección de variables (wrapper SFS) sobre muestra de 1500 registros...")
inicio = time.time()
sfs = SFS(modelo_sfs,
          k_features=8,
          forward=True,
          floating=False,
          scoring='roc_auc',
          cv=3,
          n_jobs=1)
sfs = sfs.fit(X3_sample, y3_sample)
fin = time.time()
print("="*60)
print(f"Selección de variables completada en {fin-inicio:.2f} segundos")
print("Variables seleccionadas:", list(feature_names[list(sfs.k_feature_idx_)]))
print("="*60)

# Aplicar selección a todo el dataset
selected_idx = list(sfs.k_feature_idx_)
X3_selected = X3[:, selected_idx]

# Entrenamiento con todo el dataset
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
input_dim = X3_selected.shape[1]
mlp = KerasClassifier(
    model=crear_modelo_mlp,
    model__input_dim=input_dim,
    epochs=100,
    batch_size=32,
    verbose=1,
    callbacks=[early_stop],
    validation_split=0.2
)

X3_train, X3_test, y3_train, y3_test = train_test_split(
    X3_selected, y3, test_size=0.2, stratify=y3, random_state=42
)

# Evaluar y guardar
evaluar_modelo_mlp(mlp, X3_train, X3_test, y3_train, y3_test, escenario="Escenario 3 – MLP")

guardar_resultados_csv_mlp("resultados_mlp_todos.csv")


Escenario 3 – MLP (Selección por Wrapper SFS, k=8, muestra 1500)
Iniciando selección de variables (wrapper SFS) sobre muestra de 1500 registros...




Selección de variables completada en 1117.71 segundos
Variables seleccionadas: ['edad', 'distancia_rodilla_talon', 'circunferencia_de_la_pantorrilla', 'tension_arterial', 'actividad_total', 'peso_corregido', 'estatura_corregida', 'imc']
Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - accuracy: 0.7416 - loss: 0.6025 - val_accuracy: 0.8121 - val_loss: 0.4648
Epoch 2/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8234 - loss: 0.4720 - val_accuracy: 0.8623 - val_loss: 0.3937
Epoch 3/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8244 - loss: 0.4266 - val_accuracy: 0.8737 - val_loss: 0.3577
Epoch 4/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8352 - loss: 0.4055 - val_accuracy: 0.8737 - val_loss: 0.3334
Epoch 5/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8607 - loss: 0.3616 - val_accuracy: 0.8881 - val_loss: 0.3152
Epoch 6/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8570 - loss: 0.3584 - val_accuracy: 0.8852 - val_loss: 0.3007
Epoch 7/100
[1m88/88[0m [32m━━━━━━━━━━━━━━

### Escenario 4: Wrapper SFS (best)

In [8]:
import time
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from mlxtend.feature_selection import SequentialFeatureSelector as SFS

print("Escenario 4 – MLP (Selección automática Wrapper SFS, best, muestra 1500)")
df4 = pd.read_csv("/kaggle/input/data-csv/Hipertension_Arterial_Mexico_limpio.csv")
y4 = df4["riesgo_hipertension"]

X4_base = df4.drop(columns=["riesgo_hipertension", "FOLIO_I"]).copy()
if "sueno_horas" in X4_base.columns:
    X4 = pd.get_dummies(X4_base, columns=["sueno_horas"], drop_first=True)
    feature_names = X4.columns
else:
    X4 = X4_base.values
    feature_names = X4_base.columns

scaler = StandardScaler()
X4 = scaler.fit_transform(X4)

# Selección sobre muestra
sample_size = 1500
X4_sample = X4[:sample_size]
y4_sample = y4[:sample_size]

modelo_sfs = MLPClassifier(max_iter=500, random_state=42)

print("Iniciando selección automática de variables (wrapper SFS, best) sobre muestra de 1500 registros...")
inicio = time.time()
sfs = SFS(modelo_sfs,
          k_features='best',
          forward=True,
          floating=False,
          scoring='roc_auc',
          cv=3,
          n_jobs=1)
sfs = sfs.fit(X4_sample, y4_sample)
fin = time.time()

selected_idx = list(sfs.k_feature_idx_)
print("="*60)
print(f"Selección de variables completada en {fin-inicio:.2f} segundos")
print(f"Número óptimo de variables: {len(selected_idx)}")
print("Variables seleccionadas:", list(feature_names[selected_idx]))
print("="*60)

# Aplicar selección al dataset completo
X4_selected = X4[:, selected_idx]

# Entrenamiento final con KerasClassifier
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
input_dim = X4_selected.shape[1]
mlp = KerasClassifier(
    model=crear_modelo_mlp,
    model__input_dim=input_dim,
    epochs=100,
    batch_size=32,
    verbose=1,
    callbacks=[early_stop],
    validation_split=0.2
)

X4_train, X4_test, y4_train, y4_test = train_test_split(
    X4_selected, y4, test_size=0.2, stratify=y4, random_state=42
)

# Evaluación y guardado
evaluar_modelo_mlp(mlp, X4_train, X4_test, y4_train, y4_test, escenario="Escenario 4 – MLP")
guardar_resultados_csv_mlp("resultados_mlp_todos.csv")


Escenario 4 – MLP (Selección automática Wrapper SFS, best, muestra 1500)
Iniciando selección automática de variables (wrapper SFS, best) sobre muestra de 1500 registros...




Selección de variables completada en 3251.67 segundos
Número óptimo de variables: 9
Variables seleccionadas: ['edad', 'medida_cintura', 'distancia_rodilla_talon', 'circunferencia_de_la_pantorrilla', 'tension_arterial', 'actividad_total', 'peso_corregido', 'estatura_corregida', 'imc']
Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - accuracy: 0.6613 - loss: 0.6259 - val_accuracy: 0.8164 - val_loss: 0.4611
Epoch 2/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8075 - loss: 0.4868 - val_accuracy: 0.8522 - val_loss: 0.3960
Epoch 3/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8398 - loss: 0.4241 - val_accuracy: 0.8723 - val_loss: 0.3583
Epoch 4/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8362 - loss: 0.4076 - val_accuracy: 0.8766 - val_loss: 0.3374
Epoch 5/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8500 - loss: 0.3763 - val_accuracy: 0.8824 - val_loss: 0.3182
Epoch 6/100
[1m88/88[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8531 - loss: 0.3619 - val_accuracy: 0.8881 - val_loss: 0.3060
Epoch 7/100
[1m88/88[0m [32m━━━━━━━━━━━━━━