In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# Cargar el dataset
df = pd.read_excel("dataset_resultado_categorizado.xlsx")

# Crear variable binaria
df['target'] = df['Evolucion Final'].apply(lambda x: 1 if x != 'Nada' else 0)

# Imputar valores faltantes con la mediana
df_imputado = df.copy()
for col in df_imputado.select_dtypes(include=[np.number]).columns:
    df_imputado[col] = df_imputado[col].fillna(df_imputado[col].median())

# Codificar variables categóricas
df_imputado = pd.get_dummies(df_imputado, columns=['Tipo_vasculitis', 'Tipo'], drop_first=True)
df_imputado.drop(columns=['Evolucion Final'], inplace=True)

# Separar X e y
X = df_imputado.drop(columns='target')
y = df_imputado['target']

# Validación cruzada
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
resultados = []

for train_idx, val_idx in skf.split(X, y):
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)

    model = Sequential([
        Input(shape=(X.shape[1],)),
        Dense(128, activation='relu'),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.2),
        Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

    es = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    model.fit(X_train_scaled, y_train,
              validation_data=(X_val_scaled, y_val),
              epochs=100, batch_size=8, callbacks=[es], verbose=0)

    y_pred_probs = model.predict(X_val_scaled).ravel()
    y_pred = (y_pred_probs > 0.5).astype(int)

    resultados.append({
        'accuracy': accuracy_score(y_val, y_pred),
        'precision': precision_score(y_val, y_pred, zero_division=0),
        'recall': recall_score(y_val, y_pred, zero_division=0),
        'f1': f1_score(y_val, y_pred, zero_division=0),
        'auc': roc_auc_score(y_val, y_pred_probs)
    })

df_resultados = pd.DataFrame(resultados)
print(df_resultados)
print("\nResumen estadístico:")
print(df_resultados.describe().round(4))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
   accuracy  precision    recall        f1       auc
0  0.800000   0.800000  0.888889  0.842105  0.722222
1  0.866667   0.818182  1.000000  0.900000  0.944444
2  0.866667   0.888889  0.888889  0.888889  0.962963
3  0.866667   0.888889  0.888889  0.888889  0.925926
4  0.800000   0.857143  0.750000  0.800000  0.928571

Resumen estadístico:
       accuracy  precision  recall      f1     auc
count    5.0000     5.0000  5.0000  5.0000  5.0000
mean     0.8400     0.8506  0.8833  0.8640  0.8968
std      0.0365     0.0406  0.0887  0.0422  0.0987
min      0.8000     0.8000  0.7500  0.8000  0.7222
25%      0.8000     0.8182  0.8889  0.8421  0

: 

In [2]:
import os
import random
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# ========================
# Configuración Reproducible
# ========================
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

# ========================
# Cargar y preparar el dataset
# ========================
df = pd.read_excel("dataset_resultado_categorizado.xlsx")

# Crear variable binaria para evolución
df['target'] = df['Evolucion Final'].apply(lambda x: 1 if x != 'Nada' else 0)

# Imputar valores faltantes con la mediana
df_imputado = df.copy()
for col in df_imputado.select_dtypes(include=[np.number]).columns:
    df_imputado[col] = df_imputado[col].fillna(df_imputado[col].median())

# Codificar variables categóricas
df_imputado = pd.get_dummies(df_imputado, columns=['Tipo_vasculitis', 'Tipo'], drop_first=True)

# Eliminar columna de texto no necesaria
df_imputado.drop(columns=['Evolucion Final'], inplace=True)

# Separar X e y
X = df_imputado.drop(columns='target')
y = df_imputado['target']

# ========================
# Validación cruzada estratificada
# ========================
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)
resultados = []

for train_idx, val_idx in skf.split(X, y):
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)

    model = Sequential([
        Input(shape=(X.shape[1],)),
        Dense(128, activation='relu'),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.2),
        Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

    es = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    model.fit(X_train_scaled, y_train,
              validation_data=(X_val_scaled, y_val),
              epochs=100, batch_size=8, callbacks=[es], verbose=0)

    y_pred_probs = model.predict(X_val_scaled).ravel()
    y_pred = (y_pred_probs > 0.5).astype(int)

    resultados.append({
        'accuracy': accuracy_score(y_val, y_pred),
        'precision': precision_score(y_val, y_pred, zero_division=0),
        'recall': recall_score(y_val, y_pred, zero_division=0),
        'f1': f1_score(y_val, y_pred, zero_division=0),
        'auc': roc_auc_score(y_val, y_pred_probs)
    })

# ========================
# Mostrar resultados
# ========================
df_resultados = pd.DataFrame(resultados)
print(df_resultados)
print("\nResumen estadístico:")
print(df_resultados.describe().round(4))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
   accuracy  precision    recall        f1       auc
0  0.733333   0.777778  0.777778  0.777778  0.722222
1  0.866667   0.818182  1.000000  0.900000  1.000000
2  0.800000   1.000000  0.666667  0.800000  0.851852
3  0.866667   0.888889  0.888889  0.888889  0.944444
4  0.800000   0.857143  0.750000  0.800000  0.928571

Resumen estadístico:
       accuracy  precision  recall      f1     auc
count    5.0000     5.0000  5.0000  5.0000  5.0000
mean     0.8133     0.8684  0.8167  0.8333  0.8894
std      0.0558     0.0846  0.1297  0.0567  0.1074
min      0.7333     0.7778  0.6667  0.7778  0.7222
25%      0.8000     0.8182  0.7500  0.8000  0