In [18]:
import pandas as pd

# Cargar el dataset
df = pd.read_excel("dataset_resultado_categorizado.xlsx")

# Calcular resumen de nulos
missing_summary = df.isnull().sum()
missing_percentage = (missing_summary / len(df)) * 100

# Crear dataframe con el resumen
missing_df = pd.DataFrame({
    "Variable": missing_summary.index,
    "Nulos": missing_summary.values,
    "Porcentaje (%)": missing_percentage.values
}).sort_values(by="Nulos", ascending=False)

# Mostrar resultado
print(missing_df)


                               Variable  Nulos  Porcentaje (%)
43                     Prot24 h (g/24H)     21       28.000000
84  Tiempo en alcanzar remisión (meses)     17       22.666667
93                             VDI 24 m     16       21.333333
77                        Respuesta 24m     16       21.333333
49                           MPO título     15       20.000000
..                                  ...    ...             ...
91                               Muerte      0        0.000000
90                             Diálisis      0        0.000000
95                                 Edad      0        0.000000
96                  Resultado renal cat      0        0.000000
97                      Evolucion Final      0        0.000000

[98 rows x 3 columns]


In [19]:

import pandas as pd

# Carga del dataset (ajusta el nombre si es necesario)
df = pd.read_excel("dataset_resultado_categorizado.xlsx")

# Mostramos tipos de datos
df.dtypes.value_counts()


int64      62
float64    33
object      3
Name: count, dtype: int64

In [20]:
# Ver columnas que no son numéricas
df.select_dtypes(include=['object']).columns

for col in ['Tipo_vasculitis', 'Tipo', 'Evolucion Final']:
    print(f"\n{col}:")
    print(df[col].value_counts())



Tipo_vasculitis:
Tipo_vasculitis
MPO    47
PR3    23
NEG     5
Name: count, dtype: int64

Tipo:
Tipo
PAM     35
GPA     28
EGPA    12
Name: count, dtype: int64

Evolucion Final:
Evolucion Final
Nada                   31
Muerte                 17
IRC                    16
IRC, ERCA               7
IRC, ERCA, Diálisis     4
Name: count, dtype: int64


In [21]:
# Copia del DataFrame por seguridad
df_modelo = df.copy()

# One-hot encoding de variables categóricas
df_modelo = pd.get_dummies(df_modelo, columns=['Tipo_vasculitis', 'Tipo'], drop_first=True)

# Codificación binaria del target
df_modelo['target'] = df_modelo['Evolucion Final'].apply(lambda x: 0 if x == 'Nada' else 1)

# Eliminamos la columna original del target
df_modelo.drop(columns=['Evolucion Final'], inplace=True)


In [22]:
# Separar variables predictoras (X) y variable objetivo binaria (y)
X = df_modelo.drop(columns='target')
y = df_modelo['target']


In [23]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

print(f"X shape: {X_scaled.shape}")
print(f"y shape: {y.shape}")
print(f"y valores únicos: {y.unique()}")


X shape: (75, 99)
y shape: (75,)
y valores únicos: [1 0]


In [24]:
# Importación de librerías
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

# 1. Cargar el dataset real con imputación parcial
df = pd.read_excel("dataset_resultado_categorizado.xlsx")

# 2. Crear variable binaria como objetivo (0 = Nada, 1 = cualquier evolución negativa)
df['target'] = df['Evolucion Final'].apply(lambda x: 1 if x != 'Nada' else 0)

# 3. Codificar variables categóricas con one-hot encoding
df_modelo = pd.get_dummies(df, columns=['Tipo_vasculitis', 'Tipo'], drop_first=True)

# 4. Eliminar columna objetivo textual
df_modelo.drop(columns=['Evolucion Final'], inplace=True)

# 5. Separar variables independientes y dependiente
X = df_modelo.drop(columns='target')
y = df_modelo['target']

# 6. Escalar variables numéricas
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 7. Validación cruzada
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# 8. Resultados a almacenar
results = {
    'accuracy': [],
    'precision': [],
    'recall': [],
    'f1': [],
    'auc': []
}

# 9. Entrenamiento del modelo en cada fold
for train_idx, val_idx in skf.split(X_scaled, y):
    X_train, X_val = X_scaled[train_idx], X_scaled[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

    model = Sequential([
        Dense(64, activation='relu', input_shape=(X.shape[1],)),
        Dropout(0.3),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    model.fit(X_train, y_train, epochs=50, batch_size=16, verbose=0)

    y_pred = model.predict(X_val).ravel()
    y_pred_label = (y_pred > 0.5).astype(int)

    # 10. Evaluación
    results['accuracy'].append(accuracy_score(y_val, y_pred_label))
    results['precision'].append(precision_score(y_val, y_pred_label, zero_division=0))
    results['recall'].append(recall_score(y_val, y_pred_label))
    results['f1'].append(f1_score(y_val, y_pred_label))
    results['auc'].append(roc_auc_score(y_val, y_pred))

# 11. Visualizar resultados en forma de tabla
results_df = pd.DataFrame(results)
print(results_df)
print("\nResumen estadístico:")
print(results_df.describe().round(4))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
   accuracy  precision  recall        f1  auc
0  0.600000   0.600000     1.0  0.750000  0.5
1  0.600000   0.600000     1.0  0.750000  0.5
2  0.600000   0.600000     1.0  0.750000  0.5
3  0.600000   0.600000     1.0  0.750000  0.5
4  0.533333   0.533333     1.0  0.695652  0.5

Resumen estadístico:
       accuracy  precision  recall      f1  auc
count    5.0000     5.0000     5.0  5.0000  5.0
mean     0.5867     0.5867     1.0  0.7391  0.5
std      0.0298     0.0298     0.0  0.0243  0.0
min      0.5333     0.5333     1.0  0.6957  0.5
25%      0.6000     0.6000     1.0  0.7500  0.5
50%      0.6000     0.6000     1.0  0.7500  0.5
75%      0.6000     0.6000     1.0  0.7500  0.5
max      0.6000     0.6000     1.0  0.7500  0.5


In [26]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# Supone que ya tienes definidos X y y con tus datos reales (escenario 1)

resultados = []

kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for train_index, val_index in kfold.split(X, y):
    X_train, X_val = X.iloc[train_index], X.iloc[val_index]
    y_train, y_val = y.iloc[train_index], y.iloc[val_index]

    # Normalización
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)

    # Modelo
    model = Sequential([
        Input(shape=(X.shape[1],)),
        Dense(128, activation='relu'),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.2),
        Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    # Early stopping para evitar sobreajuste
    es = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)


    # Entrenamiento
    model.fit(X_train_scaled, y_train,
              validation_data=(X_val_scaled, y_val),
              epochs=100,
              batch_size=8,
              callbacks=[es],
              verbose=0,
              )

    # Predicciones
    y_pred_probs = model.predict(X_val_scaled).ravel()
    y_pred = (y_pred_probs > 0.5).astype(int)

    resultados.append({
        'accuracy': accuracy_score(y_val, y_pred),
        'precision': precision_score(y_val, y_pred, zero_division=0),
        'recall': recall_score(y_val, y_pred, zero_division=0),
        'f1': f1_score(y_val, y_pred, zero_division=0),
        'auc': roc_auc_score(y_val, y_pred_probs)
    })

# Mostrar resultados
df_resultados = pd.DataFrame(resultados)
print(df_resultados)

print("\nResumen estadístico:")
print(df_resultados.describe().round(4))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
   accuracy  precision  recall        f1  auc
0  0.600000   0.600000     1.0  0.750000  0.5
1  0.600000   0.600000     1.0  0.750000  0.5
2  0.600000   0.600000     1.0  0.750000  0.5
3  0.600000   0.600000     1.0  0.750000  0.5
4  0.533333   0.533333     1.0  0.695652  0.5

Resumen estadístico:
       accuracy  precision  recall      f1  auc
count    5.0000     5.0000     5.0  5.0000  5.0
mean     0.5867     0.5867     1.0  0.7391  0.5
std      0.0298     0.0298     0.0  0.0243  0.0
min      0.5333     0.5333     1.0  0.6957  0.5
25%      0.6000     0.6000     1.0  0.7500  0.5
50%      0.6000     0.6000     1.0  0.7500  0.5
75%   

In [27]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from keras.models import Sequential
from keras.layers import Dense, Dropout, Input
from keras.callbacks import EarlyStopping

# Inicializar listas para almacenar métricas
resultados = []

# Configurar validación cruzada
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for train_idx, val_idx in kfold.split(X, y):
    # División de datos
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

    # Escalado de características
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)

    # Cálculo de pesos de clase
    y_train_array = np.array(y_train)
    pesos = compute_class_weight(class_weight='balanced', classes=np.unique(y_train_array), y=y_train_array)
    class_weights = dict(zip(np.unique(y_train_array), pesos))

    # Construcción del modelo
    model = Sequential([
        Input(shape=(X.shape[1],)),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(32, activation='relu'),
        Dropout(0.2),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # EarlyStopping
    es = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    # Entrenamiento
    model.fit(X_train_scaled, y_train,
              validation_data=(X_val_scaled, y_val),
              epochs=100,
              batch_size=8,
              callbacks=[es],
              verbose=0,
              class_weight=class_weights)

    # Evaluación
    y_pred_probs = model.predict(X_val_scaled).ravel()
    y_pred = (y_pred_probs > 0.5).astype(int)

    resultados.append({
        'accuracy': accuracy_score(y_val, y_pred),
        'precision': precision_score(y_val, y_pred, zero_division=0),
        'recall': recall_score(y_val, y_pred, zero_division=0),
        'f1': f1_score(y_val, y_pred, zero_division=0),
        'auc': roc_auc_score(y_val, y_pred_probs)
    })

# Mostrar resultados
df_resultados = pd.DataFrame(resultados)
print(df_resultados)
print("\nResumen estadístico:")
print(df_resultados.describe())


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
   accuracy  precision  recall        f1  auc
0  0.600000   0.600000     1.0  0.750000  0.5
1  0.600000   0.600000     1.0  0.750000  0.5
2  0.400000   0.000000     0.0  0.000000  0.5
3  0.400000   0.000000     0.0  0.000000  0.5
4  0.533333   0.533333     1.0  0.695652  0.5

Resumen estadístico:
       accuracy  precision    recall        f1  auc
count  5.000000   5.000000  5.000000  5.000000  5.0
mean   0.506667   0.346667  0.600000  0.439130  0.5
std    0.101105   0.317630  0.547723  0.401483  0.0
min    0.400000   0.000000  0.000000  0.000000  0.5
25%    0.400000   0.000000  0.000000  0.000000  0.5
50%    0.533333   0.533333  1.

In [None]:
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from keras.models import Sequential
from keras.layers import Dense, Input
from keras.callbacks import EarlyStopping
import numpy as np
import pandas as pd

resultados = []

kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for train_idx, val_idx in kfold.split(X, y):
    X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]

    # Normalizar
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)

    # Pesos de clase
    pesos = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
    class_weights = dict(zip(np.unique(y_train), pesos))

    # Arquitectura muy simple
    model = Sequential([
        Input(shape=(X.shape[1],)),
        Dense(16, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Early stopping
    es = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    # Entrenamiento
    model.fit(X_train_scaled, y_train,
              validation_data=(X_val_scaled, y_val),
              epochs=100,
              batch_size=4,
              verbose=0,
              class_weight=class_weights,
              callbacks=[es])

    # Evaluación
    y_pred_probs = model.predict(X_val_scaled).ravel()
    y_pred = (y_pred_probs > 0.5).astype(int)

    resultados.append({
        'accuracy': accuracy_score(y_val, y_pred),
        'precision': precision_score(y_val, y_pred, zero_division=0),
        'recall': recall_score(y_val, y_pred, zero_division=0),
        'f1': f1_score(y_val, y_pred, zero_division=0),
        'auc': roc_auc_score(y_val, y_pred_probs)
    })

# Mostrar métricas
df_resultados = pd.DataFrame(resultados)
print(df_resultados)
print("\nResumen estadístico:")
print(df_resultados.describe())


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
   accuracy  precision  recall    f1  auc
0  0.600000        0.6     1.0  0.75  0.5
1  0.400000        0.0     0.0  0.00  0.5
2  0.400000        0.0     0.0  0.00  0.5
3  0.400000        0.0     0.0  0.00  0.5
4  0.466667        0.0     0.0  0.00  0.5

Resumen estadístico:
       accuracy  precision    recall       f1  auc
count  5.000000   5.000000  5.000000  5.00000  5.0
mean   0.453333   0.120000  0.200000  0.15000  0.5
std    0.086923   0.268328  0.447214  0.33541  0.0
min    0.400000   0.000000  0.000000  0.00000  0.5
25%    0.400000   0.000000  0.000000  0.00000  0.5
50%    0.400000   0.000000  0.000000  0.00000  0.5
75%    0.

: 