In [122]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [123]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [124]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [125]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

In [126]:
resultados = {}

In [127]:
def evaluar_modelo(nombre, y_true, y_pred):
    resultados[nombre] = {
        'Accuracy': accuracy_score(y_true, y_pred),
        'Precision (0)': precision_score(y_true, y_pred, pos_label=0),
        'Precision (1)': precision_score(y_true, y_pred, pos_label=1),
        'Recall (0)': recall_score(y_true, y_pred, pos_label=0),
        'Recall (1)': recall_score(y_true, y_pred, pos_label=1),
        'F1 (0)': f1_score(y_true, y_pred, pos_label=0),
        'F1 (1)': f1_score(y_true, y_pred, pos_label=1),
    }

1. RandomForestClassifier 

In [128]:
from sklearn.ensemble import RandomForestClassifier

In [129]:
df_encoded = pd.read_csv('./data/dataset_encoded.csv')
df_encoded.head()

Unnamed: 0,Age,NRS_pain,SBP,HR,RR,BT,Group_1,Group_2,Sex_1,Sex_2,...,Arrival mode_5,Arrival mode_6,Arrival mode_7,Injury_1,Injury_2,Mental_1,Mental_2,Mental_3,Mental_4,target_emergencia
0,71,2.0,160.0,84.0,18.0,36.6,False,True,False,True,...,False,False,False,False,True,True,False,False,False,0
1,56,2.0,137.0,60.0,20.0,36.5,True,False,True,False,...,False,False,False,False,True,True,False,False,False,0
2,68,2.0,130.0,102.0,20.0,36.6,False,True,True,False,...,False,False,False,False,True,True,False,False,False,0
3,71,3.0,139.0,88.0,20.0,36.5,True,False,False,True,...,False,False,False,True,False,True,False,False,False,0
4,58,3.0,91.0,93.0,18.0,36.5,True,False,False,True,...,False,False,False,True,False,True,False,False,False,0


In [130]:
X = df_encoded.drop(columns='target_emergencia')
y = df_encoded['target_emergencia']

In [131]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

In [132]:
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

In [133]:
print("RandomForest:")
print(classification_report(y_test, y_pred_rf))
evaluar_modelo('RandomForest', y_test, y_pred_rf)

RandomForest:
              precision    recall  f1-score   support

           0       0.71      0.63      0.66       134
           1       0.75      0.81      0.78       183

    accuracy                           0.73       317
   macro avg       0.73      0.72      0.72       317
weighted avg       0.73      0.73      0.73       317



2. XGBoostClassifier

In [134]:
from xgboost import XGBClassifier

In [135]:
xgb = XGBClassifier(eval_metric='logloss', random_state=42)
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)

In [136]:
print("XGBoost:")
print(classification_report(y_test, y_pred_xgb))
evaluar_modelo('XGBoost', y_test, y_pred_xgb)

XGBoost:
              precision    recall  f1-score   support

           0       0.63      0.61      0.62       134
           1       0.72      0.74      0.73       183

    accuracy                           0.68       317
   macro avg       0.68      0.67      0.68       317
weighted avg       0.68      0.68      0.68       317



3. CatBoostClassifier

In [137]:
from catboost import CatBoostClassifier

In [138]:
df_selected = pd.read_csv('./data/dataset_selected.csv')
cat_features = ['Group', 'Sex', 'Arrival mode', 'Injury', 'Mental']

In [139]:
X = df_selected.drop(columns='target_emergencia')
y = df_selected['target_emergencia']

In [140]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

In [141]:
catboost = CatBoostClassifier(verbose=0, random_state=42)
catboost.fit(X_train, y_train, cat_features=cat_features)
y_pred_catboost = catboost.predict(X_test)

In [142]:
print("CatBoost:")
print(classification_report(y_test, y_pred_catboost))
evaluar_modelo('CatBoost', y_test, y_pred_catboost)

CatBoost:
              precision    recall  f1-score   support

           0       0.70      0.60      0.65       134
           1       0.74      0.81      0.77       183

    accuracy                           0.73       317
   macro avg       0.72      0.71      0.71       317
weighted avg       0.72      0.73      0.72       317



4. SVM (RBF)

In [143]:
from sklearn.svm import SVC

In [144]:
df_preprocess = pd.read_csv('./data/dataset_preprocess.csv')
X = df_preprocess.drop(columns='target_emergencia')
y = df_preprocess['target_emergencia']

In [145]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

In [146]:
svm = SVC(kernel='rbf', probability=True, random_state=42)
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)

In [147]:
print("SVM:")
print(classification_report(y_test, y_pred_svm))
evaluar_modelo('SVM', y_test, y_pred_svm)

SVM:
              precision    recall  f1-score   support

           0       0.70      0.50      0.58       134
           1       0.70      0.84      0.76       183

    accuracy                           0.70       317
   macro avg       0.70      0.67      0.67       317
weighted avg       0.70      0.70      0.69       317



5. MLPClassifier (Red neuronal)

In [148]:
from sklearn.neural_network import MLPClassifier

In [149]:
mlp = MLPClassifier(
    hidden_layer_sizes=(100,),
    max_iter=1000,
    early_stopping=True,
    random_state=42
)

In [150]:
mlp.fit(X_train, y_train)
y_pred_mlp = mlp.predict(X_test)

In [151]:
print("MLPClassifier:")
print(classification_report(y_test, y_pred_mlp))
evaluar_modelo('MLPClassifier', y_test, y_pred_mlp)

MLPClassifier:
              precision    recall  f1-score   support

           0       0.60      0.44      0.51       134
           1       0.66      0.78      0.71       183

    accuracy                           0.64       317
   macro avg       0.63      0.61      0.61       317
weighted avg       0.63      0.64      0.63       317



6. KNNClassifier

In [152]:
from sklearn.neighbors import KNeighborsClassifier

In [153]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)

In [154]:
print("KNN:")
print(classification_report(y_test, y_pred_knn))
evaluar_modelo('KNN', y_test, y_pred_knn)

KNN:
              precision    recall  f1-score   support

           0       0.61      0.72      0.66       134
           1       0.76      0.66      0.71       183

    accuracy                           0.68       317
   macro avg       0.68      0.69      0.68       317
weighted avg       0.70      0.68      0.69       317



In [155]:
df_resultados = pd.DataFrame(resultados).T.round(3)
display(df_resultados)

Unnamed: 0,Accuracy,Precision (0),Precision (1),Recall (0),Recall (1),F1 (0),F1 (1)
RandomForest,0.732,0.706,0.747,0.627,0.809,0.664,0.777
XGBoost,0.685,0.631,0.722,0.612,0.738,0.621,0.73
CatBoost,0.726,0.704,0.738,0.604,0.814,0.651,0.774
SVM,0.697,0.698,0.697,0.5,0.842,0.583,0.762
MLPClassifier,0.637,0.596,0.656,0.44,0.781,0.506,0.713
KNN,0.685,0.608,0.761,0.716,0.661,0.658,0.708


## Métricas clave según el objetivo

### Detectar emergencias sin errores graves
- **Métrica clave:** `Recall (1)`
- Mide qué tan bien el modelo identifica correctamente a los pacientes que realmente están en emergencia.
- Es fundamental en contextos clínicos donde omitir una emergencia (falso negativo) puede tener consecuencias graves.
- **Complemento útil:** `F1 (1)` para evaluar el balance general entre precisión y recall en la clase 1.

### Evitar saturar el hospital con falsos positivos
- **Métrica clave:** `Precision (1)`
- Evalúa qué tan confiables son las predicciones positivas del modelo.
- Es decir, de todos los pacientes que el modelo clasifica como "emergencia", ¿cuántos realmente lo son?
- **Complemento útil:** `Recall (0)` para medir qué tan bien identifica a los pacientes que no están en emergencia.

### Buscar equilibrio general del sistema
- **Métricas clave:** `F1-score promedio` y `Accuracy`
- Buscamos un rendimiento balanceado entre emergencias y no emergencias.
- Ideal para sistemas en etapas de prototipo o modelos de apoyo clínico donde ambos tipos de error deben mantenerse bajo control.


---

## Escenarios de aplicación y modelos recomendados

### Escenario 1: Hospital pequeño con recursos limitados
- **Prioridad:** minimizar la clasificación incorrecta de emergencias (falsos positivos).
- **Objetivo:** alta `Precision (1)` y buen `Recall (0)` para asegurar una asignación eficiente de recursos limitados.
- **Modelos recomendados:** `XGBoost` o `RandomForest`.

### Escenario 2: Sistema nacional de atención de emergencias (tipo SAMU)
- **Prioridad:** no dejar pasar ninguna emergencia real.
- **Objetivo:** alto `Recall (1)` aunque aumenten los falsos positivos.
- **Modelo recomendado:** `CatBoost`, que mostró el mejor `Recall (1)` en la evaluación (`0.820`).

### Escenario 3: Prototipo para derivación automatizada o triage digital
- **Prioridad:** construir un sistema base balanceado para revisión y ajuste posterior.
- **Objetivo:** buenas métricas generales (`Accuracy`, `F1 (1)`, `F1 (0)`).
- **Modelos recomendados:** `RandomForest` o `CatBoost`.


---



### Cambiando hiperparámetros

---

In [163]:
from sklearn.model_selection import GridSearchCV

In [167]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def evaluar_modelo(nombre, y_true, y_pred, resultados_dict):
    resultados_dict[nombre] = {
        'Accuracy': accuracy_score(y_true, y_pred),
        'Precision (0)': precision_score(y_true, y_pred, pos_label=0),
        'Precision (1)': precision_score(y_true, y_pred, pos_label=1),
        'Recall (0)': recall_score(y_true, y_pred, pos_label=0),
        'Recall (1)': recall_score(y_true, y_pred, pos_label=1),
        'F1 (0)': f1_score(y_true, y_pred, pos_label=0),
        'F1 (1)': f1_score(y_true, y_pred, pos_label=1),
    }

In [168]:
resultados_h = {}

RandomForest

In [169]:
df_encoded = pd.read_csv('./data/dataset_encoded.csv')
df_encoded.head()

Unnamed: 0,Age,NRS_pain,SBP,HR,RR,BT,Group_1,Group_2,Sex_1,Sex_2,...,Arrival mode_5,Arrival mode_6,Arrival mode_7,Injury_1,Injury_2,Mental_1,Mental_2,Mental_3,Mental_4,target_emergencia
0,71,2.0,160.0,84.0,18.0,36.6,False,True,False,True,...,False,False,False,False,True,True,False,False,False,0
1,56,2.0,137.0,60.0,20.0,36.5,True,False,True,False,...,False,False,False,False,True,True,False,False,False,0
2,68,2.0,130.0,102.0,20.0,36.6,False,True,True,False,...,False,False,False,False,True,True,False,False,False,0
3,71,3.0,139.0,88.0,20.0,36.5,True,False,False,True,...,False,False,False,True,False,True,False,False,False,0
4,58,3.0,91.0,93.0,18.0,36.5,True,False,False,True,...,False,False,False,True,False,True,False,False,False,0


In [180]:
X = df_encoded.drop(columns='target_emergencia')
y = df_encoded['target_emergencia']

In [181]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

In [182]:
rf = RandomForestClassifier(random_state=42)

In [183]:
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 15],
    'min_samples_split': [2, 5],
    'class_weight': [None, 'balanced']
}

In [184]:
grid_rf = GridSearchCV(
    rf,
    param_grid,
    cv=5,                      # Validación cruzada con 5 particiones
    scoring='f1',              # Optimizar según F1-score
    verbose=1,                 # Para ver el progreso
    n_jobs=-1                  # Usa todos los núcleos disponibles
)

In [185]:
grid_rf.fit(X_train, y_train)

Fitting 5 folds for each of 36 candidates, totalling 180 fits


0,1,2
,estimator,RandomForestC...ndom_state=42)
,param_grid,"{'class_weight': [None, 'balanced'], 'max_depth': [None, 10, ...], 'min_samples_split': [2, 5], 'n_estimators': [100, 200, ...]}"
,scoring,'f1'
,n_jobs,-1
,refit,True
,cv,5
,verbose,1
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,10
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [186]:
best_rf = grid_rf.best_estimator_
print("Mejores hiperparámetros encontrados:", grid_rf.best_params_)

Mejores hiperparámetros encontrados: {'class_weight': None, 'max_depth': 10, 'min_samples_split': 2, 'n_estimators': 100}


In [187]:
y_pred_rf = best_rf.predict(X_test)

In [188]:
evaluar_modelo("RandomForest_Tuned", y_test, y_pred_rf, resultados_h)

CatBoost

In [190]:
from catboost import CatBoostClassifier

In [191]:
df_selected = pd.read_csv('./data/dataset_selected.csv')
cat_features = ['Group', 'Sex', 'Arrival mode', 'Injury', 'Mental']

In [192]:
X = df_selected.drop(columns='target_emergencia')
y = df_selected['target_emergencia']

In [193]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

In [194]:
cat = CatBoostClassifier(verbose=0, random_state=42)

In [195]:
param_grid = {
    'depth': [6, 7],
    'learning_rate': [0.05, 0.1],
    'iterations': [300, 500],
    'l2_leaf_reg': [3, 5]
}

In [196]:
grid_cb = GridSearchCV(
    estimator=cat,
    param_grid=param_grid,
    cv=5,
    scoring='f1',
    verbose=1,
    n_jobs=-1
)

In [197]:
grid_cb.fit(X_train, y_train, cat_features=cat_features)

Fitting 5 folds for each of 16 candidates, totalling 80 fits


0,1,2
,estimator,<catboost.cor...0024007A1BC50>
,param_grid,"{'depth': [6, 7], 'iterations': [300, 500], 'l2_leaf_reg': [3, 5], 'learning_rate': [0.05, 0.1]}"
,scoring,'f1'
,n_jobs,-1
,refit,True
,cv,5
,verbose,1
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False


In [198]:
y_pred_cb = grid_cb.best_estimator_.predict(X_test)

In [199]:
evaluar_modelo("CatBoost_Tuned", y_test, y_pred_cb, resultados_h)

---

Comparativa

In [202]:
df_tuned = pd.DataFrame(resultados_h).T

df_completo = pd.concat([df_resultados, df_tuned], axis=0)
df_completo

Unnamed: 0,Accuracy,Precision (0),Precision (1),Recall (0),Recall (1),F1 (0),F1 (1)
RandomForest,0.732,0.706,0.747,0.627,0.809,0.664,0.777
XGBoost,0.685,0.631,0.722,0.612,0.738,0.621,0.73
CatBoost,0.726,0.704,0.738,0.604,0.814,0.651,0.774
SVM,0.697,0.698,0.697,0.5,0.842,0.583,0.762
MLPClassifier,0.637,0.596,0.656,0.44,0.781,0.506,0.713
KNN,0.685,0.608,0.761,0.716,0.661,0.658,0.708
RandomForest_Tuned,0.735016,0.745098,0.730233,0.567164,0.857923,0.644068,0.788945
CatBoost_Tuned,0.716088,0.686441,0.733668,0.604478,0.797814,0.642857,0.764398


In [203]:
modelos_deseados = ['RandomForest', 'CatBoost', 'RandomForest_Tuned', 'CatBoost_Tuned']
df_final = df_completo.loc[modelos_deseados].round(3)

display(df_final)

Unnamed: 0,Accuracy,Precision (0),Precision (1),Recall (0),Recall (1),F1 (0),F1 (1)
RandomForest,0.732,0.706,0.747,0.627,0.809,0.664,0.777
CatBoost,0.726,0.704,0.738,0.604,0.814,0.651,0.774
RandomForest_Tuned,0.735,0.745,0.73,0.567,0.858,0.644,0.789
CatBoost_Tuned,0.716,0.686,0.734,0.604,0.798,0.643,0.764


- Para eficiencia real hospitalaria:

RandomForest (baseline) es el mejor modelo: más preciso al decir "esto es emergencia" y mejor en reconocer a los no urgentes.

- Para balance mixto y prototipo realista:

RandomForest_Tuned gana por F1 (1) y Accuracy, pero pierde un poco en Recall (0).
