In [20]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from xgboost import XGBClassifier

In [21]:
df = pd.read_csv("synthetic_icu_cardiac_arrest.csv")
df.head()

Unnamed: 0,RR,SpO2,FiO2,SpO2_FiO2_ratio,HR,MAP,Lactate,Lactate_change,GCS,Age,RR_slope,Cardiac_arrest
0,15.486216,88.262426,0.654739,134.805508,73.916591,103.683748,4.844001,0.497362,13.184228,64.519564,-0.122652,0
1,27.984073,91.749254,0.699792,131.109371,79.339773,70.975929,3.192455,1.109519,7.999581,76.307903,0.401615,1
2,23.697871,90.53233,0.514377,176.003988,131.558017,63.309763,5.891011,0.107589,9.885781,63.816983,0.255408,0
3,12.962232,99.311823,0.637635,155.750194,129.936144,91.462948,3.376802,0.192674,15.0,32.678599,0.00077,0
4,18.528398,98.080002,0.21,467.047631,121.56517,90.588261,0.214577,1.27165,9.980036,80.448045,-0.949477,0


In [22]:
x = df.drop("Cardiac_arrest" , axis=1)
y = df["Cardiac_arrest"]

xTrain , xTest , yTrain , yTest = train_test_split(x,y,test_size=0.3 , random_state=25 , stratify=y)

In [27]:
rf_param_grid = {

    # Tree size
    "n_estimators": [300, 600, 900, 1200],
    "max_depth": [None, 8, 12, 16, 20, 25],
    "min_samples_split": [2, 5, 10, 20, 30],
    "min_samples_leaf": [1, 2, 4, 8, 12],
    "min_weight_fraction_leaf": [0.0, 0.01, 0.02],
    "max_features": ["sqrt", "log2", 0.4, 0.6, 0.8],
    "bootstrap": [True],
    "max_samples": [None, 0.6, 0.8, 0.9],
    "class_weight": [
        None,
        "balanced",
        "balanced_subsample",
        {0:1, 1:1.5},
        {0:1, 1:2},
        {0:1, 1:3}
    ],
    "ccp_alpha": [0.0, 0.0001, 0.001, 0.01]
}

xgb_param_grid = {
    "n_estimators": [300, 600, 900, 1200],
    "learning_rate": [0.005, 0.01, 0.03, 0.05, 0.1],
    "max_depth": [3, 4, 5, 6, 8, 10],
    "min_child_weight": [1, 3, 5, 7, 10],
    "subsample": [0.6, 0.7, 0.8, 0.9, 1.0],
    "colsample_bytree": [0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
    "colsample_bylevel": [0.6, 0.8, 1.0],
    "colsample_bynode": [0.6, 0.8, 1.0],
    "gamma": [0, 0.1, 0.2, 0.3, 0.5],
    "reg_alpha": [0, 0.01, 0.1, 1, 5],
    "reg_lambda": [0.5, 1, 1.5, 2, 3, 5],
    "scale_pos_weight": [1, 1.5, 2, 2.3, 3, 4],
    "max_delta_step": [0, 1, 5],
    "grow_policy": ["depthwise", "lossguide"]
}

In [28]:
models = (["RandomForest" , RandomForestClassifier() , rf_param_grid],
          ["XGboost" , XGBClassifier() , xgb_param_grid])

In [29]:
from sklearn.metrics import make_scorer, fbeta_score

f2_scorer = make_scorer(fbeta_score, beta=2)

In [30]:
bestParam = {}
bestModels = {}
bestScores = {}

for name, model, param in models:
    
    randomsearch = RandomizedSearchCV(
        estimator=model,
        param_distributions=param,
        n_iter=10,
        cv=5,
        n_jobs=-1,
        random_state=30,
        scoring=f2_scorer
    )
    
    randomsearch.fit(xTrain, yTrain)
    
    bestParam[name] = randomsearch.best_params_
    
    bestModels[name] = randomsearch.best_estimator_
    
    bestScores[name] = randomsearch.best_score_

    print(f"{name} best F2 score: {randomsearch.best_score_:.4f}")

RandomForest best F2 score: 0.7113
XGboost best F2 score: 0.7031
