In [75]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import RandomizedSearchCV

In [76]:
df = pd.read_csv('synthetic_ulmis_data.csv')
df.head()

Unnamed: 0,RR,SpO2,FiO2,SpO2_FiO2_ratio,HR,MAP,Lactate,GCS,RR_slope,SpO2_slope,FiO2_slope,Lactate_change,Oxygen_Device,Intubation_6h
0,26.980285,95.14074,40.515181,2.348274,150.222651,96.187112,3.871147,11.844071,5.828496,0.143348,0.232416,0.524107,1,1
1,23.170414,84.889276,40.783306,2.081471,99.601369,83.166606,4.936731,15.170105,-0.273125,0.713908,7.934041,1.080823,2,0
2,27.886131,94.858983,89.708549,1.057413,93.748659,83.091591,3.018276,12.383748,2.087717,0.397483,5.886795,-0.753667,1,0
3,33.138179,91.065104,61.186218,1.488327,89.552801,64.23088,2.117319,13.060433,2.853488,-0.586467,3.33455,1.517724,2,1
4,22.59508,94.829831,94.613018,1.002292,107.527341,96.228745,0.899913,12.803947,1.202366,-0.470816,0.485648,0.531061,0,0


In [77]:
encoder = LabelEncoder()

In [78]:
df.head()

Unnamed: 0,RR,SpO2,FiO2,SpO2_FiO2_ratio,HR,MAP,Lactate,GCS,RR_slope,SpO2_slope,FiO2_slope,Lactate_change,Oxygen_Device,Intubation_6h
0,26.980285,95.14074,40.515181,2.348274,150.222651,96.187112,3.871147,11.844071,5.828496,0.143348,0.232416,0.524107,1,1
1,23.170414,84.889276,40.783306,2.081471,99.601369,83.166606,4.936731,15.170105,-0.273125,0.713908,7.934041,1.080823,2,0
2,27.886131,94.858983,89.708549,1.057413,93.748659,83.091591,3.018276,12.383748,2.087717,0.397483,5.886795,-0.753667,1,0
3,33.138179,91.065104,61.186218,1.488327,89.552801,64.23088,2.117319,13.060433,2.853488,-0.586467,3.33455,1.517724,2,1
4,22.59508,94.829831,94.613018,1.002292,107.527341,96.228745,0.899913,12.803947,1.202366,-0.470816,0.485648,0.531061,0,0


In [79]:
x = df.drop(columns=['Intubation_6h'])
y = df['Intubation_6h']

xTrain , xTest , yTrain , yTest = train_test_split(x,y, test_size=0.30 , random_state=30 , stratify=y)

In [None]:
rf_param_grid = {
    "n_estimators": [200, 400, 600, 800],
    "max_depth": [None, 5, 10, 15, 20],
    "min_samples_split": [2, 5, 10, 15],
    "min_samples_leaf": [1, 2, 4, 6],
    "max_features": ["sqrt", "log2", 0.6, 0.8],
    "class_weight": [None, "balanced", {0:1, 1:2}, {0:1, 1:2.3}]
}

xgb_param_grid = {
    "n_estimators": [200, 400, 600, 800],
    "learning_rate": [0.01, 0.03, 0.05, 0.1],
    "max_depth": [3, 4, 5, 6, 8],
    "min_child_weight": [1, 3, 5, 7],
    "subsample": [0.6, 0.7, 0.8, 0.9, 1.0],
    "colsample_bytree": [0.6, 0.7, 0.8, 0.9, 1.0],
    "gamma": [0, 0.1, 0.2, 0.3],
    "reg_alpha": [0, 0.01, 0.1, 1],
    "reg_lambda": [1, 1.5, 2, 3],
    "scale_pos_weight": [1, 2, 2.3, 3] 
}

In [81]:
models = (["RandomForest" , RandomForestClassifier() , rf_param_grid],
          ["XGboost" , XGBClassifier() , xgb_param_grid])

In [82]:
from sklearn.metrics import make_scorer, fbeta_score

f2_scorer = make_scorer(fbeta_score, beta=2)

In [83]:
bestParam = {}
bestModels = {}
bestScores = {}

for name, model, param in models:
    
    randomsearch = RandomizedSearchCV(
        estimator=model,
        param_distributions=param,
        n_iter=10,
        cv=5,
        n_jobs=-1,
        random_state=30,
        scoring=f2_scorer
    )
    
    randomsearch.fit(xTrain, yTrain)
    
    bestParam[name] = randomsearch.best_params_
    
    bestModels[name] = randomsearch.best_estimator_
    
    bestScores[name] = randomsearch.best_score_

    print(f"{name} best F2 score: {randomsearch.best_score_:.4f}")

RandomForest best F2 score: 0.7850
XGboost best F2 score: 0.8500


In [85]:
from sklearn.metrics import classification_report, confusion_matrix
bestModel = XGBClassifier(**bestParam["XGboost"])
bestModel.fit(xTrain , yTrain)

yPred = bestModel.predict(xTest)

print(confusion_matrix(yTest , yPred))
print(classification_report(yTest , yPred))

[[1171  256]
 [  77  596]]
              precision    recall  f1-score   support

           0       0.94      0.82      0.88      1427
           1       0.70      0.89      0.78       673

    accuracy                           0.84      2100
   macro avg       0.82      0.85      0.83      2100
weighted avg       0.86      0.84      0.85      2100



In [86]:
with open("bestModel.joblib" , "wb") as f:
    joblib.dump(bestModel , f)