In [1]:
import numpy as np 
import pandas as pd 

In [2]:
df_train = pd.read_csv('Paitients_Files_Train.csv')
df_train.head()

Unnamed: 0,ID,PRG,PL,PR,SK,TS,M11,BD2,Age,Insurance,Sepssis
0,ICU200010,6,148,72,35,0,33.6,0.627,50,0,Positive
1,ICU200011,1,85,66,29,0,26.6,0.351,31,0,Negative
2,ICU200012,8,183,64,0,0,23.3,0.672,32,1,Positive
3,ICU200013,1,89,66,23,94,28.1,0.167,21,1,Negative
4,ICU200014,0,137,40,35,168,43.1,2.288,33,1,Positive


In [3]:
df_train['sepsis_encoded'] = df_train['Sepssis'].map({'Positive':1, 'Negative': 0})
df_train = df_train.drop(['ID', 'Sepssis'], axis='columns')
df_train.head()

Unnamed: 0,PRG,PL,PR,SK,TS,M11,BD2,Age,Insurance,sepsis_encoded
0,6,148,72,35,0,33.6,0.627,50,0,1
1,1,85,66,29,0,26.6,0.351,31,0,0
2,8,183,64,0,0,23.3,0.672,32,1,1
3,1,89,66,23,94,28.1,0.167,21,1,0
4,0,137,40,35,168,43.1,2.288,33,1,1


In [4]:
df_train['sepsis_encoded'].value_counts()

sepsis_encoded
0    391
1    208
Name: count, dtype: int64

In [5]:
X = df_train.iloc[:,:-1]
y = df_train.iloc[:,-1]

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=2023)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((479, 9), (120, 9), (479,), (120,))

# Optuna

In [7]:
import optuna
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score

In [8]:
def objective(trial):
    learning_rate = trial.suggest_uniform('learning_rate', 0.01, 1)
    max_depth = trial.suggest_int('max_depth', 2, 11)
    n_estimators = trial.suggest_int('n_estimators', 50, 200)
    min_child_weight = trial.suggest_int('min_child_weight', 1, 10)
    gamma = trial.suggest_uniform('gamma', 0.01, 5)
    subsample = trial.suggest_uniform('subsample', 0.01, 1)
    clf1 = XGBClassifier(learning_rate=learning_rate,
                         max_depth=max_depth,
                         n_estimators=n_estimators,
                         min_child_weight=min_child_weight,
                         gamma=gamma)
    score = cross_val_score(clf1, X_train, y_train, cv=5)
    return np.mean(score)

In [9]:
study = optuna.create_study(direction='maximize',sampler=optuna.samplers.TPESampler(seed=42))
study.optimize(objective, n_trials=30)
study.best_params

[I 2025-04-04 18:46:56,554] A new study created in memory with name: no-name-bf2ac0cc-75a1-45fc-8bc2-1ae7a5e9940a
  learning_rate = trial.suggest_uniform('learning_rate', 0.01, 1)
  gamma = trial.suggest_uniform('gamma', 0.01, 5)
  subsample = trial.suggest_uniform('subsample', 0.01, 1)
[I 2025-04-04 18:46:58,621] Trial 0 finished with value: 0.7328070175438597 and parameters: {'learning_rate': 0.3807947176588889, 'max_depth': 11, 'n_estimators': 160, 'min_child_weight': 6, 'gamma': 0.7885330158077583, 'subsample': 0.16443457513284063}. Best is trial 0 with value: 0.7328070175438597.
  learning_rate = trial.suggest_uniform('learning_rate', 0.01, 1)
  gamma = trial.suggest_uniform('gamma', 0.01, 5)
  subsample = trial.suggest_uniform('subsample', 0.01, 1)
[I 2025-04-04 18:47:02,298] Trial 1 finished with value: 0.7494956140350877 and parameters: {'learning_rate': 0.06750277604651747, 'max_depth': 10, 'n_estimators': 140, 'min_child_weight': 8, 'gamma': 0.11271662653605422, 'subsample': 

{'learning_rate': 0.7742047722562706,
 'max_depth': 6,
 'n_estimators': 173,
 'min_child_weight': 8,
 'gamma': 2.63094119499456,
 'subsample': 0.01292237593906475}

In [10]:
xgbc = XGBClassifier(learning_rate= list(study.best_params.values())[0],
 max_depth= list(study.best_params.values())[1],
 n_estimators= list(study.best_params.values())[2],
 min_child_weight= list(study.best_params.values())[3],
 gamma= list(study.best_params.values())[4],
 subsample= list(study.best_params.values())[5]
                     )

In [11]:
from sklearn.metrics import classification_report

In [12]:
xgbc.fit(X_train, y_train)

In [13]:
y_predicted = xgbc.predict(X_test)

In [14]:
print(classification_report(y_test, y_predicted))

              precision    recall  f1-score   support

           0       0.65      1.00      0.79        78
           1       0.00      0.00      0.00        42

    accuracy                           0.65       120
   macro avg       0.33      0.50      0.39       120
weighted avg       0.42      0.65      0.51       120



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


**I also tried using standardization and SMOTE, but the ACCURACY was low. I wonder why?**