# Importing Libraries

In [1]:
import numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.utils import resample

import optuna
from xgboost import XGBClassifier

from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix


# Loading the Dataset

In [2]:
df_train = pd.read_csv('Dataset.csv')
df_train = df_train.drop(columns=['Unnamed: 0'])
print(df_train.head())

   Hour    HR  O2Sat  Temp    SBP   MAP   DBP  Resp  EtCO2  BaseExcess  ...  \
0     0   NaN    NaN   NaN    NaN   NaN   NaN   NaN    NaN         NaN  ...   
1     1  65.0  100.0   NaN    NaN  72.0   NaN  16.5    NaN         NaN  ...   
2     2  78.0  100.0   NaN    NaN  42.5   NaN   NaN    NaN         NaN  ...   
3     3  73.0  100.0   NaN    NaN   NaN   NaN  17.0    NaN         NaN  ...   
4     4  70.0  100.0   NaN  129.0  74.0  69.0  14.0    NaN         NaN  ...   

   Fibrinogen  Platelets    Age  Gender  Unit1  Unit2  HospAdmTime  ICULOS  \
0         NaN        NaN  68.54       0    NaN    NaN        -0.02       1   
1         NaN        NaN  68.54       0    NaN    NaN        -0.02       2   
2         NaN        NaN  68.54       0    NaN    NaN        -0.02       3   
3         NaN        NaN  68.54       0    NaN    NaN        -0.02       4   
4         NaN      330.0  68.54       0    NaN    NaN        -0.02       5   

   SepsisLabel  Patient_ID  
0            0       17072 

In [4]:
# Inspecting the Dataset
print('taille des données : \n', df_train.shape)
print('\nColonnes  du dataset : \n', df_train.columns)
print('\nNombre de patients unique : \n', df_train.Patient_ID.nunique())

taille des données : 
 (1552210, 43)

Colonnes  du dataset : 
 Index(['Hour', 'HR', 'O2Sat', 'Temp', 'SBP', 'MAP', 'DBP', 'Resp', 'EtCO2',
       'BaseExcess', 'HCO3', 'FiO2', 'pH', 'PaCO2', 'SaO2', 'AST', 'BUN',
       'Alkalinephos', 'Calcium', 'Chloride', 'Creatinine', 'Bilirubin_direct',
       'Glucose', 'Lactate', 'Magnesium', 'Phosphate', 'Potassium',
       'Bilirubin_total', 'TroponinI', 'Hct', 'Hgb', 'PTT', 'WBC',
       'Fibrinogen', 'Platelets', 'Age', 'Gender', 'Unit1', 'Unit2',
       'HospAdmTime', 'ICULOS', 'SepsisLabel', 'Patient_ID'],
      dtype='object')

Nombre de patients unique : 
 40336


# Data Preprocessing

In [5]:
# Obtenir les cinq premiers Patient_ID
cleaned_df = df_train
first_five_patient_ids = cleaned_df['Patient_ID'].unique()[:10]

# Afficher les valeurs de la colonne Hour pour chaque Patient_ID
for patient_id in first_five_patient_ids:
    patient_hours = cleaned_df[cleaned_df['Patient_ID'] == patient_id]['Hour']
    print(f'Les valeurs de la colonne Hour pour le Patient_ID {patient_id} sont:')
    print(patient_hours.values)
    print()


Les valeurs de la colonne Hour pour le Patient_ID 17072 sont:
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42]

Les valeurs de la colonne Hour pour le Patient_ID 16153 sont:
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]

Les valeurs de la colonne Hour pour le Patient_ID 465 sont:
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35]

Les valeurs de la colonne Hour pour le Patient_ID 9891 sont:
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35]

Les valeurs de la colonne Hour pour le Patient_ID 8065 sont:
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45]

Les valeurs de la colonne Hour pour le Patient_ID 3516 sont:
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 

In [6]:
# Afficher les valeurs de la colonne Hour pour chaque Patient_ID
for patient_id in first_five_patient_ids:
    patient_hours = cleaned_df[cleaned_df['Patient_ID'] == patient_id]['Hour']
    patient_iculos = cleaned_df[cleaned_df['Patient_ID'] == patient_id]['ICULOS']
    patient_labels = cleaned_df[cleaned_df['Patient_ID'] == patient_id]['SepsisLabel']
    patient_adm_time = cleaned_df[cleaned_df['Patient_ID'] == patient_id]['HospAdmTime']

    print(f'Patient ID : {patient_id}')
    print('patients hours : ', patient_hours.values)
    print('patients iculos : ', patient_iculos.values)
    print('patients adm time :', patient_adm_time.values)
    print('patients sepsis label : ', patient_labels.values)
    print()

Patient ID : 17072
patients hours :  [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42]
patients iculos :  [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43]
patients adm time : [-0.02 -0.02 -0.02 -0.02 -0.02 -0.02 -0.02 -0.02 -0.02 -0.02 -0.02 -0.02
 -0.02 -0.02 -0.02 -0.02 -0.02 -0.02 -0.02 -0.02 -0.02 -0.02 -0.02 -0.02
 -0.02 -0.02 -0.02 -0.02 -0.02 -0.02 -0.02 -0.02 -0.02 -0.02 -0.02 -0.02
 -0.02 -0.02 -0.02 -0.02 -0.02 -0.02 -0.02]
patients sepsis label :  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0]

Patient ID : 16153
patients hours :  [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]
patients iculos :  [ 4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]
patients adm time : [-0.04 -0.04 -0.04 -0.04 -0.04 -0.04 -0.04 -0.04 -0.04 -0.04 -0.04 -0.04
 -0.04 -0.04 -0.04 -0.04]
patient

In [8]:
# Identifier l'heure d'apparition du sepsis pour chaque patient
sepsis_onset = df_train[df_train['SepsisLabel'] == 1].groupby('Patient_ID')['Hour'].min().reset_index()
sepsis_onset.columns = ['Patient_ID', 'SepsisOnsetHour']

# Regrouper les données par Patient_ID
grouped_df = df_train.groupby('Patient_ID').agg(
    HospAdmTime=('HospAdmTime', 'first'),
    SepsisLabel=('SepsisLabel', 'max'),
    Hours=('Hour', list)
).reset_index()

# Joindre les données d'apparition du sepsis
grouped_df = grouped_df.merge(sepsis_onset, on='Patient_ID', how='left')

# Calculer le temps écoulé entre l'admission et l'apparition du sepsis
grouped_df['TimeToSepsis'] = grouped_df['SepsisOnsetHour'] - grouped_df['HospAdmTime']

# Afficher les résultats
display(grouped_df.head())

Unnamed: 0,Patient_ID,HospAdmTime,SepsisLabel,Hours,SepsisOnsetHour,TimeToSepsis
0,1,-0.03,0,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",,
1,2,-98.6,0,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",,
2,3,-1195.71,0,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",,
3,4,-8.77,0,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",,
4,5,-0.05,0,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",,


In [9]:
# Identifier l'heure d'apparition du sepsis pour chaque patient
sepsis_onset = df_train[df_train['SepsisLabel'] == 1].groupby('Patient_ID')['Hour'].min().reset_index()
sepsis_onset.columns = ['Patient_ID', 'SepsisOnsetHour']

# Joindre les données d'apparition du sepsis
df_train = df_train.merge(sepsis_onset, on='Patient_ID', how='left')

# Marquer les heures de prévision
df_train['PredictionWindow'] = df_train.apply(
    lambda row: 1 if row['SepsisOnsetHour'] - 4 <= row['Hour'] < row['SepsisOnsetHour'] else 0, axis=1
)

# Remplacer les valeurs NaN dans 'SepsisOnsetHour' par un nombre d'heures plus élevé que le maximum observé
max_hour = df_train['Hour'].max()
df_train['SepsisOnsetHour'].fillna(max_hour + 1, inplace=True)

# Marquer les heures de prévision
df_train['PredictionWindow'] = df_train.apply(
    lambda row: 1 if row['SepsisOnsetHour'] - 4 <= row['Hour'] < row['SepsisOnsetHour'] else 0, axis=1
)

# Inclure les heures proches de la fin de l'enregistrement pour les patients sans sepsis
patients_no_sepsis = df_train[df_train['SepsisLabel'] == 0]['Patient_ID'].unique()
for patient_id in patients_no_sepsis:
    patient_data = df_train[df_train['Patient_ID'] == patient_id]
    end_hour = patient_data['Hour'].max()
    prediction_hours = patient_data[(end_hour - 4 <= patient_data['Hour']) & (patient_data['Hour'] <= end_hour)]
    df_train.loc[prediction_hours.index, 'PredictionWindow'] = 1

# Filtrer les données pour la fenêtre de prévision
df_window = df_train[df_train['PredictionWindow'] == 1]



In [10]:
display(df_window)

Unnamed: 0,Hour,HR,O2Sat,Temp,SBP,MAP,DBP,Resp,EtCO2,BaseExcess,...,Age,Gender,Unit1,Unit2,HospAdmTime,ICULOS,SepsisLabel,Patient_ID,SepsisOnsetHour,PredictionWindow
38,38,64.0,100.0,,,66.0,,14.0,,,...,68.54,0,,,-0.02,39,0,17072,336.0,1
39,39,72.0,100.0,37.44,,71.0,,14.0,,,...,68.54,0,,,-0.02,40,0,17072,336.0,1
40,40,71.0,100.0,,,71.0,,14.0,,5.0,...,68.54,0,,,-0.02,41,0,17072,336.0,1
41,41,70.0,99.0,,,70.0,,14.0,,,...,68.54,0,,,-0.02,42,0,17072,336.0,1
42,42,71.0,99.0,,,48.0,,14.0,,,...,68.54,0,,,-0.02,43,0,17072,336.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1552205,21,83.0,99.0,,121.0,77.0,54.0,22.0,,,...,88.00,0,1.0,0.0,-2.93,22,0,113911,336.0,1
1552206,22,80.0,92.0,,102.0,73.0,51.0,24.0,,,...,88.00,0,1.0,0.0,-2.93,23,0,113911,336.0,1
1552207,23,95.0,97.0,36.70,128.5,83.0,58.5,25.0,,,...,88.00,0,1.0,0.0,-2.93,24,0,113911,336.0,1
1552208,24,104.0,99.0,,127.0,85.0,59.0,24.0,,,...,88.00,0,1.0,0.0,-2.93,25,0,113911,336.0,1


In [12]:
# Filtrer les données pour la fenêtre de prévision
df_window = df_train[df_train['PredictionWindow'] == 2]
display(df_window)

Unnamed: 0,Hour,HR,O2Sat,Temp,SBP,MAP,DBP,Resp,EtCO2,BaseExcess,...,Age,Gender,Unit1,Unit2,HospAdmTime,ICULOS,SepsisLabel,Patient_ID,SepsisOnsetHour,PredictionWindow


In [45]:
# Créer les caractéristiques et les étiquettes
features = df_window.drop(columns=['Patient_ID', 'SepsisLabel', 'SepsisOnsetHour', 'PredictionWindow'])
labels = df_window['SepsisLabel']

# Séparer les données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Créer et entraîner le modèle XGBoost
model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model.fit(X_train, y_train)

# Prédictions
y_pred = model.predict(X_test)

# Évaluation du modèle
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, zero_division=1)
recall = recall_score(y_test, y_pred, zero_division=1)

# Calculer la matrice de confusion
cm = confusion_matrix(y_test, y_pred)

# Calculer la spécificité
if cm.shape == (2, 2):
    specificity = cm[0, 0] / (cm[0, 0] + cm[0, 1])
else:
    specificity = np.nan  # Assign NaN if specificity cannot be calculated

print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall (Sensitivity): {recall:.4f}')
print(f'Specificity: {specificity:.4f}')

Accuracy: 0.9499
Precision: 0.6387
Recall (Sensitivity): 0.3534
Specificity: 0.9874


In [8]:
# Data Cleaning
interest_columns = ['Patient_ID', 'Hour', 'HR', 'Temp', 'WBC', 'SBP', 'DBP', 'MAP', 'Age', 'Creatinine', 'Gender', 'BUN', 'ICULOS', 'Platelets', 'SepsisLabel']
seuil = 0.3 * len(interest_columns)
cleaned_df = df_train.dropna(subset=interest_columns, thresh=len(interest_columns) - seuil)
print('taille du dataset : ', cleaned_df.shape)
print('\npremières lignes du dataset : \n', cleaned_df.head())
print('\nnombre de label de chaque classe : \n', cleaned_df['SepsisLabel'].value_counts())

taille du dataset :  (502739, 43)

premières lignes du dataset : 
     Hour    HR  O2Sat   Temp    SBP   MAP   DBP  Resp  EtCO2  BaseExcess  ...  \
4      4  70.0  100.0    NaN  129.0  74.0  69.0  14.0    NaN         NaN  ...   
7      7  68.0  100.0  35.78  142.0  93.5  78.0  16.0    NaN         NaN  ...   
11    11  84.0  100.0  36.39  128.0  80.0  60.0  14.0    NaN         NaN  ...   
13    13  85.0  100.0    NaN  141.0  95.0  69.0  14.0    NaN         NaN  ...   
16    16  89.0  100.0  37.50  112.0  82.5  63.0  14.0    NaN         NaN  ...   

    Fibrinogen  Platelets    Age  Gender  Unit1  Unit2  HospAdmTime  ICULOS  \
4          NaN      330.0  68.54       0    NaN    NaN        -0.02       5   
7          NaN        NaN  68.54       0    NaN    NaN        -0.02       8   
11         NaN        NaN  68.54       0    NaN    NaN        -0.02      12   
13         NaN      303.0  68.54       0    NaN    NaN        -0.02      14   
16         NaN        NaN  68.54       0    NaN    

In [11]:
# Balancing Classes
major_classe = cleaned_df[cleaned_df.SepsisLabel == 0]
minor_classe = cleaned_df[cleaned_df.SepsisLabel == 1]
major_classe_resample = resample(major_classe, replace=False, n_samples=len(minor_classe), random_state=123)
df_final = pd.concat([major_classe_resample, minor_classe])
print('nombre de label de chaque classe après équilibrage de classes : \n', df_final.SepsisLabel.value_counts())
print('\nNombre de patient unique : \n', df_final['Patient_ID'].nunique())

nombre de label de chaque classe après équilibrage de classes : 
 SepsisLabel
0    9432
1    9432
Name: count, dtype: int64

Nombre de patient unique : 
 9730


In [None]:
# Feature Selection
sub_columns = ['Hour', 'HR', 'O2Sat', 'Temp', 'MAP', 'Resp', 'BUN', 'Chloride', 'Creatinine', 'Glucose', 'Hct', 'Hgb', 'WBC', 'Platelets', 'Age', 'HospAdmTime', 'ICULOS', 'SepsisLabel']
df_final = df_final[sub_columns]

# Creating X and y Datasets
X = df_final.drop(columns=['SepsisLabel'])
y = df_final['SepsisLabel']

# Splitting the Dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=2023)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

In [None]:
# Optuna Hyperparameter Tuning
def objective(trial):
    learning_rate = trial.suggest_uniform('learning_rate', 0.01, 1)
    max_depth = trial.suggest_int('max_depth', 2, 15)
    n_estimators = trial.suggest_int('n_estimators', 50, 250)
    min_child_weight = trial.suggest_int('min_child_weight', 1, 10)
    gamma = trial.suggest_uniform('gamma', 0.01, 5)
    subsample = trial.suggest_uniform('subsample', 0.01, 1)
    
    clf1 = XGBClassifier(learning_rate=learning_rate,
                         max_depth=max_depth,
                         n_estimators=n_estimators,
                         min_child_weight=min_child_weight,
                         gamma=gamma,
                         subsample=subsample,
                         use_label_encoder=False,
                         eval_metric='logloss')
    
    score = cross_val_score(clf1, X_train, y_train, cv=7)
    return np.mean(score)

# Creating Optuna Study
study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=42))
study.optimize(objective, n_trials=30)

# Displaying Best Parameters
print('best parameters : \n', study.best_params)

In [4]:

# Creating the XGBoost Model with Best Parameters
xgbc = XGBClassifier(
    learning_rate=study.best_params['learning_rate'],
    max_depth=study.best_params['max_depth'],
    n_estimators=study.best_params['n_estimators'],
    min_child_weight=study.best_params['min_child_weight'],
    gamma=study.best_params['gamma'],
    subsample=study.best_params['subsample'],
    use_label_encoder=False,
    eval_metric='logloss'
)

# Training the Model
xgbc.fit(X_train, y_train)

# Prediction and Evaluation
y_predicted = xgbc.predict(X_test)
print(classification_report(y_test, y_predicted))


(502739, 43)
    Hour    HR  O2Sat   Temp    SBP   MAP   DBP  Resp  EtCO2  BaseExcess  ...  \
4      4  70.0  100.0    NaN  129.0  74.0  69.0  14.0    NaN         NaN  ...   
7      7  68.0  100.0  35.78  142.0  93.5  78.0  16.0    NaN         NaN  ...   
11    11  84.0  100.0  36.39  128.0  80.0  60.0  14.0    NaN         NaN  ...   
13    13  85.0  100.0    NaN  141.0  95.0  69.0  14.0    NaN         NaN  ...   
16    16  89.0  100.0  37.50  112.0  82.5  63.0  14.0    NaN         NaN  ...   

    Fibrinogen  Platelets    Age  Gender  Unit1  Unit2  HospAdmTime  ICULOS  \
4          NaN      330.0  68.54       0    NaN    NaN        -0.02       5   
7          NaN        NaN  68.54       0    NaN    NaN        -0.02       8   
11         NaN        NaN  68.54       0    NaN    NaN        -0.02      12   
13         NaN      303.0  68.54       0    NaN    NaN        -0.02      14   
16         NaN        NaN  68.54       0    NaN    NaN        -0.02      17   

    SepsisLabel  Patient_

[I 2024-06-17 12:14:03,416] A new study created in memory with name: no-name-f91d03f5-85bb-4b65-9f00-c2b26fae3a1f


(15091, 17) (3773, 17) (15091,) (3773,)


  learning_rate = trial.suggest_uniform('learning_rate', 0.01, 1)
  gamma = trial.suggest_uniform('gamma', 0.01, 5)
  subsample = trial.suggest_uniform('subsample', 0.01, 1)
[I 2024-06-17 12:14:09,963] Trial 0 finished with value: 0.6910086504243426 and parameters: {'learning_rate': 0.3807947176588889, 'max_depth': 15, 'n_estimators': 197, 'min_child_weight': 6, 'gamma': 0.7885330158077583, 'subsample': 0.16443457513284063}. Best is trial 0 with value: 0.6910086504243426.
  learning_rate = trial.suggest_uniform('learning_rate', 0.01, 1)
  gamma = trial.suggest_uniform('gamma', 0.01, 5)
  subsample = trial.suggest_uniform('subsample', 0.01, 1)
[I 2024-06-17 12:14:21,675] Trial 1 finished with value: 0.7679417745945517 and parameters: {'learning_rate': 0.06750277604651747, 'max_depth': 14, 'n_estimators': 170, 'min_child_weight': 8, 'gamma': 0.11271662653605422, 'subsample': 0.9702107536403743}. Best is trial 1 with value: 0.7679417745945517.
  learning_rate = trial.suggest_uniform('lear

[I 2024-06-17 12:14:34,930] Trial 6 finished with value: 0.7407726962180298 and parameters: {'learning_rate': 0.31156763148163696, 'max_depth': 3, 'n_estimators': 187, 'min_child_weight': 5, 'gamma': 0.6189707918754463, 'subsample': 0.5002251410101575}. Best is trial 1 with value: 0.7679417745945517.
  learning_rate = trial.suggest_uniform('learning_rate', 0.01, 1)
  gamma = trial.suggest_uniform('gamma', 0.01, 5)
  subsample = trial.suggest_uniform('subsample', 0.01, 1)
[I 2024-06-17 12:14:41,640] Trial 7 finished with value: 0.7560805097643964 and parameters: {'learning_rate': 0.044044635904066216, 'max_depth': 14, 'n_estimators': 102, 'min_child_weight': 7, 'gamma': 1.5654382696861608, 'subsample': 0.5248673409660327}. Best is trial 1 with value: 0.7679417745945517.
  learning_rate = trial.suggest_uniform('learning_rate', 0.01, 1)
  gamma = trial.suggest_uniform('gamma', 0.01, 5)
  subsample = trial.suggest_uniform('subsample', 0.01, 1)
[I 2024-06-17 12:14:44,285] Trial 8 finished w

[I 2024-06-17 12:15:15,164] Trial 13 finished with value: 0.7654903905697031 and parameters: {'learning_rate': 0.2236776803057568, 'max_depth': 8, 'n_estimators': 159, 'min_child_weight': 4, 'gamma': 0.10247270807374692, 'subsample': 0.9580994031539216}. Best is trial 1 with value: 0.7679417745945517.
  learning_rate = trial.suggest_uniform('learning_rate', 0.01, 1)
  gamma = trial.suggest_uniform('gamma', 0.01, 5)
  subsample = trial.suggest_uniform('subsample', 0.01, 1)
[I 2024-06-17 12:15:18,453] Trial 14 finished with value: 0.752502239935357 and parameters: {'learning_rate': 0.25494993582657577, 'max_depth': 7, 'n_estimators': 158, 'min_child_weight': 4, 'gamma': 0.8643074534032664, 'subsample': 0.9954276348729897}. Best is trial 1 with value: 0.7679417745945517.
  learning_rate = trial.suggest_uniform('learning_rate', 0.01, 1)
  gamma = trial.suggest_uniform('gamma', 0.01, 5)
  subsample = trial.suggest_uniform('subsample', 0.01, 1)
[I 2024-06-17 12:15:22,239] Trial 15 finished w

[I 2024-06-17 12:15:48,555] Trial 20 finished with value: 0.7257312458837151 and parameters: {'learning_rate': 0.9646105346403706, 'max_depth': 7, 'n_estimators': 122, 'min_child_weight': 7, 'gamma': 1.1889954415682085, 'subsample': 0.781648809357816}. Best is trial 1 with value: 0.7679417745945517.
  learning_rate = trial.suggest_uniform('learning_rate', 0.01, 1)
  gamma = trial.suggest_uniform('gamma', 0.01, 5)
  subsample = trial.suggest_uniform('subsample', 0.01, 1)
[I 2024-06-17 12:15:53,608] Trial 21 finished with value: 0.7623093133960126 and parameters: {'learning_rate': 0.17051543896485719, 'max_depth': 9, 'n_estimators': 115, 'min_child_weight': 8, 'gamma': 0.19116101802827012, 'subsample': 0.9987729260949273}. Best is trial 1 with value: 0.7679417745945517.
  learning_rate = trial.suggest_uniform('learning_rate', 0.01, 1)
  gamma = trial.suggest_uniform('gamma', 0.01, 5)
  subsample = trial.suggest_uniform('subsample', 0.01, 1)
[I 2024-06-17 12:16:00,354] Trial 22 finished w

[I 2024-06-17 12:16:32,887] Trial 27 finished with value: 0.7588634718659815 and parameters: {'learning_rate': 0.11556875111598885, 'max_depth': 8, 'n_estimators': 169, 'min_child_weight': 3, 'gamma': 1.231960789556175, 'subsample': 0.5877969342907241}. Best is trial 1 with value: 0.7679417745945517.
  learning_rate = trial.suggest_uniform('learning_rate', 0.01, 1)
  gamma = trial.suggest_uniform('gamma', 0.01, 5)
  subsample = trial.suggest_uniform('subsample', 0.01, 1)
[I 2024-06-17 12:16:41,219] Trial 28 finished with value: 0.7655561588844414 and parameters: {'learning_rate': 0.0804164499191974, 'max_depth': 10, 'n_estimators': 149, 'min_child_weight': 5, 'gamma': 0.43166872346973245, 'subsample': 0.8525030848685566}. Best is trial 1 with value: 0.7679417745945517.
  learning_rate = trial.suggest_uniform('learning_rate', 0.01, 1)
  gamma = trial.suggest_uniform('gamma', 0.01, 5)
  subsample = trial.suggest_uniform('subsample', 0.01, 1)
[I 2024-06-17 12:16:49,518] Trial 29 finished 

{'learning_rate': 0.06750277604651747, 'max_depth': 14, 'n_estimators': 170, 'min_child_weight': 8, 'gamma': 0.11271662653605422, 'subsample': 0.9702107536403743}
              precision    recall  f1-score   support

           0       0.78      0.78      0.78      1886
           1       0.78      0.78      0.78      1887

    accuracy                           0.78      3773
   macro avg       0.78      0.78      0.78      3773
weighted avg       0.78      0.78      0.78      3773

