<a href="https://colab.research.google.com/github/Dazhou2000/Msc-Project/blob/master/XGBoost.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
! pip install optuna

Collecting optuna
  Downloading optuna-3.6.1-py3-none-any.whl (380 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.1/380.1 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.2-py3-none-any.whl (232 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.0/233.0 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.5-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Mako, colorlog, alembic, optuna
Successfully installed Mako-1.3.5 alembic-1.13.2 colorlog-6.8.2 optuna-3.6.1


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from imblearn.over_sampling import SMOTE
from xgboost import XGBClassifier
from sklearn.metrics import roc_auc_score, classification_report, accuracy_score
import optuna
import json
import os

def load_data(patient_id, window_size):
    data_path = f'/content/drive/My Drive/Msc Project/features_and_labels_detection/P{patient_id} features and seizure labels with window_{window_size}.csv'
    data = pd.read_csv(data_path)
    data = data.dropna()
    importance = pd.read_csv(f'/content/drive/My Drive/Msc Project/importance_feature/P{patient_id}MDI_importance with window_{window_size}.csv')
    top_features_list = importance.sort_values(by='Importance', ascending=False)['Unnamed: 0'].tolist()
    return data, top_features_list

def save_best_params(patient_id, window_size, best_params):
    if not os.path.exists(f'/content/drive/My Drive/Msc Project/best_params_xgb'):
        os.makedirs(f'/content/drive/My Drive/Msc Project/best_params_xgb')

    params_file = f'/content/drive/My Drive/Msc Project/best_params_xgb/params_patient_{patient_id}_window_{window_size}.json'
    with open(params_file, 'w') as file:
        json.dump(best_params, file)

def load_best_params(patient_id, window_size):
    params_file = f'/content/drive/My Drive/Msc Project/best_params_xgb/params_patient_{patient_id}_window_{window_size}.json'
    if os.path.exists(params_file):
        with open(params_file, 'r') as file:
            best_params = json.load(file)
        return best_params
    else:
        return None

def save_best_scores(patient_id, window_size, best_scores):
    if not os.path.exists(f'/content/drive/My Drive/Msc Project/best_scores_xgb'):
        os.makedirs(f'/content/drive/My Drive/Msc Project/best_scores_xgb')
    scores_file = f'/content/drive/My Drive/Msc Project/best_scores_xgb/best_scores_more.csv'
    row_name = f'{patient_id}_window_{window_size}'
    scores_df = pd.DataFrame(best_scores, index=[row_name])
    if os.path.exists(scores_file):
        existing_df = pd.read_csv(scores_file, index_col=0)
        if row_name in existing_df.index:
            existing_df.loc[row_name] = scores_df.loc[row_name]
            existing_df.to_csv(scores_file)
        else:
            updated_df = pd.concat([existing_df, scores_df])
            updated_df.to_csv(scores_file)
    else:
        scores_df.to_csv(scores_file)

# Example usage:Patient id:'1_','3_','4_','10','13','19','23','27','29','30','31'. Window_size: 24,72,144,288
patient_id = '31'
window_size = 288

data, top_features_list = load_data(patient_id, window_size)

selected_features = pd.read_csv(f'/content/drive/My Drive/Msc Project/select_feature/P{patient_id}_selected_features_with_window_{window_size}.csv')
number_features = selected_features.loc[selected_features['AUC'].idxmax()]['Num Features']
print(number_features)

X = data.drop(columns=['label', 'timestamp', 'seizure_event', 'Unnamed: 0'])
X = data[top_features_list[:int(number_features)]]
y = data['label']

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, shuffle=False, random_state=42)
smote = SMOTE(random_state=42, n_jobs=-1)
X_train, y_train = smote.fit_resample(X_train, y_train)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, shuffle=False, random_state=42)

def objective(trial):
    params = {
        'verbosity': 1,
        'lambda': trial.suggest_float('lambda', 1.0, 2.0,log=True),
        'alpha': trial.suggest_float('alpha', 1e-5, 100.0,log=True),
        'max_depth':trial.suggest_int('max_depth', 3, 10),
        'eta': trial.suggest_float('eta', 1e-5, 1.0,log=True),
        'gamma': trial.suggest_float('gamma', 0.1, 0.2,log=True),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'grow_policy':trial.suggest_categorical('grow_policy', ['depthwise', 'lossguide']),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
    }


    xgb_model = XGBClassifier(**params, use_label_encoder=False, eval_metric='auc', tree_method='hist', random_state=42,device='gpu',booster='gbtree',objective='binary:logistic')
    xgb_model.fit(X_train, y_train)
    y_pred_val = xgb_model.predict(X_val)
    y_pred_val_scores = xgb_model.predict_proba(X_val)[:,1]
    auc_val = roc_auc_score(y_val, y_pred_val_scores)
    return auc_val

study = optuna.create_study(direction='maximize')

best_params = load_best_params(patient_id, window_size)
if best_params:
    study.enqueue_trial(best_params)

study.optimize(objective, n_trials=1)

best_params = study.best_params
print("Best parameters found by Optuna:", best_params)

save_best_params(patient_id, window_size, best_params)

xgb_model = XGBClassifier(**best_params, use_label_encoder=False, eval_metric='auc', tree_method='hist', random_state=42,device='gpu',booster='gbtree',objective='binary:logistic')
xgb_model.fit(X_train, y_train)

y_pred_train = xgb_model.predict(X_train)
y_pred_val = xgb_model.predict(X_val)
y_pred_test = xgb_model.predict(X_test)

# Calculate roc_auc_score
y_pred_train_scores = xgb_model.predict_proba(X_train)[:,1]
auc_train = roc_auc_score(y_train, y_pred_train)
y_pred_val_scores = xgb_model.predict_proba(X_val)[:,1]
auc_val = roc_auc_score(y_val, y_pred_val_scores)
y_pred_test_scores = xgb_model.predict_proba(X_test)[:,1]
auc_test = roc_auc_score(y_test, y_pred_test_scores)

print("Train classification report:")
print(classification_report(y_train, y_pred_train))

print("Validation classification report:")
print(classification_report(y_val, y_pred_val))

print("Test classification report:")
print(classification_report(y_test, y_pred_test))

print("Train accuracy:", accuracy_score(y_train, y_pred_train))
print("Validation accuracy:", accuracy_score(y_val, y_pred_val))
print("Test accuracy:", accuracy_score(y_test, y_pred_test))

print("Val auc:", auc_val)
print("Test auc:", auc_test)
print("Train auc:", auc_train)

best_scores = {
    "sensitivity": classification_report(y_test, y_pred_test, output_dict=True)['1']['recall'],
    "auc": auc_test,
    "specificity": classification_report(y_test, y_pred_test, output_dict=True)['0']['recall'],
    "accuracy": accuracy_score(y_test, y_pred_test)
}

save_best_scores(patient_id, window_size, best_scores)


19.0


[I 2024-07-17 16:05:11,249] A new study created in memory with name: no-name-495b5ffc-83be-44c1-bc1a-66da5e28e32c
[I 2024-07-17 16:05:13,661] Trial 0 finished with value: 0.7643178015441149 and parameters: {'lambda': 1.5858877414042443, 'alpha': 0.007376108351406913, 'max_depth': 9, 'eta': 0.014376023577920038, 'gamma': 0.16250083904882537, 'subsample': 0.9423017241583421, 'colsample_bytree': 0.7715534237052748, 'grow_policy': 'lossguide', 'min_child_weight': 9}. Best is trial 0 with value: 0.7643178015441149.


Best parameters found by Optuna: {'lambda': 1.5858877414042443, 'alpha': 0.007376108351406913, 'max_depth': 9, 'eta': 0.014376023577920038, 'gamma': 0.16250083904882537, 'subsample': 0.9423017241583421, 'colsample_bytree': 0.7715534237052748, 'grow_policy': 'lossguide', 'min_child_weight': 9}
Train classification report:
              precision    recall  f1-score   support

           0       0.96      0.97      0.96     81314
           1       0.97      0.96      0.96     81314

    accuracy                           0.96    162628
   macro avg       0.96      0.96      0.96    162628
weighted avg       0.96      0.96      0.96    162628

Validation classification report:
              precision    recall  f1-score   support

           0       0.76      0.89      0.82      9623
           1       0.64      0.40      0.49      4602

    accuracy                           0.73     14225
   macro avg       0.70      0.64      0.65     14225
weighted avg       0.72      0.73      0.71 

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from imblearn.over_sampling import SMOTE
from sklearn.metrics import roc_auc_score, classification_report, accuracy_score
import optuna
import json
import os
import shutil
import random
from xgboost import XGBClassifier

SEED = 42
random.seed(SEED)
np.random.seed(SEED)

def load_data(patient_id, window_size, forecasting_length):
    data_path = f'/content/drive/My Drive/Msc Project/features_and_labels_detection/P{patient_id} features and seizure labels with window_{window_size}.csv'
    data = pd.read_csv(data_path)
    data = data.dropna()
    importance = pd.read_csv(f'/content/drive/My Drive/Msc Project/Forecasting/importance_feature/P{patient_id}MDI_importance with window_{window_size} with forecasting_{forecasting_length}.csv')
    top_features_list = importance.sort_values(by='Importance', ascending=False)['Unnamed: 0'].tolist()
    return data, top_features_list

def save_best_params(patient_id, window_size, forecasting_length, best_params):
    if not os.path.exists(f'/content/drive/My Drive/Msc Project/Forecasting/best_params_xgb'):
        os.makedirs(f'/content/drive/My Drive/Msc Project/Forecasting/best_params_xgb')

    params_file = f'/content/drive/My Drive/Msc Project/Forecasting/best_params_xgb/params_patient_{patient_id}_window_{window_size}_forecasting_{forecasting_length}.json'
    with open(params_file, 'w') as file:
        json.dump(best_params, file)

def load_best_params(patient_id, window_size, forecasting_length):
    params_file = f'/content/drive/My Drive/Msc Project/Forecasting/best_params_xgb/params_patient_{patient_id}_window_{window_size}_forecasting_{forecasting_length}.json'
    if os.path.exists(params_file):
        with open(params_file, 'r') as file:
            best_params = json.load(file)
        return best_params
    else:
        return None

def save_best_scores(patient_id, window_size, forecasting_length, best_scores):
    if not os.path.exists(f'/content/drive/My Drive/Msc Project/Forecasting/best_scores_xgb'):
        os.makedirs(f'/content/drive/My Drive/Msc Project/Forecasting/best_scores_xgb')
    scores_file = f'/content/drive/My Drive/Msc Project/Forecasting/best_scores_xgb/best_scores.csv'
    row_name = f'{patient_id}_window_{window_size}_forecasting_{forecasting_length}'
    scores_df = pd.DataFrame(best_scores, index=[row_name])
    if os.path.exists(scores_file):
        existing_df = pd.read_csv(scores_file, index_col=0)
        if row_name in existing_df.index:
            existing_df.loc[row_name] = scores_df.loc[row_name]
            existing_df.to_csv(scores_file)
        else:
            updated_df = pd.concat([existing_df, scores_df])
            updated_df.to_csv(scores_file)
    else:
        scores_df.to_csv(scores_file)

def process_patient_window(patient_id, window_size, forecasting_length):
    data, top_features_list = load_data(patient_id, window_size, forecasting_length)

    selected_features = pd.read_csv(f'/content/drive/My Drive/Msc Project/Forecasting/select_feature/P{patient_id}_selected_features_with_window_{window_size}_with_forecasting_{forecasting_length}.csv')
    number_features = selected_features.loc[selected_features['AUC'].idxmax()]['Num Features']
    print(f'Number of features for patient {patient_id} with window {window_size}: {number_features}')

    X = data.drop(columns=['label', 'timestamp', 'seizure_event', 'Unnamed: 0'])
    X = X[top_features_list[:int(number_features)]]
    y = data['label']
    y = y.shift(-forecasting_length)

    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, shuffle=False, random_state=42)
    smote = SMOTE(random_state=42, n_jobs=-1)
    X_train, y_train = smote.fit_resample(X_train, y_train)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, shuffle=False, random_state=42)
    print(len(X_test))
    print(len(y_test))
    X_test = X_test.iloc[:-forecasting_length]
    y_test = y_test.iloc[:-forecasting_length]
    print(len(X_test))
    print(len(y_test))
    def objective(trial):
        params = {
            'verbosity': 1,
            'lambda': trial.suggest_float('lambda', 1.0, 2.0, log=True),
            'alpha': trial.suggest_float('alpha', 1e-5, 100.0, log=True),
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'eta': trial.suggest_float('eta', 1e-5, 1.0, log=True),
            'gamma': trial.suggest_float('gamma', 0.1, 0.2, log=True),
            'subsample': trial.suggest_float('subsample', 0.5, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
            'grow_policy': trial.suggest_categorical('grow_policy', ['depthwise', 'lossguide']),
            'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        }

        xgb_model = XGBClassifier(**params, use_label_encoder=False, eval_metric='auc', tree_method='hist', random_state=42, device='gpu', booster='gbtree', objective='binary:logistic')
        xgb_model.fit(X_train, y_train)
        y_pred_val = xgb_model.predict(X_val)
        y_pred_val_scores = xgb_model.predict_proba(X_val)[:, 1]
        auc_val = roc_auc_score(y_val, y_pred_val_scores)
        return auc_val

    class EarlyStoppingCallback:
        def __init__(self, patience: int):
            self.patience = patience
            self.best_value = None
            self.no_improvement_count = 0

        def __call__(self, study, trial):
            if self.best_value is None or study.best_value > self.best_value:
                self.best_value = study.best_value
                self.no_improvement_count = 0
            else:
                self.no_improvement_count += 1
            if self.no_improvement_count >= self.patience:
                study.stop()

    study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=SEED))

    best_params = load_best_params(patient_id, window_size, forecasting_length)
    if best_params:
        study.enqueue_trial(best_params)

    early_stopping = EarlyStoppingCallback(patience=50)
    study.optimize(objective, n_trials=200, callbacks=[early_stopping])

    best_params = study.best_params
    print("Best parameters found by Optuna:", best_params)

    save_best_params(patient_id, window_size, forecasting_length, best_params)

    xgb_model = XGBClassifier(**best_params, use_label_encoder=False, eval_metric='auc', tree_method='hist', random_state=42, device='gpu', booster='gbtree', objective='binary:logistic')
    xgb_model.fit(X_train, y_train)

    y_pred_train = xgb_model.predict(X_train)
    y_pred_val = xgb_model.predict(X_val)
    y_pred_test = xgb_model.predict(X_test)

    y_pred_train_scores = xgb_model.predict_proba(X_train)[:, 1]
    auc_train = roc_auc_score(y_train, y_pred_train_scores)
    y_pred_val_scores = xgb_model.predict_proba(X_val)[:, 1]
    auc_val = roc_auc_score(y_val, y_pred_val_scores)
    y_pred_test_scores = xgb_model.predict_proba(X_test)[:, 1]
    auc_test = roc_auc_score(y_test, y_pred_test_scores)

    print(f"Train classification report for patient {patient_id} with window {window_size}:")
    print(classification_report(y_train, y_pred_train))

    print(f"Validation classification report for patient {patient_id} with window {window_size}:")
    print(classification_report(y_val, y_pred_val))

    print(f"Test classification report for patient {patient_id} with window {window_size}:")
    print(classification_report(y_test, y_pred_test))

    print(f"Train accuracy for patient {patient_id} with window {window_size}: {accuracy_score(y_train, y_pred_train)}")
    print(f"Validation accuracy for patient {patient_id} with window {window_size}: {accuracy_score(y_val, y_pred_val)}")
    print(f"Test accuracy for patient {patient_id} with window {window_size}: {accuracy_score(y_test, y_pred_test)}")

    print(f"Train AUC for patient {patient_id} with window {window_size}: {auc_train}")
    print(f"Validation AUC for patient {patient_id} with window {window_size}: {auc_val}")
    print(f"Test AUC for patient {patient_id} with window {window_size}: {auc_test}")

    best_scores = {
        "auc": auc_test,
        "sensitivity": classification_report(y_test, y_pred_test, output_dict=True)['1.0']['recall'],
        "specificity": classification_report(y_test, y_pred_test, output_dict=True)['0.0']['recall'],
        "accuracy": accuracy_score(y_test, y_pred_test)
    }

    save_best_scores(patient_id, window_size, forecasting_length, best_scores)

patient_ids = ['3_','4_','13','23','29','31']
window_sizes = [288]
forecasting_lengths = [24, 120]

for patient_id in patient_ids:
    for window_size in window_sizes:
        for forecasting_length in forecasting_lengths:
            print(f"Processing patient {patient_id} with window size {window_size} with forecasting length {forecasting_length}")
            process_patient_window(patient_id, window_size, forecasting_length)


Processing patient 3_ with window size 288 with forecasting length 24
Number of features for patient 3_ with window 288: 7.0
12462
12462


[I 2024-07-20 20:38:43,926] A new study created in memory with name: no-name-df496bb2-6009-44c3-b4af-d03bc4c75673


12438
12438


[I 2024-07-20 20:38:45,266] Trial 0 finished with value: 0.5028203318803841 and parameters: {'lambda': 1.296426233049547, 'alpha': 45.18560951024108, 'max_depth': 8, 'eta': 0.009846738873614562, 'gamma': 0.11142080469295271, 'subsample': 0.5779972601681014, 'colsample_bytree': 0.5290418060840998, 'grow_policy': 'depthwise', 'min_child_weight': 8}. Best is trial 0 with value: 0.5028203318803841.
[I 2024-07-20 20:38:46,336] Trial 1 finished with value: 0.5227575818437438 and parameters: {'lambda': 1.0143703591411894, 'alpha': 61.56997328235202, 'max_depth': 9, 'eta': 0.00011526449540315612, 'gamma': 0.11343178571896925, 'subsample': 0.5917022549267169, 'colsample_bytree': 0.6521211214797689, 'grow_policy': 'depthwise', 'min_child_weight': 3}. Best is trial 1 with value: 0.5227575818437438.
[I 2024-07-20 20:38:46,859] Trial 2 finished with value: 0.5024461657882023 and parameters: {'lambda': 1.5282206868683035, 'alpha': 9.472334467618546e-05, 'max_depth': 5, 'eta': 0.0006789053271698484, 

Best parameters found by Optuna: {'lambda': 1.8264210482776209, 'alpha': 0.025154562154085562, 'max_depth': 8, 'eta': 0.048024786180828824, 'gamma': 0.14183878446298256, 'subsample': 0.9960016289401145, 'colsample_bytree': 0.5017161308413209, 'grow_policy': 'depthwise', 'min_child_weight': 9}
Train classification report for patient 3_ with window 288:
              precision    recall  f1-score   support

         0.0       0.99      0.98      0.98     83211
         1.0       0.98      0.99      0.98     83211

    accuracy                           0.98    166422
   macro avg       0.98      0.98      0.98    166422
weighted avg       0.98      0.98      0.98    166422

Validation classification report for patient 3_ with window 288:
              precision    recall  f1-score   support

         0.0       0.78      0.97      0.86      9575
         1.0       0.42      0.07      0.12      2886

    accuracy                           0.76     12461
   macro avg       0.60      0.52   

[I 2024-07-20 20:39:38,664] A new study created in memory with name: no-name-3ce5a3ff-3743-4f18-ae28-fc23015f7ff0


Number of features for patient 3_ with window 288: 4.0
12462
12462
12342
12342


[I 2024-07-20 20:39:39,291] Trial 0 finished with value: 0.5146959580897755 and parameters: {'lambda': 1.296426233049547, 'alpha': 45.18560951024108, 'max_depth': 8, 'eta': 0.009846738873614562, 'gamma': 0.11142080469295271, 'subsample': 0.5779972601681014, 'colsample_bytree': 0.5290418060840998, 'grow_policy': 'depthwise', 'min_child_weight': 8}. Best is trial 0 with value: 0.5146959580897755.
[I 2024-07-20 20:39:39,799] Trial 1 finished with value: 0.5300064273121782 and parameters: {'lambda': 1.0143703591411894, 'alpha': 61.56997328235202, 'max_depth': 9, 'eta': 0.00011526449540315612, 'gamma': 0.11343178571896925, 'subsample': 0.5917022549267169, 'colsample_bytree': 0.6521211214797689, 'grow_policy': 'depthwise', 'min_child_weight': 3}. Best is trial 1 with value: 0.5300064273121782.
[I 2024-07-20 20:39:40,224] Trial 2 finished with value: 0.6182306578856539 and parameters: {'lambda': 1.5282206868683035, 'alpha': 9.472334467618546e-05, 'max_depth': 5, 'eta': 0.0006789053271698484, 

Best parameters found by Optuna: {'lambda': 1.0127289399088535, 'alpha': 0.08984119199128002, 'max_depth': 3, 'eta': 4.85923198150895e-05, 'gamma': 0.17376540092664186, 'subsample': 0.9879506083453056, 'colsample_bytree': 0.6431046471475638, 'grow_policy': 'depthwise', 'min_child_weight': 7}
Train classification report for patient 3_ with window 288:
              precision    recall  f1-score   support

         0.0       0.65      0.76      0.70     83283
         1.0       0.71      0.58      0.64     83283

    accuracy                           0.67    166566
   macro avg       0.68      0.67      0.67    166566
weighted avg       0.68      0.67      0.67    166566

Validation classification report for patient 3_ with window 288:
              precision    recall  f1-score   support

         0.0       0.80      0.86      0.83      9526
         1.0       0.40      0.31      0.35      2935

    accuracy                           0.73     12461
   macro avg       0.60      0.58    

[I 2024-07-20 20:41:06,292] A new study created in memory with name: no-name-8f6107fd-026e-437c-8ec0-f49b8353157a


4618
4618
4594
4594


[I 2024-07-20 20:41:07,156] Trial 0 finished with value: 0.7891678168852082 and parameters: {'lambda': 1.296426233049547, 'alpha': 45.18560951024108, 'max_depth': 8, 'eta': 0.009846738873614562, 'gamma': 0.11142080469295271, 'subsample': 0.5779972601681014, 'colsample_bytree': 0.5290418060840998, 'grow_policy': 'depthwise', 'min_child_weight': 8}. Best is trial 0 with value: 0.7891678168852082.
[I 2024-07-20 20:41:07,802] Trial 1 finished with value: 0.7541763389589478 and parameters: {'lambda': 1.0143703591411894, 'alpha': 61.56997328235202, 'max_depth': 9, 'eta': 0.00011526449540315612, 'gamma': 0.11343178571896925, 'subsample': 0.5917022549267169, 'colsample_bytree': 0.6521211214797689, 'grow_policy': 'depthwise', 'min_child_weight': 3}. Best is trial 0 with value: 0.7891678168852082.
[I 2024-07-20 20:41:08,321] Trial 2 finished with value: 0.7719137545224501 and parameters: {'lambda': 1.5282206868683035, 'alpha': 9.472334467618546e-05, 'max_depth': 5, 'eta': 0.0006789053271698484, 

Best parameters found by Optuna: {'lambda': 1.1756082621801565, 'alpha': 0.011909644539346816, 'max_depth': 7, 'eta': 0.9551793982137865, 'gamma': 0.14278309007486523, 'subsample': 0.9870204482232658, 'colsample_bytree': 0.7674016019536977, 'grow_policy': 'depthwise', 'min_child_weight': 4}
Train classification report for patient 4_ with window 288:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00     27395
         1.0       1.00      1.00      1.00     27395

    accuracy                           1.00     54790
   macro avg       1.00      1.00      1.00     54790
weighted avg       1.00      1.00      1.00     54790

Validation classification report for patient 4_ with window 288:
              precision    recall  f1-score   support

         0.0       0.97      0.89      0.93      4158
         1.0       0.43      0.73      0.54       460

    accuracy                           0.88      4618
   macro avg       0.70      0.81     

[I 2024-07-20 20:41:43,293] A new study created in memory with name: no-name-f9a49e7b-9294-47eb-a28a-5914c1c74eca


4618
4618
4498
4498


[I 2024-07-20 20:41:44,033] Trial 0 finished with value: 0.904346780433737 and parameters: {'lambda': 1.296426233049547, 'alpha': 45.18560951024108, 'max_depth': 8, 'eta': 0.009846738873614562, 'gamma': 0.11142080469295271, 'subsample': 0.5779972601681014, 'colsample_bytree': 0.5290418060840998, 'grow_policy': 'depthwise', 'min_child_weight': 8}. Best is trial 0 with value: 0.904346780433737.
[I 2024-07-20 20:41:44,720] Trial 1 finished with value: 0.9127606290649768 and parameters: {'lambda': 1.0143703591411894, 'alpha': 61.56997328235202, 'max_depth': 9, 'eta': 0.00011526449540315612, 'gamma': 0.11343178571896925, 'subsample': 0.5917022549267169, 'colsample_bytree': 0.6521211214797689, 'grow_policy': 'depthwise', 'min_child_weight': 3}. Best is trial 1 with value: 0.9127606290649768.
[I 2024-07-20 20:41:45,420] Trial 2 finished with value: 0.8867873873308656 and parameters: {'lambda': 1.5282206868683035, 'alpha': 9.472334467618546e-05, 'max_depth': 5, 'eta': 0.0006789053271698484, 'g

Best parameters found by Optuna: {'lambda': 1.0899506528037644, 'alpha': 0.01706283025135903, 'max_depth': 4, 'eta': 0.0023711862710306844, 'gamma': 0.1853730910291392, 'subsample': 0.5354996636427398, 'colsample_bytree': 0.7554913455600114, 'grow_policy': 'depthwise', 'min_child_weight': 3}
Train classification report for patient 4_ with window 288:
              precision    recall  f1-score   support

         0.0       0.80      0.77      0.78     27299
         1.0       0.78      0.81      0.79     27299

    accuracy                           0.79     54598
   macro avg       0.79      0.79      0.79     54598
weighted avg       0.79      0.79      0.79     54598

Validation classification report for patient 4_ with window 288:
              precision    recall  f1-score   support

         0.0       0.96      0.96      0.96      4158
         1.0       0.68      0.68      0.68       460

    accuracy                           0.94      4618
   macro avg       0.82      0.82    

[I 2024-07-20 20:42:58,147] A new study created in memory with name: no-name-cfda4f7b-5fd1-4323-9cd2-cd7b3a89f6ae


Number of features for patient 13 with window 288: 3.0
5949
5949
5925
5925


[I 2024-07-20 20:42:58,502] Trial 0 finished with value: 0.6816413179911709 and parameters: {'lambda': 1.296426233049547, 'alpha': 45.18560951024108, 'max_depth': 8, 'eta': 0.009846738873614562, 'gamma': 0.11142080469295271, 'subsample': 0.5779972601681014, 'colsample_bytree': 0.5290418060840998, 'grow_policy': 'depthwise', 'min_child_weight': 8}. Best is trial 0 with value: 0.6816413179911709.
[I 2024-07-20 20:42:58,780] Trial 1 finished with value: 0.6904260770109375 and parameters: {'lambda': 1.0143703591411894, 'alpha': 61.56997328235202, 'max_depth': 9, 'eta': 0.00011526449540315612, 'gamma': 0.11343178571896925, 'subsample': 0.5917022549267169, 'colsample_bytree': 0.6521211214797689, 'grow_policy': 'depthwise', 'min_child_weight': 3}. Best is trial 1 with value: 0.6904260770109375.
[I 2024-07-20 20:42:59,097] Trial 2 finished with value: 0.6766295086521906 and parameters: {'lambda': 1.5282206868683035, 'alpha': 9.472334467618546e-05, 'max_depth': 5, 'eta': 0.0006789053271698484, 

Best parameters found by Optuna: {'lambda': 1.0999413009916181, 'alpha': 90.38407870629867, 'max_depth': 6, 'eta': 4.034875024587857e-05, 'gamma': 0.11923556281429289, 'subsample': 0.5619227404191081, 'colsample_bytree': 0.840398832456672, 'grow_policy': 'depthwise', 'min_child_weight': 8}
Train classification report for patient 13 with window 288:
              precision    recall  f1-score   support

         0.0       0.70      0.53      0.60     29826
         1.0       0.62      0.77      0.69     29826

    accuracy                           0.65     59652
   macro avg       0.66      0.65      0.65     59652
weighted avg       0.66      0.65      0.65     59652

Validation classification report for patient 13 with window 288:
              precision    recall  f1-score   support

         0.0       0.92      0.82      0.86      5207
         1.0       0.27      0.48      0.35       741

    accuracy                           0.77      5948
   macro avg       0.59      0.65      

[I 2024-07-20 20:43:25,110] A new study created in memory with name: no-name-3da46f24-deaf-4b46-8ad7-fea0a427e7c5


Number of features for patient 13 with window 288: 5.0
5949
5949
5829
5829


[I 2024-07-20 20:43:25,595] Trial 0 finished with value: 0.642125841705355 and parameters: {'lambda': 1.296426233049547, 'alpha': 45.18560951024108, 'max_depth': 8, 'eta': 0.009846738873614562, 'gamma': 0.11142080469295271, 'subsample': 0.5779972601681014, 'colsample_bytree': 0.5290418060840998, 'grow_policy': 'depthwise', 'min_child_weight': 8}. Best is trial 0 with value: 0.642125841705355.
[I 2024-07-20 20:43:25,985] Trial 1 finished with value: 0.6512873125479637 and parameters: {'lambda': 1.0143703591411894, 'alpha': 61.56997328235202, 'max_depth': 9, 'eta': 0.00011526449540315612, 'gamma': 0.11343178571896925, 'subsample': 0.5917022549267169, 'colsample_bytree': 0.6521211214797689, 'grow_policy': 'depthwise', 'min_child_weight': 3}. Best is trial 1 with value: 0.6512873125479637.
[I 2024-07-20 20:43:26,306] Trial 2 finished with value: 0.6357020433668266 and parameters: {'lambda': 1.5282206868683035, 'alpha': 9.472334467618546e-05, 'max_depth': 5, 'eta': 0.0006789053271698484, 'g

Best parameters found by Optuna: {'lambda': 1.1097383737532247, 'alpha': 3.872186503010382, 'max_depth': 10, 'eta': 0.0012401669535568106, 'gamma': 0.14326012418519515, 'subsample': 0.7558755304632564, 'colsample_bytree': 0.9539865624032351, 'grow_policy': 'depthwise', 'min_child_weight': 10}
Train classification report for patient 13 with window 288:
              precision    recall  f1-score   support

         0.0       0.94      0.81      0.87     29826
         1.0       0.83      0.95      0.88     29826

    accuracy                           0.88     59652
   macro avg       0.88      0.88      0.87     59652
weighted avg       0.88      0.88      0.87     59652

Validation classification report for patient 13 with window 288:
              precision    recall  f1-score   support

         0.0       0.93      0.83      0.87      5207
         1.0       0.32      0.57      0.40       741

    accuracy                           0.79      5948
   macro avg       0.62      0.70   

[I 2024-07-20 20:44:35,983] A new study created in memory with name: no-name-854b86c5-55da-4078-b571-cf1d837f2709


19224
19224
19200
19200


[I 2024-07-20 20:44:37,718] Trial 0 finished with value: 0.47234525144206396 and parameters: {'lambda': 1.296426233049547, 'alpha': 45.18560951024108, 'max_depth': 8, 'eta': 0.009846738873614562, 'gamma': 0.11142080469295271, 'subsample': 0.5779972601681014, 'colsample_bytree': 0.5290418060840998, 'grow_policy': 'depthwise', 'min_child_weight': 8}. Best is trial 0 with value: 0.47234525144206396.
[I 2024-07-20 20:44:39,399] Trial 1 finished with value: 0.4749824733054457 and parameters: {'lambda': 1.0143703591411894, 'alpha': 61.56997328235202, 'max_depth': 9, 'eta': 0.00011526449540315612, 'gamma': 0.11343178571896925, 'subsample': 0.5917022549267169, 'colsample_bytree': 0.6521211214797689, 'grow_policy': 'depthwise', 'min_child_weight': 3}. Best is trial 1 with value: 0.4749824733054457.
[I 2024-07-20 20:44:40,754] Trial 2 finished with value: 0.4722577976799937 and parameters: {'lambda': 1.5282206868683035, 'alpha': 9.472334467618546e-05, 'max_depth': 5, 'eta': 0.0006789053271698484

Best parameters found by Optuna: {'lambda': 1.7497589169816758, 'alpha': 0.0007593803540181733, 'max_depth': 6, 'eta': 0.9670286937488761, 'gamma': 0.16397502103368344, 'subsample': 0.5246757777004822, 'colsample_bytree': 0.663358046783951, 'grow_policy': 'lossguide', 'min_child_weight': 6}
Train classification report for patient 23 with window 288:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00    119533
         1.0       1.00      1.00      1.00    119533

    accuracy                           1.00    239066
   macro avg       1.00      1.00      1.00    239066
weighted avg       1.00      1.00      1.00    239066

Validation classification report for patient 23 with window 288:
              precision    recall  f1-score   support

         0.0       0.80      0.81      0.81     15196
         1.0       0.24      0.22      0.23      4028

    accuracy                           0.69     19224
   macro avg       0.52      0.52     

[I 2024-07-20 20:47:08,180] A new study created in memory with name: no-name-83a63b8d-724c-4fe3-8d3f-392da913eb86


19224
19224
19104
19104


[I 2024-07-20 20:47:09,833] Trial 0 finished with value: 0.4509508313482381 and parameters: {'lambda': 1.296426233049547, 'alpha': 45.18560951024108, 'max_depth': 8, 'eta': 0.009846738873614562, 'gamma': 0.11142080469295271, 'subsample': 0.5779972601681014, 'colsample_bytree': 0.5290418060840998, 'grow_policy': 'depthwise', 'min_child_weight': 8}. Best is trial 0 with value: 0.4509508313482381.
[I 2024-07-20 20:47:11,516] Trial 1 finished with value: 0.45421164934429775 and parameters: {'lambda': 1.0143703591411894, 'alpha': 61.56997328235202, 'max_depth': 9, 'eta': 0.00011526449540315612, 'gamma': 0.11343178571896925, 'subsample': 0.5917022549267169, 'colsample_bytree': 0.6521211214797689, 'grow_policy': 'depthwise', 'min_child_weight': 3}. Best is trial 1 with value: 0.45421164934429775.
[I 2024-07-20 20:47:12,784] Trial 2 finished with value: 0.45539574681624523 and parameters: {'lambda': 1.5282206868683035, 'alpha': 9.472334467618546e-05, 'max_depth': 5, 'eta': 0.000678905327169848

Best parameters found by Optuna: {'lambda': 1.6560400385096687, 'alpha': 3.6827298997167396e-05, 'max_depth': 3, 'eta': 0.00013294259124600874, 'gamma': 0.16323771913629265, 'subsample': 0.9303214710247406, 'colsample_bytree': 0.9954236097546785, 'grow_policy': 'lossguide', 'min_child_weight': 1}
Train classification report for patient 23 with window 288:
              precision    recall  f1-score   support

         0.0       0.71      0.51      0.59    119533
         1.0       0.62      0.79      0.69    119533

    accuracy                           0.65    239066
   macro avg       0.66      0.65      0.64    239066
weighted avg       0.66      0.65      0.64    239066

Validation classification report for patient 23 with window 288:
              precision    recall  f1-score   support

         0.0       0.81      0.41      0.55     15196
         1.0       0.22      0.62      0.33      4028

    accuracy                           0.46     19224
   macro avg       0.51      0.5

[I 2024-07-20 20:48:59,426] A new study created in memory with name: no-name-b8276339-9708-47f6-96cf-453e20eeb736


7784
7784
7760
7760


[I 2024-07-20 20:49:00,058] Trial 0 finished with value: 0.6180833481560536 and parameters: {'lambda': 1.296426233049547, 'alpha': 45.18560951024108, 'max_depth': 8, 'eta': 0.009846738873614562, 'gamma': 0.11142080469295271, 'subsample': 0.5779972601681014, 'colsample_bytree': 0.5290418060840998, 'grow_policy': 'depthwise', 'min_child_weight': 8}. Best is trial 0 with value: 0.6180833481560536.
[I 2024-07-20 20:49:00,623] Trial 1 finished with value: 0.6323309931963714 and parameters: {'lambda': 1.0143703591411894, 'alpha': 61.56997328235202, 'max_depth': 9, 'eta': 0.00011526449540315612, 'gamma': 0.11343178571896925, 'subsample': 0.5917022549267169, 'colsample_bytree': 0.6521211214797689, 'grow_policy': 'depthwise', 'min_child_weight': 3}. Best is trial 1 with value: 0.6323309931963714.
[I 2024-07-20 20:49:01,077] Trial 2 finished with value: 0.7257674000207519 and parameters: {'lambda': 1.5282206868683035, 'alpha': 9.472334467618546e-05, 'max_depth': 5, 'eta': 0.0006789053271698484, 

Best parameters found by Optuna: {'lambda': 1.4433622561329105, 'alpha': 2.318872628380384, 'max_depth': 4, 'eta': 0.0005242848517009747, 'gamma': 0.19407660237335467, 'subsample': 0.9981768397932795, 'colsample_bytree': 0.9941970121079954, 'grow_policy': 'lossguide', 'min_child_weight': 7}
Train classification report for patient 29 with window 288:
              precision    recall  f1-score   support

         0.0       0.75      0.56      0.64     32383
         1.0       0.65      0.82      0.72     32383

    accuracy                           0.69     64766
   macro avg       0.70      0.69      0.68     64766
weighted avg       0.70      0.69      0.68     64766

Validation classification report for patient 29 with window 288:
              precision    recall  f1-score   support

         0.0       1.00      0.67      0.80      7496
         1.0       0.10      0.99      0.18       288

    accuracy                           0.68      7784
   macro avg       0.55      0.83     

[I 2024-07-20 20:49:58,119] A new study created in memory with name: no-name-f5f742ea-27c6-4b96-8e1e-4a28d165aa8d


7784
7784
7664
7664


[I 2024-07-20 20:49:58,906] Trial 0 finished with value: 0.7075873336149651 and parameters: {'lambda': 1.296426233049547, 'alpha': 45.18560951024108, 'max_depth': 8, 'eta': 0.009846738873614562, 'gamma': 0.11142080469295271, 'subsample': 0.5779972601681014, 'colsample_bytree': 0.5290418060840998, 'grow_policy': 'depthwise', 'min_child_weight': 8}. Best is trial 0 with value: 0.7075873336149651.
[I 2024-07-20 20:49:59,620] Trial 1 finished with value: 0.7345484721481086 and parameters: {'lambda': 1.0143703591411894, 'alpha': 61.56997328235202, 'max_depth': 9, 'eta': 0.00011526449540315612, 'gamma': 0.11343178571896925, 'subsample': 0.5917022549267169, 'colsample_bytree': 0.6521211214797689, 'grow_policy': 'depthwise', 'min_child_weight': 3}. Best is trial 1 with value: 0.7345484721481086.
[I 2024-07-20 20:50:00,127] Trial 2 finished with value: 0.7436053858354086 and parameters: {'lambda': 1.5282206868683035, 'alpha': 9.472334467618546e-05, 'max_depth': 5, 'eta': 0.0006789053271698484, 

Best parameters found by Optuna: {'lambda': 1.4769484578128758, 'alpha': 0.12701823981551658, 'max_depth': 3, 'eta': 0.056456643931413655, 'gamma': 0.1111414514762098, 'subsample': 0.5897951085592509, 'colsample_bytree': 0.7845320886918048, 'grow_policy': 'lossguide', 'min_child_weight': 2}
Train classification report for patient 29 with window 288:
              precision    recall  f1-score   support

         0.0       0.83      0.79      0.81     32479
         1.0       0.80      0.83      0.82     32479

    accuracy                           0.81     64958
   macro avg       0.81      0.81      0.81     64958
weighted avg       0.81      0.81      0.81     64958

Validation classification report for patient 29 with window 288:
              precision    recall  f1-score   support

         0.0       0.98      0.67      0.79      7496
         1.0       0.07      0.69      0.13       288

    accuracy                           0.67      7784
   macro avg       0.53      0.68     

[I 2024-07-20 20:50:51,103] A new study created in memory with name: no-name-74b5c42f-afba-4b76-ab98-b71e62aae396


14225
14225
14201
14201


[I 2024-07-20 20:50:52,257] Trial 0 finished with value: 0.7306094702938775 and parameters: {'lambda': 1.296426233049547, 'alpha': 45.18560951024108, 'max_depth': 8, 'eta': 0.009846738873614562, 'gamma': 0.11142080469295271, 'subsample': 0.5779972601681014, 'colsample_bytree': 0.5290418060840998, 'grow_policy': 'depthwise', 'min_child_weight': 8}. Best is trial 0 with value: 0.7306094702938775.
[I 2024-07-20 20:50:53,460] Trial 1 finished with value: 0.7191348293959094 and parameters: {'lambda': 1.0143703591411894, 'alpha': 61.56997328235202, 'max_depth': 9, 'eta': 0.00011526449540315612, 'gamma': 0.11343178571896925, 'subsample': 0.5917022549267169, 'colsample_bytree': 0.6521211214797689, 'grow_policy': 'depthwise', 'min_child_weight': 3}. Best is trial 0 with value: 0.7306094702938775.
[I 2024-07-20 20:50:54,316] Trial 2 finished with value: 0.7151189591177121 and parameters: {'lambda': 1.5282206868683035, 'alpha': 9.472334467618546e-05, 'max_depth': 5, 'eta': 0.0006789053271698484, 

Best parameters found by Optuna: {'lambda': 1.000664798783858, 'alpha': 0.0010040919006327135, 'max_depth': 10, 'eta': 0.0034806816803432997, 'gamma': 0.17014573977328223, 'subsample': 0.8043372503333879, 'colsample_bytree': 0.563310358073504, 'grow_policy': 'depthwise', 'min_child_weight': 3}
Train classification report for patient 31 with window 288:
              precision    recall  f1-score   support

         0.0       0.95      0.97      0.96     81314
         1.0       0.97      0.95      0.96     81314

    accuracy                           0.96    162628
   macro avg       0.96      0.96      0.96    162628
weighted avg       0.96      0.96      0.96    162628

Validation classification report for patient 31 with window 288:
              precision    recall  f1-score   support

         0.0       0.76      0.93      0.84      9623
         1.0       0.74      0.39      0.51      4602

    accuracy                           0.76     14225
   macro avg       0.75      0.66  

[I 2024-07-20 20:55:42,458] A new study created in memory with name: no-name-f15f48dc-c2aa-467c-9bc7-98d89ea3dc05


14225
14225
14105
14105


[I 2024-07-20 20:55:43,451] Trial 0 finished with value: 0.6528843285858921 and parameters: {'lambda': 1.296426233049547, 'alpha': 45.18560951024108, 'max_depth': 8, 'eta': 0.009846738873614562, 'gamma': 0.11142080469295271, 'subsample': 0.5779972601681014, 'colsample_bytree': 0.5290418060840998, 'grow_policy': 'depthwise', 'min_child_weight': 8}. Best is trial 0 with value: 0.6528843285858921.
[I 2024-07-20 20:55:44,484] Trial 1 finished with value: 0.6663586040537423 and parameters: {'lambda': 1.0143703591411894, 'alpha': 61.56997328235202, 'max_depth': 9, 'eta': 0.00011526449540315612, 'gamma': 0.11343178571896925, 'subsample': 0.5917022549267169, 'colsample_bytree': 0.6521211214797689, 'grow_policy': 'depthwise', 'min_child_weight': 3}. Best is trial 1 with value: 0.6663586040537423.
[I 2024-07-20 20:55:45,475] Trial 2 finished with value: 0.6474350630443086 and parameters: {'lambda': 1.5282206868683035, 'alpha': 9.472334467618546e-05, 'max_depth': 5, 'eta': 0.0006789053271698484, 

Best parameters found by Optuna: {'lambda': 1.4832960581947787, 'alpha': 10.046912074111086, 'max_depth': 4, 'eta': 0.03170965272282716, 'gamma': 0.10460278325594857, 'subsample': 0.544436214506524, 'colsample_bytree': 0.5049939450509008, 'grow_policy': 'depthwise', 'min_child_weight': 9}
Train classification report for patient 31 with window 288:
              precision    recall  f1-score   support

         0.0       0.73      0.84      0.78     81314
         1.0       0.81      0.69      0.75     81314

    accuracy                           0.77    162628
   macro avg       0.77      0.77      0.76    162628
weighted avg       0.77      0.77      0.76    162628

Validation classification report for patient 31 with window 288:
              precision    recall  f1-score   support

         0.0       0.75      0.80      0.77      9571
         1.0       0.52      0.45      0.48      4654

    accuracy                           0.68     14225
   macro avg       0.63      0.62      0