In [1]:
import numpy as np
import pandas as pd

In [2]:
df=pd.read_csv('./data/preprocessed_Bank_Customer_Churn_Prediction.csv')

In [3]:
df


Unnamed: 0,credit_score,country,gender,age,tenure,balance,products_number,credit_card,active_member,estimated_salary,churn
0,619,0,1,42,2,0.00,1,1,1,101348.88,1
1,608,2,1,41,1,83807.86,1,0,1,112542.58,0
2,502,0,1,42,8,159660.80,3,1,0,113931.57,1
3,699,0,1,39,1,0.00,2,0,0,93826.63,0
4,850,2,1,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,0,0,39,5,0.00,2,1,0,96270.64,0
9996,516,0,0,35,10,57369.61,1,1,1,101699.77,0
9997,709,0,1,36,7,0.00,1,0,1,42085.58,1
9998,772,1,0,42,3,75075.31,2,1,0,92888.52,1


### XGBOOST 사용

In [4]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report


# X, y 분리
y = df['churn']  # 목표 변수
X = df.drop(columns=['churn'])  # 특징 변수

# 학습 및 테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# XGBoost 모델 생성 및 학습
model = xgb.XGBClassifier(
    objective='binary:logistic', 
    eval_metric='logloss', 
    use_label_encoder=False, 
    random_state=42
)
model.fit(X_train, y_train)

# 예측
y_pred = model.predict(X_test)

# 평가
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, digits=4)

print(f'Accuracy: {accuracy:.4f}')
print('Classification Report:\n', report)


Accuracy: 0.8470
Classification Report:
               precision    recall  f1-score   support

           0     0.8730    0.9454    0.9078      1593
           1     0.6836    0.4619    0.5513       407

    accuracy                         0.8470      2000
   macro avg     0.7783    0.7037    0.7295      2000
weighted avg     0.8345    0.8470    0.8352      2000



Parameters: { "use_label_encoder" } are not used.



In [10]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report

# 데이터 준비
y = df['churn']
X = df.drop(columns=['churn'])

# 학습 및 테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# 하이퍼파라미터 설정
params = {
    'objective': 'binary:logistic',
    'eval_metric': 'logloss',
    'random_state': 42,
    'learning_rate': 0.1,
    'max_depth': 5,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'n_estimators': 100
}

# ✅ XGBoost의 cv 기능 사용
cv_results = xgb.cv(
    params=params,
    dtrain=xgb.DMatrix(X_train, label=y_train),
    num_boost_round=100,
    nfold=5,
    stratified=True,
    metrics='logloss',
    early_stopping_rounds=10
)

# 최적 n_estimators 설정
best_n_estimators = len(cv_results)
params['n_estimators'] = best_n_estimators

# 최적 모델 학습
model = xgb.XGBClassifier(**params)
model.fit(X_train, y_train)

# 예측
y_pred = model.predict(X_test)

# 평가
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, digits=4)

print(f'Best n_estimators: {best_n_estimators}')
print(f'Accuracy: {accuracy:.4f}')
print('Classification Report:\n', report)


Parameters: { "n_estimators" } are not used.



Best n_estimators: 79
Accuracy: 0.8740
Classification Report:
               precision    recall  f1-score   support

           0     0.8834    0.9699    0.9246      1593
           1     0.8088    0.4988    0.6170       407

    accuracy                         0.8740      2000
   macro avg     0.8461    0.7343    0.7708      2000
weighted avg     0.8682    0.8740    0.8620      2000



### LGB사용

In [6]:
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report


# X, y 분리
y = df['churn']  # 목표 변수
X = df.drop(columns=['churn'])  # 특징 변수

# 학습 및 테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# LightGBM 데이터셋 생성
train_data = lgb.Dataset(X_train, label=y_train)
valid_data = lgb.Dataset(X_test, label=y_test, reference=train_data)  # 검증 데이터 설정

# LightGBM 모델 학습
params = {
    'objective': 'binary',
    'metric': 'binary_logloss',
    'verbosity': -1
}

# callbacks을 사용하여 early stopping 적용
model = lgb.train(params, train_data, num_boost_round=100,
                  valid_sets=[valid_data], valid_names=['valid'],
                  callbacks=[lgb.early_stopping(10), lgb.log_evaluation(10)])

# 예측
y_pred = model.predict(X_test)
y_pred = [1 if x > 0.5 else 0 for x in y_pred]

# 평가
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, digits=4)


print(f'Accuracy: {accuracy:.4f}')
print('Classification Report:\n', report)


Training until validation scores don't improve for 10 rounds
[10]	valid's binary_logloss: 0.370556
[20]	valid's binary_logloss: 0.344806
[30]	valid's binary_logloss: 0.338654
[40]	valid's binary_logloss: 0.335742
[50]	valid's binary_logloss: 0.335947
Early stopping, best iteration is:
[45]	valid's binary_logloss: 0.335173
Accuracy: 0.8645
Classification Report:
               precision    recall  f1-score   support

           0     0.8781    0.9636    0.9189      1593
           1     0.7698    0.4767    0.5888       407

    accuracy                         0.8645      2000
   macro avg     0.8240    0.7201    0.7538      2000
weighted avg     0.8561    0.8645    0.8517      2000



### LGB gridsearch사용

In [7]:
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report


# X, y 분리
y = df['churn']  # 목표 변수
X = df.drop(columns=['churn'])  # 특징 변수

# 학습 및 테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# LightGBM 모델 객체 생성
model = lgb.LGBMClassifier(objective='binary', metric='binary_logloss', verbosity=-1)

# 하이퍼파라미터 검색 공간 정의
param_grid = {
    'num_leaves': [20, 31, 50],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.05, 0.1],
    'min_child_samples': [10, 20, 50],
    'subsample': [0.7, 0.8, 0.9],
    'colsample_bytree': [0.7, 0.8, 0.9]
}

# Grid Search 수행
grid_search = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    scoring='accuracy',
    cv=3,  # 3-폴드 교차검증
    verbose=1,
    n_jobs=-1
)

grid_search.fit(X_train, y_train)

# 최적 하이퍼파라미터 출력
print("Best parameters found:", grid_search.best_params_)

# 최적 하이퍼파라미터 적용하여 모델 재학습
best_model = grid_search.best_estimator_

# 예측
y_pred = best_model.predict(X_test)

# 평가
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred,digits=4)

print(f'Accuracy: {accuracy:.4f}')
print('Classification Report:\n', report)


Fitting 3 folds for each of 729 candidates, totalling 2187 fits
Best parameters found: {'colsample_bytree': 0.8, 'learning_rate': 0.05, 'max_depth': 7, 'min_child_samples': 50, 'num_leaves': 50, 'subsample': 0.7}
Accuracy: 0.8640
Classification Report:
               precision    recall  f1-score   support

           0     0.8746    0.9680    0.9190      1593
           1     0.7848    0.4570    0.5776       407

    accuracy                         0.8640      2000
   macro avg     0.8297    0.7125    0.7483      2000
weighted avg     0.8564    0.8640    0.8495      2000



---

# optuna 사용


In [8]:
import pandas as pd
import lightgbm as lgb
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report



# X, y 분리
y = df['churn']  # 목표 변수
X = df.drop(columns=['churn'])  # 특징 변수

# 학습 및 테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Optuna를 이용한 하이퍼파라미터 튜닝
def objective(trial):
    params = {
        'objective': 'binary',
        'metric': 'binary_logloss',
        'verbosity': -1,
        'num_leaves': trial.suggest_int('num_leaves', 20, 150),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
    }
    
    train_data = lgb.Dataset(X_train, label=y_train)
    valid_data = lgb.Dataset(X_test, label=y_test, reference=train_data)
    
    model = lgb.train(params, train_data, num_boost_round=100,
                      valid_sets=[valid_data], 
                      callbacks=[lgb.early_stopping(10), lgb.log_evaluation(10)])
    
    y_pred = model.predict(X_test)
    y_pred = [1 if x > 0.5 else 0 for x in y_pred]
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

# Optuna 실행
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=30)

# 최적 하이퍼파라미터 출력
print("Best parameters found:", study.best_params)

# 최적 하이퍼파라미터로 모델 학습
best_params = study.best_params
best_params.update({'objective': 'binary', 'metric': 'binary_logloss', 'verbosity': -1})

train_data = lgb.Dataset(X_train, label=y_train)
valid_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

model = lgb.train(best_params, train_data, num_boost_round=100,
                  valid_sets=[valid_data], 
                  callbacks=[lgb.early_stopping(10), lgb.log_evaluation(10)])

# 최적 모델 예측
y_pred = model.predict(X_test)
y_pred = [1 if x > 0.5 else 0 for x in y_pred]

# 평가
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy:.4f}')
print('Classification Report:\n', report)


[I 2025-02-08 18:50:19,607] A new study created in memory with name: no-name-6bd9a99b-e3d9-4c90-b21b-5ef0ac5ab335
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 18:50:19,741] Trial 0 finished with value: 0.868 and parameters: {'num_leaves': 74, 'max_depth': 5, 'learning_rate': 0.07206128341273402, 'min_child_samples': 100, 'subsample': 0.5283064317051264, 'colsample_bytree': 0.754675838391475}. Best is trial 0 with value: 0.868.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.404552
[20]	valid_0's binary_logloss: 0.366253
[30]	valid_0's binary_logloss: 0.350865
[40]	valid_0's binary_logloss: 0.342715
[50]	valid_0's binary_logloss: 0.340199
[60]	valid_0's binary_logloss: 0.338635
[70]	valid_0's binary_logloss: 0.337512
[80]	valid_0's binary_logloss: 0.33622
[90]	valid_0's binary_logloss: 0.336577
Early stopping, best iteration is:
[83]	valid_0's binary_logloss: 0.33605
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.402387
[20]	valid_0's binary_logloss: 0.363613
[30]	valid_0's binary_logloss: 0.34958
[40]	valid_0's binary_logloss: 0.342296


[I 2025-02-08 18:50:19,927] Trial 1 finished with value: 0.869 and parameters: {'num_leaves': 89, 'max_depth': 7, 'learning_rate': 0.08473123855018388, 'min_child_samples': 64, 'subsample': 0.84723845068526, 'colsample_bytree': 0.5892572070783184}. Best is trial 1 with value: 0.869.


[50]	valid_0's binary_logloss: 0.339905
[60]	valid_0's binary_logloss: 0.339954
[70]	valid_0's binary_logloss: 0.339122
[80]	valid_0's binary_logloss: 0.33879
Early stopping, best iteration is:
[78]	valid_0's binary_logloss: 0.338658
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.457561
[20]	valid_0's binary_logloss: 0.424294
[30]	valid_0's binary_logloss: 0.405224
[40]	valid_0's binary_logloss: 0.388149
[50]	valid_0's binary_logloss: 0.377791
[60]	valid_0's binary_logloss: 0.367672
[70]	valid_0's binary_logloss: 0.362334
[80]	valid_0's binary_logloss: 0.356074


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 18:50:20,095] Trial 2 finished with value: 0.8655 and parameters: {'num_leaves': 49, 'max_depth': 5, 'learning_rate': 0.030890226758426263, 'min_child_samples': 47, 'subsample': 0.8136566500386291, 'colsample_bytree': 0.5023314727055617}. Best is trial 1 with value: 0.869.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 18:50:20,209] Trial 3 finished with value: 0.866 and parameters: {'num_leaves': 104, 'max_depth': 8, 'learning_rate': 0.13520813470300508, 'min_child_samples': 91, 'subsample': 0.5791415406680646, 'colsample_bytree': 0.5283983834418519}. Best is trial 1 with value: 0.869.


[90]	valid_0's binary_logloss: 0.351739
[100]	valid_0's binary_logloss: 0.34866
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.34866
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.382611
[20]	valid_0's binary_logloss: 0.349751
[30]	valid_0's binary_logloss: 0.342894
[40]	valid_0's binary_logloss: 0.340541
Early stopping, best iteration is:
[37]	valid_0's binary_logloss: 0.339962
Training until validation scores don't improve for 10 rounds


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[10]	valid_0's binary_logloss: 0.463817
[20]	valid_0's binary_logloss: 0.43609
[30]	valid_0's binary_logloss: 0.416088
[40]	valid_0's binary_logloss: 0.400521
[50]	valid_0's binary_logloss: 0.388666
[60]	valid_0's binary_logloss: 0.379466
[70]	valid_0's binary_logloss: 0.372117
[80]	valid_0's binary_logloss: 0.366667
[90]	valid_0's binary_logloss: 0.362114
[100]	valid_0's binary_logloss: 0.358709
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.358709


[I 2025-02-08 18:50:20,771] Trial 4 finished with value: 0.8555 and parameters: {'num_leaves': 131, 'max_depth': 13, 'learning_rate': 0.013912886106560952, 'min_child_samples': 37, 'subsample': 0.7469180851646247, 'colsample_bytree': 0.9675843472605881}. Best is trial 1 with value: 0.869.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 18:50:20,832] Trial 5 finished with value: 0.866 and parameters: {'num_leaves': 113, 'max_depth': 5, 'learning_rate': 0.27243074459665184, 'min_child_samples': 66, 'subsample': 0.6469611727458529, 'colsample_bytree': 0.881435304314591}. Best is trial 1 with value: 0.869.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.343414
[20]	valid_0's binary_logloss: 0.336475
[30]	valid_0's binary_logloss: 0.336603
Early stopping, best iteration is:
[26]	valid_0's binary_logloss: 0.335666
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.375892
[20]	valid_0's binary_logloss: 0.349182
[30]	valid_0's binary_logloss: 0.346249


[I 2025-02-08 18:50:21,070] Trial 6 finished with value: 0.862 and parameters: {'num_leaves': 74, 'max_depth': 13, 'learning_rate': 0.096691477596462, 'min_child_samples': 65, 'subsample': 0.8850655571173567, 'colsample_bytree': 0.9314151291085653}. Best is trial 1 with value: 0.869.


[40]	valid_0's binary_logloss: 0.343883
[50]	valid_0's binary_logloss: 0.346151
Early stopping, best iteration is:
[43]	valid_0's binary_logloss: 0.343403
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.4573
[20]	valid_0's binary_logloss: 0.427085
[30]	valid_0's binary_logloss: 0.405553
[40]	valid_0's binary_logloss: 0.389478
[50]	valid_0's binary_logloss: 0.377623
[60]	valid_0's binary_logloss: 0.368771
[70]	valid_0's binary_logloss: 0.362001


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 18:50:21,277] Trial 7 finished with value: 0.859 and parameters: {'num_leaves': 32, 'max_depth': 9, 'learning_rate': 0.017529313053912924, 'min_child_samples': 32, 'subsample': 0.9486294351889135, 'colsample_bytree': 0.9500165437074235}. Best is trial 1 with value: 0.869.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 18:50:21,370] Trial 8 finished with value: 0.858 and parameters: {'num_leaves': 77, 'max_depth': 10, 'learning_rate': 0.18039094168926864, 'min_child_samples': 17, 'subsample': 0.5958664686621729, 'colsample_bytree': 0.5971397518142743}. Best is trial 1 with value: 0.869.


[80]	valid_0's binary_logloss: 0.356539
[90]	valid_0's binary_logloss: 0.351914
[100]	valid_0's binary_logloss: 0.348281
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.348281
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.366825
[20]	valid_0's binary_logloss: 0.34441
[30]	valid_0's binary_logloss: 0.34795
Early stopping, best iteration is:
[21]	valid_0's binary_logloss: 0.343705
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.391166


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 18:50:21,503] Trial 9 finished with value: 0.8645 and parameters: {'num_leaves': 82, 'max_depth': 10, 'learning_rate': 0.09547305107030692, 'min_child_samples': 88, 'subsample': 0.9512724878024948, 'colsample_bytree': 0.6998774559186168}. Best is trial 1 with value: 0.869.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 18:50:21,603] Trial 10 finished with value: 0.8635 and parameters: {'num_leaves': 149, 'max_depth': 3, 'learning_rate': 0.03597391489797162, 'min_child_samples': 5, 'subsample': 0.73368854197138, 'colsample_bytree': 0.6679410873036021}. Best is trial 1 with value: 0.869.


[20]	valid_0's binary_logloss: 0.353525
[30]	valid_0's binary_logloss: 0.343602
[40]	valid_0's binary_logloss: 0.342559
Early stopping, best iteration is:
[36]	valid_0's binary_logloss: 0.34141
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.450121
[20]	valid_0's binary_logloss: 0.417858
[30]	valid_0's binary_logloss: 0.396634
[40]	valid_0's binary_logloss: 0.382657
[50]	valid_0's binary_logloss: 0.372758
[60]	valid_0's binary_logloss: 0.365286
[70]	valid_0's binary_logloss: 0.35909
[80]	valid_0's binary_logloss: 0.354616
[90]	valid_0's binary_logloss: 0.35121
[100]	valid_0's binary_logloss: 0.348192
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.348192


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 18:50:21,798] Trial 11 finished with value: 0.863 and parameters: {'num_leaves': 56, 'max_depth': 6, 'learning_rate': 0.06203208666046088, 'min_child_samples': 99, 'subsample': 0.8101746681711226, 'colsample_bytree': 0.8125477453178402}. Best is trial 1 with value: 0.869.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.409486
[20]	valid_0's binary_logloss: 0.369716
[30]	valid_0's binary_logloss: 0.3534
[40]	valid_0's binary_logloss: 0.345538
[50]	valid_0's binary_logloss: 0.341059
[60]	valid_0's binary_logloss: 0.338853
[70]	valid_0's binary_logloss: 0.33782
[80]	valid_0's binary_logloss: 0.337051
[90]	valid_0's binary_logloss: 0.337236
Early stopping, best iteration is:
[84]	valid_0's binary_logloss: 0.336439


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.414605
[20]	valid_0's binary_logloss: 0.374272
[30]	valid_0's binary_logloss: 0.356314
[40]	valid_0's binary_logloss: 0.347324
[50]	valid_0's binary_logloss: 0.342017
[60]	valid_0's binary_logloss: 0.339082
[70]	valid_0's binary_logloss: 0.337389
[80]	valid_0's binary_logloss: 0.336574
[90]	valid_0's binary_logloss: 0.336972
Early stopping, best iteration is:
[80]	valid_0's binary_logloss: 0.336574


[I 2025-02-08 18:50:22,035] Trial 12 finished with value: 0.8605 and parameters: {'num_leaves': 100, 'max_depth': 7, 'learning_rate': 0.05500645979835031, 'min_child_samples': 74, 'subsample': 0.5319397577549959, 'colsample_bytree': 0.7843920603209218}. Best is trial 1 with value: 0.869.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 18:50:22,136] Trial 13 finished with value: 0.863 and parameters: {'num_leaves': 59, 'max_depth': 3, 'learning_rate': 0.030716617291922807, 'min_child_samples': 79, 'subsample': 0.6744136562236809, 'colsample_bytree': 0.6358340173075195}. Best is trial 1 with value: 0.869.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025

Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.459337
[20]	valid_0's binary_logloss: 0.431745
[30]	valid_0's binary_logloss: 0.412622
[40]	valid_0's binary_logloss: 0.395464
[50]	valid_0's binary_logloss: 0.384562
[60]	valid_0's binary_logloss: 0.375764
[70]	valid_0's binary_logloss: 0.368748
[80]	valid_0's binary_logloss: 0.363066
[90]	valid_0's binary_logloss: 0.358858
[100]	valid_0's binary_logloss: 0.355274
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.355274
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.399493
[20]	valid_0's binary_logloss: 0.3618
[30]	valid_0's binary_logloss: 0.347147
[40]	valid_0's binary_logloss: 0.34143
[50]	valid_0's binary_logloss: 0.33834
[60]	valid_0's binary_logloss: 0.337098
[70]	valid_0's binary_logloss: 0.336238
[80]	valid_0's binary_logloss: 0.334886
[90]	valid_0's binary_logloss: 0.335402
Early stopping, best iteration i

  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 18:50:22,400] Trial 15 finished with value: 0.8615 and parameters: {'num_leaves': 93, 'max_depth': 7, 'learning_rate': 0.15301411519008562, 'min_child_samples': 49, 'subsample': 0.8290649763576309, 'colsample_bytree': 0.581743817464555}. Best is trial 1 with value: 0.869.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.371523
[20]	valid_0's binary_logloss: 0.34648
[30]	valid_0's binary_logloss: 0.339869
[40]	valid_0's binary_logloss: 0.341821
Early stopping, best iteration is:
[33]	valid_0's binary_logloss: 0.339672
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.425276
[20]	valid_0's binary_logloss: 0.386841


[I 2025-02-08 18:50:22,689] Trial 16 finished with value: 0.8625 and parameters: {'num_leaves': 119, 'max_depth': 11, 'learning_rate': 0.04219529179506529, 'min_child_samples': 99, 'subsample': 0.8813131183675735, 'colsample_bytree': 0.8591390075276115}. Best is trial 1 with value: 0.869.


[30]	valid_0's binary_logloss: 0.366129
[40]	valid_0's binary_logloss: 0.354019
[50]	valid_0's binary_logloss: 0.348012
[60]	valid_0's binary_logloss: 0.344564
[70]	valid_0's binary_logloss: 0.343226
[80]	valid_0's binary_logloss: 0.341855
[90]	valid_0's binary_logloss: 0.34144
Early stopping, best iteration is:
[88]	valid_0's binary_logloss: 0.341266


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.464274
[20]	valid_0's binary_logloss: 0.435549
[30]	valid_0's binary_logloss: 0.412493
[40]	valid_0's binary_logloss: 0.396317
[50]	valid_0's binary_logloss: 0.384429
[60]	valid_0's binary_logloss: 0.374446
[70]	valid_0's binary_logloss: 0.36617
[80]	valid_0's binary_logloss: 0.358985
[90]	valid_0's binary_logloss: 0.354029


[I 2025-02-08 18:50:22,971] Trial 17 finished with value: 0.865 and parameters: {'num_leaves': 67, 'max_depth': 15, 'learning_rate': 0.021004553971927006, 'min_child_samples': 76, 'subsample': 0.6860540972425077, 'colsample_bytree': 0.7413587566128186}. Best is trial 1 with value: 0.869.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 18:50:23,097] Trial 18 finished with value: 0.864 and parameters: {'num_leaves': 39, 'max_depth': 4, 'learning_rate': 0.06447523821043005, 'min_child_samples': 84, 'subsample': 0.9998055443452211, 'colsample_bytree': 0.565737347441347}. Best is trial 1 with value: 0.869.


[100]	valid_0's binary_logloss: 0.350324
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.350324
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.422459
[20]	valid_0's binary_logloss: 0.384523
[30]	valid_0's binary_logloss: 0.36583
[40]	valid_0's binary_logloss: 0.353242
[50]	valid_0's binary_logloss: 0.347351
[60]	valid_0's binary_logloss: 0.342938
[70]	valid_0's binary_logloss: 0.34003
[80]	valid_0's binary_logloss: 0.337816
[90]	valid_0's binary_logloss: 0.336825
[100]	valid_0's binary_logloss: 0.336293
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.336293
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.353066


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 18:50:23,192] Trial 19 finished with value: 0.8625 and parameters: {'num_leaves': 89, 'max_depth': 7, 'learning_rate': 0.2629546475315007, 'min_child_samples': 37, 'subsample': 0.7814507646704135, 'colsample_bytree': 0.6813262251724573}. Best is trial 1 with value: 0.869.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 18:50:23,346] Trial 20 finished with value: 0.8565 and parameters: {'num_leaves': 124, 'max_depth': 8, 'learning_rate': 0.12284147360531311, 'min_child_samples': 24, 'subsample': 0.8855466295063605, 'colsample_bytree': 0.8051850068740402}. Best is trial 1 with value: 0.869.


[20]	valid_0's binary_logloss: 0.343717
Early stopping, best iteration is:
[18]	valid_0's binary_logloss: 0.341629
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.365886
[20]	valid_0's binary_logloss: 0.346251
[30]	valid_0's binary_logloss: 0.345724
Early stopping, best iteration is:
[28]	valid_0's binary_logloss: 0.345499


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 18:50:23,490] Trial 21 finished with value: 0.865 and parameters: {'num_leaves': 25, 'max_depth': 5, 'learning_rate': 0.0850862699736666, 'min_child_samples': 60, 'subsample': 0.5024111889620556, 'colsample_bytree': 0.7418217183761967}. Best is trial 1 with value: 0.869.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.398632
[20]	valid_0's binary_logloss: 0.360868
[30]	valid_0's binary_logloss: 0.345894
[40]	valid_0's binary_logloss: 0.340678
[50]	valid_0's binary_logloss: 0.338247
[60]	valid_0's binary_logloss: 0.336664
[70]	valid_0's binary_logloss: 0.335741
[80]	valid_0's binary_logloss: 0.334643
[90]	valid_0's binary_logloss: 0.334389
Early stopping, best iteration is:
[81]	valid_0's binary_logloss: 0.334304
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.409801
[20]	valid_0's binary_logloss: 0.369726
[30]	valid_0's binary_logloss: 0.353441
[40]	valid_0's binary_logloss: 0.342735
[50]	valid_0's binary_logloss: 0.339224
[60]	valid_0's binary_logloss: 0.336563
[70]	valid_0's binary_logloss: 0.33491
[80]	valid_0's binary_logloss: 0.333937
[90]	valid_0's binary_logloss: 0.333958
Early stopping, best iteration is:
[85]	valid_0's binary_logloss: 0.333799


[I 2025-02-08 18:50:23,618] Trial 22 finished with value: 0.8675 and parameters: {'num_leaves': 20, 'max_depth': 6, 'learning_rate': 0.07490988784405991, 'min_child_samples': 58, 'subsample': 0.5679433322457893, 'colsample_bytree': 0.6442909757094852}. Best is trial 1 with value: 0.869.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 18:50:23,802] Trial 23 finished with value: 0.867 and parameters: {'num_leaves': 45, 'max_depth': 6, 'learning_rate': 0.04748319216478778, 'min_child_samples': 55, 'subsample': 0.6033595125451763, 'colsample_bytree': 0.6198593681277859}. Best is trial 1 with value: 0.869.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.433674
[20]	valid_0's binary_logloss: 0.395504
[30]	valid_0's binary_logloss: 0.375711
[40]	valid_0's binary_logloss: 0.360137
[50]	valid_0's binary_logloss: 0.352706
[60]	valid_0's binary_logloss: 0.347471
[70]	valid_0's binary_logloss: 0.343426
[80]	valid_0's binary_logloss: 0.340169
[90]	valid_0's binary_logloss: 0.338641
[100]	valid_0's binary_logloss: 0.337962
Did not meet early stopping. Best iteration is:
[99]	valid_0's binary_logloss: 0.337883
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.412047
[20]	valid_0's binary_logloss: 0.374894
[30]	valid_0's binary_logloss: 0.357304
[40]	valid_0's binary_logloss: 0.34818


  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 18:50:23,931] Trial 24 finished with value: 0.866 and parameters: {'num_leaves': 63, 'max_depth': 4, 'learning_rate': 0.07203188399943887, 'min_child_samples': 68, 'subsample': 0.543328213351868, 'colsample_bytree': 0.6556296957355019}. Best is trial 1 with value: 0.869.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 18:50:24,049] Trial 25 finished with value: 0.867 and parameters: {'num_leaves': 106, 'max_depth': 6, 'learning_rate': 0.11896860374949947, 'min_child_samples': 49, 'subsample': 0.6337465739285928, 'colsample_bytree': 0.6992357627340358}. Best is trial 1 with value: 0.869.


[50]	valid_0's binary_logloss: 0.34254
[60]	valid_0's binary_logloss: 0.338935
[70]	valid_0's binary_logloss: 0.337274
[80]	valid_0's binary_logloss: 0.336028
[90]	valid_0's binary_logloss: 0.33465
[100]	valid_0's binary_logloss: 0.333946
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.333946
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.379101
[20]	valid_0's binary_logloss: 0.346726
[30]	valid_0's binary_logloss: 0.339143
[40]	valid_0's binary_logloss: 0.337702
[50]	valid_0's binary_logloss: 0.337818
Early stopping, best iteration is:
[42]	valid_0's binary_logloss: 0.337371


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 18:50:24,191] Trial 26 finished with value: 0.8635 and parameters: {'num_leaves': 136, 'max_depth': 8, 'learning_rate': 0.1873213039122994, 'min_child_samples': 43, 'subsample': 0.7067558882364209, 'colsample_bytree': 0.5474675370516986}. Best is trial 1 with value: 0.869.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.368994
[20]	valid_0's binary_logloss: 0.343718
[30]	valid_0's binary_logloss: 0.341263
[40]	valid_0's binary_logloss: 0.34258
Early stopping, best iteration is:
[33]	valid_0's binary_logloss: 0.340092
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.437442
[20]	valid_0's binary_logloss: 0.401263
[30]	valid_0's binary_logloss: 0.381622
[40]	valid_0's binary_logloss: 0.365963
[50]	valid_0's binary_logloss: 0.357502
[60]	valid_0's binary_logloss: 0.351158
[70]	valid_0's binary_logloss: 0.346712
[80]	valid_0's binary_logloss: 0.343185


[I 2025-02-08 18:50:24,337] Trial 27 finished with value: 0.865 and parameters: {'num_leaves': 94, 'max_depth': 4, 'learning_rate': 0.048108854877583596, 'min_child_samples': 73, 'subsample': 0.5591750727139418, 'colsample_bytree': 0.5978180679522447}. Best is trial 1 with value: 0.869.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 18:50:24,495] Trial 28 finished with value: 0.8655 and parameters: {'num_leaves': 69, 'max_depth': 9, 'learning_rate': 0.07356154229419133, 'min_child_samples': 94, 'subsample': 0.6140282002198343, 'colsample_bytree': 0.7770103443156802}. Best is trial 1 with value: 0.869.


[90]	valid_0's binary_logloss: 0.341405
[100]	valid_0's binary_logloss: 0.340279
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.340279
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.396022
[20]	valid_0's binary_logloss: 0.358751
[30]	valid_0's binary_logloss: 0.345571
[40]	valid_0's binary_logloss: 0.339923
[50]	valid_0's binary_logloss: 0.338891
[60]	valid_0's binary_logloss: 0.33844
Early stopping, best iteration is:
[53]	valid_0's binary_logloss: 0.338076


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 18:50:24,671] Trial 29 finished with value: 0.866 and parameters: {'num_leaves': 44, 'max_depth': 6, 'learning_rate': 0.028923383099727695, 'min_child_samples': 81, 'subsample': 0.7859789260241935, 'colsample_bytree': 0.5103473549198734}. Best is trial 1 with value: 0.869.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.460388
[20]	valid_0's binary_logloss: 0.428654
[30]	valid_0's binary_logloss: 0.409458
[40]	valid_0's binary_logloss: 0.392537
[50]	valid_0's binary_logloss: 0.381591
[60]	valid_0's binary_logloss: 0.371906
[70]	valid_0's binary_logloss: 0.366533
[80]	valid_0's binary_logloss: 0.35975
[90]	valid_0's binary_logloss: 0.354762
[100]	valid_0's binary_logloss: 0.351591
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.351591
Best parameters found: {'num_leaves': 89, 'max_depth': 7, 'learning_rate': 0.08473123855018388, 'min_child_samples': 64, 'subsample': 0.84723845068526, 'colsample_bytree': 0.5892572070783184}
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.402387
[20]	valid_0's binary_logloss: 0.363613
[30]	valid_0's binary_logloss: 0.34958
[40]	valid_0's binary_logloss: 0.342296
[50]	valid_0's binary_logloss: 0.33990

In [9]:
import pandas as pd
import lightgbm as lgb
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, precision_recall_curve


# X, y 분리
y = df['churn']  # 목표 변수
X = df.drop(columns=['churn'])  # 특징 변수

# 학습 및 테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Optuna를 이용한 하이퍼파라미터 튜닝
def objective(trial):
    params = {
        'objective': 'binary',
        'metric': 'binary_logloss',
        'verbosity': -1,
        'num_leaves': trial.suggest_int('num_leaves', 20, 150),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0)
    }
    
    train_data = lgb.Dataset(X_train, label=y_train)
    valid_data = lgb.Dataset(X_test, label=y_test, reference=train_data)
    
    model = lgb.train(params, train_data, num_boost_round=100,
                      valid_sets=[valid_data], 
                      callbacks=[lgb.early_stopping(10), lgb.log_evaluation(10)])
    
    y_pred_proba = model.predict(X_test)
    precision, recall, thresholds = precision_recall_curve(y_test, y_pred_proba)
    best_threshold = thresholds[(precision + recall).argmax()]  # 최적의 threshold 선택
    
    y_pred = [1 if x > best_threshold else 0 for x in y_pred_proba]
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

# Optuna 실행
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=30)

# 최적 하이퍼파라미터 출력
print("Best parameters found:", study.best_params)

# 최적 하이퍼파라미터로 모델 학습
best_params = study.best_params
best_params.update({'objective': 'binary', 'metric': 'binary_logloss', 'verbosity': -1})

train_data = lgb.Dataset(X_train, label=y_train)
valid_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

model = lgb.train(best_params, train_data, num_boost_round=100,
                  valid_sets=[valid_data], 
                  callbacks=[lgb.early_stopping(10), lgb.log_evaluation(10)])

# 최적 모델 예측
y_pred_proba = model.predict(X_test)
precision, recall, thresholds = precision_recall_curve(y_test, y_pred_proba)
best_threshold = thresholds[(precision + recall).argmax()]  # 최적 threshold 선택

print(f'Best Threshold: {best_threshold:.4f}')

y_pred = [1 if x > best_threshold else 0 for x in y_pred_proba]

# 평가
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy:.4f}')
print('Classification Report:\n', report)


[I 2025-02-08 18:50:24,860] A new study created in memory with name: no-name-781d9814-8eaa-4052-89ba-a843e3f69abd


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.374636
[20]	valid_0's binary_logloss: 0.345134
[30]	valid_0's binary_logloss: 0.341209


[I 2025-02-08 18:50:24,987] Trial 0 finished with value: 0.865 and parameters: {'num_leaves': 112, 'max_depth': 14, 'learning_rate': 0.14285428959672658, 'min_child_samples': 98, 'subsample': 0.7676272782375093, 'colsample_bytree': 0.6217725186575114}. Best is trial 0 with value: 0.865.


[40]	valid_0's binary_logloss: 0.342151
Early stopping, best iteration is:
[33]	valid_0's binary_logloss: 0.340064
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.368927
[20]	valid_0's binary_logloss: 0.342516
[30]	valid_0's binary_logloss: 0.336167
[40]	valid_0's binary_logloss: 0.332948
[50]	valid_0's binary_logloss: 0.332025
[60]	valid_0's binary_logloss: 0.332129
Early stopping, best iteration is:
[55]	valid_0's binary_logloss: 0.33168


[I 2025-02-08 18:50:25,070] Trial 1 finished with value: 0.864 and parameters: {'num_leaves': 141, 'max_depth': 5, 'learning_rate': 0.12503668910271215, 'min_child_samples': 6, 'subsample': 0.8596630273119743, 'colsample_bytree': 0.8467538379348268}. Best is trial 0 with value: 0.865.
[I 2025-02-08 18:50:25,131] Trial 2 finished with value: 0.8665 and parameters: {'num_leaves': 53, 'max_depth': 10, 'learning_rate': 0.1478767621133668, 'min_child_samples': 56, 'subsample': 0.6721911360409354, 'colsample_bytree': 0.8610303946510053}. Best is trial 2 with value: 0.8665.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.353784
[20]	valid_0's binary_logloss: 0.34179
[30]	valid_0's binary_logloss: 0.342996
Early stopping, best iteration is:
[21]	valid_0's binary_logloss: 0.341277


[I 2025-02-08 18:50:25,216] Trial 3 finished with value: 0.8625 and parameters: {'num_leaves': 142, 'max_depth': 11, 'learning_rate': 0.21354393095526844, 'min_child_samples': 77, 'subsample': 0.6874470479748926, 'colsample_bytree': 0.8625505105922242}. Best is trial 2 with value: 0.8665.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.349854
[20]	valid_0's binary_logloss: 0.346632
Early stopping, best iteration is:
[15]	valid_0's binary_logloss: 0.344993
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.37881
[20]	valid_0's binary_logloss: 0.350767


[I 2025-02-08 18:50:25,325] Trial 4 finished with value: 0.836 and parameters: {'num_leaves': 64, 'max_depth': 9, 'learning_rate': 0.0942530423948135, 'min_child_samples': 84, 'subsample': 0.8259518295098098, 'colsample_bytree': 0.8661128434825578}. Best is trial 2 with value: 0.8665.


[30]	valid_0's binary_logloss: 0.342485
[40]	valid_0's binary_logloss: 0.340449
[50]	valid_0's binary_logloss: 0.341916
Early stopping, best iteration is:
[43]	valid_0's binary_logloss: 0.340325
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.46622
[20]	valid_0's binary_logloss: 0.440543


[I 2025-02-08 18:50:25,398] Trial 5 finished with value: 0.8205 and parameters: {'num_leaves': 32, 'max_depth': 3, 'learning_rate': 0.02317613927767659, 'min_child_samples': 47, 'subsample': 0.8262039401523734, 'colsample_bytree': 0.7046850039532115}. Best is trial 2 with value: 0.8665.
[I 2025-02-08 18:50:25,446] Trial 6 finished with value: 0.8705 and parameters: {'num_leaves': 137, 'max_depth': 4, 'learning_rate': 0.23980918805110849, 'min_child_samples': 89, 'subsample': 0.5540157845788954, 'colsample_bytree': 0.5181621140857728}. Best is trial 6 with value: 0.8705.


[30]	valid_0's binary_logloss: 0.420007
[40]	valid_0's binary_logloss: 0.405063
[50]	valid_0's binary_logloss: 0.393895
[60]	valid_0's binary_logloss: 0.384594
[70]	valid_0's binary_logloss: 0.377122
[80]	valid_0's binary_logloss: 0.371228
[90]	valid_0's binary_logloss: 0.366718
[100]	valid_0's binary_logloss: 0.363061
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.363061
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.358105
[20]	valid_0's binary_logloss: 0.338639
[30]	valid_0's binary_logloss: 0.334886
[40]	valid_0's binary_logloss: 0.333847
Early stopping, best iteration is:
[36]	valid_0's binary_logloss: 0.332717
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.469602
[20]	valid_0's binary_logloss: 0.441373
[30]	valid_0's binary_logloss: 0.423579
[40]	valid_0's binary_logloss: 0.40697
[50]	valid_0's binary_logloss: 0.396103
[60]	valid_0's binary_logloss: 0.3

[I 2025-02-08 18:50:25,753] Trial 7 finished with value: 0.8685 and parameters: {'num_leaves': 107, 'max_depth': 14, 'learning_rate': 0.021440318137483046, 'min_child_samples': 60, 'subsample': 0.7175561964540054, 'colsample_bytree': 0.5488246225511244}. Best is trial 6 with value: 0.8705.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.443928
[20]	valid_0's binary_logloss: 0.408911
[30]	valid_0's binary_logloss: 0.386475
[40]	valid_0's binary_logloss: 0.371333
[50]	valid_0's binary_logloss: 0.362174
[60]	valid_0's binary_logloss: 0.35555
[70]	valid_0's binary_logloss: 0.350817
[80]	valid_0's binary_logloss: 0.347122
[90]	valid_0's binary_logloss: 0.345124
[100]	valid_0's binary_logloss: 0.343333
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.343333


[I 2025-02-08 18:50:25,964] Trial 8 finished with value: 0.8155 and parameters: {'num_leaves': 57, 'max_depth': 9, 'learning_rate': 0.026184958970334387, 'min_child_samples': 43, 'subsample': 0.8244403585564803, 'colsample_bytree': 0.8786089860634532}. Best is trial 6 with value: 0.8705.
[I 2025-02-08 18:50:26,027] Trial 9 finished with value: 0.841 and parameters: {'num_leaves': 51, 'max_depth': 14, 'learning_rate': 0.2144619875254618, 'min_child_samples': 5, 'subsample': 0.9448417256819198, 'colsample_bytree': 0.7406005090785324}. Best is trial 6 with value: 0.8705.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.356072
[20]	valid_0's binary_logloss: 0.342091
Early stopping, best iteration is:
[19]	valid_0's binary_logloss: 0.341238
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.414943
[20]	valid_0's binary_logloss: 0.377828
[30]	valid_0's binary_logloss: 0.358776
[40]	valid_0's binary_logloss: 0.349331
[50]	valid_0's binary_logloss: 0.343944
[60]	valid_0's binary_logloss: 0.34132
[70]	valid_0's binary_logloss: 0.339745
[80]	valid_0's binary_logloss: 0.339174


[I 2025-02-08 18:50:26,181] Trial 10 finished with value: 0.7905 and parameters: {'num_leaves': 101, 'max_depth': 6, 'learning_rate': 0.05030403534385045, 'min_child_samples': 99, 'subsample': 0.5088467671340533, 'colsample_bytree': 0.9830005286969703}. Best is trial 6 with value: 0.8705.


Early stopping, best iteration is:
[79]	valid_0's binary_logloss: 0.338986
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.485068
[20]	valid_0's binary_logloss: 0.467232
[30]	valid_0's binary_logloss: 0.45401
[40]	valid_0's binary_logloss: 0.440722
[50]	valid_0's binary_logloss: 0.431045
[60]	valid_0's binary_logloss: 0.420498
[70]	valid_0's binary_logloss: 0.413724
[80]	valid_0's binary_logloss: 0.40501
[90]	valid_0's binary_logloss: 0.398326
[100]	valid_0's binary_logloss: 0.393011
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.393011


[I 2025-02-08 18:50:26,506] Trial 11 finished with value: 0.8675 and parameters: {'num_leaves': 120, 'max_depth': 15, 'learning_rate': 0.011307668962471143, 'min_child_samples': 66, 'subsample': 0.52650862908288, 'colsample_bytree': 0.5278304457784355}. Best is trial 6 with value: 0.8705.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.424941


[I 2025-02-08 18:50:26,723] Trial 12 finished with value: 0.869 and parameters: {'num_leaves': 92, 'max_depth': 7, 'learning_rate': 0.06152587511329845, 'min_child_samples': 25, 'subsample': 0.6113925201062463, 'colsample_bytree': 0.5279895422075104}. Best is trial 6 with value: 0.8705.


[20]	valid_0's binary_logloss: 0.384053
[30]	valid_0's binary_logloss: 0.367056
[40]	valid_0's binary_logloss: 0.354877
[50]	valid_0's binary_logloss: 0.348903
[60]	valid_0's binary_logloss: 0.345147
[70]	valid_0's binary_logloss: 0.344424
[80]	valid_0's binary_logloss: 0.342672
[90]	valid_0's binary_logloss: 0.341156
[100]	valid_0's binary_logloss: 0.341411
Did not meet early stopping. Best iteration is:
[91]	valid_0's binary_logloss: 0.341114
Training until validation scores don't improve for 10 rounds

[I 2025-02-08 18:50:26,908] Trial 13 finished with value: 0.8405 and parameters: {'num_leaves': 83, 'max_depth': 6, 'learning_rate': 0.06094829043221203, 'min_child_samples': 28, 'subsample': 0.6007462441869997, 'colsample_bytree': 0.6141475558233177}. Best is trial 6 with value: 0.8705.



[10]	valid_0's binary_logloss: 0.421243
[20]	valid_0's binary_logloss: 0.380698
[30]	valid_0's binary_logloss: 0.361797
[40]	valid_0's binary_logloss: 0.348923
[50]	valid_0's binary_logloss: 0.344343
[60]	valid_0's binary_logloss: 0.341312
[70]	valid_0's binary_logloss: 0.338943
[80]	valid_0's binary_logloss: 0.337213
[90]	valid_0's binary_logloss: 0.336425
Early stopping, best iteration is:
[85]	valid_0's binary_logloss: 0.336095
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.355258
[20]	valid_0's binary_logloss: 0.338979
[30]	valid_0's binary_logloss: 0.33514
[40]	valid_0's binary_logloss: 0.334719
Early stopping, best iteration is:
[37]	valid_0's binary_logloss: 0.333486


[I 2025-02-08 18:50:26,980] Trial 14 finished with value: 0.8705 and parameters: {'num_leaves': 87, 'max_depth': 3, 'learning_rate': 0.2899353587197576, 'min_child_samples': 27, 'subsample': 0.5988508353889053, 'colsample_bytree': 0.5039582480530922}. Best is trial 6 with value: 0.8705.
[I 2025-02-08 18:50:27,059] Trial 15 finished with value: 0.8405 and parameters: {'num_leaves': 80, 'max_depth': 3, 'learning_rate': 0.29570892755569306, 'min_child_samples': 32, 'subsample': 0.5951218187707634, 'colsample_bytree': 0.6258791652747746}. Best is trial 6 with value: 0.8705.
[I 2025-02-08 18:50:27,153] Trial 16 finished with value: 0.8345 and parameters: {'num_leaves': 128, 'max_depth': 4, 'learning_rate': 0.25507674285754534, 'min_child_samples': 18, 'subsample': 0.5712809556871967, 'colsample_bytree': 0.5045467443459846}. Best is trial 6 with value: 0.8705.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.349158
[20]	valid_0's binary_logloss: 0.334113
[30]	valid_0's binary_logloss: 0.331168
[40]	valid_0's binary_logloss: 0.331424
Early stopping, best iteration is:
[35]	valid_0's binary_logloss: 0.330203
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.354262
[20]	valid_0's binary_logloss: 0.336293
[30]	valid_0's binary_logloss: 0.333972
[40]	valid_0's binary_logloss: 0.335351
Early stopping, best iteration is:
[35]	valid_0's binary_logloss: 0.332871
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.386522
[20]	valid_0's binary_logloss: 0.349904


[I 2025-02-08 18:50:27,263] Trial 17 finished with value: 0.775 and parameters: {'num_leaves': 37, 'max_depth': 7, 'learning_rate': 0.1048445380658842, 'min_child_samples': 73, 'subsample': 0.6507844971160924, 'colsample_bytree': 0.6760408483885827}. Best is trial 6 with value: 0.8705.
[I 2025-02-08 18:50:27,357] Trial 18 finished with value: 0.8635 and parameters: {'num_leaves': 130, 'max_depth': 4, 'learning_rate': 0.19427575655253068, 'min_child_samples': 40, 'subsample': 0.5846874895304734, 'colsample_bytree': 0.5914634780267092}. Best is trial 6 with value: 0.8705.


[30]	valid_0's binary_logloss: 0.340628
[40]	valid_0's binary_logloss: 0.33759
[50]	valid_0's binary_logloss: 0.338284
Early stopping, best iteration is:
[48]	valid_0's binary_logloss: 0.337439
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.362119
[20]	valid_0's binary_logloss: 0.337751
[30]	valid_0's binary_logloss: 0.333385
[40]	valid_0's binary_logloss: 0.332006
[50]	valid_0's binary_logloss: 0.333674
Early stopping, best iteration is:
[43]	valid_0's binary_logloss: 0.331715
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.404239


[I 2025-02-08 18:50:27,513] Trial 19 finished with value: 0.867 and parameters: {'num_leaves': 73, 'max_depth': 12, 'learning_rate': 0.08159679221787072, 'min_child_samples': 87, 'subsample': 0.5444058941553562, 'colsample_bytree': 0.5667627772136935}. Best is trial 6 with value: 0.8705.


[20]	valid_0's binary_logloss: 0.365595
[30]	valid_0's binary_logloss: 0.352069
[40]	valid_0's binary_logloss: 0.344076
[50]	valid_0's binary_logloss: 0.342931
[60]	valid_0's binary_logloss: 0.343209
Early stopping, best iteration is:
[52]	valid_0's binary_logloss: 0.342466
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.362773
[20]	valid_0's binary_logloss: 0.339844
[30]	valid_0's binary_logloss: 0.337447


[I 2025-02-08 18:50:27,639] Trial 20 finished with value: 0.818 and parameters: {'num_leaves': 148, 'max_depth': 7, 'learning_rate': 0.16658481567942618, 'min_child_samples': 17, 'subsample': 0.6394328792101068, 'colsample_bytree': 0.6699774605755346}. Best is trial 6 with value: 0.8705.
[I 2025-02-08 18:50:27,776] Trial 21 finished with value: 0.831 and parameters: {'num_leaves': 94, 'max_depth': 5, 'learning_rate': 0.04936202237747705, 'min_child_samples': 30, 'subsample': 0.6387226782937591, 'colsample_bytree': 0.5061792745091687}. Best is trial 6 with value: 0.8705.


[40]	valid_0's binary_logloss: 0.338854
Early stopping, best iteration is:
[33]	valid_0's binary_logloss: 0.33657
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.436899
[20]	valid_0's binary_logloss: 0.397529
[30]	valid_0's binary_logloss: 0.378317
[40]	valid_0's binary_logloss: 0.363884
[50]	valid_0's binary_logloss: 0.356037
[60]	valid_0's binary_logloss: 0.349295
[70]	valid_0's binary_logloss: 0.345861
[80]	valid_0's binary_logloss: 0.342532
[90]	valid_0's binary_logloss: 0.340325
[100]	valid_0's binary_logloss: 0.339172
Did not meet early stopping. Best iteration is:
[99]	valid_0's binary_logloss: 0.339125
Training until validation scores don't improve for 10 rounds


[I 2025-02-08 18:50:27,867] Trial 22 finished with value: 0.841 and parameters: {'num_leaves': 97, 'max_depth': 7, 'learning_rate': 0.2894497135450009, 'min_child_samples': 19, 'subsample': 0.5042345407718771, 'colsample_bytree': 0.5563446509041458}. Best is trial 6 with value: 0.8705.
[I 2025-02-08 18:50:27,976] Trial 23 finished with value: 0.863 and parameters: {'num_leaves': 90, 'max_depth': 3, 'learning_rate': 0.033561997455435554, 'min_child_samples': 37, 'subsample': 0.7391923898828822, 'colsample_bytree': 0.5815949651834332}. Best is trial 6 with value: 0.8705.


[10]	valid_0's binary_logloss: 0.351357
[20]	valid_0's binary_logloss: 0.344683
Early stopping, best iteration is:
[17]	valid_0's binary_logloss: 0.344401
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.455529
[20]	valid_0's binary_logloss: 0.425787
[30]	valid_0's binary_logloss: 0.40622
[40]	valid_0's binary_logloss: 0.389301
[50]	valid_0's binary_logloss: 0.378792
[60]	valid_0's binary_logloss: 0.37059
[70]	valid_0's binary_logloss: 0.363969
[80]	valid_0's binary_logloss: 0.358704
[90]	valid_0's binary_logloss: 0.355032
[100]	valid_0's binary_logloss: 0.351684
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.351684
Training until validation scores don't improve for 10 rounds


[I 2025-02-08 18:50:28,129] Trial 24 finished with value: 0.859 and parameters: {'num_leaves': 20, 'max_depth': 8, 'learning_rate': 0.010496595356084035, 'min_child_samples': 50, 'subsample': 0.5511893929150216, 'colsample_bytree': 0.7992312635033978}. Best is trial 6 with value: 0.8705.


[10]	valid_0's binary_logloss: 0.47975
[20]	valid_0's binary_logloss: 0.458722
[30]	valid_0's binary_logloss: 0.44181
[40]	valid_0's binary_logloss: 0.428377
[50]	valid_0's binary_logloss: 0.417144
[60]	valid_0's binary_logloss: 0.406584
[70]	valid_0's binary_logloss: 0.397288
[80]	valid_0's binary_logloss: 0.38928
[90]	valid_0's binary_logloss: 0.382904
[100]	valid_0's binary_logloss: 0.377818
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.377818
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.421775
[20]	valid_0's binary_logloss: 0.380351
[30]	valid_0's binary_logloss: 0.362759
[40]	valid_0's binary_logloss: 0.350907
[50]	valid_0's binary_logloss: 0.345187


[I 2025-02-08 18:50:28,281] Trial 25 finished with value: 0.87 and parameters: {'num_leaves': 70, 'max_depth': 5, 'learning_rate': 0.06690857658666739, 'min_child_samples': 26, 'subsample': 0.6126424697763153, 'colsample_bytree': 0.5015633463704584}. Best is trial 6 with value: 0.8705.
[I 2025-02-08 18:50:28,407] Trial 26 finished with value: 0.869 and parameters: {'num_leaves': 70, 'max_depth': 4, 'learning_rate': 0.035853204614443475, 'min_child_samples': 11, 'subsample': 0.7020200877103697, 'colsample_bytree': 0.5026733426566372}. Best is trial 6 with value: 0.8705.


[60]	valid_0's binary_logloss: 0.340722
[70]	valid_0's binary_logloss: 0.339278
[80]	valid_0's binary_logloss: 0.337719
[90]	valid_0's binary_logloss: 0.336296
[100]	valid_0's binary_logloss: 0.336159
Did not meet early stopping. Best iteration is:
[99]	valid_0's binary_logloss: 0.336052
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.453253
[20]	valid_0's binary_logloss: 0.419425
[30]	valid_0's binary_logloss: 0.399355
[40]	valid_0's binary_logloss: 0.382902
[50]	valid_0's binary_logloss: 0.372464
[60]	valid_0's binary_logloss: 0.363057
[70]	valid_0's binary_logloss: 0.35777
[80]	valid_0's binary_logloss: 0.351942
[90]	valid_0's binary_logloss: 0.347559
[100]	valid_0's binary_logloss: 0.345099
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.345099


[I 2025-02-08 18:50:28,543] Trial 27 finished with value: 0.8195 and parameters: {'num_leaves': 42, 'max_depth': 5, 'learning_rate': 0.0733212669581086, 'min_child_samples': 35, 'subsample': 0.9981095854220816, 'colsample_bytree': 0.6374861212499175}. Best is trial 6 with value: 0.8705.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.411642
[20]	valid_0's binary_logloss: 0.373065
[30]	valid_0's binary_logloss: 0.356103
[40]	valid_0's binary_logloss: 0.345166
[50]	valid_0's binary_logloss: 0.340816
[60]	valid_0's binary_logloss: 0.337591
[70]	valid_0's binary_logloss: 0.336328
[80]	valid_0's binary_logloss: 0.335181
[90]	valid_0's binary_logloss: 0.334202
Early stopping, best iteration is:
[89]	valid_0's binary_logloss: 0.334098
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.470183
[20]	valid_0's binary_logloss: 0.446247
[30]	valid_0's binary_logloss: 0.428242
[40]	valid_0's binary_logloss: 0.413347
[50]	valid_0's binary_logloss: 0.401769
[60]	valid_0's binary_logloss: 0.392368
[70]	valid_0's binary_logloss: 0.384858
[80]	valid_0's binary_logloss: 0.378726
[90]	valid_0's binary_logloss: 0.373686
[100]	valid_0's binary_logloss: 0.369296
Did not meet early stopping. Best iteratio

[I 2025-02-08 18:50:28,664] Trial 28 finished with value: 0.8615 and parameters: {'num_leaves': 115, 'max_depth': 4, 'learning_rate': 0.014335735025662648, 'min_child_samples': 22, 'subsample': 0.5584230818236376, 'colsample_bytree': 0.9744881319439486}. Best is trial 6 with value: 0.8705.
[I 2025-02-08 18:50:28,762] Trial 29 finished with value: 0.813 and parameters: {'num_leaves': 76, 'max_depth': 6, 'learning_rate': 0.13536566303400632, 'min_child_samples': 90, 'subsample': 0.7802246671795031, 'colsample_bytree': 0.5954340176720967}. Best is trial 6 with value: 0.8705.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.377209
[20]	valid_0's binary_logloss: 0.347396
[30]	valid_0's binary_logloss: 0.340755
[40]	valid_0's binary_logloss: 0.338715
Early stopping, best iteration is:
[35]	valid_0's binary_logloss: 0.338588
Best parameters found: {'num_leaves': 137, 'max_depth': 4, 'learning_rate': 0.23980918805110849, 'min_child_samples': 89, 'subsample': 0.5540157845788954, 'colsample_bytree': 0.5181621140857728}
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.358105
[20]	valid_0's binary_logloss: 0.338639
[30]	valid_0's binary_logloss: 0.334886
[40]	valid_0's binary_logloss: 0.333847
Early stopping, best iteration is:
[36]	valid_0's binary_logloss: 0.332717
Best Threshold: 0.5387
Accuracy: 0.8705
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.98      0.92      1593
           1       0.83      0.45      0

Precision과 Recall 균형 조정

Precision 증가(0.80 → 0.84)
Recall 감소(0.48 → 0.44)
이탈 고객을 보다 정확하게 예측하지만, 일부를 놓칠 가능성이 있음
비즈니스 목표에 따라 Threshold를 조정할 필요 있음!
→ 이탈 고객 Recall을 높이고 싶다면 더 낮은 Threshold를 시도하는 것도 가능