In [1]:
import numpy as np
import pandas as pd

In [2]:
df=pd.read_csv('./data/preprocessed_Bank_Customer_Churn_Prediction.csv')

In [3]:
df


Unnamed: 0,credit_score,country,gender,age,tenure,balance,products_number,credit_card,active_member,estimated_salary,churn
0,619,0,1,42,2,0.00,1,1,1,101348.88,1
1,608,2,1,41,1,83807.86,1,0,1,112542.58,0
2,502,0,1,42,8,159660.80,3,1,0,113931.57,1
3,699,0,1,39,1,0.00,2,0,0,93826.63,0
4,850,2,1,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,0,0,39,5,0.00,2,1,0,96270.64,0
9996,516,0,0,35,10,57369.61,1,1,1,101699.77,0
9997,709,0,1,36,7,0.00,1,0,1,42085.58,1
9998,772,1,0,42,3,75075.31,2,1,0,92888.52,1


### XGBOOST 사용

In [4]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report


# X, y 분리
y = df['churn']  # 목표 변수
X = df.drop(columns=['churn'])  # 특징 변수

# 학습 및 테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35, random_state=42, stratify=y)

# XGBoost 모델 생성 및 학습
model = xgb.XGBClassifier(
    objective='binary:logistic', 
    eval_metric='logloss', 
    use_label_encoder=False, 
    random_state=42
)
model.fit(X_train, y_train)

# 예측
y_pred = model.predict(X_test)

# 평가
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, digits=4)

print(f'Accuracy: {accuracy:.4f}')
print('Classification Report:\n', report)


Accuracy: 0.8520
Classification Report:
               precision    recall  f1-score   support

           0     0.8813    0.9408    0.9101      2787
           1     0.6857    0.5049    0.5816       713

    accuracy                         0.8520      3500
   macro avg     0.7835    0.7229    0.7458      3500
weighted avg     0.8415    0.8520    0.8432      3500



Parameters: { "use_label_encoder" } are not used.



In [5]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report

# 데이터 준비
y = df['churn']
X = df.drop(columns=['churn'])

# 학습 및 테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35, random_state=42, stratify=y)

# 하이퍼파라미터 설정
params = {
    'objective': 'binary:logistic',
    'eval_metric': 'logloss',
    'random_state': 42,
    'learning_rate': 0.1,
    'max_depth': 5,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'n_estimators': 100
}

# ✅ XGBoost의 cv 기능 사용
cv_results = xgb.cv(
    params=params,
    dtrain=xgb.DMatrix(X_train, label=y_train),
    num_boost_round=100,
    nfold=5,
    stratified=True,
    metrics='logloss',
    early_stopping_rounds=10
)

# 최적 n_estimators 설정
best_n_estimators = len(cv_results)
params['n_estimators'] = best_n_estimators

# 최적 모델 학습
model = xgb.XGBClassifier(**params)
model.fit(X_train, y_train)

# 예측
y_pred = model.predict(X_test)

# 평가
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, digits=4)

print(f'Best n_estimators: {best_n_estimators}')
print(f'Accuracy: {accuracy:.4f}')
print('Classification Report:\n', report)


Parameters: { "n_estimators" } are not used.



Best n_estimators: 73
Accuracy: 0.8643
Classification Report:
               precision    recall  f1-score   support

           0     0.8790    0.9620    0.9186      2787
           1     0.7644    0.4825    0.5916       713

    accuracy                         0.8643      3500
   macro avg     0.8217    0.7222    0.7551      3500
weighted avg     0.8557    0.8643    0.8520      3500



### LGB사용

In [6]:
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report


# X, y 분리
y = df['churn']  # 목표 변수
X = df.drop(columns=['churn'])  # 특징 변수

# 학습 및 테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35, random_state=42, stratify=y)

# LightGBM 데이터셋 생성
train_data = lgb.Dataset(X_train, label=y_train)
valid_data = lgb.Dataset(X_test, label=y_test, reference=train_data)  # 검증 데이터 설정

# LightGBM 모델 학습
params = {
    'objective': 'binary',
    'metric': 'binary_logloss',
    'verbosity': -1
}

# callbacks을 사용하여 early stopping 적용
model = lgb.train(params, train_data, num_boost_round=100,
                  valid_sets=[valid_data], valid_names=['valid'],
                  callbacks=[lgb.early_stopping(10), lgb.log_evaluation(10)])

# 예측
y_pred = model.predict(X_test)
y_pred = [1 if x > 0.5 else 0 for x in y_pred]

# 평가
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, digits=4)


print(f'Accuracy: {accuracy:.4f}')
print('Classification Report:\n', report)


Training until validation scores don't improve for 10 rounds
[10]	valid's binary_logloss: 0.369569
[20]	valid's binary_logloss: 0.34421
[30]	valid's binary_logloss: 0.336481
[40]	valid's binary_logloss: 0.333423
[50]	valid's binary_logloss: 0.3331
[60]	valid's binary_logloss: 0.333744
Early stopping, best iteration is:
[55]	valid's binary_logloss: 0.332668
Accuracy: 0.8634
Classification Report:
               precision    recall  f1-score   support

           0     0.8824    0.9559    0.9177      2787
           1     0.7443    0.5021    0.5997       713

    accuracy                         0.8634      3500
   macro avg     0.8133    0.7290    0.7587      3500
weighted avg     0.8543    0.8634    0.8529      3500



### LGB gridsearch사용

In [7]:
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report


# X, y 분리
y = df['churn']  # 목표 변수
X = df.drop(columns=['churn'])  # 특징 변수

# 학습 및 테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35, random_state=42, stratify=y)

# LightGBM 모델 객체 생성
model = lgb.LGBMClassifier(objective='binary', metric='binary_logloss', verbosity=-1)

# 하이퍼파라미터 검색 공간 정의
param_grid = {
    'num_leaves': [20, 31, 50],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.05, 0.1],
    'min_child_samples': [10, 20, 50],
    'subsample': [0.7, 0.8, 0.9],
    'colsample_bytree': [0.7, 0.8, 0.9]
}

# Grid Search 수행
grid_search = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    scoring='accuracy',
    cv=3,  # 3-폴드 교차검증
    verbose=1,
    n_jobs=-1
)

grid_search.fit(X_train, y_train)

# 최적 하이퍼파라미터 출력
print("Best parameters found:", grid_search.best_params_)

# 최적 하이퍼파라미터 적용하여 모델 재학습
best_model = grid_search.best_estimator_

# 예측
y_pred = best_model.predict(X_test)

# 평가
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred,digits=4)

print(f'Accuracy: {accuracy:.4f}')
print('Classification Report:\n', report)


Fitting 3 folds for each of 729 candidates, totalling 2187 fits
Best parameters found: {'colsample_bytree': 0.9, 'learning_rate': 0.1, 'max_depth': 3, 'min_child_samples': 20, 'num_leaves': 20, 'subsample': 0.7}
Accuracy: 0.8651
Classification Report:
               precision    recall  f1-score   support

           0     0.8779    0.9648    0.9193      2787
           1     0.7757    0.4755    0.5896       713

    accuracy                         0.8651      3500
   macro avg     0.8268    0.7201    0.7544      3500
weighted avg     0.8571    0.8651    0.8521      3500



---

# optuna 사용


In [8]:
import pandas as pd
import lightgbm as lgb
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report



# X, y 분리
y = df['churn']  # 목표 변수
X = df.drop(columns=['churn'])  # 특징 변수

# 학습 및 테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35, random_state=42, stratify=y)

# Optuna를 이용한 하이퍼파라미터 튜닝
def objective(trial):
    params = {
        'objective': 'binary',
        'metric': 'binary_logloss',
        'verbosity': -1,
        'num_leaves': trial.suggest_int('num_leaves', 20, 150),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
    }
    
    train_data = lgb.Dataset(X_train, label=y_train)
    valid_data = lgb.Dataset(X_test, label=y_test, reference=train_data)
    
    model = lgb.train(params, train_data, num_boost_round=100,
                      valid_sets=[valid_data], 
                      callbacks=[lgb.early_stopping(10), lgb.log_evaluation(10)])
    
    y_pred = model.predict(X_test)
    y_pred = [1 if x > 0.5 else 0 for x in y_pred]
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

# Optuna 실행
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=30)

# 최적 하이퍼파라미터 출력
print("Best parameters found:", study.best_params)

# 최적 하이퍼파라미터로 모델 학습
best_params = study.best_params
best_params.update({'objective': 'binary', 'metric': 'binary_logloss', 'verbosity': -1})

train_data = lgb.Dataset(X_train, label=y_train)
valid_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

model = lgb.train(best_params, train_data, num_boost_round=100,
                  valid_sets=[valid_data], 
                  callbacks=[lgb.early_stopping(10), lgb.log_evaluation(10)])

# 최적 모델 예측
y_pred = model.predict(X_test)
y_pred = [1 if x > 0.5 else 0 for x in y_pred]

# 평가
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy:.4f}')
print('Classification Report:\n', report)


[I 2025-02-08 20:38:53,170] A new study created in memory with name: no-name-4d58d944-59d1-46f4-aaf5-25fbd3d30ea4


Training until validation scores don't improve for 10 rounds

  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:38:53,316] Trial 0 finished with value: 0.8622857142857143 and parameters: {'num_leaves': 129, 'max_depth': 6, 'learning_rate': 0.059360214512262595, 'min_child_samples': 65, 'subsample': 0.8075073384567252, 'colsample_bytree': 0.9444809381507159}. Best is trial 0 with value: 0.8622857142857143.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:38:53,355] Trial 1 finished with value: 0.8625714285714285 and parameters: {'num_leaves': 100, 'max_depth': 3, 'learning_rate': 0.26014962300486466, 'min_child_samples': 83, 'subsample': 0.7429829581375647, 'colsample_bytree': 0.9150504995372748}. Best 


[10]	valid_0's binary_logloss: 0.405879
[20]	valid_0's binary_logloss: 0.36831
[30]	valid_0's binary_logloss: 0.351333
[40]	valid_0's binary_logloss: 0.341761
[50]	valid_0's binary_logloss: 0.337051
[60]	valid_0's binary_logloss: 0.334523
[70]	valid_0's binary_logloss: 0.332716
[80]	valid_0's binary_logloss: 0.332041
[90]	valid_0's binary_logloss: 0.332037
Early stopping, best iteration is:
[89]	valid_0's binary_logloss: 0.331811
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.355473
[20]	valid_0's binary_logloss: 0.337543
[30]	valid_0's binary_logloss: 0.335504
[40]	valid_0's binary_logloss: 0.331929
[50]	valid_0's binary_logloss: 0.332144
Early stopping, best iteration is:
[43]	valid_0's binary_logloss: 0.331433
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.345473
[20]	valid_0's binary_logloss: 0.333187
[30]	valid_0's binary_logloss: 0.335882
Early stopping, best iteration is:
[20]	valid_0'

  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.457521
[20]	valid_0's binary_logloss: 0.424746
[30]	valid_0's binary_logloss: 0.40203
[40]	valid_0's binary_logloss: 0.386714
[50]	valid_0's binary_logloss: 0.375589
[60]	valid_0's binary_logloss: 0.366433
[70]	valid_0's binary_logloss: 0.35943
[80]	valid_0's binary_logloss: 0.353715
[90]	valid_0's binary_logloss: 0.350618
[100]	valid_0's binary_logloss: 0.348526
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.348526


[I 2025-02-08 20:38:53,820] Trial 3 finished with value: 0.8588571428571429 and parameters: {'num_leaves': 133, 'max_depth': 15, 'learning_rate': 0.020340784122069053, 'min_child_samples': 20, 'subsample': 0.6456516470166345, 'colsample_bytree': 0.8455501471055434}. Best is trial 2 with value: 0.8637142857142858.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:38:53,903] Trial 4 finished with value: 0.8597142857142858 and parameters: {'num_leaves': 67, 'max_depth': 10, 'learning_rate': 0.15751492383492335, 'min_child_samples': 16, 'subsample': 0.6310183947212908, 'colsample_bytree': 0.9169945315775081}. Best is trial 2 with value: 0.8637142857142858.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.su

Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.35356
[20]	valid_0's binary_logloss: 0.342717
Early stopping, best iteration is:
[18]	valid_0's binary_logloss: 0.342535
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.361969
[20]	valid_0's binary_logloss: 0.339291
[30]	valid_0's binary_logloss: 0.336713
[40]	valid_0's binary_logloss: 0.335932
Early stopping, best iteration is:
[36]	valid_0's binary_logloss: 0.335635
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.357064
[20]	valid_0's binary_logloss: 0.334572
[30]	valid_0's binary_logloss: 0.334304
Early stopping, best iteration is:
[21]	valid_0's binary_logloss: 0.332487


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:38:54,147] Trial 7 finished with value: 0.8622857142857143 and parameters: {'num_leaves': 33, 'max_depth': 6, 'learning_rate': 0.03961836515757595, 'min_child_samples': 49, 'subsample': 0.6373904085890272, 'colsample_bytree': 0.785994416252042}. Best is trial 2 with value: 0.8637142857142858.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.430747
[20]	valid_0's binary_logloss: 0.391656
[30]	valid_0's binary_logloss: 0.369844
[40]	valid_0's binary_logloss: 0.356717
[50]	valid_0's binary_logloss: 0.348113
[60]	valid_0's binary_logloss: 0.343123
[70]	valid_0's binary_logloss: 0.339203
[80]	valid_0's binary_logloss: 0.33627
[90]	valid_0's binary_logloss: 0.335046
[100]	valid_0's binary_logloss: 0.333984
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.333984
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.394678
[20]	valid_0's binary_logloss: 0.359255
[30]	valid_0's binary_logloss: 0.34565
[40]	valid_0's binary_logloss: 0.337356
[50]	valid_0's binary_logloss: 0.335412
[60]	valid_0's binary_logloss: 0.334053
[70]	valid_0's binary_logloss: 0.332985
[80]	valid_0's binary_logloss: 0.332455
[90]	valid_0's binary_logloss: 0.332656
Early stopping, best iteration

[I 2025-02-08 20:38:54,238] Trial 8 finished with value: 0.8674285714285714 and parameters: {'num_leaves': 64, 'max_depth': 5, 'learning_rate': 0.0966491662744071, 'min_child_samples': 67, 'subsample': 0.9293142345856503, 'colsample_bytree': 0.580409628431882}. Best is trial 8 with value: 0.8674285714285714.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.424047
[20]	valid_0's binary_logloss: 0.384146
[30]	valid_0's binary_logloss: 0.365821
[40]	valid_0's binary_logloss: 0.35139
[50]	valid_0's binary_logloss: 0.345046
[60]	valid_0's binary_logloss: 0.341442
[70]	valid_0's binary_logloss: 0.339449
[80]	valid_0's binary_logloss: 0.337548
[90]	valid_0's binary_logloss: 0.337054
[100]	valid_0's binary_logloss: 0.337342
Did not meet early stopping. Best iteration is:
[91]	valid_0's binary_logloss: 0.336934


[I 2025-02-08 20:38:54,455] Trial 9 finished with value: 0.8645714285714285 and parameters: {'num_leaves': 96, 'max_depth': 9, 'learning_rate': 0.05468386457268789, 'min_child_samples': 39, 'subsample': 0.9346969599539163, 'colsample_bytree': 0.6058029097481594}. Best is trial 8 with value: 0.8674285714285714.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:38:54,622] Trial 10 finished with value: 0.8622857142857143 and parameters: {'num_leaves': 64, 'max_depth': 12, 'learning_rate': 0.10110082324870522, 'min_child_samples': 36, 'subsample': 0.5234313951351363, 'colsample_bytree': 0.6628470380057411}. Best is trial 8 with value: 0.8674285714285714.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.sugg

Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.384937
[20]	valid_0's binary_logloss: 0.348664
[30]	valid_0's binary_logloss: 0.340015
[40]	valid_0's binary_logloss: 0.338545
Early stopping, best iteration is:
[35]	valid_0's binary_logloss: 0.338113
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.415811
[20]	valid_0's binary_logloss: 0.375913


[I 2025-02-08 20:38:54,854] Trial 11 finished with value: 0.866 and parameters: {'num_leaves': 65, 'max_depth': 10, 'learning_rate': 0.06415700292716744, 'min_child_samples': 35, 'subsample': 0.8853119548255823, 'colsample_bytree': 0.6359104788774864}. Best is trial 8 with value: 0.8674285714285714.


[30]	valid_0's binary_logloss: 0.358529
[40]	valid_0's binary_logloss: 0.345795
[50]	valid_0's binary_logloss: 0.341742
[60]	valid_0's binary_logloss: 0.339482
[70]	valid_0's binary_logloss: 0.338135
[80]	valid_0's binary_logloss: 0.337574
[90]	valid_0's binary_logloss: 0.337604
Early stopping, best iteration is:
[85]	valid_0's binary_logloss: 0.337239
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.397056
[20]	valid_0's binary_logloss: 0.360698


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:38:55,028] Trial 12 finished with value: 0.8628571428571429 and parameters: {'num_leaves': 58, 'max_depth': 12, 'learning_rate': 0.09181076003274437, 'min_child_samples': 36, 'subsample': 0.8799066005010527, 'colsample_bytree': 0.6454262902710266}. Best is trial 8 with value: 0.8674285714285714.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:38:55,141] Trial 13 finished with value: 0.8628571428571429 and parameters: {'num_leaves': 75, 'max_depth': 3, 'learning_rate': 0.02604350031895589, 'min_child_samples': 6, 'subsample': 0.8716568242826014, 'colsample_bytree': 0.7101973666420853}. Best i

[30]	valid_0's binary_logloss: 0.348443
[40]	valid_0's binary_logloss: 0.341357
[50]	valid_0's binary_logloss: 0.341175
[60]	valid_0's binary_logloss: 0.341223
Early stopping, best iteration is:
[52]	valid_0's binary_logloss: 0.340522
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.461848
[20]	valid_0's binary_logloss: 0.434544
[30]	valid_0's binary_logloss: 0.413384
[40]	valid_0's binary_logloss: 0.398208
[50]	valid_0's binary_logloss: 0.387235
[60]	valid_0's binary_logloss: 0.378191
[70]	valid_0's binary_logloss: 0.371097
[80]	valid_0's binary_logloss: 0.365397
[90]	valid_0's binary_logloss: 0.360884
[100]	valid_0's binary_logloss: 0.356732
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.356732


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:38:55,369] Trial 14 finished with value: 0.8354285714285714 and parameters: {'num_leaves': 51, 'max_depth': 11, 'learning_rate': 0.012028642769410673, 'min_child_samples': 69, 'subsample': 0.8652760215472044, 'colsample_bytree': 0.5723881620473353}. Best is trial 8 with value: 0.8674285714285714.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.481462
[20]	valid_0's binary_logloss: 0.46254
[30]	valid_0's binary_logloss: 0.447675
[40]	valid_0's binary_logloss: 0.432402
[50]	valid_0's binary_logloss: 0.421494
[60]	valid_0's binary_logloss: 0.411571
[70]	valid_0's binary_logloss: 0.403417
[80]	valid_0's binary_logloss: 0.394444
[90]	valid_0's binary_logloss: 0.387711
[100]	valid_0's binary_logloss: 0.382031
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.382031


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:38:55,485] Trial 15 finished with value: 0.8651428571428571 and parameters: {'num_leaves': 113, 'max_depth': 8, 'learning_rate': 0.10823679739322449, 'min_child_samples': 48, 'subsample': 0.7662458490129367, 'colsample_bytree': 0.7400011368360279}. Best is trial 8 with value: 0.8674285714285714.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.380282
[20]	valid_0's binary_logloss: 0.346348
[30]	valid_0's binary_logloss: 0.337124
[40]	valid_0's binary_logloss: 0.335155
Early stopping, best iteration is:
[35]	valid_0's binary_logloss: 0.33489
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.413433
[20]	valid_0's binary_logloss: 0.373065
[30]	valid_0's binary_logloss: 0.356857


[I 2025-02-08 20:38:55,685] Trial 16 finished with value: 0.8637142857142858 and parameters: {'num_leaves': 48, 'max_depth': 15, 'learning_rate': 0.06880812555988482, 'min_child_samples': 30, 'subsample': 0.9227792883397609, 'colsample_bytree': 0.5836208293290792}. Best is trial 8 with value: 0.8674285714285714.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),


[40]	valid_0's binary_logloss: 0.345283
[50]	valid_0's binary_logloss: 0.341568
[60]	valid_0's binary_logloss: 0.339459
[70]	valid_0's binary_logloss: 0.338301
[80]	valid_0's binary_logloss: 0.338048
Early stopping, best iteration is:
[77]	valid_0's binary_logloss: 0.337983
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.440125
[20]	valid_0's binary_logloss: 0.40106
[30]	valid_0's binary_logloss: 0.376724


  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:38:55,994] Trial 17 finished with value: 0.8671428571428571 and parameters: {'num_leaves': 79, 'max_depth': 13, 'learning_rate': 0.03793179301030435, 'min_child_samples': 98, 'subsample': 0.8122080101723397, 'colsample_bytree': 0.6752935623027962}. Best is trial 8 with value: 0.8674285714285714.


[40]	valid_0's binary_logloss: 0.361306
[50]	valid_0's binary_logloss: 0.351622
[60]	valid_0's binary_logloss: 0.344879
[70]	valid_0's binary_logloss: 0.340167
[80]	valid_0's binary_logloss: 0.335799
[90]	valid_0's binary_logloss: 0.33364
[100]	valid_0's binary_logloss: 0.333109
Did not meet early stopping. Best iteration is:
[99]	valid_0's binary_logloss: 0.332986


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:38:56,219] Trial 18 finished with value: 0.8642857142857143 and parameters: {'num_leaves': 80, 'max_depth': 13, 'learning_rate': 0.039253136390432, 'min_child_samples': 99, 'subsample': 0.811758842746008, 'colsample_bytree': 0.80207061810154}. Best is trial 8 with value: 0.8674285714285714.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.431401
[20]	valid_0's binary_logloss: 0.391375
[30]	valid_0's binary_logloss: 0.369327
[40]	valid_0's binary_logloss: 0.355458
[50]	valid_0's binary_logloss: 0.34703
[60]	valid_0's binary_logloss: 0.342001
[70]	valid_0's binary_logloss: 0.338415
[80]	valid_0's binary_logloss: 0.335797
[90]	valid_0's binary_logloss: 0.334824
[100]	valid_0's binary_logloss: 0.334217
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.334217


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:38:56,421] Trial 19 finished with value: 0.8665714285714285 and parameters: {'num_leaves': 110, 'max_depth': 14, 'learning_rate': 0.024790720299843964, 'min_child_samples': 89, 'subsample': 0.736943054875829, 'colsample_bytree': 0.7139485393634813}. Best is trial 8 with value: 0.8674285714285714.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.45798
[20]	valid_0's binary_logloss: 0.426217
[30]	valid_0's binary_logloss: 0.402008
[40]	valid_0's binary_logloss: 0.385225
[50]	valid_0's binary_logloss: 0.373109
[60]	valid_0's binary_logloss: 0.363038
[70]	valid_0's binary_logloss: 0.35569
[80]	valid_0's binary_logloss: 0.349171
[90]	valid_0's binary_logloss: 0.345059
[100]	valid_0's binary_logloss: 0.342171
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.342171


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:38:56,545] Trial 20 finished with value: 0.8294285714285714 and parameters: {'num_leaves': 83, 'max_depth': 4, 'learning_rate': 0.010180648416111665, 'min_child_samples': 77, 'subsample': 0.8258154849391415, 'colsample_bytree': 0.6832432237813676}. Best is trial 8 with value: 0.8674285714285714.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.484775
[20]	valid_0's binary_logloss: 0.46856
[30]	valid_0's binary_logloss: 0.453025
[40]	valid_0's binary_logloss: 0.440404
[50]	valid_0's binary_logloss: 0.430198
[60]	valid_0's binary_logloss: 0.420351
[70]	valid_0's binary_logloss: 0.411834
[80]	valid_0's binary_logloss: 0.40416
[90]	valid_0's binary_logloss: 0.397742
[100]	valid_0's binary_logloss: 0.391948
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.391948
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.468355
[20]	valid_0's binary_logloss: 0.441462
[30]	valid_0's binary_logloss: 0.419013
[40]	valid_0's binary_logloss: 0.402825
[50]	valid_0's binary_logloss: 0.390872


[I 2025-02-08 20:38:56,764] Trial 21 finished with value: 0.866 and parameters: {'num_leaves': 111, 'max_depth': 14, 'learning_rate': 0.01827490646408815, 'min_child_samples': 88, 'subsample': 0.7100063666780903, 'colsample_bytree': 0.7206903473675005}. Best is trial 8 with value: 0.8674285714285714.


[60]	valid_0's binary_logloss: 0.379704
[70]	valid_0's binary_logloss: 0.370806
[80]	valid_0's binary_logloss: 0.362692
[90]	valid_0's binary_logloss: 0.357203
[100]	valid_0's binary_logloss: 0.352903
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.352903
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.438766
[20]	valid_0's binary_logloss: 0.399953
[30]	valid_0's binary_logloss: 0.377832


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:38:57,061] Trial 22 finished with value: 0.8665714285714285 and parameters: {'num_leaves': 148, 'max_depth': 13, 'learning_rate': 0.03395142742873335, 'min_child_samples': 91, 'subsample': 0.7114017827211616, 'colsample_bytree': 0.772672414716173}. Best is trial 8 with value: 0.8674285714285714.


[40]	valid_0's binary_logloss: 0.362636
[50]	valid_0's binary_logloss: 0.352825
[60]	valid_0's binary_logloss: 0.346143
[70]	valid_0's binary_logloss: 0.340959
[80]	valid_0's binary_logloss: 0.336924
[90]	valid_0's binary_logloss: 0.334835
[100]	valid_0's binary_logloss: 0.334103
Did not meet early stopping. Best iteration is:
[98]	valid_0's binary_logloss: 0.334095
Training until validation scores don't improve for 10 rounds


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[10]	valid_0's binary_logloss: 0.458726
[20]	valid_0's binary_logloss: 0.427809
[30]	valid_0's binary_logloss: 0.40709
[40]	valid_0's binary_logloss: 0.387874
[50]	valid_0's binary_logloss: 0.376577
[60]	valid_0's binary_logloss: 0.367122
[70]	valid_0's binary_logloss: 0.360046
[80]	valid_0's binary_logloss: 0.352819


[I 2025-02-08 20:38:57,367] Trial 23 finished with value: 0.8674285714285714 and parameters: {'num_leaves': 107, 'max_depth': 14, 'learning_rate': 0.026731400312485665, 'min_child_samples': 100, 'subsample': 0.5572743417154318, 'colsample_bytree': 0.5586339243699602}. Best is trial 8 with value: 0.8674285714285714.


[90]	valid_0's binary_logloss: 0.34807
[100]	valid_0's binary_logloss: 0.344933
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.344933
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.436547
[20]	valid_0's binary_logloss: 0.396426
[30]	valid_0's binary_logloss: 0.376166
[40]	valid_0's binary_logloss: 0.360789
[50]	valid_0's binary_logloss: 0.35294
[60]	valid_0's binary_logloss: 0.346394
[70]	valid_0's binary_logloss: 0.343251
[80]	valid_0's binary_logloss: 0.339425


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:38:57,584] Trial 24 finished with value: 0.8668571428571429 and parameters: {'num_leaves': 123, 'max_depth': 12, 'learning_rate': 0.049222017349490346, 'min_child_samples': 99, 'subsample': 0.5340859881086392, 'colsample_bytree': 0.5499175347146317}. Best is trial 8 with value: 0.8674285714285714.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:38:57,699] Trial 25 finished with value: 0.8651428571428571 and parameters: {'num_leaves': 91, 'max_depth': 8, 'learning_rate': 0.1481910148727758, 'min_child_samples': 71, 'subsample': 0.5767186743599759, 'colsample_bytree': 0.595918678098913}. Best 

[90]	valid_0's binary_logloss: 0.336984
[100]	valid_0's binary_logloss: 0.336219
Did not meet early stopping. Best iteration is:
[99]	valid_0's binary_logloss: 0.336196
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.368473
[20]	valid_0's binary_logloss: 0.340261
[30]	valid_0's binary_logloss: 0.335266
[40]	valid_0's binary_logloss: 0.334747
Early stopping, best iteration is:
[36]	valid_0's binary_logloss: 0.333819
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.458108


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[20]	valid_0's binary_logloss: 0.425469
[30]	valid_0's binary_logloss: 0.405115
[40]	valid_0's binary_logloss: 0.387676
[50]	valid_0's binary_logloss: 0.376902
[60]	valid_0's binary_logloss: 0.366948
[70]	valid_0's binary_logloss: 0.361474
[80]	valid_0's binary_logloss: 0.354436
[90]	valid_0's binary_logloss: 0.349416
[100]	valid_0's binary_logloss: 0.346691
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.346691


[I 2025-02-08 20:38:57,984] Trial 26 finished with value: 0.8677142857142857 and parameters: {'num_leaves': 78, 'max_depth': 14, 'learning_rate': 0.029762822926400263, 'min_child_samples': 82, 'subsample': 0.5786780038956784, 'colsample_bytree': 0.5449888601139455}. Best is trial 26 with value: 0.8677142857142857.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.477887
[20]	valid_0's binary_logloss: 0.455352
[30]	valid_0's binary_logloss: 0.439191
[40]	valid_0's binary_logloss: 0.423244
[50]	valid_0's binary_logloss: 0.412745
[60]	valid_0's binary_logloss: 0.401547
[70]	valid_0's binary_logloss: 0.394709
[80]	valid_0's binary_logloss: 0.385743
[90]	valid_0's binary_logloss: 0.379271


[I 2025-02-08 20:38:58,284] Trial 27 finished with value: 0.846 and parameters: {'num_leaves': 102, 'max_depth': 14, 'learning_rate': 0.015540582678389623, 'min_child_samples': 57, 'subsample': 0.5842287112018226, 'colsample_bytree': 0.5423908938251035}. Best is trial 26 with value: 0.8677142857142857.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:38:58,452] Trial 28 finished with value: 0.8682857142857143 and parameters: {'num_leaves': 44, 'max_depth': 7, 'learning_rate': 0.02621764072268792, 'min_child_samples': 81, 'subsample': 0.5586478288101614, 'colsample_bytree': 0.621088134499706}. Best is trial 28 with value: 0.8682857142857143.


[100]	valid_0's binary_logloss: 0.374102
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.374102
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.459251
[20]	valid_0's binary_logloss: 0.428903
[30]	valid_0's binary_logloss: 0.408198
[40]	valid_0's binary_logloss: 0.389153
[50]	valid_0's binary_logloss: 0.377679
[60]	valid_0's binary_logloss: 0.368255
[70]	valid_0's binary_logloss: 0.361221
[80]	valid_0's binary_logloss: 0.354228
[90]	valid_0's binary_logloss: 0.349357
[100]	valid_0's binary_logloss: 0.346292
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.346292


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:38:58,608] Trial 29 finished with value: 0.866 and parameters: {'num_leaves': 47, 'max_depth': 7, 'learning_rate': 0.08619762676411409, 'min_child_samples': 63, 'subsample': 0.6026980817544916, 'colsample_bytree': 0.5016108947408859}. Best is trial 28 with value: 0.8682857142857143.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.406236
[20]	valid_0's binary_logloss: 0.365687
[30]	valid_0's binary_logloss: 0.351779
[40]	valid_0's binary_logloss: 0.343164
[50]	valid_0's binary_logloss: 0.33982
[60]	valid_0's binary_logloss: 0.337939
[70]	valid_0's binary_logloss: 0.337754
Early stopping, best iteration is:
[68]	valid_0's binary_logloss: 0.337173
Best parameters found: {'num_leaves': 44, 'max_depth': 7, 'learning_rate': 0.02621764072268792, 'min_child_samples': 81, 'subsample': 0.5586478288101614, 'colsample_bytree': 0.621088134499706}
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.459251
[20]	valid_0's binary_logloss: 0.428903
[30]	valid_0's binary_logloss: 0.408198
[40]	valid_0's binary_logloss: 0.389153
[50]	valid_0's binary_logloss: 0.377679
[60]	valid_0's binary_logloss: 0.368255
[70]	valid_0's binary_logloss: 0.361221
[80]	valid_0's binary_logloss: 0.354228
[90]	valid_

In [9]:
import pandas as pd
import lightgbm as lgb
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, precision_recall_curve


# X, y 분리
y = df['churn']  # 목표 변수
X = df.drop(columns=['churn'])  # 특징 변수

# 학습 및 테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35, random_state=42, stratify=y)

# Optuna를 이용한 하이퍼파라미터 튜닝
def objective(trial):
    params = {
        'objective': 'binary',
        'metric': 'binary_logloss',
        'verbosity': -1,
        'num_leaves': trial.suggest_int('num_leaves', 20, 150),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0)
    }
    
    train_data = lgb.Dataset(X_train, label=y_train)
    valid_data = lgb.Dataset(X_test, label=y_test, reference=train_data)
    
    model = lgb.train(params, train_data, num_boost_round=100,
                      valid_sets=[valid_data], 
                      callbacks=[lgb.early_stopping(10), lgb.log_evaluation(10)])
    
    y_pred_proba = model.predict(X_test)
    precision, recall, thresholds = precision_recall_curve(y_test, y_pred_proba)
    best_threshold = thresholds[(precision + recall).argmax()]  # 최적의 threshold 선택
    
    y_pred = [1 if x > best_threshold else 0 for x in y_pred_proba]
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

# Optuna 실행
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=30)

# 최적 하이퍼파라미터 출력
print("Best parameters found:", study.best_params)

# 최적 하이퍼파라미터로 모델 학습
best_params = study.best_params
best_params.update({'objective': 'binary', 'metric': 'binary_logloss', 'verbosity': -1})

train_data = lgb.Dataset(X_train, label=y_train)
valid_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

model = lgb.train(best_params, train_data, num_boost_round=100,
                  valid_sets=[valid_data], 
                  callbacks=[lgb.early_stopping(10), lgb.log_evaluation(10)])

# 최적 모델 예측
y_pred_proba = model.predict(X_test)
precision, recall, thresholds = precision_recall_curve(y_test, y_pred_proba)
best_threshold = thresholds[(precision + recall).argmax()]  # 최적 threshold 선택

print(f'Best Threshold: {best_threshold:.4f}')

y_pred = [1 if x > best_threshold else 0 for x in y_pred_proba]

# 평가
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy:.4f}')
print('Classification Report:\n', report)


[I 2025-02-08 20:38:58,818] A new study created in memory with name: no-name-76c03788-e975-4a8d-be97-d16bc9e0489b


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.371672
[20]	valid_0's binary_logloss: 0.34107
[30]	valid_0's binary_logloss: 0.335414


[I 2025-02-08 20:38:58,937] Trial 0 finished with value: 0.79 and parameters: {'num_leaves': 52, 'max_depth': 14, 'learning_rate': 0.13156384122674317, 'min_child_samples': 78, 'subsample': 0.8742318605350777, 'colsample_bytree': 0.6702172595944933}. Best is trial 0 with value: 0.79.


[40]	valid_0's binary_logloss: 0.337333
Early stopping, best iteration is:
[34]	valid_0's binary_logloss: 0.33507
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.476114
[20]	valid_0's binary_logloss: 0.452793
[30]	valid_0's binary_logloss: 0.43474
[40]	valid_0's binary_logloss: 0.419687
[50]	valid_0's binary_logloss: 0.407498
[60]	valid_0's binary_logloss: 0.39763
[70]	valid_0's binary_logloss: 0.38922


[I 2025-02-08 20:38:59,219] Trial 1 finished with value: 0.758 and parameters: {'num_leaves': 149, 'max_depth': 10, 'learning_rate': 0.010970977337149948, 'min_child_samples': 88, 'subsample': 0.5216977061809234, 'colsample_bytree': 0.9531785211453723}. Best is trial 0 with value: 0.79.


[80]	valid_0's binary_logloss: 0.38157
[90]	valid_0's binary_logloss: 0.375
[100]	valid_0's binary_logloss: 0.369268
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.369268
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.412193
[20]	valid_0's binary_logloss: 0.3723
[30]	valid_0's binary_logloss: 0.35522
[40]	valid_0's binary_logloss: 0.343398
[50]	valid_0's binary_logloss: 0.339459
[60]	valid_0's binary_logloss: 0.336688


[I 2025-02-08 20:38:59,384] Trial 2 finished with value: 0.8682857142857143 and parameters: {'num_leaves': 66, 'max_depth': 9, 'learning_rate': 0.0688218824734827, 'min_child_samples': 92, 'subsample': 0.5470653330329711, 'colsample_bytree': 0.625762875563758}. Best is trial 2 with value: 0.8682857142857143.
[I 2025-02-08 20:38:59,450] Trial 3 finished with value: 0.7731428571428571 and parameters: {'num_leaves': 111, 'max_depth': 3, 'learning_rate': 0.07019707358938054, 'min_child_samples': 87, 'subsample': 0.996195285148721, 'colsample_bytree': 0.5269637659516007}. Best is trial 2 with value: 0.8682857142857143.


[70]	valid_0's binary_logloss: 0.334427
[80]	valid_0's binary_logloss: 0.333606
[90]	valid_0's binary_logloss: 0.333546
Early stopping, best iteration is:
[84]	valid_0's binary_logloss: 0.333403
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.424435
[20]	valid_0's binary_logloss: 0.387303
[30]	valid_0's binary_logloss: 0.370256
[40]	valid_0's binary_logloss: 0.357475
[50]	valid_0's binary_logloss: 0.350607
[60]	valid_0's binary_logloss: 0.345336
[70]	valid_0's binary_logloss: 0.341801
[80]	valid_0's binary_logloss: 0.33876
[90]	valid_0's binary_logloss: 0.336792
[100]	valid_0's binary_logloss: 0.335671
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.335671
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.402832
[20]	valid_0's binary_logloss: 0.373099
[30]	valid_0's binary_logloss: 0.357018
[40]	valid_0's binary_logloss: 0.348064
[50]	valid_0's binary_logloss: 0.3

[I 2025-02-08 20:38:59,516] Trial 4 finished with value: 0.8048571428571428 and parameters: {'num_leaves': 123, 'max_depth': 3, 'learning_rate': 0.08332361147662261, 'min_child_samples': 84, 'subsample': 0.7284779605526698, 'colsample_bytree': 0.909110688316643}. Best is trial 2 with value: 0.8682857142857143.


[60]	valid_0's binary_logloss: 0.339232
[70]	valid_0's binary_logloss: 0.336805
[80]	valid_0's binary_logloss: 0.335237
[90]	valid_0's binary_logloss: 0.33429
[100]	valid_0's binary_logloss: 0.333791
Did not meet early stopping. Best iteration is:
[98]	valid_0's binary_logloss: 0.33371
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.470694
[20]	valid_0's binary_logloss: 0.443714
[30]	valid_0's binary_logloss: 0.423312
[40]	valid_0's binary_logloss: 0.40797
[50]	valid_0's binary_logloss: 0.395838
[60]	valid_0's binary_logloss: 0.385103
[70]	valid_0's binary_logloss: 0.376093
[80]	valid_0's binary_logloss: 0.368439
[90]	valid_0's binary_logloss: 0.362777
[100]	valid_0's binary_logloss: 0.358537
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.358537


[I 2025-02-08 20:38:59,949] Trial 5 finished with value: 0.766 and parameters: {'num_leaves': 135, 'max_depth': 15, 'learning_rate': 0.014294000859999344, 'min_child_samples': 47, 'subsample': 0.6401168678248483, 'colsample_bytree': 0.7906027758110866}. Best is trial 2 with value: 0.8682857142857143.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.4773
[20]	valid_0's binary_logloss: 0.455405
[30]	valid_0's binary_logloss: 0.435953
[40]	valid_0's binary_logloss: 0.420813
[50]	valid_0's binary_logloss: 0.409211
[60]	valid_0's binary_logloss: 0.398179
[70]	valid_0's binary_logloss: 0.38887
[80]	valid_0's binary_logloss: 0.379931
[90]	valid_0's binary_logloss: 0.37351
[100]	valid_0's binary_logloss: 0.36804
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.36804


[I 2025-02-08 20:39:00,195] Trial 6 finished with value: 0.8002857142857143 and parameters: {'num_leaves': 92, 'max_depth': 9, 'learning_rate': 0.013063976158509535, 'min_child_samples': 61, 'subsample': 0.6276980760780295, 'colsample_bytree': 0.6984707564354204}. Best is trial 2 with value: 0.8682857142857143.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.478364
[20]	valid_0's binary_logloss: 0.456881
[30]	valid_0's binary_logloss: 0.438265
[40]	valid_0's binary_logloss: 0.423205
[50]	valid_0's binary_logloss: 0.411654
[60]	valid_0's binary_logloss: 0.400412
[70]	valid_0's binary_logloss: 0.391409
[80]	valid_0's binary_logloss: 0.382413
[90]	valid_0's binary_logloss: 0.375931
[100]	valid_0's binary_logloss: 0.37047
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.37047


[I 2025-02-08 20:39:00,337] Trial 7 finished with value: 0.762 and parameters: {'num_leaves': 46, 'max_depth': 14, 'learning_rate': 0.012053346264994845, 'min_child_samples': 23, 'subsample': 0.519161958708187, 'colsample_bytree': 0.7105155721098452}. Best is trial 2 with value: 0.8682857142857143.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.484943
[20]	valid_0's binary_logloss: 0.468188
[30]	valid_0's binary_logloss: 0.454521


[I 2025-02-08 20:39:00,457] Trial 8 finished with value: 0.7668571428571429 and parameters: {'num_leaves': 32, 'max_depth': 12, 'learning_rate': 0.010284574718150846, 'min_child_samples': 93, 'subsample': 0.9942625856514163, 'colsample_bytree': 0.5614169931371484}. Best is trial 2 with value: 0.8682857142857143.
[I 2025-02-08 20:39:00,515] Trial 9 finished with value: 0.8065714285714286 and parameters: {'num_leaves': 57, 'max_depth': 13, 'learning_rate': 0.1683478334364704, 'min_child_samples': 73, 'subsample': 0.6384117643505182, 'colsample_bytree': 0.6333196988649914}. Best is trial 2 with value: 0.8682857142857143.


[40]	valid_0's binary_logloss: 0.44031
[50]	valid_0's binary_logloss: 0.429971
[60]	valid_0's binary_logloss: 0.420374
[70]	valid_0's binary_logloss: 0.412462
[80]	valid_0's binary_logloss: 0.403725
[90]	valid_0's binary_logloss: 0.396786
[100]	valid_0's binary_logloss: 0.390869
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.390869
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.362392
[20]	valid_0's binary_logloss: 0.337476
[30]	valid_0's binary_logloss: 0.337333
Early stopping, best iteration is:
[21]	valid_0's binary_logloss: 0.336178
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.447249
[20]	valid_0's binary_logloss: 0.411725
[30]	valid_0's binary_logloss: 0.388978


[I 2025-02-08 20:39:00,686] Trial 10 finished with value: 0.7708571428571429 and parameters: {'num_leaves': 76, 'max_depth': 6, 'learning_rate': 0.027476510538002243, 'min_child_samples': 5, 'subsample': 0.8005032162789669, 'colsample_bytree': 0.832368273314293}. Best is trial 2 with value: 0.8682857142857143.


[40]	valid_0's binary_logloss: 0.374371
[50]	valid_0's binary_logloss: 0.363387
[60]	valid_0's binary_logloss: 0.356086
[70]	valid_0's binary_logloss: 0.350302
[80]	valid_0's binary_logloss: 0.345662
[90]	valid_0's binary_logloss: 0.342414
[100]	valid_0's binary_logloss: 0.340679
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.340679
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.356804
[20]	valid_0's binary_logloss: 0.336875
[30]	valid_0's binary_logloss: 0.337119
Early stopping, best iteration is:
[21]	valid_0's binary_logloss: 0.336003


[I 2025-02-08 20:39:00,773] Trial 11 finished with value: 0.8665714285714285 and parameters: {'num_leaves': 74, 'max_depth': 9, 'learning_rate': 0.19788368106879758, 'min_child_samples': 67, 'subsample': 0.6222411164103248, 'colsample_bytree': 0.6155274310382248}. Best is trial 2 with value: 0.8682857142857143.
[I 2025-02-08 20:39:00,881] Trial 12 finished with value: 0.8682857142857143 and parameters: {'num_leaves': 81, 'max_depth': 8, 'learning_rate': 0.2908608696119383, 'min_child_samples': 57, 'subsample': 0.5833000906690028, 'colsample_bytree': 0.6020708668792245}. Best is trial 2 with value: 0.8682857142857143.
[I 2025-02-08 20:39:00,969] Trial 13 finished with value: 0.7597142857142857 and parameters: {'num_leaves': 96, 'max_depth': 7, 'learning_rate': 0.2870130098468431, 'min_child_samples': 45, 'subsample': 0.5039225573396329, 'colsample_bytree': 0.5017780775962044}. Best is trial 2 with value: 0.8682857142857143.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.344056
[20]	valid_0's binary_logloss: 0.335836
[30]	valid_0's binary_logloss: 0.340957
Early stopping, best iteration is:
[21]	valid_0's binary_logloss: 0.334864
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.357024
[20]	valid_0's binary_logloss: 0.341294
Early stopping, best iteration is:
[19]	valid_0's binary_logloss: 0.341212
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.453219
[20]	valid_0's binary_logloss: 0.420816


[I 2025-02-08 20:39:01,112] Trial 14 finished with value: 0.7617142857142857 and parameters: {'num_leaves': 66, 'max_depth': 6, 'learning_rate': 0.030848903622257996, 'min_child_samples': 99, 'subsample': 0.5772343457082345, 'colsample_bytree': 0.5892213165998581}. Best is trial 2 with value: 0.8682857142857143.


[30]	valid_0's binary_logloss: 0.399998
[40]	valid_0's binary_logloss: 0.381003
[50]	valid_0's binary_logloss: 0.369796
[60]	valid_0's binary_logloss: 0.361093
[70]	valid_0's binary_logloss: 0.354557
[80]	valid_0's binary_logloss: 0.348444
[90]	valid_0's binary_logloss: 0.344381
[100]	valid_0's binary_logloss: 0.341782
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.341782
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.427038
[20]	valid_0's binary_logloss: 0.386644
[30]	valid_0's binary_logloss: 0.363905
[40]	valid_0's binary_logloss: 0.35108
[50]	valid_0's binary_logloss: 0.343626
[60]	valid_0's binary_logloss: 0.338419
[70]	valid_0's binary_logloss: 0.335681
[80]	valid_0's binary_logloss: 0.333408
[90]	valid_0's binary_logloss: 0.331765


[I 2025-02-08 20:39:01,271] Trial 15 finished with value: 0.8091428571428572 and parameters: {'num_leaves': 31, 'max_depth': 11, 'learning_rate': 0.04748483918284853, 'min_child_samples': 32, 'subsample': 0.719376673284163, 'colsample_bytree': 0.7468765164748263}. Best is trial 2 with value: 0.8682857142857143.
[I 2025-02-08 20:39:01,434] Trial 16 finished with value: 0.8171428571428572 and parameters: {'num_leaves': 104, 'max_depth': 8, 'learning_rate': 0.10188726296143012, 'min_child_samples': 60, 'subsample': 0.5747747159598251, 'colsample_bytree': 0.6541016772650498}. Best is trial 2 with value: 0.8682857142857143.


[100]	valid_0's binary_logloss: 0.331648
Did not meet early stopping. Best iteration is:
[98]	valid_0's binary_logloss: 0.331481
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.384116
[20]	valid_0's binary_logloss: 0.348567
[30]	valid_0's binary_logloss: 0.338425
[40]	valid_0's binary_logloss: 0.336026
[50]	valid_0's binary_logloss: 0.335497
[60]	valid_0's binary_logloss: 0.334816
[70]	valid_0's binary_logloss: 0.336052
Early stopping, best iteration is:
[60]	valid_0's binary_logloss: 0.334816


[I 2025-02-08 20:39:01,528] Trial 17 finished with value: 0.7328571428571429 and parameters: {'num_leaves': 81, 'max_depth': 6, 'learning_rate': 0.28640111044646277, 'min_child_samples': 35, 'subsample': 0.6769273886015423, 'colsample_bytree': 0.5716803800953635}. Best is trial 2 with value: 0.8682857142857143.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.34555
[20]	valid_0's binary_logloss: 0.335598
[30]	valid_0's binary_logloss: 0.341941
Early stopping, best iteration is:
[20]	valid_0's binary_logloss: 0.335598
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.427079
[20]	valid_0's binary_logloss: 0.386915
[30]	valid_0's binary_logloss: 0.365944
[40]	valid_0's binary_logloss: 0.353707


[I 2025-02-08 20:39:01,828] Trial 18 finished with value: 0.7714285714285715 and parameters: {'num_leaves': 63, 'max_depth': 11, 'learning_rate': 0.04033992623368144, 'min_child_samples': 14, 'subsample': 0.8112415284257772, 'colsample_bytree': 0.8199477480766523}. Best is trial 2 with value: 0.8682857142857143.


[50]	valid_0's binary_logloss: 0.347499
[60]	valid_0's binary_logloss: 0.3437
[70]	valid_0's binary_logloss: 0.340738
[80]	valid_0's binary_logloss: 0.338396
[90]	valid_0's binary_logloss: 0.338277
[100]	valid_0's binary_logloss: 0.338883
Did not meet early stopping. Best iteration is:
[91]	valid_0's binary_logloss: 0.338173
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.412641
[20]	valid_0's binary_logloss: 0.37367
[30]	valid_0's binary_logloss: 0.354414
[40]	valid_0's binary_logloss: 0.344239
[50]	valid_0's binary_logloss: 0.338753
[60]	valid_0's binary_logloss: 0.335478


[I 2025-02-08 20:39:01,964] Trial 19 finished with value: 0.7757142857142857 and parameters: {'num_leaves': 43, 'max_depth': 5, 'learning_rate': 0.06410754851041421, 'min_child_samples': 51, 'subsample': 0.5759304749507445, 'colsample_bytree': 0.7223153179507895}. Best is trial 2 with value: 0.8682857142857143.


[70]	valid_0's binary_logloss: 0.333897
[80]	valid_0's binary_logloss: 0.33198
[90]	valid_0's binary_logloss: 0.331281
[100]	valid_0's binary_logloss: 0.331178
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.331178
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.471173
[20]	valid_0's binary_logloss: 0.444564
[30]	valid_0's binary_logloss: 0.426505
[40]	valid_0's binary_logloss: 0.40969
[50]	valid_0's binary_logloss: 0.398563
[60]	valid_0's binary_logloss: 0.387513
[70]	valid_0's binary_logloss: 0.381028
[80]	valid_0's binary_logloss: 0.372571
[90]	valid_0's binary_logloss: 0.366219
[100]	valid_0's binary_logloss: 0.361697
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.361697


[I 2025-02-08 20:39:02,159] Trial 20 finished with value: 0.8691428571428571 and parameters: {'num_leaves': 88, 'max_depth': 8, 'learning_rate': 0.02015953109992848, 'min_child_samples': 75, 'subsample': 0.6842691627142414, 'colsample_bytree': 0.5383316557129085}. Best is trial 20 with value: 0.8691428571428571.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.472714
[20]	valid_0's binary_logloss: 0.446922
[30]	valid_0's binary_logloss: 0.429151
[40]	valid_0's binary_logloss: 0.412569
[50]	valid_0's binary_logloss: 0.40146
[60]	valid_0's binary_logloss: 0.390539
[70]	valid_0's binary_logloss: 0.383784
[80]	valid_0's binary_logloss: 0.375242
[90]	valid_0's binary_logloss: 0.368716
[100]	valid_0's binary_logloss: 0.364184
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.364184


[I 2025-02-08 20:39:02,410] Trial 21 finished with value: 0.8691428571428571 and parameters: {'num_leaves': 88, 'max_depth': 8, 'learning_rate': 0.01916824387149847, 'min_child_samples': 77, 'subsample': 0.6874608637036669, 'colsample_bytree': 0.5465819345858118}. Best is trial 20 with value: 0.8691428571428571.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.464067
[20]	valid_0's binary_logloss: 0.434118
[30]	valid_0's binary_logloss: 0.414659
[40]	valid_0's binary_logloss: 0.397392
[50]	valid_0's binary_logloss: 0.386175
[60]	valid_0's binary_logloss: 0.375445
[70]	valid_0's binary_logloss: 0.369305
[80]	valid_0's binary_logloss: 0.361699
[90]	valid_0's binary_logloss: 0.35624
[100]	valid_0's binary_logloss: 0.352469
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.352469


[I 2025-02-08 20:39:02,647] Trial 22 finished with value: 0.8688571428571429 and parameters: {'num_leaves': 92, 'max_depth': 9, 'learning_rate': 0.02509462615448957, 'min_child_samples': 76, 'subsample': 0.7896736741734319, 'colsample_bytree': 0.5389840641720571}. Best is trial 20 with value: 0.8691428571428571.
[I 2025-02-08 20:39:02,838] Trial 23 finished with value: 0.8691428571428571 and parameters: {'num_leaves': 112, 'max_depth': 8, 'learning_rate': 0.018918923197947378, 'min_child_samples': 74, 'subsample': 0.7810916761991575, 'colsample_bytree': 0.5387903461372777}. Best is trial 20 with value: 0.8691428571428571.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.472959
[20]	valid_0's binary_logloss: 0.447364
[30]	valid_0's binary_logloss: 0.429671
[40]	valid_0's binary_logloss: 0.413115
[50]	valid_0's binary_logloss: 0.401999
[60]	valid_0's binary_logloss: 0.390978
[70]	valid_0's binary_logloss: 0.384332
[80]	valid_0's binary_logloss: 0.375801
[90]	valid_0's binary_logloss: 0.3693
[100]	valid_0's binary_logloss: 0.364668
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.364668
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.474415


[I 2025-02-08 20:39:03,081] Trial 24 finished with value: 0.8688571428571429 and parameters: {'num_leaves': 116, 'max_depth': 7, 'learning_rate': 0.018018222987562678, 'min_child_samples': 71, 'subsample': 0.6883428351440108, 'colsample_bytree': 0.5007638008860537}. Best is trial 20 with value: 0.8691428571428571.


[20]	valid_0's binary_logloss: 0.449803
[30]	valid_0's binary_logloss: 0.432652
[40]	valid_0's binary_logloss: 0.416357
[50]	valid_0's binary_logloss: 0.405304
[60]	valid_0's binary_logloss: 0.394319
[70]	valid_0's binary_logloss: 0.387479
[80]	valid_0's binary_logloss: 0.378803
[90]	valid_0's binary_logloss: 0.372113
[100]	valid_0's binary_logloss: 0.367353
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.367353


[I 2025-02-08 20:39:03,223] Trial 25 finished with value: 0.8048571428571428 and parameters: {'num_leaves': 129, 'max_depth': 5, 'learning_rate': 0.020279720258954553, 'min_child_samples': 81, 'subsample': 0.8648750977560031, 'colsample_bytree': 0.5471457538101787}. Best is trial 20 with value: 0.8691428571428571.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.471653
[20]	valid_0's binary_logloss: 0.445787
[30]	valid_0's binary_logloss: 0.427983
[40]	valid_0's binary_logloss: 0.411528
[50]	valid_0's binary_logloss: 0.400228
[60]	valid_0's binary_logloss: 0.389042
[70]	valid_0's binary_logloss: 0.382274
[80]	valid_0's binary_logloss: 0.373812
[90]	valid_0's binary_logloss: 0.367389
[100]	valid_0's binary_logloss: 0.362645
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.362645
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.471147
[20]	valid_0's binary_logloss: 0.446362
[30]	valid_0's binary_logloss: 0.428171
[40]	valid_0's binary_logloss: 0.410738


[I 2025-02-08 20:39:03,476] Trial 26 finished with value: 0.8102857142857143 and parameters: {'num_leaves': 102, 'max_depth': 8, 'learning_rate': 0.01806943145856022, 'min_child_samples': 64, 'subsample': 0.764082774303608, 'colsample_bytree': 0.5641246443658536}. Best is trial 20 with value: 0.8691428571428571.


[50]	valid_0's binary_logloss: 0.398851
[60]	valid_0's binary_logloss: 0.38864
[70]	valid_0's binary_logloss: 0.380649
[80]	valid_0's binary_logloss: 0.372244
[90]	valid_0's binary_logloss: 0.366226
[100]	valid_0's binary_logloss: 0.361558
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.361558
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.440651
[20]	valid_0's binary_logloss: 0.40167


[I 2025-02-08 20:39:03,661] Trial 27 finished with value: 0.7977142857142857 and parameters: {'num_leaves': 106, 'max_depth': 10, 'learning_rate': 0.037458130264859356, 'min_child_samples': 99, 'subsample': 0.841823844831269, 'colsample_bytree': 0.6739520189422327}. Best is trial 20 with value: 0.8691428571428571.


[30]	valid_0's binary_logloss: 0.377187
[40]	valid_0's binary_logloss: 0.361843
[50]	valid_0's binary_logloss: 0.35219
[60]	valid_0's binary_logloss: 0.345092
[70]	valid_0's binary_logloss: 0.340381
[80]	valid_0's binary_logloss: 0.335867
[90]	valid_0's binary_logloss: 0.334054
[100]	valid_0's binary_logloss: 0.333232
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.333232
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.466615
[20]	valid_0's binary_logloss: 0.438285
[30]	valid_0's binary_logloss: 0.419496
[40]	valid_0's binary_logloss: 0.402527
[50]	valid_0's binary_logloss: 0.391082


[I 2025-02-08 20:39:03,820] Trial 28 finished with value: 0.868 and parameters: {'num_leaves': 87, 'max_depth': 5, 'learning_rate': 0.023834120605813875, 'min_child_samples': 73, 'subsample': 0.9233391711145824, 'colsample_bytree': 0.5034615458480549}. Best is trial 20 with value: 0.8691428571428571.


[60]	valid_0's binary_logloss: 0.380002
[70]	valid_0's binary_logloss: 0.37345
[80]	valid_0's binary_logloss: 0.365541
[90]	valid_0's binary_logloss: 0.359539
[100]	valid_0's binary_logloss: 0.355382
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.355382
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.473134
[20]	valid_0's binary_logloss: 0.448846
[30]	valid_0's binary_logloss: 0.427898
[40]	valid_0's binary_logloss: 0.412332
[50]	valid_0's binary_logloss: 0.400491
[60]	valid_0's binary_logloss: 0.389769
[70]	valid_0's binary_logloss: 0.38053


[I 2025-02-08 20:39:04,013] Trial 29 finished with value: 0.7951428571428572 and parameters: {'num_leaves': 116, 'max_depth': 7, 'learning_rate': 0.015596101682981074, 'min_child_samples': 78, 'subsample': 0.6922678696789265, 'colsample_bytree': 0.6667067229068658}. Best is trial 20 with value: 0.8691428571428571.


[80]	valid_0's binary_logloss: 0.371975
[90]	valid_0's binary_logloss: 0.365795
[100]	valid_0's binary_logloss: 0.360828
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.360828
Best parameters found: {'num_leaves': 88, 'max_depth': 8, 'learning_rate': 0.02015953109992848, 'min_child_samples': 75, 'subsample': 0.6842691627142414, 'colsample_bytree': 0.5383316557129085}
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.471173
[20]	valid_0's binary_logloss: 0.444564
[30]	valid_0's binary_logloss: 0.426505
[40]	valid_0's binary_logloss: 0.40969
[50]	valid_0's binary_logloss: 0.398563
[60]	valid_0's binary_logloss: 0.387513
[70]	valid_0's binary_logloss: 0.381028
[80]	valid_0's binary_logloss: 0.372571
[90]	valid_0's binary_logloss: 0.366219
[100]	valid_0's binary_logloss: 0.361697
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.361697
Best Threshold: 0.4494
Accuracy: 0.8691
Classi

Precision과 Recall 균형 조정

Precision 증가(0.80 → 0.84)
Recall 감소(0.48 → 0.44)
이탈 고객을 보다 정확하게 예측하지만, 일부를 놓칠 가능성이 있음
비즈니스 목표에 따라 Threshold를 조정할 필요 있음!
→ 이탈 고객 Recall을 높이고 싶다면 더 낮은 Threshold를 시도하는 것도 가능