In [1]:
import numpy as np
import pandas as pd

In [2]:
df=pd.read_csv('./data/preprocessed_Bank_Customer_Churn_Prediction.csv')

In [3]:
df


Unnamed: 0,credit_score,country,gender,age,tenure,balance,products_number,credit_card,active_member,estimated_salary,churn
0,619,0,1,42,2,0.00,1,1,1,101348.88,1
1,608,2,1,41,1,83807.86,1,0,1,112542.58,0
2,502,0,1,42,8,159660.80,3,1,0,113931.57,1
3,699,0,1,39,1,0.00,2,0,0,93826.63,0
4,850,2,1,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,771,0,0,39,5,0.00,2,1,0,96270.64,0
9996,516,0,0,35,10,57369.61,1,1,1,101699.77,0
9997,709,0,1,36,7,0.00,1,0,1,42085.58,1
9998,772,1,0,42,3,75075.31,2,1,0,92888.52,1


### XGBOOST 사용

In [4]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report


# X, y 분리
y = df['churn']  # 목표 변수
X = df.drop(columns=['churn'])  # 특징 변수

# 학습 및 테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# XGBoost 모델 생성 및 학습
model = xgb.XGBClassifier(
    objective='binary:logistic', 
    eval_metric='logloss', 
    use_label_encoder=False, 
    random_state=42
)
model.fit(X_train, y_train)

# 예측
y_pred = model.predict(X_test)

# 평가
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, digits=4)

print(f'Accuracy: {accuracy:.4f}')
print('Classification Report:\n', report)


Accuracy: 0.8530
Classification Report:
               precision    recall  f1-score   support

           0     0.8802    0.9439    0.9109      2389
           1     0.6941    0.4975    0.5796       611

    accuracy                         0.8530      3000
   macro avg     0.7871    0.7207    0.7453      3000
weighted avg     0.8423    0.8530    0.8434      3000



Parameters: { "use_label_encoder" } are not used.



In [5]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report

# 데이터 준비
y = df['churn']
X = df.drop(columns=['churn'])

# 학습 및 테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# 하이퍼파라미터 설정
params = {
    'objective': 'binary:logistic',
    'eval_metric': 'logloss',
    'random_state': 42,
    'learning_rate': 0.1,
    'max_depth': 5,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'n_estimators': 100
}

# ✅ XGBoost의 cv 기능 사용
cv_results = xgb.cv(
    params=params,
    dtrain=xgb.DMatrix(X_train, label=y_train),
    num_boost_round=100,
    nfold=5,
    stratified=True,
    metrics='logloss',
    early_stopping_rounds=10
)

# 최적 n_estimators 설정
best_n_estimators = len(cv_results)
params['n_estimators'] = best_n_estimators

# 최적 모델 학습
model = xgb.XGBClassifier(**params)
model.fit(X_train, y_train)

# 예측
y_pred = model.predict(X_test)

# 평가
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, digits=4)

print(f'Best n_estimators: {best_n_estimators}')
print(f'Accuracy: {accuracy:.4f}')
print('Classification Report:\n', report)


Parameters: { "n_estimators" } are not used.



Best n_estimators: 59
Accuracy: 0.8673
Classification Report:
               precision    recall  f1-score   support

           0     0.8775    0.9686    0.9208      2389
           1     0.7934    0.4714    0.5914       611

    accuracy                         0.8673      3000
   macro avg     0.8355    0.7200    0.7561      3000
weighted avg     0.8604    0.8673    0.8537      3000



### LGB사용

In [6]:
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report


# X, y 분리
y = df['churn']  # 목표 변수
X = df.drop(columns=['churn'])  # 특징 변수

# 학습 및 테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# LightGBM 데이터셋 생성
train_data = lgb.Dataset(X_train, label=y_train)
valid_data = lgb.Dataset(X_test, label=y_test, reference=train_data)  # 검증 데이터 설정

# LightGBM 모델 학습
params = {
    'objective': 'binary',
    'metric': 'binary_logloss',
    'verbosity': -1
}

# callbacks을 사용하여 early stopping 적용
model = lgb.train(params, train_data, num_boost_round=100,
                  valid_sets=[valid_data], valid_names=['valid'],
                  callbacks=[lgb.early_stopping(10), lgb.log_evaluation(10)])

# 예측
y_pred = model.predict(X_test)
y_pred = [1 if x > 0.5 else 0 for x in y_pred]

# 평가
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, digits=4)


print(f'Accuracy: {accuracy:.4f}')
print('Classification Report:\n', report)


Training until validation scores don't improve for 10 rounds
[10]	valid's binary_logloss: 0.363612
[20]	valid's binary_logloss: 0.337697
[30]	valid's binary_logloss: 0.32777
[40]	valid's binary_logloss: 0.324757
[50]	valid's binary_logloss: 0.323227
[60]	valid's binary_logloss: 0.322389
[70]	valid's binary_logloss: 0.322251
Early stopping, best iteration is:
[67]	valid's binary_logloss: 0.321905
Accuracy: 0.8670
Classification Report:
               precision    recall  f1-score   support

           0     0.8806    0.9636    0.9202      2389
           1     0.7746    0.4894    0.5998       611

    accuracy                         0.8670      3000
   macro avg     0.8276    0.7265    0.7600      3000
weighted avg     0.8590    0.8670    0.8550      3000



### LGB gridsearch사용

In [7]:
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report


# X, y 분리
y = df['churn']  # 목표 변수
X = df.drop(columns=['churn'])  # 특징 변수

# 학습 및 테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# LightGBM 모델 객체 생성
model = lgb.LGBMClassifier(objective='binary', metric='binary_logloss', verbosity=-1)

# 하이퍼파라미터 검색 공간 정의
param_grid = {
    'num_leaves': [20, 31, 50],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.05, 0.1],
    'min_child_samples': [10, 20, 50],
    'subsample': [0.7, 0.8, 0.9],
    'colsample_bytree': [0.7, 0.8, 0.9]
}

# Grid Search 수행
grid_search = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    scoring='accuracy',
    cv=3,  # 3-폴드 교차검증
    verbose=1,
    n_jobs=-1
)

grid_search.fit(X_train, y_train)

# 최적 하이퍼파라미터 출력
print("Best parameters found:", grid_search.best_params_)

# 최적 하이퍼파라미터 적용하여 모델 재학습
best_model = grid_search.best_estimator_

# 예측
y_pred = best_model.predict(X_test)

# 평가
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred,digits=4)

print(f'Accuracy: {accuracy:.4f}')
print('Classification Report:\n', report)


Fitting 3 folds for each of 729 candidates, totalling 2187 fits
Best parameters found: {'colsample_bytree': 0.7, 'learning_rate': 0.05, 'max_depth': 7, 'min_child_samples': 20, 'num_leaves': 20, 'subsample': 0.7}
Accuracy: 0.8693
Classification Report:
               precision    recall  f1-score   support

           0     0.8775    0.9715    0.9221      2389
           1     0.8085    0.4697    0.5942       611

    accuracy                         0.8693      3000
   macro avg     0.8430    0.7206    0.7582      3000
weighted avg     0.8634    0.8693    0.8553      3000



---

# optuna 사용


In [8]:
import pandas as pd
import lightgbm as lgb
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report



# X, y 분리
y = df['churn']  # 목표 변수
X = df.drop(columns=['churn'])  # 특징 변수

# 학습 및 테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Optuna를 이용한 하이퍼파라미터 튜닝
def objective(trial):
    params = {
        'objective': 'binary',
        'metric': 'binary_logloss',
        'verbosity': -1,
        'num_leaves': trial.suggest_int('num_leaves', 20, 150),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
    }
    
    train_data = lgb.Dataset(X_train, label=y_train)
    valid_data = lgb.Dataset(X_test, label=y_test, reference=train_data)
    
    model = lgb.train(params, train_data, num_boost_round=100,
                      valid_sets=[valid_data], 
                      callbacks=[lgb.early_stopping(10), lgb.log_evaluation(10)])
    
    y_pred = model.predict(X_test)
    y_pred = [1 if x > 0.5 else 0 for x in y_pred]
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

# Optuna 실행
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=30)

# 최적 하이퍼파라미터 출력
print("Best parameters found:", study.best_params)

# 최적 하이퍼파라미터로 모델 학습
best_params = study.best_params
best_params.update({'objective': 'binary', 'metric': 'binary_logloss', 'verbosity': -1})

train_data = lgb.Dataset(X_train, label=y_train)
valid_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

model = lgb.train(best_params, train_data, num_boost_round=100,
                  valid_sets=[valid_data], 
                  callbacks=[lgb.early_stopping(10), lgb.log_evaluation(10)])

# 최적 모델 예측
y_pred = model.predict(X_test)
y_pred = [1 if x > 0.5 else 0 for x in y_pred]

# 평가
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy:.4f}')
print('Classification Report:\n', report)


[I 2025-02-08 20:28:34,158] A new study created in memory with name: no-name-aecefbd0-5165-4efd-a226-68d48e291fcd
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.369068
[20]	valid_0's binary_logloss: 0.341788
[30]	valid_0's binary_logloss: 0.332273


[I 2025-02-08 20:28:34,394] Trial 0 finished with value: 0.868 and parameters: {'num_leaves': 102, 'max_depth': 15, 'learning_rate': 0.09392912877877257, 'min_child_samples': 65, 'subsample': 0.9572992481157327, 'colsample_bytree': 0.9674451813971876}. Best is trial 0 with value: 0.868.


[40]	valid_0's binary_logloss: 0.332453
Early stopping, best iteration is:
[30]	valid_0's binary_logloss: 0.332273
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.451064
[20]	valid_0's binary_logloss: 0.416115
[30]	valid_0's binary_logloss: 0.39456


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[40]	valid_0's binary_logloss: 0.37525
[50]	valid_0's binary_logloss: 0.364003
[60]	valid_0's binary_logloss: 0.354626


[I 2025-02-08 20:28:35,024] Trial 1 finished with value: 0.8703333333333333 and parameters: {'num_leaves': 141, 'max_depth': 11, 'learning_rate': 0.03059362807672799, 'min_child_samples': 44, 'subsample': 0.9951404646963116, 'colsample_bytree': 0.5962777131433259}. Best is trial 1 with value: 0.8703333333333333.


[70]	valid_0's binary_logloss: 0.348139
[80]	valid_0's binary_logloss: 0.341836
[90]	valid_0's binary_logloss: 0.337978
[100]	valid_0's binary_logloss: 0.335031
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.335031


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.461247
[20]	valid_0's binary_logloss: 0.431522
[30]	valid_0's binary_logloss: 0.409214
[40]	valid_0's binary_logloss: 0.392885
[50]	valid_0's binary_logloss: 0.380659
[60]	valid_0's binary_logloss: 0.369902
[70]	valid_0's binary_logloss: 0.36154
[80]	valid_0's binary_logloss: 0.355085


[I 2025-02-08 20:28:35,287] Trial 2 finished with value: 0.8626666666666667 and parameters: {'num_leaves': 34, 'max_depth': 6, 'learning_rate': 0.016993299311213607, 'min_child_samples': 7, 'subsample': 0.9636725268244113, 'colsample_bytree': 0.9182427175837214}. Best is trial 1 with value: 0.8703333333333333.


[90]	valid_0's binary_logloss: 0.350078
[100]	valid_0's binary_logloss: 0.345693
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.345693
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.373431
[20]	valid_0's binary_logloss: 0.34477


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:28:35,511] Trial 3 finished with value: 0.8666666666666667 and parameters: {'num_leaves': 121, 'max_depth': 11, 'learning_rate': 0.16675878914710532, 'min_child_samples': 10, 'subsample': 0.7619517881826199, 'colsample_bytree': 0.5185100769875186}. Best is trial 1 with value: 0.8703333333333333.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:28:35,625] Trial 4 finished with value: 0.8666666666666667 and parameters: {'num_leaves': 26, 'max_depth': 15, 'learning_rate': 0.21199773406385425, 'min_child_samples': 56, 'subsample': 0.7659472695681386, 'colsample_bytree': 0.5010582219722817}. Best 

[30]	valid_0's binary_logloss: 0.344065
Early stopping, best iteration is:
[21]	valid_0's binary_logloss: 0.342327
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.355842
[20]	valid_0's binary_logloss: 0.330446
[30]	valid_0's binary_logloss: 0.328438
[40]	valid_0's binary_logloss: 0.327049
Early stopping, best iteration is:
[37]	valid_0's binary_logloss: 0.326373
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.476439
[20]	valid_0's binary_logloss: 0.455292
[30]	valid_0's binary_logloss: 0.439079
[40]	valid_0's binary_logloss: 0.42291
[50]	valid_0's binary_logloss: 0.41153
[60]	valid_0's binary_logloss: 0.401188
[70]	valid_0's binary_logloss: 0.392872


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:28:35,723] Trial 5 finished with value: 0.8453333333333334 and parameters: {'num_leaves': 115, 'max_depth': 3, 'learning_rate': 0.016908312840709303, 'min_child_samples': 50, 'subsample': 0.9851075756068717, 'colsample_bytree': 0.6298063099988375}. Best is trial 1 with value: 0.8703333333333333.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:28:35,829] Trial 6 finished with value: 0.8683333333333333 and parameters: {'num_leaves': 64, 'max_depth': 10, 'learning_rate': 0.25945132349153144, 'min_child_samples': 92, 'subsample': 0.9182744345992034, 'colsample_bytree': 0.7373562887548155}. Best 

[80]	valid_0's binary_logloss: 0.38474
[90]	valid_0's binary_logloss: 0.378147
[100]	valid_0's binary_logloss: 0.372314
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.372314
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.340184
[20]	valid_0's binary_logloss: 0.325637
[30]	valid_0's binary_logloss: 0.32846
Early stopping, best iteration is:
[22]	valid_0's binary_logloss: 0.324919
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.467024
[20]	valid_0's binary_logloss: 0.441006
[30]	valid_0's binary_logloss: 0.422259
[40]	valid_0's binary_logloss: 0.404791
[50]	valid_0's binary_logloss: 0.392858
[60]	valid_0's binary_logloss: 0.382506
[70]	valid_0's binary_logloss: 0.374469
[80]	valid_0's binary_logloss: 0.366862
[90]	valid_0's binary_logloss: 0.361111
[100]	valid_0's binary_logloss: 0.356295
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_log

  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:28:35,918] Trial 7 finished with value: 0.861 and parameters: {'num_leaves': 42, 'max_depth': 3, 'learning_rate': 0.023728316499921114, 'min_child_samples': 56, 'subsample': 0.6280302884451805, 'colsample_bytree': 0.6434185928083846}. Best is trial 1 with value: 0.8703333333333333.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:28:36,055] Trial 8 finished with value: 0.862 and parameters: {'num_leaves': 131, 'max_depth': 9, 'learning_rate': 0.26764898870145715, 'min_child_samples': 38, 'subsample': 0.6068141553924304, 'colsample_bytree': 0.7912528326671284}. Best is trial 1 with value: 0.87

Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.341342
[20]	valid_0's binary_logloss: 0.339009
Early stopping, best iteration is:
[14]	valid_0's binary_logloss: 0.338508
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.449982
[20]	valid_0's binary_logloss: 0.414998
[30]	valid_0's binary_logloss: 0.391552
[40]	valid_0's binary_logloss: 0.375264


[I 2025-02-08 20:28:36,232] Trial 9 finished with value: 0.8663333333333333 and parameters: {'num_leaves': 101, 'max_depth': 5, 'learning_rate': 0.02592790303007854, 'min_child_samples': 78, 'subsample': 0.8928960037321187, 'colsample_bytree': 0.8992038743357513}. Best is trial 1 with value: 0.8703333333333333.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),


[50]	valid_0's binary_logloss: 0.363861
[60]	valid_0's binary_logloss: 0.355143
[70]	valid_0's binary_logloss: 0.348394
[80]	valid_0's binary_logloss: 0.343508
[90]	valid_0's binary_logloss: 0.339363
[100]	valid_0's binary_logloss: 0.335951
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.335951
Training until validation scores don't improve for 10 rounds


  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[10]	valid_0's binary_logloss: 0.428711
[20]	valid_0's binary_logloss: 0.387881
[30]	valid_0's binary_logloss: 0.367624
[40]	valid_0's binary_logloss: 0.350638
[50]	valid_0's binary_logloss: 0.34344
[60]	valid_0's binary_logloss: 0.338017
[70]	valid_0's binary_logloss: 0.334622
[80]	valid_0's binary_logloss: 0.3319
[90]	valid_0's binary_logloss: 0.331881
Early stopping, best iteration is:
[86]	valid_0's binary_logloss: 0.331346


[I 2025-02-08 20:28:37,129] Trial 10 finished with value: 0.8673333333333333 and parameters: {'num_leaves': 149, 'max_depth': 12, 'learning_rate': 0.04892595813692864, 'min_child_samples': 29, 'subsample': 0.6900970033503924, 'colsample_bytree': 0.6129196179278745}. Best is trial 1 with value: 0.8703333333333333.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.421909
[20]	valid_0's binary_logloss: 0.379734
[30]	valid_0's binary_logloss: 0.356095
[40]	valid_0's binary_logloss: 0.343337
[50]	valid_0's binary_logloss: 0.336348
[60]	valid_0's binary_logloss: 0.332174
[70]	valid_0's binary_logloss: 0.328513


[I 2025-02-08 20:28:37,459] Trial 11 finished with value: 0.8673333333333333 and parameters: {'num_leaves': 70, 'max_depth': 9, 'learning_rate': 0.05139652555597693, 'min_child_samples': 100, 'subsample': 0.8610925947506923, 'colsample_bytree': 0.7489771683458024}. Best is trial 1 with value: 0.8703333333333333.


[80]	valid_0's binary_logloss: 0.32633
[90]	valid_0's binary_logloss: 0.325822
[100]	valid_0's binary_logloss: 0.325509
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.325509
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.38743
[20]	valid_0's binary_logloss: 0.347212


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:28:37,739] Trial 12 finished with value: 0.8683333333333333 and parameters: {'num_leaves': 65, 'max_depth': 12, 'learning_rate': 0.08941224884484537, 'min_child_samples': 91, 'subsample': 0.8742945589196004, 'colsample_bytree': 0.7468781487632669}. Best is trial 1 with value: 0.8703333333333333.


[30]	valid_0's binary_logloss: 0.333147
[40]	valid_0's binary_logloss: 0.327321
[50]	valid_0's binary_logloss: 0.325472
[60]	valid_0's binary_logloss: 0.32497
[70]	valid_0's binary_logloss: 0.325747
Early stopping, best iteration is:
[62]	valid_0's binary_logloss: 0.324639
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.482848


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[20]	valid_0's binary_logloss: 0.464526
[30]	valid_0's binary_logloss: 0.446928
[40]	valid_0's binary_logloss: 0.432904
[50]	valid_0's binary_logloss: 0.421783
[60]	valid_0's binary_logloss: 0.411131
[70]	valid_0's binary_logloss: 0.401697
[80]	valid_0's binary_logloss: 0.39284


[I 2025-02-08 20:28:38,124] Trial 13 finished with value: 0.831 and parameters: {'num_leaves': 58, 'max_depth': 10, 'learning_rate': 0.010151479885093052, 'min_child_samples': 76, 'subsample': 0.50685043002736, 'colsample_bytree': 0.6875545287836768}. Best is trial 1 with value: 0.8703333333333333.


[90]	valid_0's binary_logloss: 0.385605
[100]	valid_0's binary_logloss: 0.379111
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.379111
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.372556
[20]	valid_0's binary_logloss: 0.338584
[30]	valid_0's binary_logloss: 0.32934


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:28:38,361] Trial 14 finished with value: 0.8663333333333333 and parameters: {'num_leaves': 85, 'max_depth': 7, 'learning_rate': 0.09835003545612901, 'min_child_samples': 30, 'subsample': 0.8518274622926714, 'colsample_bytree': 0.8295191437595081}. Best is trial 1 with value: 0.8703333333333333.


[40]	valid_0's binary_logloss: 0.327438
[50]	valid_0's binary_logloss: 0.326597
Early stopping, best iteration is:
[49]	valid_0's binary_logloss: 0.326306
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.450488


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[20]	valid_0's binary_logloss: 0.415271
[30]	valid_0's binary_logloss: 0.393722
[40]	valid_0's binary_logloss: 0.374297
[50]	valid_0's binary_logloss: 0.363054
[60]	valid_0's binary_logloss: 0.353751


[I 2025-02-08 20:28:38,900] Trial 15 finished with value: 0.8703333333333333 and parameters: {'num_leaves': 86, 'max_depth': 13, 'learning_rate': 0.03105649071677099, 'min_child_samples': 44, 'subsample': 0.8120781368599115, 'colsample_bytree': 0.5698270248217359}. Best is trial 1 with value: 0.8703333333333333.


[70]	valid_0's binary_logloss: 0.347472
[80]	valid_0's binary_logloss: 0.341259
[90]	valid_0's binary_logloss: 0.337353
[100]	valid_0's binary_logloss: 0.334879
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.334879


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.442873
[20]	valid_0's binary_logloss: 0.404373
[30]	valid_0's binary_logloss: 0.382743
[40]	valid_0's binary_logloss: 0.363901
[50]	valid_0's binary_logloss: 0.354225
[60]	valid_0's binary_logloss: 0.346063
[70]	valid_0's binary_logloss: 0.340691
[80]	valid_0's binary_logloss: 0.335396


[I 2025-02-08 20:28:39,701] Trial 16 finished with value: 0.8693333333333333 and parameters: {'num_leaves': 150, 'max_depth': 13, 'learning_rate': 0.036601087101028586, 'min_child_samples': 37, 'subsample': 0.8131097595053938, 'colsample_bytree': 0.5642034987246571}. Best is trial 1 with value: 0.8703333333333333.


[90]	valid_0's binary_logloss: 0.332384
[100]	valid_0's binary_logloss: 0.330786
Did not meet early stopping. Best iteration is:
[99]	valid_0's binary_logloss: 0.330754
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.450283


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[20]	valid_0's binary_logloss: 0.414302
[30]	valid_0's binary_logloss: 0.393214
[40]	valid_0's binary_logloss: 0.37409
[50]	valid_0's binary_logloss: 0.362974
[60]	valid_0's binary_logloss: 0.353791
[70]	valid_0's binary_logloss: 0.347981
[80]	valid_0's binary_logloss: 0.341956
[90]	valid_0's binary_logloss: 0.338651


[I 2025-02-08 20:28:40,309] Trial 17 finished with value: 0.8666666666666667 and parameters: {'num_leaves': 87, 'max_depth': 13, 'learning_rate': 0.03131052563620894, 'min_child_samples': 20, 'subsample': 0.7150536696007608, 'colsample_bytree': 0.570046407759173}. Best is trial 1 with value: 0.8703333333333333.


[100]	valid_0's binary_logloss: 0.33571
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.33571
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.477947


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[20]	valid_0's binary_logloss: 0.456091
[30]	valid_0's binary_logloss: 0.436574
[40]	valid_0's binary_logloss: 0.421075
[50]	valid_0's binary_logloss: 0.408812
[60]	valid_0's binary_logloss: 0.39711
[70]	valid_0's binary_logloss: 0.387667
[80]	valid_0's binary_logloss: 0.378472


[I 2025-02-08 20:28:41,018] Trial 18 finished with value: 0.8533333333333334 and parameters: {'num_leaves': 129, 'max_depth': 14, 'learning_rate': 0.011990552904336246, 'min_child_samples': 43, 'subsample': 0.8033689261838585, 'colsample_bytree': 0.6825663404462388}. Best is trial 1 with value: 0.8703333333333333.


[90]	valid_0's binary_logloss: 0.371745
[100]	valid_0's binary_logloss: 0.365693
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.365693
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.439467
[20]	valid_0's binary_logloss: 0.400756


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[30]	valid_0's binary_logloss: 0.379223
[40]	valid_0's binary_logloss: 0.360478
[50]	valid_0's binary_logloss: 0.350589
[60]	valid_0's binary_logloss: 0.343101
[70]	valid_0's binary_logloss: 0.337674
[80]	valid_0's binary_logloss: 0.333352
[90]	valid_0's binary_logloss: 0.330736


[I 2025-02-08 20:28:41,395] Trial 19 finished with value: 0.8733333333333333 and parameters: {'num_leaves': 102, 'max_depth': 8, 'learning_rate': 0.04008017886984275, 'min_child_samples': 64, 'subsample': 0.6547337087660698, 'colsample_bytree': 0.5742628765552298}. Best is trial 19 with value: 0.8733333333333333.


[100]	valid_0's binary_logloss: 0.329545
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.329545
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.402658
[20]	valid_0's binary_logloss: 0.359705
[30]	valid_0's binary_logloss: 0.341115


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:28:41,697] Trial 20 finished with value: 0.8703333333333333 and parameters: {'num_leaves': 138, 'max_depth': 7, 'learning_rate': 0.07149406755562614, 'min_child_samples': 67, 'subsample': 0.5480044243037713, 'colsample_bytree': 0.6897764973766888}. Best is trial 19 with value: 0.8733333333333333.


[40]	valid_0's binary_logloss: 0.332329
[50]	valid_0's binary_logloss: 0.328816
[60]	valid_0's binary_logloss: 0.326586
[70]	valid_0's binary_logloss: 0.325495
[80]	valid_0's binary_logloss: 0.324564
[90]	valid_0's binary_logloss: 0.324379
Early stopping, best iteration is:
[82]	valid_0's binary_logloss: 0.324319
Training until validation scores don't improve for 10 rounds


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[10]	valid_0's binary_logloss: 0.440088
[20]	valid_0's binary_logloss: 0.401191
[30]	valid_0's binary_logloss: 0.379737
[40]	valid_0's binary_logloss: 0.36119
[50]	valid_0's binary_logloss: 0.351507
[60]	valid_0's binary_logloss: 0.344147


[I 2025-02-08 20:28:42,124] Trial 21 finished with value: 0.869 and parameters: {'num_leaves': 104, 'max_depth': 8, 'learning_rate': 0.038859972991164636, 'min_child_samples': 45, 'subsample': 0.6851074473437014, 'colsample_bytree': 0.569209433317744}. Best is trial 19 with value: 0.8733333333333333.


[70]	valid_0's binary_logloss: 0.339068
[80]	valid_0's binary_logloss: 0.334265
[90]	valid_0's binary_logloss: 0.331178
[100]	valid_0's binary_logloss: 0.330138
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.330138
Training until validation scores don't improve for 10 rounds


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[10]	valid_0's binary_logloss: 0.472019
[20]	valid_0's binary_logloss: 0.445738
[30]	valid_0's binary_logloss: 0.427672
[40]	valid_0's binary_logloss: 0.410498
[50]	valid_0's binary_logloss: 0.399216


[I 2025-02-08 20:28:42,624] Trial 22 finished with value: 0.8566666666666667 and parameters: {'num_leaves': 93, 'max_depth': 11, 'learning_rate': 0.01904974166955663, 'min_child_samples': 63, 'subsample': 0.6188255469544323, 'colsample_bytree': 0.5471038191219741}. Best is trial 19 with value: 0.8733333333333333.


[60]	valid_0's binary_logloss: 0.387186
[70]	valid_0's binary_logloss: 0.379679
[80]	valid_0's binary_logloss: 0.370416
[90]	valid_0's binary_logloss: 0.363989
[100]	valid_0's binary_logloss: 0.359069
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.359069


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.415914
[20]	valid_0's binary_logloss: 0.373042
[30]	valid_0's binary_logloss: 0.353731
[40]	valid_0's binary_logloss: 0.339866
[50]	valid_0's binary_logloss: 0.334804
[60]	valid_0's binary_logloss: 0.331976
[70]	valid_0's binary_logloss: 0.330365
[80]	valid_0's binary_logloss: 0.328657


[I 2025-02-08 20:28:43,141] Trial 23 finished with value: 0.873 and parameters: {'num_leaves': 115, 'max_depth': 13, 'learning_rate': 0.061725458632948545, 'min_child_samples': 50, 'subsample': 0.6645209894348445, 'colsample_bytree': 0.5991945857250507}. Best is trial 19 with value: 0.8733333333333333.


[90]	valid_0's binary_logloss: 0.3287
Early stopping, best iteration is:
[86]	valid_0's binary_logloss: 0.327977
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.413713
[20]	valid_0's binary_logloss: 0.371434
[30]	valid_0's binary_logloss: 0.352244


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


[40]	valid_0's binary_logloss: 0.338751
[50]	valid_0's binary_logloss: 0.333477
[60]	valid_0's binary_logloss: 0.330091
[70]	valid_0's binary_logloss: 0.328484
[80]	valid_0's binary_logloss: 0.326624
[90]	valid_0's binary_logloss: 0.326023
Early stopping, best iteration is:
[86]	valid_0's binary_logloss: 0.325591


[I 2025-02-08 20:28:43,548] Trial 24 finished with value: 0.8713333333333333 and parameters: {'num_leaves': 117, 'max_depth': 11, 'learning_rate': 0.06359303371511489, 'min_child_samples': 70, 'subsample': 0.660962245991575, 'colsample_bytree': 0.6160139381248273}. Best is trial 19 with value: 0.8733333333333333.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.405805
[20]	valid_0's binary_logloss: 0.362882
[30]	valid_0's binary_logloss: 0.343212
[40]	valid_0's binary_logloss: 0.333661
[50]	valid_0's binary_logloss: 0.329846


[I 2025-02-08 20:28:43,873] Trial 25 finished with value: 0.866 and parameters: {'num_leaves': 114, 'max_depth': 9, 'learning_rate': 0.06758751364691684, 'min_child_samples': 77, 'subsample': 0.6545054006461273, 'colsample_bytree': 0.656294668627648}. Best is trial 19 with value: 0.8733333333333333.


[60]	valid_0's binary_logloss: 0.327228
[70]	valid_0's binary_logloss: 0.326143
[80]	valid_0's binary_logloss: 0.325143
[90]	valid_0's binary_logloss: 0.325211
Early stopping, best iteration is:
[87]	valid_0's binary_logloss: 0.324904
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.375007
[20]	valid_0's binary_logloss: 0.340177
[30]	valid_0's binary_logloss: 0.330293
[40]	valid_0's binary_logloss: 0.325615
[50]	valid_0's binary_logloss: 0.325206
[60]	valid_0's binary_logloss: 0.324563
[70]	valid_0's binary_logloss: 0.325169
Early stopping, best iteration is:
[65]	valid_0's binary_logloss: 0.324377


  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:28:44,006] Trial 26 finished with value: 0.874 and parameters: {'num_leaves': 111, 'max_depth': 5, 'learning_rate': 0.14212578337462897, 'min_child_samples': 70, 'subsample': 0.572966772315434, 'colsample_bytree': 0.5359158611519707}. Best is trial 26 with value: 0.874.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:28:44,152] Trial 27 finished with value: 0.8693333333333333 and parameters: {'num_leaves': 77, 'max_depth': 5, 'learning_rate': 0.12297169646174554, 'min_child_samples': 85, 'subsample': 0.5806868973020826, 'colsample_bytree': 0.5298646545466461}. Best is trial 26 with value: 0.

Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.38317
[20]	valid_0's binary_logloss: 0.345634
[30]	valid_0's binary_logloss: 0.333763
[40]	valid_0's binary_logloss: 0.327725
[50]	valid_0's binary_logloss: 0.325608
[60]	valid_0's binary_logloss: 0.323326
[70]	valid_0's binary_logloss: 0.322349
[80]	valid_0's binary_logloss: 0.322504
Early stopping, best iteration is:
[78]	valid_0's binary_logloss: 0.322014
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.366736
[20]	valid_0's binary_logloss: 0.334499
[30]	valid_0's binary_logloss: 0.327533
[40]	valid_0's binary_logloss: 0.32351
[50]	valid_0's binary_logloss: 0.324398
Early stopping, best iteration is:
[40]	valid_0's binary_logloss: 0.32351


[I 2025-02-08 20:28:44,266] Trial 28 finished with value: 0.8706666666666667 and parameters: {'num_leaves': 110, 'max_depth': 5, 'learning_rate': 0.16441973874173985, 'min_child_samples': 57, 'subsample': 0.5628140741281913, 'colsample_bytree': 0.5298717668378827}. Best is trial 26 with value: 0.874.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0)
[I 2025-02-08 20:28:44,483] Trial 29 finished with value: 0.8716666666666667 and parameters: {'num_leaves': 98, 'max_depth': 4, 'learning_rate': 0.09904080102575362, 'min_child_samples': 63, 'subsample': 0.5007715617415099, 'colsample_bytree': 0.602179101730343}. Best is trial 26 with value: 0.874.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.392928
[20]	valid_0's binary_logloss: 0.354851
[30]	valid_0's binary_logloss: 0.339598
[40]	valid_0's binary_logloss: 0.330307
[50]	valid_0's binary_logloss: 0.326968
[60]	valid_0's binary_logloss: 0.324682
[70]	valid_0's binary_logloss: 0.323002
[80]	valid_0's binary_logloss: 0.321737
[90]	valid_0's binary_logloss: 0.321293
[100]	valid_0's binary_logloss: 0.320692
Did not meet early stopping. Best iteration is:
[99]	valid_0's binary_logloss: 0.320609
Best parameters found: {'num_leaves': 111, 'max_depth': 5, 'learning_rate': 0.14212578337462897, 'min_child_samples': 70, 'subsample': 0.572966772315434, 'colsample_bytree': 0.5359158611519707}
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.375007
[20]	valid_0's binary_logloss: 0.340177
[30]	valid_0's binary_logloss: 0.330293
[40]	valid_0's binary_logloss: 0.325615
[50]	valid_0's binary_logloss: 0.32

In [9]:
import pandas as pd
import lightgbm as lgb
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, precision_recall_curve


# X, y 분리
y = df['churn']  # 목표 변수
X = df.drop(columns=['churn'])  # 특징 변수

# 학습 및 테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Optuna를 이용한 하이퍼파라미터 튜닝
def objective(trial):
    params = {
        'objective': 'binary',
        'metric': 'binary_logloss',
        'verbosity': -1,
        'num_leaves': trial.suggest_int('num_leaves', 20, 150),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0)
    }
    
    train_data = lgb.Dataset(X_train, label=y_train)
    valid_data = lgb.Dataset(X_test, label=y_test, reference=train_data)
    
    model = lgb.train(params, train_data, num_boost_round=100,
                      valid_sets=[valid_data], 
                      callbacks=[lgb.early_stopping(10), lgb.log_evaluation(10)])
    
    y_pred_proba = model.predict(X_test)
    precision, recall, thresholds = precision_recall_curve(y_test, y_pred_proba)
    best_threshold = thresholds[(precision + recall).argmax()]  # 최적의 threshold 선택
    
    y_pred = [1 if x > best_threshold else 0 for x in y_pred_proba]
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

# Optuna 실행
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=30)

# 최적 하이퍼파라미터 출력
print("Best parameters found:", study.best_params)

# 최적 하이퍼파라미터로 모델 학습
best_params = study.best_params
best_params.update({'objective': 'binary', 'metric': 'binary_logloss', 'verbosity': -1})

train_data = lgb.Dataset(X_train, label=y_train)
valid_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

model = lgb.train(best_params, train_data, num_boost_round=100,
                  valid_sets=[valid_data], 
                  callbacks=[lgb.early_stopping(10), lgb.log_evaluation(10)])

# 최적 모델 예측
y_pred_proba = model.predict(X_test)
precision, recall, thresholds = precision_recall_curve(y_test, y_pred_proba)
best_threshold = thresholds[(precision + recall).argmax()]  # 최적 threshold 선택

print(f'Best Threshold: {best_threshold:.4f}')

y_pred = [1 if x > best_threshold else 0 for x in y_pred_proba]

# 평가
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy:.4f}')
print('Classification Report:\n', report)


[I 2025-02-08 20:28:44,613] A new study created in memory with name: no-name-93807782-b866-4f11-af54-11c58d06e3e5
[I 2025-02-08 20:28:44,690] Trial 0 finished with value: 0.803 and parameters: {'num_leaves': 50, 'max_depth': 3, 'learning_rate': 0.12467137122952576, 'min_child_samples': 14, 'subsample': 0.6925485564728842, 'colsample_bytree': 0.8635862555703362}. Best is trial 0 with value: 0.803.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.377416
[20]	valid_0's binary_logloss: 0.348973
[30]	valid_0's binary_logloss: 0.335884
[40]	valid_0's binary_logloss: 0.329403
[50]	valid_0's binary_logloss: 0.325272
[60]	valid_0's binary_logloss: 0.323029
[70]	valid_0's binary_logloss: 0.321191
[80]	valid_0's binary_logloss: 0.320538
[90]	valid_0's binary_logloss: 0.320628
Early stopping, best iteration is:
[80]	valid_0's binary_logloss: 0.320538
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.38868
[20]	valid_0's binary_logloss: 0.3493
[30]	valid_0's binary_logloss: 0.33609
[40]	valid_0's binary_logloss: 0.329117
[50]	valid_0's binary_logloss: 0.326996
[60]	valid_0's binary_logloss: 0.324579
[70]	valid_0's binary_logloss: 0.323445
[80]	valid_0's binary_logloss: 0.323076
[90]	valid_0's binary_logloss: 0.322573


[I 2025-02-08 20:28:44,823] Trial 1 finished with value: 0.792 and parameters: {'num_leaves': 34, 'max_depth': 5, 'learning_rate': 0.11272648215039936, 'min_child_samples': 81, 'subsample': 0.7991249686556032, 'colsample_bytree': 0.5163253187975734}. Best is trial 0 with value: 0.803.


[100]	valid_0's binary_logloss: 0.323415
Did not meet early stopping. Best iteration is:
[91]	valid_0's binary_logloss: 0.322444
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.424888
[20]	valid_0's binary_logloss: 0.385597
[30]	valid_0's binary_logloss: 0.362932
[40]	valid_0's binary_logloss: 0.349903
[50]	valid_0's binary_logloss: 0.34233
[60]	valid_0's binary_logloss: 0.337259
[70]	valid_0's binary_logloss: 0.334117
[80]	valid_0's binary_logloss: 0.331963


[I 2025-02-08 20:28:45,374] Trial 2 finished with value: 0.824 and parameters: {'num_leaves': 74, 'max_depth': 10, 'learning_rate': 0.03589874859797542, 'min_child_samples': 32, 'subsample': 0.8029875250555134, 'colsample_bytree': 0.8740876259703843}. Best is trial 2 with value: 0.824.


[90]	valid_0's binary_logloss: 0.331164
[100]	valid_0's binary_logloss: 0.330493
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.330493
Training until validation scores don't improve for 10 rounds


[I 2025-02-08 20:28:45,473] Trial 3 finished with value: 0.845 and parameters: {'num_leaves': 105, 'max_depth': 8, 'learning_rate': 0.24945403867645558, 'min_child_samples': 31, 'subsample': 0.6699446621270434, 'colsample_bytree': 0.8523528191841132}. Best is trial 3 with value: 0.845.


[10]	valid_0's binary_logloss: 0.339917
[20]	valid_0's binary_logloss: 0.340139
Early stopping, best iteration is:
[12]	valid_0's binary_logloss: 0.337827


[I 2025-02-08 20:28:45,606] Trial 4 finished with value: 0.8733333333333333 and parameters: {'num_leaves': 119, 'max_depth': 7, 'learning_rate': 0.11666414520116945, 'min_child_samples': 68, 'subsample': 0.6414845616181973, 'colsample_bytree': 0.6967974202980832}. Best is trial 4 with value: 0.8733333333333333.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.372706
[20]	valid_0's binary_logloss: 0.337199
[30]	valid_0's binary_logloss: 0.328143
[40]	valid_0's binary_logloss: 0.326986
Early stopping, best iteration is:
[35]	valid_0's binary_logloss: 0.326082
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.352654
[20]	valid_0's binary_logloss: 0.328142


[I 2025-02-08 20:28:45,724] Trial 5 finished with value: 0.7826666666666666 and parameters: {'num_leaves': 116, 'max_depth': 7, 'learning_rate': 0.17435117427751703, 'min_child_samples': 67, 'subsample': 0.9096285493776015, 'colsample_bytree': 0.6871941380547644}. Best is trial 4 with value: 0.8733333333333333.
[I 2025-02-08 20:28:45,774] Trial 6 finished with value: 0.8513333333333334 and parameters: {'num_leaves': 55, 'max_depth': 3, 'learning_rate': 0.2456049230856809, 'min_child_samples': 72, 'subsample': 0.5468107207286408, 'colsample_bytree': 0.5270955754211568}. Best is trial 4 with value: 0.8733333333333333.


[30]	valid_0's binary_logloss: 0.323717
[40]	valid_0's binary_logloss: 0.323838
Early stopping, best iteration is:
[35]	valid_0's binary_logloss: 0.322959
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.35412
[20]	valid_0's binary_logloss: 0.332456
[30]	valid_0's binary_logloss: 0.327166
[40]	valid_0's binary_logloss: 0.325046
Early stopping, best iteration is:
[37]	valid_0's binary_logloss: 0.324771
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.473659
[20]	valid_0's binary_logloss: 0.44808
[30]	valid_0's binary_logloss: 0.428105
[40]	valid_0's binary_logloss: 0.412905
[50]	valid_0's binary_logloss: 0.400369
[60]	valid_0's binary_logloss: 0.388854
[70]	valid_0's binary_logloss: 0.379111
[80]	valid_0's binary_logloss: 0.370763
[90]	valid_0's binary_logloss: 0.364682
[100]	valid_0's binary_logloss: 0.359747
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.359747


[I 2025-02-08 20:28:46,159] Trial 7 finished with value: 0.802 and parameters: {'num_leaves': 61, 'max_depth': 12, 'learning_rate': 0.012549104025910673, 'min_child_samples': 48, 'subsample': 0.7876526494984664, 'colsample_bytree': 0.7813383201953483}. Best is trial 4 with value: 0.8733333333333333.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.426117
[20]	valid_0's binary_logloss: 0.383806
[30]	valid_0's binary_logloss: 0.359904
[40]	valid_0's binary_logloss: 0.347117
[50]	valid_0's binary_logloss: 0.339699


[I 2025-02-08 20:28:46,476] Trial 8 finished with value: 0.8203333333333334 and parameters: {'num_leaves': 105, 'max_depth': 12, 'learning_rate': 0.046841560008807785, 'min_child_samples': 53, 'subsample': 0.663691145473424, 'colsample_bytree': 0.7009736935929904}. Best is trial 4 with value: 0.8733333333333333.


[60]	valid_0's binary_logloss: 0.334671
[70]	valid_0's binary_logloss: 0.331604
[80]	valid_0's binary_logloss: 0.328916
[90]	valid_0's binary_logloss: 0.328251
[100]	valid_0's binary_logloss: 0.328232
Did not meet early stopping. Best iteration is:
[95]	valid_0's binary_logloss: 0.328082
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.433496
[20]	valid_0's binary_logloss: 0.395427
[30]	valid_0's binary_logloss: 0.3753
[40]	valid_0's binary_logloss: 0.360058
[50]	valid_0's binary_logloss: 0.351312
[60]	valid_0's binary_logloss: 0.343824
[70]	valid_0's binary_logloss: 0.339501
[80]	valid_0's binary_logloss: 0.335066
[90]	valid_0's binary_logloss: 0.332197
[100]	valid_0's binary_logloss: 0.330027
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.330027


[I 2025-02-08 20:28:46,555] Trial 9 finished with value: 0.849 and parameters: {'num_leaves': 56, 'max_depth': 3, 'learning_rate': 0.05688234287971654, 'min_child_samples': 41, 'subsample': 0.6733120157234902, 'colsample_bytree': 0.5130782765335978}. Best is trial 4 with value: 0.8733333333333333.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.459779
[20]	valid_0's binary_logloss: 0.42872
[30]	valid_0's binary_logloss: 0.406458
[40]	valid_0's binary_logloss: 0.390338
[50]	valid_0's binary_logloss: 0.377743
[60]	valid_0's binary_logloss: 0.367375
[70]	valid_0's binary_logloss: 0.358581
[80]	valid_0's binary_logloss: 0.352168
[90]	valid_0's binary_logloss: 0.347422
[100]	valid_0's binary_logloss: 0.343552
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.343552


[I 2025-02-08 20:28:46,951] Trial 10 finished with value: 0.8563333333333333 and parameters: {'num_leaves': 149, 'max_depth': 15, 'learning_rate': 0.01786826273426072, 'min_child_samples': 96, 'subsample': 0.5024240995061939, 'colsample_bytree': 0.9899316418403894}. Best is trial 4 with value: 0.8733333333333333.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.473156
[20]	valid_0's binary_logloss: 0.447669
[30]	valid_0's binary_logloss: 0.427898
[40]	valid_0's binary_logloss: 0.412318
[50]	valid_0's binary_logloss: 0.399802
[60]	valid_0's binary_logloss: 0.38962

[I 2025-02-08 20:28:47,381] Trial 11 finished with value: 0.8563333333333333 and parameters: {'num_leaves': 150, 'max_depth': 15, 'learning_rate': 0.012105307727516284, 'min_child_samples': 96, 'subsample': 0.5004180084514833, 'colsample_bytree': 0.9882353914751002}. Best is trial 4 with value: 0.8733333333333333.



[70]	valid_0's binary_logloss: 0.380867
[80]	valid_0's binary_logloss: 0.373316
[90]	valid_0's binary_logloss: 0.366666
[100]	valid_0's binary_logloss: 0.360464
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.360464
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.466239
[20]	valid_0's binary_logloss: 0.438412
[30]	valid_0's binary_logloss: 0.418286
[40]	valid_0's binary_logloss: 0.398957
[50]	valid_0's binary_logloss: 0.38678
[60]	valid_0's binary_logloss: 0.376337
[70]	valid_0's binary_logloss: 0.368155
[80]	valid_0's binary_logloss: 0.359187


[I 2025-02-08 20:28:47,721] Trial 12 finished with value: 0.873 and parameters: {'num_leaves': 150, 'max_depth': 14, 'learning_rate': 0.020955613554619074, 'min_child_samples': 98, 'subsample': 0.5713388760566425, 'colsample_bytree': 0.6280249020706474}. Best is trial 4 with value: 0.8733333333333333.


[90]	valid_0's binary_logloss: 0.353192
[100]	valid_0's binary_logloss: 0.34848
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.34848
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.462248
[20]	valid_0's binary_logloss: 0.432534
[30]	valid_0's binary_logloss: 0.411844
[40]	valid_0's binary_logloss: 0.392011
[50]	valid_0's binary_logloss: 0.379712
[60]	valid_0's binary_logloss: 0.369371
[70]	valid_0's binary_logloss: 0.361551
[80]	valid_0's binary_logloss: 0.353058


[I 2025-02-08 20:28:48,154] Trial 13 finished with value: 0.872 and parameters: {'num_leaves': 131, 'max_depth': 10, 'learning_rate': 0.02339659043351734, 'min_child_samples': 82, 'subsample': 0.5931198199833378, 'colsample_bytree': 0.6394127723458541}. Best is trial 4 with value: 0.8733333333333333.


[90]	valid_0's binary_logloss: 0.347416
[100]	valid_0's binary_logloss: 0.343177
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.343177
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.395604
[20]	valid_0's binary_logloss: 0.355859
[30]	valid_0's binary_logloss: 0.340156
[40]	valid_0's binary_logloss: 0.330136
[50]	valid_0's binary_logloss: 0.327192
[60]	valid_0's binary_logloss: 0.325695
[70]	valid_0's binary_logloss: 0.324525
[80]	valid_0's binary_logloss: 0.323437
[90]	valid_0's binary_logloss: 0.323728
Early stopping, best iteration is:
[80]	valid_0's binary_logloss: 0.323437


[I 2025-02-08 20:28:48,497] Trial 14 finished with value: 0.8716666666666667 and parameters: {'num_leaves': 129, 'max_depth': 6, 'learning_rate': 0.08682477795771257, 'min_child_samples': 62, 'subsample': 0.5971907705893623, 'colsample_bytree': 0.6087538358786091}. Best is trial 4 with value: 0.8733333333333333.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.455546
[20]	valid_0's binary_logloss: 0.423025
[30]	valid_0's binary_logloss: 0.401443
[40]	valid_0's binary_logloss: 0.381565
[50]	valid_0's binary_logloss: 0.369833


[I 2025-02-08 20:28:48,927] Trial 15 finished with value: 0.8736666666666667 and parameters: {'num_leaves': 90, 'max_depth': 13, 'learning_rate': 0.02796556173073916, 'min_child_samples': 84, 'subsample': 0.5914556547363532, 'colsample_bytree': 0.5910196930700887}. Best is trial 15 with value: 0.8736666666666667.


[60]	valid_0's binary_logloss: 0.359765
[70]	valid_0's binary_logloss: 0.352428
[80]	valid_0's binary_logloss: 0.344803
[90]	valid_0's binary_logloss: 0.340143
[100]	valid_0's binary_logloss: 0.337054
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.337054
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.395531
[20]	valid_0's binary_logloss: 0.354227
[30]	valid_0's binary_logloss: 0.338198
[40]	valid_0's binary_logloss: 0.330827
[50]	valid_0's binary_logloss: 0.327562
[60]	valid_0's binary_logloss: 0.327693
Early stopping, best iteration is:
[52]	valid_0's binary_logloss: 0.327274


[I 2025-02-08 20:28:49,222] Trial 16 finished with value: 0.872 and parameters: {'num_leaves': 88, 'max_depth': 12, 'learning_rate': 0.069725342732223, 'min_child_samples': 82, 'subsample': 0.7361724742455349, 'colsample_bytree': 0.7780565988280261}. Best is trial 15 with value: 0.8736666666666667.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.432651
[20]	valid_0's binary_logloss: 0.390982
[30]	valid_0's binary_logloss: 0.370101
[40]	valid_0's binary_logloss: 0.352766
[50]	valid_0's binary_logloss: 0.344166
[60]	valid_0's binary_logloss: 0.337689
[70]	valid_0's binary_logloss: 0.333212
[80]	valid_0's binary_logloss: 0.329512
[90]	valid_0's binary_logloss: 0.327614
[100]	valid_0's binary_logloss: 0.326682
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.326682


[I 2025-02-08 20:28:49,661] Trial 17 finished with value: 0.8736666666666667 and parameters: {'num_leaves': 87, 'max_depth': 9, 'learning_rate': 0.0458164131936239, 'min_child_samples': 60, 'subsample': 0.6164282034551061, 'colsample_bytree': 0.5824175644394936}. Best is trial 15 with value: 0.8736666666666667.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.445687
[20]	valid_0's binary_logloss: 0.409214
[30]	valid_0's binary_logloss: 0.38732
[40]	valid_0's binary_logloss: 0.367911
[50]	valid_0's binary_logloss: 0.356885
[60]	valid_0's binary_logloss: 0.348878
[70]	valid_0's binary_logloss: 0.342444
[80]	valid_0's binary_logloss: 0.336473
[90]	valid_0's binary_logloss: 0.332981


[I 2025-02-08 20:28:50,147] Trial 18 finished with value: 0.8733333333333333 and parameters: {'num_leaves': 85, 'max_depth': 10, 'learning_rate': 0.03448075877975506, 'min_child_samples': 56, 'subsample': 0.9567990312391379, 'colsample_bytree': 0.5699525313458925}. Best is trial 15 with value: 0.8736666666666667.


[100]	valid_0's binary_logloss: 0.330881
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.330881
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.449922
[20]	valid_0's binary_logloss: 0.413764
[30]	valid_0's binary_logloss: 0.392849
[40]	valid_0's binary_logloss: 0.373919
[50]	valid_0's binary_logloss: 0.362656
[60]	valid_0's binary_logloss: 0.353835


[I 2025-02-08 20:28:50,734] Trial 19 finished with value: 0.8306666666666667 and parameters: {'num_leaves': 88, 'max_depth': 13, 'learning_rate': 0.03202930483495765, 'min_child_samples': 11, 'subsample': 0.7404019082327459, 'colsample_bytree': 0.5737611245485663}. Best is trial 15 with value: 0.8736666666666667.


[70]	valid_0's binary_logloss: 0.347528
[80]	valid_0's binary_logloss: 0.341345
[90]	valid_0's binary_logloss: 0.337678
[100]	valid_0's binary_logloss: 0.334968
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.334968
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.456499
[20]	valid_0's binary_logloss: 0.424361
[30]	valid_0's binary_logloss: 0.402799
[40]	valid_0's binary_logloss: 0.382973
[50]	valid_0's binary_logloss: 0.37104


[I 2025-02-08 20:28:51,164] Trial 20 finished with value: 0.8743333333333333 and parameters: {'num_leaves': 73, 'max_depth': 9, 'learning_rate': 0.02725631672558437, 'min_child_samples': 76, 'subsample': 0.623527934356535, 'colsample_bytree': 0.58342496856832}. Best is trial 20 with value: 0.8743333333333333.


[60]	valid_0's binary_logloss: 0.360861
[70]	valid_0's binary_logloss: 0.353347
[80]	valid_0's binary_logloss: 0.345693
[90]	valid_0's binary_logloss: 0.340816
[100]	valid_0's binary_logloss: 0.337688
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.337688
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.431201
[20]	valid_0's binary_logloss: 0.389801
[30]	valid_0's binary_logloss: 0.368494
[40]	valid_0's binary_logloss: 0.351174
[50]	valid_0's binary_logloss: 0.342535
[60]	valid_0's binary_logloss: 0.336761


[I 2025-02-08 20:28:51,524] Trial 21 finished with value: 0.8753333333333333 and parameters: {'num_leaves': 75, 'max_depth': 9, 'learning_rate': 0.04728923452292558, 'min_child_samples': 75, 'subsample': 0.6279748133999552, 'colsample_bytree': 0.5748630153653248}. Best is trial 21 with value: 0.8753333333333333.


[70]	valid_0's binary_logloss: 0.33219
[80]	valid_0's binary_logloss: 0.328773
[90]	valid_0's binary_logloss: 0.326949
[100]	valid_0's binary_logloss: 0.326341
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.326341
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.458077
[20]	valid_0's binary_logloss: 0.42615
[30]	valid_0's binary_logloss: 0.401052
[40]	valid_0's binary_logloss: 0.383723
[50]	valid_0's binary_logloss: 0.371217
[60]	valid_0's binary_logloss: 0.360081
[70]	valid_0's binary_logloss: 0.352004
[80]	valid_0's binary_logloss: 0.344885
[90]	valid_0's binary_logloss: 0.340177


[I 2025-02-08 20:28:51,886] Trial 22 finished with value: 0.7766666666666666 and parameters: {'num_leaves': 71, 'max_depth': 9, 'learning_rate': 0.02409755769403644, 'min_child_samples': 86, 'subsample': 0.5459390284640174, 'colsample_bytree': 0.6609077327483339}. Best is trial 21 with value: 0.8753333333333333.


[100]	valid_0's binary_logloss: 0.336616
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.336616
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.471779
[20]	valid_0's binary_logloss: 0.447112
[30]	valid_0's binary_logloss: 0.428668
[40]	valid_0's binary_logloss: 0.410458
[50]	valid_0's binary_logloss: 0.398142
[60]	valid_0's binary_logloss: 0.387319
[70]	valid_0's binary_logloss: 0.378551
[80]	valid_0's binary_logloss: 0.369599
[90]	valid_0's binary_logloss: 0.362912


[I 2025-02-08 20:28:52,086] Trial 23 finished with value: 0.872 and parameters: {'num_leaves': 20, 'max_depth': 11, 'learning_rate': 0.017782986876661674, 'min_child_samples': 74, 'subsample': 0.7081182637866659, 'colsample_bytree': 0.556422259620346}. Best is trial 21 with value: 0.8753333333333333.


[100]	valid_0's binary_logloss: 0.357382
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.357382
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.44694
[20]	valid_0's binary_logloss: 0.410823
[30]	valid_0's binary_logloss: 0.388626
[40]	valid_0's binary_logloss: 0.368854
[50]	valid_0's binary_logloss: 0.357871


[I 2025-02-08 20:28:52,412] Trial 24 finished with value: 0.8746666666666667 and parameters: {'num_leaves': 74, 'max_depth': 8, 'learning_rate': 0.03425655481677627, 'min_child_samples': 89, 'subsample': 0.6312541740095989, 'colsample_bytree': 0.6051599452279621}. Best is trial 21 with value: 0.8753333333333333.


[60]	valid_0's binary_logloss: 0.349108
[70]	valid_0's binary_logloss: 0.342591
[80]	valid_0's binary_logloss: 0.336728
[90]	valid_0's binary_logloss: 0.332992
[100]	valid_0's binary_logloss: 0.33062
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.33062
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.431257
[20]	valid_0's binary_logloss: 0.390003
[30]	valid_0's binary_logloss: 0.365239
[40]	valid_0's binary_logloss: 0.350713
[50]	valid_0's binary_logloss: 0.341495
[60]	valid_0's binary_logloss: 0.335469
[70]	valid_0's binary_logloss: 0.33151
[80]	valid_0's binary_logloss: 0.327737
[90]	valid_0's binary_logloss: 0.32635
[100]	valid_0's binary_logloss: 0.325342
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.325342


[I 2025-02-08 20:28:52,701] Trial 25 finished with value: 0.828 and parameters: {'num_leaves': 43, 'max_depth': 8, 'learning_rate': 0.0437367581089793, 'min_child_samples': 76, 'subsample': 0.6395037282401348, 'colsample_bytree': 0.7461996566929374}. Best is trial 21 with value: 0.8753333333333333.
[I 2025-02-08 20:28:52,881] Trial 26 finished with value: 0.8733333333333333 and parameters: {'num_leaves': 65, 'max_depth': 5, 'learning_rate': 0.06494064552286169, 'min_child_samples': 89, 'subsample': 0.8822185607822493, 'colsample_bytree': 0.6365799725923887}. Best is trial 21 with value: 0.8753333333333333.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.414729
[20]	valid_0's binary_logloss: 0.375286
[30]	valid_0's binary_logloss: 0.356019
[40]	valid_0's binary_logloss: 0.341576
[50]	valid_0's binary_logloss: 0.335195
[60]	valid_0's binary_logloss: 0.330968
[70]	valid_0's binary_logloss: 0.328105
[80]	valid_0's binary_logloss: 0.325868
[90]	valid_0's binary_logloss: 0.324779
[100]	valid_0's binary_logloss: 0.324173
Did not meet early stopping. Best iteration is:
[99]	valid_0's binary_logloss: 0.324098
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.40817
[20]	valid_0's binary_logloss: 0.364769
[30]	valid_0's binary_logloss: 0.34792
[40]	valid_0's binary_logloss: 0.336694
[50]	valid_0's binary_logloss: 0.332745
[60]	valid_0's binary_logloss: 0.33001
[70]	valid_0's binary_logloss: 0.328398
[80]	valid_0's binary_logloss: 0.326194
[90]	valid_0's binary_logloss: 0.325212
[100]	valid_0's binary_logloss: 

[I 2025-02-08 20:28:53,158] Trial 27 finished with value: 0.8723333333333333 and parameters: {'num_leaves': 74, 'max_depth': 8, 'learning_rate': 0.07903407247305554, 'min_child_samples': 93, 'subsample': 0.6293796095930653, 'colsample_bytree': 0.5427500735730961}. Best is trial 21 with value: 0.8753333333333333.


Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.473482
[20]	valid_0's binary_logloss: 0.449423
[30]	valid_0's binary_logloss: 0.428336
[40]	valid_0's binary_logloss: 0.412334
[50]	valid_0's binary_logloss: 0.400005
[60]	valid_0's binary_logloss: 0.388571
[70]	valid_0's binary_logloss: 0.378814


[I 2025-02-08 20:28:53,497] Trial 28 finished with value: 0.798 and parameters: {'num_leaves': 97, 'max_depth': 7, 'learning_rate': 0.015036772552550206, 'min_child_samples': 76, 'subsample': 0.7154311547847175, 'colsample_bytree': 0.7330263520316256}. Best is trial 21 with value: 0.8753333333333333.


[80]	valid_0's binary_logloss: 0.369447
[90]	valid_0's binary_logloss: 0.362788
[100]	valid_0's binary_logloss: 0.357349
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.357349
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.435563
[20]	valid_0's binary_logloss: 0.395001
[30]	valid_0's binary_logloss: 0.369955
[40]	valid_0's binary_logloss: 0.354206


[I 2025-02-08 20:28:53,795] Trial 29 finished with value: 0.8246666666666667 and parameters: {'num_leaves': 43, 'max_depth': 11, 'learning_rate': 0.04037293805980678, 'min_child_samples': 90, 'subsample': 0.5524603776891457, 'colsample_bytree': 0.6640869501191893}. Best is trial 21 with value: 0.8753333333333333.


[50]	valid_0's binary_logloss: 0.344381
[60]	valid_0's binary_logloss: 0.337458
[70]	valid_0's binary_logloss: 0.332858
[80]	valid_0's binary_logloss: 0.328773
[90]	valid_0's binary_logloss: 0.326881
[100]	valid_0's binary_logloss: 0.325621
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.325621
Best parameters found: {'num_leaves': 75, 'max_depth': 9, 'learning_rate': 0.04728923452292558, 'min_child_samples': 75, 'subsample': 0.6279748133999552, 'colsample_bytree': 0.5748630153653248}
Training until validation scores don't improve for 10 rounds
[10]	valid_0's binary_logloss: 0.431201
[20]	valid_0's binary_logloss: 0.389801
[30]	valid_0's binary_logloss: 0.368494
[40]	valid_0's binary_logloss: 0.351174
[50]	valid_0's binary_logloss: 0.342535
[60]	valid_0's binary_logloss: 0.336761
[70]	valid_0's binary_logloss: 0.33219
[80]	valid_0's binary_logloss: 0.328773
[90]	valid_0's binary_logloss: 0.326949
[100]	valid_0's binary_logloss: 0.326341
Did not meet ea

Precision과 Recall 균형 조정

Precision 증가(0.80 → 0.84)
Recall 감소(0.48 → 0.44)
이탈 고객을 보다 정확하게 예측하지만, 일부를 놓칠 가능성이 있음
비즈니스 목표에 따라 Threshold를 조정할 필요 있음!
→ 이탈 고객 Recall을 높이고 싶다면 더 낮은 Threshold를 시도하는 것도 가능