<a href="https://colab.research.google.com/github/O-Kpy/Dacon/blob/main/Dacon_%EC%8B%A0%EC%9A%A9%EC%98%88%EC%B8%A1_%ED%95%84%EC%82%AC_catboost_%EB%81%9D%EA%B9%8C%EC%A7%80_%EB%8F%8C%EB%A0%A4%EB%B3%B4%EA%B8%B0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
import numpy as np

from catboost import CatBoostClassifier
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.metrics import log_loss
from tensorflow.keras.utils import to_categorical

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')
from IPython.core.display import display, HTML
display(HTML("<style>.container { width: 100% !important; }</style>"))

In [5]:
train = pd.read_csv('/content/drive/MyDrive/dataset/Dacon/대회/신용카드 사용자 연체 예측 AI 경진대회/train.csv')
test = pd.read_csv('/content/drive/MyDrive/dataset/Dacon/대회/신용카드 사용자 연체 예측 AI 경진대회/test.csv')
submit = pd.read_csv('/content/drive/MyDrive/dataset/Dacon/대회/신용카드 사용자 연체 예측 AI 경진대회/sample_submission.csv')
test_index = test['index']
train = train.drop(['index'], axis=1)
test = test.drop(['index'], axis=1)
print(f'train shape:{train.shape}')
print(f'test shape:{test.shape}')


train shape:(26457, 19)
test shape:(10000, 18)


In [3]:
!pip install catboost



In [6]:
def days_to_year(x):
  return (x * -1) / 365

def minus(x):
  return x * -1

# 이상치 제거

In [7]:
def remove_outlier(train, column):
    
    df = train[column]
    
    # 1분위수
    quan_25 = np.percentile(df.values, 25)
    
    # 3분위수
    quan_75 = np.percentile(df.values, 75)
    
    iqr = quan_75 - quan_25
    
    lowest = quan_25 - iqr * 5
    highest = quan_75 + iqr * 5
    outlier_index = df[(df < lowest) | (df > highest)].index
    print('outlier의 수 : ' , len(outlier_index))
    print(df.iloc[outlier_index])
    train.drop(outlier_index, axis = 0, inplace = True)
    
    return train

candidate = ['child_num']
for cand in candidate:  
    train = remove_outlier(train,cand)

train.reset_index(drop = True,inplace = True)
len(train)

outlier의 수 :  6
8462     14
9021     14
10731    19
25313     7
25390    14
25638     7
Name: child_num, dtype: int64


26451

# 중복방지 이건 나중에 따로

In [8]:
def add_var(data):
    
    # 개개인을 구분할 수 있는 변수들을 묶어서 생성
    data['personal_id'] = data['gender'] + "_" + data['DAYS_BIRTH'].astype(str) + "_" + data['income_total'].astype(str) + "_" + data['income_type'].astype(str)
    
    # 카드를 생성한 기간도 같은 경우가 있어서 begin을 추가하여 하나의 변수를 더 생성
    data['personal_begin_id'] = data['gender'] + "_" + data['DAYS_BIRTH'].astype(str) + "_" + data['income_total'].astype(str) + "_" + data['income_type'].astype(str) + "_" + data['begin_month'].astype(str)
    
    # 그외의 변수들을 조합하여 하나의 변수로 추가 생성
    data['g_r_c'] = data['gender'] + "_" + data['reality'] + "_" + data['car'] 
    data['p_w_e'] = data['phone'].astype(str) + "_" + data['work_phone'].astype(str) + "_" + data['email'].astype(str) 
    
    return data

train = add_var(train)
test = add_var(test)

# 숫자형 변수 전처리 및 변수 생성

In [9]:
def numeric_process(data):
    
    # income_total 변수 전처리
    # 만단위로 생성
    data['income_total'] = data['income_total']/10000
    # 편차 제곱 변수 생성
    data['income_total_dev'] = (data['income_total'] - data['income_total'].mean())**2
    # 로그 변환
    data['income_total_log'] = data['income_total'].apply(np.log1p)

    # DAYS_EMPLOYED 변수 전처리
    # 0 이상인 경우 0으로 모두 변환
    data.loc[data['DAYS_EMPLOYED'] >= 0,'DAYS_EMPLOYED']=0
    # day를 year로 변환
    data['DAYS_EMPLOYED'] = data['DAYS_EMPLOYED'].apply(days_to_year) 
    # 로그 변환
    data['DAYS_EMPLOYED_log'] = data['DAYS_EMPLOYED'].apply(np.log1p)

    # begin_month 변수 전처리
    # 마이너스 변환
    data['begin_month'] = data['begin_month'].apply(minus)
    
    # DAYS_BIRTH 변수 전처리
    # day를 year로 변환
    data['DAYS_BIRTH'] = data['DAYS_BIRTH'].apply(days_to_year)

    #  DAYS_BIRTH, DAYS_EMPLOYED, income_total변수를 조합하여 RATIO 변수 생성
    data['EMPLOYED_BIRTH_RATIO'] = data['DAYS_EMPLOYED']/data['DAYS_BIRTH']
    data['INCOME_EMPLOYED_RATIO'] = data['income_total']/data['DAYS_EMPLOYED']
    data['INCOME_BIRTH_RATIO'] = data['income_total']/data['DAYS_BIRTH']
    
    # 가족수 - 자식수
    data['diff_fam_child'] = data['family_size'] - data['child_num']
    # chid_num과 family_size는 다음과 같이 최대 2와 5가 되도록 전처리
    data.loc[data['child_num'] >= 2,'child_num'] = 2
    data.loc[data['family_size'] >= 5,'child_num'] = 5
    # 가족수와 자녀수 sum 변수 추가
    data['FAM_CHILD_SUM'] = data[['child_num', 'family_size']].sum(axis=1)
    
    # income을 가족 수 및 자식 수로 나눈 비율
    data['INCOME_FAM_RATIO'] = data['income_total']/data['family_size']
    data['INCOME_child_num_RATIO'] = data['income_total']/data['child_num']
    
    # 일을하게 된 시점 변수 추가
    data['BIRTH_MINUS_EMPLOYED'] = data['DAYS_BIRTH'] - data['DAYS_EMPLOYED']
    # income total 변수에 before_EMPLOYED로 나눈 변수 추가
    data['INCOME_BIRTH_MINUS_EMPLOYED_RATIO'] = data['income_total']/data['BIRTH_MINUS_EMPLOYED']
    
    return data 


In [10]:
train = numeric_process(train)
test = numeric_process(test)

In [11]:
# occyp_type 변수 전처리

def occype_process(data):

  data['occyp_type'] = data['occyp_type'].fillna('NAN')
  # 경력이 없고 직업군이 none인 사람은 no_work로 대체
  data.loc[(data['DAYS_EMPLOYED'] == 0) & (data['occyp_type'] == 'NAN'), 'occyp_type'] = 'no_work'
  print(data['occyp_type'].value_counts(), '\n\n')

  return data

train = occype_process(train)
test = occype_process(test)

Laborers                 4512
no_work                  4438
NAN                      3733
Core staff               2646
Sales staff              2539
Managers                 2167
Drivers                  1572
High skill tech staff    1040
Accountants               902
Medicine staff            864
Cooking staff             457
Security staff            424
Cleaning staff            401
Private service staff     243
Low-skill Laborers        127
Waiters/barmen staff      123
Secretaries                97
Realty agents              63
HR staff                   62
IT staff                   41
Name: occyp_type, dtype: int64 


Laborers                 1699
no_work                  1697
NAN                      1455
Sales staff               946
Core staff                945
Managers                  845
Drivers                   563
High skill tech staff     343
Medicine staff            343
Accountants               339
Cooking staff             198
Security staff            168
Cleani

In [12]:
#구간화 함수
def make_bin(df, variable, n):
    
    data = df
    count, bin_dividers = np.histogram(data[variable], bins=n)
    bin_names=[str(i) for i in range(n)]
    data['%s_bin' % variable] = pd.cut(x=data[variable], bins=bin_dividers, labels=bin_names, include_lowest=True)
    data['%s_bin' % variable] = pd.factorize(data['%s_bin' % variable])[0]
    print(data['%s_bin' % variable], '\n\n')
    
    return data

# days_birth만 구간화 했을 떄 가장 성능이 좋았음
train = make_bin(train, 'DAYS_BIRTH', n=10)
test = make_bin(test, 'DAYS_BIRTH', n=10)

0        0
1        1
2        2
3        3
4        3
        ..
26446    1
26447    3
26448    5
26449    5
26450    2
Name: DAYS_BIRTH_bin, Length: 26451, dtype: int64 


0       0
1       1
2       2
3       1
4       3
       ..
9995    1
9996    8
9997    4
9998    3
9999    9
Name: DAYS_BIRTH_bin, Length: 10000, dtype: int64 




In [13]:
# feature selection
train = train.drop(['income_total', 'DAYS_EMPLOYED', 'FLAG_MOBIL'], axis=1)
test = test.drop(['income_total', 'DAYS_EMPLOYED', 'FLAG_MOBIL'], axis=1)

In [14]:
print(train.shape, test.shape)

(26451, 33) (10000, 32)


# 데이터 분할

> 컬럼 순서대로 정렬 후 object형 변수의 인덱스 정보를 cat_feature에 활용

In [15]:
x_train = train.drop(['credit'], axis=1)
y_train = train['credit']
x_test = test

In [16]:
cat_features = [f for f in x_train.columns if x_train[f].dtype=='object']

def columns_index(df, cat_features):
  cols = df.columns.values
  sidx = np.argsort(cols)
  return sidx[np.searchsorted(cols, cat_features, sorter=sidx)]

cat_features_idx = columns_index(x_train, cat_features)
print('Cat features are:', [f for f in cat_features])
print(cat_features_idx)

Cat features are: ['gender', 'car', 'reality', 'income_type', 'edu_type', 'family_type', 'house_type', 'occyp_type', 'personal_id', 'personal_begin_id', 'g_r_c', 'p_w_e']
[ 0  1  2  4  5  6  7 12 15 16 17 18]


In [28]:
cat_models = {}

skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=40)
outcomes =[]
sub=np.zeros((x_test.shape[0], 3))
cat_params = {'random_seed': 589, 'learning_rate': 0.01554563366576581, 'reg_lambda': 0.08726261680296399, 'max_depth': 8, 'colsample_bylevel': 0.9636993649385135, 'subsample': 0.9263791452993542, 'min_child_samples': 62, 'max_bin': 477}

for n_fold, (train_index, test_index) in enumerate(skf.split(x_train, y_train)):
  X_train, X_valid = x_train.iloc[train_index], x_train.iloc[test_index]
  Y_train, Y_valid = y_train.iloc[train_index], y_train.iloc[test_index]

  cat = CatBoostClassifier(n_estimators=3000, bootstrap_type='Bernoulli', **cat_params)
  cat.fit(X_train, Y_train, eval_set=[(X_train, Y_train), (X_valid, Y_valid)], early_stopping_rounds = 50, cat_features=cat_features, verbose=100)

  cat_models[n_fold] = cat

  # val 데이터 예측
  predict = cat.predict_proba(X_valid)
  # test 데이터 예측
  test_predict = cat.predict_proba(x_test)

  # val 데이터 예측 logloss 값 저장
  logloss=log_loss(to_categorical(Y_valid), predict)
  outcomes.append(logloss)
  print(f'FOLD{n_fold+1}:logloss:{logloss}')

  # test 데이터 예측 결과 종합
  # 최종적으로는 kfolds 횟수 만큼 나눠서 평균 값을 활용
  sub+=test_predict

  print(f'-----------------------------------------------------------------------------------------------------------------\n\n')

mean_outcome = np.mean(outcomes)
print('Mean:{}'.format(mean_outcome))
sub=sub/(skf.n_splits)




0:	learn: 1.0896300	test: 1.0896316	test1: 1.0895172	best: 1.0895172 (0)	total: 110ms	remaining: 5m 28s
100:	learn: 0.7596877	test: 0.6353998	test1: 0.7236813	best: 0.7236813 (100)	total: 28s	remaining: 13m 22s
200:	learn: 0.7215457	test: 0.5330940	test1: 0.6742266	best: 0.6742266 (200)	total: 57.9s	remaining: 13m 26s
300:	learn: 0.7077173	test: 0.5047494	test1: 0.6642723	best: 0.6642723 (300)	total: 1m 34s	remaining: 14m 5s
400:	learn: 0.6980324	test: 0.4935647	test1: 0.6611416	best: 0.6611364 (399)	total: 2m 11s	remaining: 14m 14s
500:	learn: 0.6901301	test: 0.4844878	test1: 0.6598981	best: 0.6598857 (495)	total: 2m 50s	remaining: 14m 9s
600:	learn: 0.6810508	test: 0.4770314	test1: 0.6590001	best: 0.6590001 (600)	total: 3m 30s	remaining: 14m
700:	learn: 0.6715180	test: 0.4723391	test1: 0.6579305	best: 0.6579305 (700)	total: 4m 11s	remaining: 13m 43s
800:	learn: 0.6614476	test: 0.4684535	test1: 0.6573069	best: 0.6572945 (796)	total: 4m 51s	remaining: 13m 21s
900:	learn: 0.6506937	test

KeyboardInterrupt: ignored

In [18]:
pip install optuna

Collecting optuna
[?25l  Downloading https://files.pythonhosted.org/packages/2b/21/d13081805e1e1afc71f5bb743ece324c8bd576237c51b899ecb38a717502/optuna-2.7.0-py3-none-any.whl (293kB)
[K     |████████████████████████████████| 296kB 7.6MB/s 
Collecting alembic
[?25l  Downloading https://files.pythonhosted.org/packages/eb/bd/c3486fd57a3eec5162a2e32e8f05880c990f0d92b03d268342d2e8fe7032/alembic-1.6.4-py2.py3-none-any.whl (164kB)
[K     |████████████████████████████████| 174kB 13.0MB/s 
Collecting cmaes>=0.8.2
  Downloading https://files.pythonhosted.org/packages/01/1f/43b01223a0366171f474320c6e966c39a11587287f098a5f09809b45e05f/cmaes-0.8.2-py3-none-any.whl
Collecting colorlog
  Downloading https://files.pythonhosted.org/packages/32/e6/e9ddc6fa1104fda718338b341e4b3dc31cd8039ab29e52fc73b508515361/colorlog-5.0.1-py2.py3-none-any.whl
Collecting cliff
[?25l  Downloading https://files.pythonhosted.org/packages/a2/d6/7d9acb68a77acd140be7fececb7f2701b2a29d2da9c54184cb8f93509590/cliff-3.7.0-py3-

In [23]:
import optuna
from lightgbm import LGBMClassifier
from optuna import Trial
from optuna.samplers import TPESampler

def objective(trial: Trial) -> float:
    params_cat = {
        "random_seed": trial.suggest_int('random_seed', 42, 1043),
        "learning_rate": trial.suggest_float('learning_rate', 0.01, 0.04),
        "n_estimators": 3000,
        "reg_lambda": trial.suggest_float("reg_lambda", 1e-8, 9e-2),
        "max_depth": trial.suggest_int("max_depth", 1, 10),
        "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.4, 1.0),
        "subsample": trial.suggest_float("subsample", 0.3, 1.0),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
        "max_bin": trial.suggest_int("max_bin", 200, 500),
        'bootstrap_type':'Bernoulli'
        
    }
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=40)
    for i, (train_indice, valid_indice) in enumerate(skf.split(X=x_train, y=y_train)):
      X_train, X_valid = x_train.iloc[train_indice], x_train.iloc[valid_indice]
      Y_train, Y_valid = y_train.iloc[train_indice], y_train.iloc[valid_indice]

      model = CatBoostClassifier(**params_cat)
      model.fit(
          X_train,
          Y_train,
          eval_set=[(X_train, Y_train), (X_valid, Y_valid)],
          early_stopping_rounds=50,
          verbose=100,
          cat_features=cat_features
          )

    lgb_pred = model.predict_proba(X_valid)
    log_score = log_loss(Y_valid, lgb_pred)
    
    return log_score

In [24]:
sampler = TPESampler(seed=42)
study = optuna.create_study(
    study_name="cat_parameter_opt",
    direction="minimize",
    sampler=sampler,
)
study.optimize(objective, n_trials=10)
print("Best Score:", study.best_value)
print("Best trial:", study.best_trial.params)

[32m[I 2021-05-26 07:01:22,484][0m A new study created in memory with name: cat_parameter_opt[0m


0:	learn: 1.0765439	test: 1.0765577	test1: 1.0765245	best: 1.0765245 (0)	total: 140ms	remaining: 7m 1s
100:	learn: 0.7336900	test: 0.5422321	test1: 0.6881428	best: 0.6881428 (100)	total: 8.39s	remaining: 4m
200:	learn: 0.7192940	test: 0.5113798	test1: 0.6775315	best: 0.6775315 (200)	total: 18.2s	remaining: 4m 13s
300:	learn: 0.7065300	test: 0.4970842	test1: 0.6725212	best: 0.6725212 (300)	total: 28.5s	remaining: 4m 15s
400:	learn: 0.6930761	test: 0.4835493	test1: 0.6704759	best: 0.6704759 (400)	total: 39.3s	remaining: 4m 14s
500:	learn: 0.6805728	test: 0.4755786	test1: 0.6693839	best: 0.6693794 (498)	total: 50s	remaining: 4m 9s
600:	learn: 0.6691628	test: 0.4691486	test1: 0.6691077	best: 0.6689608 (585)	total: 1m	remaining: 4m 3s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6689608406
bestIteration = 585

Shrink model to first 586 iterations.
0:	learn: 1.0763257	test: 1.0763257	test1: 1.0764069	best: 1.0764069 (0)	total: 48.7ms	remaining: 2m 26s
100:	learn: 0.729

[32m[I 2021-05-26 07:06:17,378][0m Trial 0 finished with value: 0.6809494491568633 and parameters: {'random_seed': 417, 'learning_rate': 0.038521429192297486, 'reg_lambda': 0.06587945744308704, 'max_depth': 6, 'colsample_bylevel': 0.4936111842654619, 'subsample': 0.40919616423534183, 'min_child_samples': 10, 'max_bin': 460}. Best is trial 0 with value: 0.6809494491568633.[0m


0:	learn: 1.0806763	test: 1.0806763	test1: 1.0806457	best: 1.0806457 (0)	total: 47.6ms	remaining: 2m 22s
100:	learn: 0.7146131	test: 0.5413418	test1: 0.6879848	best: 0.6879848 (100)	total: 33.9s	remaining: 16m 14s
200:	learn: 0.6567140	test: 0.4846957	test1: 0.6741266	best: 0.6741266 (200)	total: 1m 19s	remaining: 18m 20s
300:	learn: 0.6026636	test: 0.4650253	test1: 0.6703698	best: 0.6703152 (299)	total: 2m 5s	remaining: 18m 46s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6698394973
bestIteration = 335

Shrink model to first 336 iterations.
0:	learn: 1.0804073	test: 1.0804073	test1: 1.0804298	best: 1.0804298 (0)	total: 48.9ms	remaining: 2m 26s
100:	learn: 0.7123041	test: 0.5349257	test1: 0.6965886	best: 0.6965886 (100)	total: 33s	remaining: 15m 45s
200:	learn: 0.6554618	test: 0.4911511	test1: 0.6850471	best: 0.6850471 (200)	total: 1m 16s	remaining: 17m 49s
300:	learn: 0.6063546	test: 0.4593806	test1: 0.6838877	best: 0.6830314 (250)	total: 2m 2s	remaining: 18m 18

[32m[I 2021-05-26 07:20:38,843][0m Trial 1 finished with value: 0.6831947765051771 and parameters: {'random_seed': 644, 'learning_rate': 0.031242177333881363, 'reg_lambda': 0.0018526142807772773, 'max_depth': 10, 'colsample_bylevel': 0.899465584480253, 'subsample': 0.44863737747479326, 'min_child_samples': 22, 'max_bin': 255}. Best is trial 0 with value: 0.6809494491568633.[0m


0:	learn: 1.0835685	test: 1.0835857	test1: 1.0835927	best: 1.0835927 (0)	total: 73.7ms	remaining: 3m 40s
100:	learn: 0.7517693	test: 0.5945561	test1: 0.7096729	best: 0.7096729 (100)	total: 3.94s	remaining: 1m 53s
200:	learn: 0.7364039	test: 0.5309749	test1: 0.6830811	best: 0.6830811 (200)	total: 8.25s	remaining: 1m 54s
300:	learn: 0.7325917	test: 0.5121029	test1: 0.6774191	best: 0.6774191 (300)	total: 13s	remaining: 1m 56s
400:	learn: 0.7296719	test: 0.5012114	test1: 0.6744801	best: 0.6744801 (400)	total: 18s	remaining: 1m 56s
500:	learn: 0.7267175	test: 0.4936468	test1: 0.6719899	best: 0.6719889 (498)	total: 23.1s	remaining: 1m 55s
600:	learn: 0.7247660	test: 0.4892736	test1: 0.6711439	best: 0.6711439 (600)	total: 28.2s	remaining: 1m 52s
700:	learn: 0.7229066	test: 0.4853761	test1: 0.6704717	best: 0.6704708 (697)	total: 33.4s	remaining: 1m 49s
800:	learn: 0.7213774	test: 0.4825357	test1: 0.6698449	best: 0.6698426 (799)	total: 38.5s	remaining: 1m 45s
900:	learn: 0.7197584	test: 0.48107

[32m[I 2021-05-26 07:25:29,046][0m Trial 2 finished with value: 0.6811655050620322 and parameters: {'random_seed': 346, 'learning_rate': 0.02574269294896713, 'reg_lambda': 0.03887505735834024, 'max_depth': 3, 'colsample_bylevel': 0.7671117368334277, 'subsample': 0.3976457024564293, 'min_child_samples': 33, 'max_bin': 310}. Best is trial 0 with value: 0.6809494491568633.[0m


0:	learn: 1.0838761	test: 1.0838761	test1: 1.0838863	best: 1.0838863 (0)	total: 23.9ms	remaining: 1m 11s
100:	learn: 0.7370935	test: 0.5510159	test1: 0.6915218	best: 0.6915218 (100)	total: 8.64s	remaining: 4m 7s
200:	learn: 0.7207495	test: 0.5056347	test1: 0.6761304	best: 0.6761304 (200)	total: 19.4s	remaining: 4m 30s
300:	learn: 0.7102532	test: 0.4879702	test1: 0.6729760	best: 0.6729760 (300)	total: 31.3s	remaining: 4m 41s
400:	learn: 0.6979756	test: 0.4778550	test1: 0.6697554	best: 0.6697554 (400)	total: 44.1s	remaining: 4m 46s
500:	learn: 0.6865691	test: 0.4728143	test1: 0.6689881	best: 0.6688656 (470)	total: 57s	remaining: 4m 44s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6688656308
bestIteration = 470

Shrink model to first 471 iterations.
0:	learn: 1.0791830	test: 1.0791654	test1: 1.0793850	best: 1.0793850 (0)	total: 138ms	remaining: 6m 52s
100:	learn: 0.7323965	test: 0.5447419	test1: 0.7009084	best: 0.7009084 (100)	total: 8.65s	remaining: 4m 8s
200:	lear

[32m[I 2021-05-26 07:32:07,208][0m Trial 3 finished with value: 0.6812966424501745 and parameters: {'random_seed': 498, 'learning_rate': 0.03355527884179041, 'reg_lambda': 0.017970648397514558, 'max_depth': 6, 'colsample_bylevel': 0.7554487413172255, 'subsample': 0.3325152889039984, 'min_child_samples': 63, 'max_bin': 251}. Best is trial 0 with value: 0.6809494491568633.[0m


0:	learn: 1.0762874	test: 1.0762874	test1: 1.0762996	best: 1.0762996 (0)	total: 29ms	remaining: 1m 27s
100:	learn: 0.7112710	test: 0.5296744	test1: 0.6843128	best: 0.6843128 (100)	total: 19.3s	remaining: 9m 13s
200:	learn: 0.6703181	test: 0.4836527	test1: 0.6742760	best: 0.6742321 (194)	total: 42.7s	remaining: 9m 54s
300:	learn: 0.6224439	test: 0.4632092	test1: 0.6712861	best: 0.6710583 (292)	total: 1m 8s	remaining: 10m 12s
400:	learn: 0.5700922	test: 0.4493667	test1: 0.6694641	best: 0.6694433 (398)	total: 1m 33s	remaining: 10m 7s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6694432502
bestIteration = 398

Shrink model to first 399 iterations.
0:	learn: 1.0762815	test: 1.0762815	test1: 1.0763092	best: 1.0763092 (0)	total: 30.7ms	remaining: 1m 32s
100:	learn: 0.7114868	test: 0.5259378	test1: 0.6960649	best: 0.6960649 (100)	total: 19.1s	remaining: 9m 6s
200:	learn: 0.6704761	test: 0.4806286	test1: 0.6889633	best: 0.6889015 (197)	total: 42.7s	remaining: 9m 55s
300:	

[32m[I 2021-05-26 07:40:05,890][0m Trial 4 finished with value: 0.6830464524545614 and parameters: {'random_seed': 107, 'learning_rate': 0.0384665661176, 'reg_lambda': 0.08690688332039001, 'max_depth': 9, 'colsample_bylevel': 0.5827682615040224, 'subsample': 0.3683704798044687, 'min_child_samples': 70, 'max_bin': 332}. Best is trial 0 with value: 0.6809494491568633.[0m


0:	learn: 1.0876481	test: 1.0876481	test1: 1.0876557	best: 1.0876557 (0)	total: 20.3ms	remaining: 1m
100:	learn: 0.7306875	test: 0.5711249	test1: 0.6995875	best: 0.6995875 (100)	total: 25.4s	remaining: 12m 7s
200:	learn: 0.6826294	test: 0.5042803	test1: 0.6752504	best: 0.6752504 (200)	total: 1m 3s	remaining: 14m 46s
300:	learn: 0.6430341	test: 0.4809835	test1: 0.6704929	best: 0.6704746 (296)	total: 1m 44s	remaining: 15m 34s
400:	learn: 0.6026632	test: 0.4645826	test1: 0.6682287	best: 0.6681951 (394)	total: 2m 26s	remaining: 15m 46s
500:	learn: 0.5554163	test: 0.4509870	test1: 0.6675109	best: 0.6672411 (462)	total: 3m 9s	remaining: 15m 46s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6672411048
bestIteration = 462

Shrink model to first 463 iterations.
0:	learn: 1.0840692	test: 1.0840456	test1: 1.0840612	best: 1.0840612 (0)	total: 150ms	remaining: 7m 29s
100:	learn: 0.7258758	test: 0.5694519	test1: 0.7093940	best: 0.7093940 (100)	total: 26.8s	remaining: 12m 50s
20

[32m[I 2021-05-26 07:54:09,231][0m Trial 5 finished with value: 0.6840224349090118 and parameters: {'random_seed': 164, 'learning_rate': 0.024855307303338105, 'reg_lambda': 0.0030949765564844446, 'max_depth': 10, 'colsample_bylevel': 0.5552679889600102, 'subsample': 0.7637655990477874, 'min_child_samples': 34, 'max_bin': 356}. Best is trial 0 with value: 0.6809494491568633.[0m


0:	learn: 1.0895705	test: 1.0895772	test1: 1.0895783	best: 1.0895783 (0)	total: 433ms	remaining: 21m 37s
100:	learn: 0.7694752	test: 0.6507511	test1: 0.7391328	best: 0.7391328 (100)	total: 21.9s	remaining: 10m 29s
200:	learn: 0.7289367	test: 0.5411420	test1: 0.6864850	best: 0.6864850 (200)	total: 49.6s	remaining: 11m 30s
300:	learn: 0.7138487	test: 0.5082210	test1: 0.6754653	best: 0.6754653 (300)	total: 1m 22s	remaining: 12m 22s
400:	learn: 0.7025011	test: 0.4960948	test1: 0.6718707	best: 0.6718707 (400)	total: 1m 58s	remaining: 12m 44s
500:	learn: 0.6935805	test: 0.4905678	test1: 0.6704238	best: 0.6704238 (500)	total: 2m 32s	remaining: 12m 38s
600:	learn: 0.6838644	test: 0.4813763	test1: 0.6692305	best: 0.6692305 (600)	total: 3m 7s	remaining: 12m 27s
700:	learn: 0.6729628	test: 0.4746786	test1: 0.6682053	best: 0.6682053 (700)	total: 3m 45s	remaining: 12m 17s
800:	learn: 0.6609749	test: 0.4696692	test1: 0.6673746	best: 0.6673606 (798)	total: 4m 22s	remaining: 12m 1s
900:	learn: 0.64956

[32m[I 2021-05-26 08:20:40,654][0m Trial 6 finished with value: 0.6798194748399248 and parameters: {'random_seed': 589, 'learning_rate': 0.01554563366576581, 'reg_lambda': 0.08726261680296399, 'max_depth': 8, 'colsample_bylevel': 0.9636993649385135, 'subsample': 0.9263791452993542, 'min_child_samples': 62, 'max_bin': 477}. Best is trial 6 with value: 0.6798194748399248.[0m


0:	learn: 1.0892800	test: 1.0892752	test1: 1.0892803	best: 1.0892803 (0)	total: 96.8ms	remaining: 4m 50s
100:	learn: 0.7800745	test: 0.6764138	test1: 0.7524414	best: 0.7524414 (100)	total: 4.96s	remaining: 2m 22s
200:	learn: 0.7424183	test: 0.5603409	test1: 0.6946677	best: 0.6946677 (200)	total: 10s	remaining: 2m 19s
300:	learn: 0.7345329	test: 0.5283553	test1: 0.6822762	best: 0.6822762 (300)	total: 16.3s	remaining: 2m 25s
400:	learn: 0.7310046	test: 0.5141065	test1: 0.6776802	best: 0.6776760 (398)	total: 22.9s	remaining: 2m 28s
500:	learn: 0.7284551	test: 0.5012451	test1: 0.6752642	best: 0.6752642 (500)	total: 29.5s	remaining: 2m 27s
600:	learn: 0.7261479	test: 0.4909377	test1: 0.6733557	best: 0.6733549 (599)	total: 36.6s	remaining: 2m 26s
700:	learn: 0.7236508	test: 0.4849037	test1: 0.6717131	best: 0.6717108 (698)	total: 43.6s	remaining: 2m 22s
800:	learn: 0.7211196	test: 0.4804130	test1: 0.6703205	best: 0.6703205 (800)	total: 50.6s	remaining: 2m 18s
900:	learn: 0.7188612	test: 0.477

[32m[I 2021-05-26 08:30:13,101][0m Trial 7 finished with value: 0.6818202043768704 and parameters: {'random_seed': 130, 'learning_rate': 0.015879485872574355, 'reg_lambda': 0.0040704655496755365, 'max_depth': 4, 'colsample_bylevel': 0.6332063738136893, 'subsample': 0.4899443222417271, 'min_child_samples': 84, 'max_bin': 307}. Best is trial 6 with value: 0.6798194748399248.[0m


0:	learn: 1.0870277	test: 1.0870277	test1: 1.0870357	best: 1.0870357 (0)	total: 27.3ms	remaining: 1m 22s
100:	learn: 0.7354463	test: 0.5681109	test1: 0.6976211	best: 0.6976211 (100)	total: 18s	remaining: 8m 37s
200:	learn: 0.6992787	test: 0.5027614	test1: 0.6745694	best: 0.6745694 (200)	total: 43s	remaining: 9m 58s
300:	learn: 0.6730525	test: 0.4857676	test1: 0.6707059	best: 0.6707059 (300)	total: 1m 11s	remaining: 10m 37s
400:	learn: 0.6459132	test: 0.4707779	test1: 0.6688988	best: 0.6688630 (398)	total: 1m 39s	remaining: 10m 47s
500:	learn: 0.6158988	test: 0.4620217	test1: 0.6676682	best: 0.6676017 (499)	total: 2m 8s	remaining: 10m 41s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6676016881
bestIteration = 499

Shrink model to first 500 iterations.
0:	learn: 1.0832349	test: 1.0832356	test1: 1.0832503	best: 1.0832503 (0)	total: 154ms	remaining: 7m 42s
100:	learn: 0.7285579	test: 0.5538515	test1: 0.7034465	best: 0.7034465 (100)	total: 20.3s	remaining: 9m 43s
200:

[32m[I 2021-05-26 08:40:51,267][0m Trial 8 finished with value: 0.6813010527939698 and parameters: {'random_seed': 323, 'learning_rate': 0.026280882494747457, 'reg_lambda': 0.01268318883848639, 'max_depth': 9, 'colsample_bylevel': 0.44473038620786254, 'subsample': 0.9908208556203622, 'min_child_samples': 79, 'max_bin': 259}. Best is trial 6 with value: 0.6798194748399248.[0m


0:	learn: 1.0788599	test: 1.0788599	test1: 1.0788260	best: 1.0788260 (0)	total: 45.8ms	remaining: 2m 17s
100:	learn: 0.7258491	test: 0.5410474	test1: 0.6875431	best: 0.6875431 (100)	total: 16.4s	remaining: 7m 51s
200:	learn: 0.6976152	test: 0.4961862	test1: 0.6760803	best: 0.6760803 (200)	total: 37.1s	remaining: 8m 36s
300:	learn: 0.6738039	test: 0.4771272	test1: 0.6725100	best: 0.6725100 (300)	total: 58.5s	remaining: 8m 44s
400:	learn: 0.6394030	test: 0.4665821	test1: 0.6708602	best: 0.6707591 (396)	total: 1m 22s	remaining: 8m 51s
500:	learn: 0.6072793	test: 0.4572349	test1: 0.6699913	best: 0.6699341 (464)	total: 1m 44s	remaining: 8m 43s
Stopped by overfitting detector  (50 iterations wait)

bestTest = 0.6699340642
bestIteration = 464

Shrink model to first 465 iterations.
0:	learn: 1.0787931	test: 1.0787931	test1: 1.0789255	best: 1.0789255 (0)	total: 63.5ms	remaining: 3m 10s
100:	learn: 0.7208468	test: 0.5360614	test1: 0.6977421	best: 0.6977421 (100)	total: 16.4s	remaining: 7m 51s
20

[32m[I 2021-05-26 08:49:01,434][0m Trial 9 finished with value: 0.6819626953389014 and parameters: {'random_seed': 47, 'learning_rate': 0.034463842853645024, 'reg_lambda': 0.0636171638777121, 'max_depth': 8, 'colsample_bylevel': 0.8627622080115674, 'subsample': 0.35183125621386324, 'min_child_samples': 39, 'max_bin': 234}. Best is trial 6 with value: 0.6798194748399248.[0m


Best Score: 0.6798194748399248
Best trial: {'random_seed': 589, 'learning_rate': 0.01554563366576581, 'reg_lambda': 0.08726261680296399, 'max_depth': 8, 'colsample_bylevel': 0.9636993649385135, 'subsample': 0.9263791452993542, 'min_child_samples': 62, 'max_bin': 477}
