In [3]:
import numpy as np
import pandas as pd

결정트리 하이퍼파라미터

In [4]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings('ignore')

# 예제 반복 시마다 동일한 예측 결과 도출을 위해 random_state 설정
dt_clf = DecisionTreeClassifier(random_state=156)

# 붓꽃데이터 로딩, 학습과 테스트 데이터 세트로 분리
iris_data = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris_data.data, iris_data.target,
                                                   test_size=0.2, random_state=11)

# DecisionTreeClassifier 학습
dt_clf.fit(X_train,y_train)
pred = dt_clf.predict(X_test)
accuracy = accuracy_score(y_test, pred)
print('결정트리 예측 정확도: {0:.4f}'.format(accuracy),'\n')

# DecisionTreeClassifier의 하이퍼 파라미터 추출
print('DecisionTreeClassifier 기본 하이퍼 파라미터:\n',dt_clf.get_params(),'\n')

from sklearn.model_selection import GridSearchCV

params = {'max_depth':[6,8,10,12,16,20,24]}
grid_cv = GridSearchCV(dt_clf,param_grid=params,scoring='accuracy',cv=5,verbose=1)
grid_cv.fit(X_train,y_train)
print('GridSearchCV 최고 평균 정확도 수치:{0:.4f}'.format(grid_cv.best_score_))
print('GridSearchCV 최적 하이퍼파라미터:',grid_cv.best_params_)

# GridSearchCV 객체의 cv_results_ 속성을 DataFrame으로 생성
cv_results_df = pd.DataFrame(grid_cv.cv_results_)

# max_depth 파라미터 값과 그때의 테스트 세트, 학습 데이터 세트의 정확도 수치 추출
cv_results_df[['param_max_depth','mean_test_score']]

결정트리 예측 정확도: 0.9333 

DecisionTreeClassifier 기본 하이퍼 파라미터:
 {'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'random_state': 156, 'splitter': 'best'} 

Fitting 5 folds for each of 7 candidates, totalling 35 fits
GridSearchCV 최고 평균 정확도 수치:0.9500
GridSearchCV 최적 하이퍼파라미터: {'max_depth': 6}


Unnamed: 0,param_max_depth,mean_test_score
0,6,0.95
1,8,0.95
2,10,0.95
3,12,0.95
4,16,0.95
5,20,0.95
6,24,0.95


랜덤포레스트 하이퍼파라미터

In [5]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

# 학습/테스트용 DataFrame 변환
X_train, X_test, y_train, y_test = train_test_split(iris_data.data, iris_data.target,
                                                   test_size=0.2, random_state=11)

#랜덤포레스트 학습 및 별도의 테스트 세트로 예측 성능 평가
rf_clf = RandomForestClassifier(random_state=0)
rf_clf.fit(X_train,y_train)
pred = rf_clf.predict(X_test)
accuracy = accuracy_score(y_test,pred)
print('랜덤포레스트 정확도: {0:.4f}'.format(accuracy),'\n')

params = {
    'n_estimators':[100],
    'max_depth' : [6,8,10,12],
    'min_samples_leaf' : [8,12,18],
    'min_samples_split' : [8,16,20]
}
# RandomForestClassifier 객체 생성 후 GridSearchCV 수행
rf_clf = RandomForestClassifier(random_state=0, n_jobs=-1)
grid_cv = GridSearchCV(rf_clf, param_grid=params, cv=2, n_jobs=-1)
grid_cv.fit(X_train,y_train)

print('최적 하이퍼 파라미터:\n', grid_cv.best_params_)
print('최고 예측 정확도: {0:.4f}'.format(grid_cv.best_score_))

랜덤포레스트 정확도: 0.9333 

최적 하이퍼 파라미터:
 {'max_depth': 6, 'min_samples_leaf': 8, 'min_samples_split': 8, 'n_estimators': 100}
최고 예측 정확도: 0.9500


GBM 하이퍼파라미터

In [6]:
from sklearn.ensemble import GradientBoostingClassifier

X_train, X_test, y_train, y_test = train_test_split(iris_data.data, iris_data.target,
                                                   test_size=0.2, random_state=11)
gb_clf = GradientBoostingClassifier(random_state=0)
gb_clf.fit(X_train,y_train)
gb_pred = gb_clf.predict(X_test)
gb_accuracy = accuracy_score(y_test, gb_pred)

params = {
    'n_estimators':[100,500],
    'learning_rate':[0.05,0.1]
}
grid_cv = GridSearchCV(gb_clf, param_grid=params,cv=2,verbose=1)
grid_cv.fit(X_train,y_train)

print('최적 하이퍼 파라미터:\n', grid_cv.best_params_)
print('최고 예측 정확도: {0:.4f}'.format(grid_cv.best_score_))

# GridSearchCV를 이용해 최적으로 학습된 estimator로 예측 수행
gb_pred = grid_cv.best_estimator_.predict(X_test)
gb_accuracy = accuracy_score(y_test,gb_pred)
print('GBM 정확도: {0:.4f}'.format(gb_accuracy))

Fitting 2 folds for each of 4 candidates, totalling 8 fits
최적 하이퍼 파라미터:
 {'learning_rate': 0.05, 'n_estimators': 100}
최고 예측 정확도: 0.9667
GBM 정확도: 0.9333


XGBoost 하이퍼파라미터

In [21]:
import xgboost as xgb # 파이썬 래퍼
from xgboost import XGBClassifier # 사이킷런 API
from xgboost import XGBClassifier
from xgboost import plot_importance
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
# sklearn 관련
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV
# xgboost 관련
from xgboost import XGBClassifier
from xgboost import plot_importance

cust_df = pd.read_csv('./data/train.csv',encoding='latin-1')
print(cust_df.shape)
cust_df.head(3)

cust_df['TARGET'].value_counts() # 0: 만족, 1: 불만족

cust_df['var3'].replace(-999999, 2, inplace=True)
cust_df.drop('ID',axis=1, inplace=True)

# X랑 y 나누기
X = cust_df.iloc[:,:-1]
y = cust_df.iloc[:,-1]

X_train, X_eval, y_train, y_eval = train_test_split(X, y, test_size = 0.2, random_state = 0)

print(y_train.value_counts()/y_train.count())
print(y_eval.value_counts()/y_eval.count())


#-------------------
# Grid Search
xgb = XGBClassifier(n_estimators = 100, random_state = 156)

# 하이퍼파라미터 후보
params = {'max_depth':[5,7]
    , 'min_child_weight':[1,3]
    , 'colsample_bytree':[0.5,0.75]}
# folds = 3
gridcv = GridSearchCV(xgb, param_grid = params, cv = 3)
gridcv.fit(X_train, y_train, early_stopping_rounds = 30, eval_metric = "auc",
          eval_set = [(X_train, y_train), (X_eval, y_eval)])
xgb_roc_score = roc_auc_score(y_eval, gridcv.predict_proba(X_eval)[:,1],
    average='macro')
#-------------------


print('Optimized hyperparameters', gridcv.best_params_) 
#{'colsample_bytree': 0.75, 'max_depth': 5, 'min_child_weight': 3}
print('ROC AUC: {0:.4f}'.format(xgb_roc_score)) #0.8433

(76020, 371)
0    0.960964
1    0.039036
Name: TARGET, dtype: float64
0    0.9583
1    0.0417
Name: TARGET, dtype: float64
[0]	validation_0-auc:0.70629	validation_1-auc:0.71552
[1]	validation_0-auc:0.79529	validation_1-auc:0.80747
[2]	validation_0-auc:0.79939	validation_1-auc:0.80662
[3]	validation_0-auc:0.80449	validation_1-auc:0.80695
[4]	validation_0-auc:0.80267	validation_1-auc:0.80336
[5]	validation_0-auc:0.82025	validation_1-auc:0.81848
[6]	validation_0-auc:0.82861	validation_1-auc:0.82517
[7]	validation_0-auc:0.84012	validation_1-auc:0.83344
[8]	validation_0-auc:0.84475	validation_1-auc:0.83705
[9]	validation_0-auc:0.84382	validation_1-auc:0.83378
[10]	validation_0-auc:0.84814	validation_1-auc:0.83654
[11]	validation_0-auc:0.84906	validation_1-auc:0.83434
[12]	validation_0-auc:0.85281	validation_1-auc:0.83742
[13]	validation_0-auc:0.85297	validation_1-auc:0.83479
[14]	validation_0-auc:0.85564	validation_1-auc:0.83805
[15]	validation_0-auc:0.85614	validation_1-auc:0.83747
[16]	va

[43]	validation_0-auc:0.87838	validation_1-auc:0.83612
[44]	validation_0-auc:0.87839	validation_1-auc:0.83592
[45]	validation_0-auc:0.87873	validation_1-auc:0.83558
[46]	validation_0-auc:0.87884	validation_1-auc:0.83557
[47]	validation_0-auc:0.87900	validation_1-auc:0.83575
[0]	validation_0-auc:0.70730	validation_1-auc:0.71971
[1]	validation_0-auc:0.81282	validation_1-auc:0.82050
[2]	validation_0-auc:0.80737	validation_1-auc:0.81381
[3]	validation_0-auc:0.80999	validation_1-auc:0.81404
[4]	validation_0-auc:0.80732	validation_1-auc:0.80976
[5]	validation_0-auc:0.82347	validation_1-auc:0.82367
[6]	validation_0-auc:0.83316	validation_1-auc:0.82986
[7]	validation_0-auc:0.83968	validation_1-auc:0.83484
[8]	validation_0-auc:0.84392	validation_1-auc:0.83817
[9]	validation_0-auc:0.84441	validation_1-auc:0.83352
[10]	validation_0-auc:0.84829	validation_1-auc:0.83487
[11]	validation_0-auc:0.84967	validation_1-auc:0.83502
[12]	validation_0-auc:0.85287	validation_1-auc:0.83856
[13]	validation_0-au

[35]	validation_0-auc:0.87266	validation_1-auc:0.83998
[36]	validation_0-auc:0.87325	validation_1-auc:0.84038
[37]	validation_0-auc:0.87348	validation_1-auc:0.84026
[38]	validation_0-auc:0.87453	validation_1-auc:0.84029
[39]	validation_0-auc:0.87465	validation_1-auc:0.84013
[40]	validation_0-auc:0.87473	validation_1-auc:0.84082
[41]	validation_0-auc:0.87517	validation_1-auc:0.84098
[42]	validation_0-auc:0.87569	validation_1-auc:0.84071
[43]	validation_0-auc:0.87594	validation_1-auc:0.84064
[44]	validation_0-auc:0.87628	validation_1-auc:0.84067
[45]	validation_0-auc:0.87649	validation_1-auc:0.84055
[46]	validation_0-auc:0.87653	validation_1-auc:0.84039
[47]	validation_0-auc:0.87659	validation_1-auc:0.84022
[48]	validation_0-auc:0.87668	validation_1-auc:0.84002
[49]	validation_0-auc:0.87674	validation_1-auc:0.83997
[50]	validation_0-auc:0.87701	validation_1-auc:0.84009
[51]	validation_0-auc:0.87755	validation_1-auc:0.84009
[52]	validation_0-auc:0.87781	validation_1-auc:0.83991
[53]	valid

[45]	validation_0-auc:0.89667	validation_1-auc:0.83275
[46]	validation_0-auc:0.89755	validation_1-auc:0.83196
[47]	validation_0-auc:0.89785	validation_1-auc:0.83189
[48]	validation_0-auc:0.89810	validation_1-auc:0.83103
[0]	validation_0-auc:0.71065	validation_1-auc:0.72162
[1]	validation_0-auc:0.81004	validation_1-auc:0.81744
[2]	validation_0-auc:0.81166	validation_1-auc:0.81353
[3]	validation_0-auc:0.81782	validation_1-auc:0.81079
[4]	validation_0-auc:0.81689	validation_1-auc:0.80561
[5]	validation_0-auc:0.83478	validation_1-auc:0.82049
[6]	validation_0-auc:0.84518	validation_1-auc:0.82760
[7]	validation_0-auc:0.85094	validation_1-auc:0.83378
[8]	validation_0-auc:0.85686	validation_1-auc:0.83678
[9]	validation_0-auc:0.85628	validation_1-auc:0.83338
[10]	validation_0-auc:0.86106	validation_1-auc:0.83592
[11]	validation_0-auc:0.86092	validation_1-auc:0.83293
[12]	validation_0-auc:0.86555	validation_1-auc:0.83586
[13]	validation_0-auc:0.86595	validation_1-auc:0.83273
[14]	validation_0-au

[13]	validation_0-auc:0.85683	validation_1-auc:0.83441
[14]	validation_0-auc:0.85931	validation_1-auc:0.83523
[15]	validation_0-auc:0.86068	validation_1-auc:0.83563
[16]	validation_0-auc:0.86192	validation_1-auc:0.83572
[17]	validation_0-auc:0.86341	validation_1-auc:0.83627
[18]	validation_0-auc:0.86477	validation_1-auc:0.83580
[19]	validation_0-auc:0.86535	validation_1-auc:0.83617
[20]	validation_0-auc:0.86561	validation_1-auc:0.83573
[21]	validation_0-auc:0.86657	validation_1-auc:0.83626
[22]	validation_0-auc:0.86671	validation_1-auc:0.83561
[23]	validation_0-auc:0.86736	validation_1-auc:0.83473
[24]	validation_0-auc:0.86766	validation_1-auc:0.83520
[25]	validation_0-auc:0.86785	validation_1-auc:0.83524
[26]	validation_0-auc:0.86817	validation_1-auc:0.83448
[27]	validation_0-auc:0.86851	validation_1-auc:0.83461
[28]	validation_0-auc:0.86939	validation_1-auc:0.83401
[29]	validation_0-auc:0.87039	validation_1-auc:0.83435
[30]	validation_0-auc:0.87048	validation_1-auc:0.83435
[31]	valid

[34]	validation_0-auc:0.86913	validation_1-auc:0.83579
[35]	validation_0-auc:0.86958	validation_1-auc:0.83611
[36]	validation_0-auc:0.87006	validation_1-auc:0.83579
[37]	validation_0-auc:0.87044	validation_1-auc:0.83523
[38]	validation_0-auc:0.87056	validation_1-auc:0.83527
[39]	validation_0-auc:0.87066	validation_1-auc:0.83516
[40]	validation_0-auc:0.87130	validation_1-auc:0.83490
[41]	validation_0-auc:0.87151	validation_1-auc:0.83502
[42]	validation_0-auc:0.87212	validation_1-auc:0.83513
[43]	validation_0-auc:0.87249	validation_1-auc:0.83481
[44]	validation_0-auc:0.87311	validation_1-auc:0.83452
[45]	validation_0-auc:0.87432	validation_1-auc:0.83401
[46]	validation_0-auc:0.87453	validation_1-auc:0.83353
[0]	validation_0-auc:0.71530	validation_1-auc:0.72310
[1]	validation_0-auc:0.81691	validation_1-auc:0.82146
[2]	validation_0-auc:0.81177	validation_1-auc:0.81507
[3]	validation_0-auc:0.81260	validation_1-auc:0.81317
[4]	validation_0-auc:0.82937	validation_1-auc:0.82683
[5]	validation_

[33]	validation_0-auc:0.89159	validation_1-auc:0.82809
[34]	validation_0-auc:0.89234	validation_1-auc:0.82809
[35]	validation_0-auc:0.89244	validation_1-auc:0.82763
[36]	validation_0-auc:0.89245	validation_1-auc:0.82777
[37]	validation_0-auc:0.89246	validation_1-auc:0.82761
[38]	validation_0-auc:0.89308	validation_1-auc:0.82705
[0]	validation_0-auc:0.71847	validation_1-auc:0.72285
[1]	validation_0-auc:0.82164	validation_1-auc:0.82158
[2]	validation_0-auc:0.81798	validation_1-auc:0.81498
[3]	validation_0-auc:0.82280	validation_1-auc:0.81345
[4]	validation_0-auc:0.84258	validation_1-auc:0.82822
[5]	validation_0-auc:0.85057	validation_1-auc:0.83418
[6]	validation_0-auc:0.85679	validation_1-auc:0.83776
[7]	validation_0-auc:0.86112	validation_1-auc:0.84137
[8]	validation_0-auc:0.86407	validation_1-auc:0.84227
[9]	validation_0-auc:0.86564	validation_1-auc:0.84123
[10]	validation_0-auc:0.86886	validation_1-auc:0.84127
[11]	validation_0-auc:0.87028	validation_1-auc:0.83960
[12]	validation_0-au

[17]	validation_0-auc:0.87776	validation_1-auc:0.83674
[18]	validation_0-auc:0.87885	validation_1-auc:0.83618
[19]	validation_0-auc:0.87985	validation_1-auc:0.83591
[20]	validation_0-auc:0.88205	validation_1-auc:0.83590
[21]	validation_0-auc:0.88240	validation_1-auc:0.83610
[22]	validation_0-auc:0.88342	validation_1-auc:0.83624
[23]	validation_0-auc:0.88418	validation_1-auc:0.83695
[24]	validation_0-auc:0.88452	validation_1-auc:0.83667
[25]	validation_0-auc:0.88538	validation_1-auc:0.83594
[26]	validation_0-auc:0.88660	validation_1-auc:0.83578
[27]	validation_0-auc:0.88707	validation_1-auc:0.83564
[28]	validation_0-auc:0.88740	validation_1-auc:0.83516
[29]	validation_0-auc:0.88752	validation_1-auc:0.83543
[30]	validation_0-auc:0.88802	validation_1-auc:0.83556
[31]	validation_0-auc:0.88828	validation_1-auc:0.83535
[32]	validation_0-auc:0.88853	validation_1-auc:0.83511
[33]	validation_0-auc:0.88903	validation_1-auc:0.83468
[34]	validation_0-auc:0.88927	validation_1-auc:0.83495
[35]	valid

In [None]:
!pip install optuna
!pip install psutil
import numpy as np
import optuna
from sklearn import datasets
import sklearn.datasets
import sklearn.metrics
from sklearn.model_selection import train_test_split
import xgboost as xgb
import psutil
import time

def memory_usage(message:str = 'debug'):
    p = psutil.Process()
    rss = p.memory_info().rss/2**20
    print(f'[{message}] memory usage:{rss:10.5f}MB')

iris = datasets.load_iris()
data = iris.data
target = iris.target
# train 데이터세트와 test 데이터세트로 분리
X_train,X_test,y_train,y_test = train_test_split(data,target,test_size=0.33, random_state=1234)

# train 데이터세트와 validation 데이터세트로 분리
X_train, X_validation, y_train, y_validation = train_test_split(X_train,y_train,test_size=0.25,
                                                               random_state=1234)

print('X_train.shape, X_test.shape,X_validation.shape',X_train.shape,X_test.shape,X_validation.shape)

def objective(trial):
    params = {
        'objective':'multi:softprob',
        'eval_metric':'mlogloss',
        'booster':'gbtree',
        'tree_method':'exact','gpu_id':-1,
        'verbosity':0,
        'num_class':3,
        'max_depth':trial.suggest_int('max_depth',4,10),
        'learning_rate':trial.suggest_uniform('learning_rate',0.0001,0.99),
        'n_estimators':trial.suggest_int('n_estimators',1000,10000,step=100),
        'colsample_bytree':trial.suggest_float('colsample_bytree',0.5,1.0),
        'colsample_bylevel':trial.suggest_float('colsample_bylevel',0.5,1.0),
        'colsample_bynode':trial.suggest_float('colsample_bynode',0.5,1.0),
        'reg_lambda':trial.suggest_loguniform('reg_lambda',1e-2,1),
        'reg_alpha':trial.suggest_loguniform('reg_alpha',1e-2,1),
        'subsample':trial.suggest_discrete_uniform('subsample',0.6,1.0,0.05),
        'min_child_weight':trial.suggest_int('min_child_weight',2,15),
        'gamma':trial.suggest_float('gamma',0.1,1.0,log=True)
    }
    
    model = xgb.XGBClassifier(**params,random_state=1234,use_label_encoder=False)
    bst = model.fit(X_train,y_train,eval_set=[(X_validation,y_validation)],early_stopping_rounds=50,
                   verbose=False)
    preds = bst.predict(X_validation)
    pred_labels = np.rint(preds)
    accuracy = sklearn.metrics.accuracy_score(y_validation,pred_labels)
    return accuracy

if __name__ == "__main__":
    train_start = time.time()
    study = optuna.create_study(direction='maximize')
    study.optimize(objective, n_trials=50, show_progress_bar=True)
    
    print('Number fo finished trials:',len(study.trials))
    print('Best trial:')
    
    trial = study.best_trial
    
    print('정확성:{}',format(trial.value))
    print('최적 하이퍼파라미터:')
    for key,value in trial.params.items():
        print('{}:{}'.format(key,value))
        
clf = xgb.XGBClassifier(**study.best_params,random_state=1234,use_label_encoder=False)
clf.fit(X_train,y_train)

preds = clf.predict(X_test)
accuracy = sklearn.metrics.accuracy_score(y_test,preds)
print('정확성:{}'.format(accuracy))

LightGBM 하이퍼파라미터

In [20]:
from lightgbm import LGBMClassifier
from lightgbm import LGBMClassifier
from lightgbm import plot_importance
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import time
# sklearn 관련
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import GridSearchCV
# lightgbm 관련
from lightgbm import LGBMClassifier
from lightgbm import plot_importance

cust_df = pd.read_csv('./data/train.csv',encoding='latin-1')
print(cust_df.shape)
cust_df.head(3)

cust_df['TARGET'].value_counts() # 0: 만족, 1: 불만족

cust_df['var3'].replace(-999999, 2, inplace=True)
cust_df.drop('ID',axis=1, inplace=True)

# X랑 y 나누기
X = cust_df.iloc[:,:-1]
y = cust_df.iloc[:,-1]

X_train, X_eval, y_train, y_eval = train_test_split(X, y, test_size = 0.2, random_state = 0)

print(y_train.value_counts()/y_train.count())
print(y_eval.value_counts()/y_eval.count())


params = {
    'max_depth':[128,160],
    'min_child_samples':[60,100],
    'subsample':[0.8,1]
}
lgbm = LGBMClassifier(n_estimator = 100)

gridcv = GridSearchCV(lgbm, param_grid = params, cv = 3)
gridcv.fit(X_train, y_train, early_stopping_rounds = 30, eval_metric = 'auc',
          eval_set = [(X_train, y_train), (X_eval, y_eval)])

lgbm_roc_score = roc_auc_score(y_eval, gridcv.predict_proba(X_eval)[:,1],average='macro')


print('Optimized hyperparameters', gridcv.best_params_) # {'max_depth': 128, 'min_child_samples': 60, 'subsample': 0.8}
print('ROC AUC: {0:.4f}'.format(lgbm_roc_score)) # 0.8406

(76020, 371)
0    0.960964
1    0.039036
Name: TARGET, dtype: float64
0    0.9583
1    0.0417
Name: TARGET, dtype: float64
[1]	valid_0's auc: 0.820107	valid_0's binary_logloss: 0.156116	valid_1's auc: 0.815632	valid_1's binary_logloss: 0.165023
[2]	valid_0's auc: 0.825543	valid_0's binary_logloss: 0.151001	valid_1's auc: 0.821373	valid_1's binary_logloss: 0.159945
[3]	valid_0's auc: 0.832117	valid_0's binary_logloss: 0.147249	valid_1's auc: 0.826189	valid_1's binary_logloss: 0.156372
[4]	valid_0's auc: 0.837249	valid_0's binary_logloss: 0.144258	valid_1's auc: 0.83307	valid_1's binary_logloss: 0.153399
[5]	valid_0's auc: 0.841358	valid_0's binary_logloss: 0.141884	valid_1's auc: 0.836863	valid_1's binary_logloss: 0.151079
[6]	valid_0's auc: 0.843532	valid_0's binary_logloss: 0.139902	valid_1's auc: 0.83696	valid_1's binary_logloss: 0.149223
[7]	valid_0's auc: 0.845572	valid_0's binary_logloss: 0.138268	valid_1's auc: 0.839347	valid_1's binary_logloss: 0.147475
[8]	valid_0's auc: 0.8479

[34]	valid_0's auc: 0.876486	valid_0's binary_logloss: 0.123236	valid_1's auc: 0.838115	valid_1's binary_logloss: 0.139649
[35]	valid_0's auc: 0.877367	valid_0's binary_logloss: 0.122967	valid_1's auc: 0.838011	valid_1's binary_logloss: 0.13965
[36]	valid_0's auc: 0.878121	valid_0's binary_logloss: 0.122753	valid_1's auc: 0.838001	valid_1's binary_logloss: 0.139639
[37]	valid_0's auc: 0.878717	valid_0's binary_logloss: 0.122569	valid_1's auc: 0.837691	valid_1's binary_logloss: 0.139684
[38]	valid_0's auc: 0.879237	valid_0's binary_logloss: 0.122329	valid_1's auc: 0.837523	valid_1's binary_logloss: 0.139747
[39]	valid_0's auc: 0.879748	valid_0's binary_logloss: 0.122076	valid_1's auc: 0.837413	valid_1's binary_logloss: 0.139779
[40]	valid_0's auc: 0.880242	valid_0's binary_logloss: 0.12184	valid_1's auc: 0.836893	valid_1's binary_logloss: 0.139865
[41]	valid_0's auc: 0.880853	valid_0's binary_logloss: 0.121621	valid_1's auc: 0.836671	valid_1's binary_logloss: 0.139894
[42]	valid_0's auc

[40]	valid_0's auc: 0.880176	valid_0's binary_logloss: 0.121944	valid_1's auc: 0.840473	valid_1's binary_logloss: 0.138951
[41]	valid_0's auc: 0.880826	valid_0's binary_logloss: 0.121711	valid_1's auc: 0.840646	valid_1's binary_logloss: 0.138913
[42]	valid_0's auc: 0.881237	valid_0's binary_logloss: 0.12153	valid_1's auc: 0.840775	valid_1's binary_logloss: 0.138898
[43]	valid_0's auc: 0.881488	valid_0's binary_logloss: 0.121353	valid_1's auc: 0.840542	valid_1's binary_logloss: 0.138973
[44]	valid_0's auc: 0.88217	valid_0's binary_logloss: 0.121153	valid_1's auc: 0.84025	valid_1's binary_logloss: 0.139024
[45]	valid_0's auc: 0.882502	valid_0's binary_logloss: 0.120969	valid_1's auc: 0.840617	valid_1's binary_logloss: 0.138952
[46]	valid_0's auc: 0.882901	valid_0's binary_logloss: 0.120776	valid_1's auc: 0.840635	valid_1's binary_logloss: 0.138929
[47]	valid_0's auc: 0.8833	valid_0's binary_logloss: 0.120596	valid_1's auc: 0.840437	valid_1's binary_logloss: 0.138974
[48]	valid_0's auc: 0

[1]	valid_0's auc: 0.814409	valid_0's binary_logloss: 0.156453	valid_1's auc: 0.813191	valid_1's binary_logloss: 0.1654
[2]	valid_0's auc: 0.827239	valid_0's binary_logloss: 0.151091	valid_1's auc: 0.819589	valid_1's binary_logloss: 0.160119
[3]	valid_0's auc: 0.836958	valid_0's binary_logloss: 0.147254	valid_1's auc: 0.828052	valid_1's binary_logloss: 0.156528
[4]	valid_0's auc: 0.840075	valid_0's binary_logloss: 0.144254	valid_1's auc: 0.830576	valid_1's binary_logloss: 0.15359
[5]	valid_0's auc: 0.842405	valid_0's binary_logloss: 0.141776	valid_1's auc: 0.832631	valid_1's binary_logloss: 0.151302
[6]	valid_0's auc: 0.84513	valid_0's binary_logloss: 0.139783	valid_1's auc: 0.835654	valid_1's binary_logloss: 0.149357
[7]	valid_0's auc: 0.847603	valid_0's binary_logloss: 0.138115	valid_1's auc: 0.835158	valid_1's binary_logloss: 0.147934
[8]	valid_0's auc: 0.84964	valid_0's binary_logloss: 0.136673	valid_1's auc: 0.835684	valid_1's binary_logloss: 0.146781
[9]	valid_0's auc: 0.851495	v

[11]	valid_0's auc: 0.851781	valid_0's binary_logloss: 0.133486	valid_1's auc: 0.837719	valid_1's binary_logloss: 0.143895
[12]	valid_0's auc: 0.854437	valid_0's binary_logloss: 0.132625	valid_1's auc: 0.838697	valid_1's binary_logloss: 0.143197
[13]	valid_0's auc: 0.857231	valid_0's binary_logloss: 0.131813	valid_1's auc: 0.837906	valid_1's binary_logloss: 0.142689
[14]	valid_0's auc: 0.858841	valid_0's binary_logloss: 0.131104	valid_1's auc: 0.83829	valid_1's binary_logloss: 0.142225
[15]	valid_0's auc: 0.859561	valid_0's binary_logloss: 0.130502	valid_1's auc: 0.837733	valid_1's binary_logloss: 0.141845
[16]	valid_0's auc: 0.860759	valid_0's binary_logloss: 0.129897	valid_1's auc: 0.837702	valid_1's binary_logloss: 0.141516
[17]	valid_0's auc: 0.861715	valid_0's binary_logloss: 0.129359	valid_1's auc: 0.837893	valid_1's binary_logloss: 0.141257
[18]	valid_0's auc: 0.863307	valid_0's binary_logloss: 0.12885	valid_1's auc: 0.838535	valid_1's binary_logloss: 0.14096
[19]	valid_0's auc:

[10]	valid_0's auc: 0.851013	valid_0's binary_logloss: 0.134987	valid_1's auc: 0.836673	valid_1's binary_logloss: 0.14473
[11]	valid_0's auc: 0.852203	valid_0's binary_logloss: 0.134031	valid_1's auc: 0.836732	valid_1's binary_logloss: 0.144003
[12]	valid_0's auc: 0.853051	valid_0's binary_logloss: 0.133237	valid_1's auc: 0.836665	valid_1's binary_logloss: 0.143377
[13]	valid_0's auc: 0.854449	valid_0's binary_logloss: 0.132467	valid_1's auc: 0.836347	valid_1's binary_logloss: 0.14286
[14]	valid_0's auc: 0.855696	valid_0's binary_logloss: 0.131797	valid_1's auc: 0.835966	valid_1's binary_logloss: 0.1425
[15]	valid_0's auc: 0.856848	valid_0's binary_logloss: 0.131219	valid_1's auc: 0.835052	valid_1's binary_logloss: 0.142203
[16]	valid_0's auc: 0.857724	valid_0's binary_logloss: 0.130664	valid_1's auc: 0.835557	valid_1's binary_logloss: 0.141875
[17]	valid_0's auc: 0.858917	valid_0's binary_logloss: 0.130139	valid_1's auc: 0.835029	valid_1's binary_logloss: 0.141659
[18]	valid_0's auc: 

[37]	valid_0's auc: 0.87534	valid_0's binary_logloss: 0.123748	valid_1's auc: 0.836635	valid_1's binary_logloss: 0.139512
[1]	valid_0's auc: 0.821372	valid_0's binary_logloss: 0.156607	valid_1's auc: 0.817319	valid_1's binary_logloss: 0.165251
[2]	valid_0's auc: 0.827767	valid_0's binary_logloss: 0.151379	valid_1's auc: 0.820742	valid_1's binary_logloss: 0.160263
[3]	valid_0's auc: 0.837338	valid_0's binary_logloss: 0.14754	valid_1's auc: 0.828985	valid_1's binary_logloss: 0.156549
[4]	valid_0's auc: 0.840199	valid_0's binary_logloss: 0.144519	valid_1's auc: 0.831475	valid_1's binary_logloss: 0.153611
[5]	valid_0's auc: 0.842643	valid_0's binary_logloss: 0.142185	valid_1's auc: 0.83374	valid_1's binary_logloss: 0.151385
[6]	valid_0's auc: 0.843533	valid_0's binary_logloss: 0.140149	valid_1's auc: 0.834243	valid_1's binary_logloss: 0.149492
[7]	valid_0's auc: 0.844716	valid_0's binary_logloss: 0.13857	valid_1's auc: 0.834698	valid_1's binary_logloss: 0.147987
[8]	valid_0's auc: 0.846564

[1]	valid_0's auc: 0.824771	valid_0's binary_logloss: 0.156249	valid_1's auc: 0.817793	valid_1's binary_logloss: 0.165102
[2]	valid_0's auc: 0.828535	valid_0's binary_logloss: 0.151281	valid_1's auc: 0.823189	valid_1's binary_logloss: 0.160178
[3]	valid_0's auc: 0.835411	valid_0's binary_logloss: 0.147483	valid_1's auc: 0.828532	valid_1's binary_logloss: 0.15649
[4]	valid_0's auc: 0.839239	valid_0's binary_logloss: 0.144503	valid_1's auc: 0.832454	valid_1's binary_logloss: 0.153535
[5]	valid_0's auc: 0.842214	valid_0's binary_logloss: 0.142121	valid_1's auc: 0.834304	valid_1's binary_logloss: 0.151216
[6]	valid_0's auc: 0.845112	valid_0's binary_logloss: 0.14023	valid_1's auc: 0.833606	valid_1's binary_logloss: 0.149478
[7]	valid_0's auc: 0.846839	valid_0's binary_logloss: 0.138573	valid_1's auc: 0.834745	valid_1's binary_logloss: 0.147891
[8]	valid_0's auc: 0.847792	valid_0's binary_logloss: 0.137228	valid_1's auc: 0.835658	valid_1's binary_logloss: 0.146627
[9]	valid_0's auc: 0.84936

[33]	valid_0's auc: 0.87259	valid_0's binary_logloss: 0.124662	valid_1's auc: 0.836362	valid_1's binary_logloss: 0.139505
[34]	valid_0's auc: 0.873319	valid_0's binary_logloss: 0.12441	valid_1's auc: 0.836517	valid_1's binary_logloss: 0.139489
[35]	valid_0's auc: 0.874206	valid_0's binary_logloss: 0.124168	valid_1's auc: 0.836438	valid_1's binary_logloss: 0.139515
[36]	valid_0's auc: 0.874905	valid_0's binary_logloss: 0.123946	valid_1's auc: 0.836563	valid_1's binary_logloss: 0.139513
[37]	valid_0's auc: 0.87534	valid_0's binary_logloss: 0.123748	valid_1's auc: 0.836635	valid_1's binary_logloss: 0.139512
[1]	valid_0's auc: 0.821372	valid_0's binary_logloss: 0.156607	valid_1's auc: 0.817319	valid_1's binary_logloss: 0.165251
[2]	valid_0's auc: 0.827767	valid_0's binary_logloss: 0.151379	valid_1's auc: 0.820742	valid_1's binary_logloss: 0.160263
[3]	valid_0's auc: 0.837338	valid_0's binary_logloss: 0.14754	valid_1's auc: 0.828985	valid_1's binary_logloss: 0.156549
[4]	valid_0's auc: 0.84

[1]	valid_0's auc: 0.820107	valid_0's binary_logloss: 0.156116	valid_1's auc: 0.815632	valid_1's binary_logloss: 0.165023
[2]	valid_0's auc: 0.825543	valid_0's binary_logloss: 0.151001	valid_1's auc: 0.821373	valid_1's binary_logloss: 0.159945
[3]	valid_0's auc: 0.832117	valid_0's binary_logloss: 0.147249	valid_1's auc: 0.826189	valid_1's binary_logloss: 0.156372
[4]	valid_0's auc: 0.837249	valid_0's binary_logloss: 0.144258	valid_1's auc: 0.83307	valid_1's binary_logloss: 0.153399
[5]	valid_0's auc: 0.841358	valid_0's binary_logloss: 0.141884	valid_1's auc: 0.836863	valid_1's binary_logloss: 0.151079
[6]	valid_0's auc: 0.843532	valid_0's binary_logloss: 0.139902	valid_1's auc: 0.83696	valid_1's binary_logloss: 0.149223
[7]	valid_0's auc: 0.845572	valid_0's binary_logloss: 0.138268	valid_1's auc: 0.839347	valid_1's binary_logloss: 0.147475
[8]	valid_0's auc: 0.847989	valid_0's binary_logloss: 0.136862	valid_1's auc: 0.840152	valid_1's binary_logloss: 0.146246
[9]	valid_0's auc: 0.84896

[30]	valid_0's auc: 0.873218	valid_0's binary_logloss: 0.124234	valid_1's auc: 0.838048	valid_1's binary_logloss: 0.139769
[31]	valid_0's auc: 0.874055	valid_0's binary_logloss: 0.123993	valid_1's auc: 0.838765	valid_1's binary_logloss: 0.139667
[32]	valid_0's auc: 0.875053	valid_0's binary_logloss: 0.123708	valid_1's auc: 0.838331	valid_1's binary_logloss: 0.139647
[33]	valid_0's auc: 0.875971	valid_0's binary_logloss: 0.123442	valid_1's auc: 0.838327	valid_1's binary_logloss: 0.139633
[34]	valid_0's auc: 0.876486	valid_0's binary_logloss: 0.123236	valid_1's auc: 0.838115	valid_1's binary_logloss: 0.139649
[35]	valid_0's auc: 0.877367	valid_0's binary_logloss: 0.122967	valid_1's auc: 0.838011	valid_1's binary_logloss: 0.13965
[36]	valid_0's auc: 0.878121	valid_0's binary_logloss: 0.122753	valid_1's auc: 0.838001	valid_1's binary_logloss: 0.139639
[37]	valid_0's auc: 0.878717	valid_0's binary_logloss: 0.122569	valid_1's auc: 0.837691	valid_1's binary_logloss: 0.139684
[38]	valid_0's au

[37]	valid_0's auc: 0.878676	valid_0's binary_logloss: 0.122683	valid_1's auc: 0.84014	valid_1's binary_logloss: 0.139044
[38]	valid_0's auc: 0.879078	valid_0's binary_logloss: 0.122461	valid_1's auc: 0.840579	valid_1's binary_logloss: 0.138958
[39]	valid_0's auc: 0.879758	valid_0's binary_logloss: 0.122171	valid_1's auc: 0.840431	valid_1's binary_logloss: 0.138979
[40]	valid_0's auc: 0.880176	valid_0's binary_logloss: 0.121944	valid_1's auc: 0.840473	valid_1's binary_logloss: 0.138951
[41]	valid_0's auc: 0.880826	valid_0's binary_logloss: 0.121711	valid_1's auc: 0.840646	valid_1's binary_logloss: 0.138913
[42]	valid_0's auc: 0.881237	valid_0's binary_logloss: 0.12153	valid_1's auc: 0.840775	valid_1's binary_logloss: 0.138898
[43]	valid_0's auc: 0.881488	valid_0's binary_logloss: 0.121353	valid_1's auc: 0.840542	valid_1's binary_logloss: 0.138973
[44]	valid_0's auc: 0.88217	valid_0's binary_logloss: 0.121153	valid_1's auc: 0.84025	valid_1's binary_logloss: 0.139024
[45]	valid_0's auc: 

[31]	valid_0's auc: 0.871973	valid_0's binary_logloss: 0.124687	valid_1's auc: 0.832443	valid_1's binary_logloss: 0.14056
[32]	valid_0's auc: 0.873281	valid_0's binary_logloss: 0.124367	valid_1's auc: 0.833085	valid_1's binary_logloss: 0.140444
[33]	valid_0's auc: 0.873981	valid_0's binary_logloss: 0.124124	valid_1's auc: 0.832656	valid_1's binary_logloss: 0.140483
[34]	valid_0's auc: 0.874432	valid_0's binary_logloss: 0.123884	valid_1's auc: 0.832717	valid_1's binary_logloss: 0.140445
[35]	valid_0's auc: 0.874851	valid_0's binary_logloss: 0.123652	valid_1's auc: 0.832795	valid_1's binary_logloss: 0.140444
[36]	valid_0's auc: 0.875353	valid_0's binary_logloss: 0.123466	valid_1's auc: 0.833217	valid_1's binary_logloss: 0.140387
[37]	valid_0's auc: 0.876163	valid_0's binary_logloss: 0.123231	valid_1's auc: 0.832811	valid_1's binary_logloss: 0.140451
[38]	valid_0's auc: 0.876936	valid_0's binary_logloss: 0.122965	valid_1's auc: 0.832693	valid_1's binary_logloss: 0.140452
[39]	valid_0's au

[1]	valid_0's auc: 0.82166	valid_0's binary_logloss: 0.156555	valid_1's auc: 0.818884	valid_1's binary_logloss: 0.165074
[2]	valid_0's auc: 0.8275	valid_0's binary_logloss: 0.151228	valid_1's auc: 0.822224	valid_1's binary_logloss: 0.160016
[3]	valid_0's auc: 0.837962	valid_0's binary_logloss: 0.147321	valid_1's auc: 0.82992	valid_1's binary_logloss: 0.156365
[4]	valid_0's auc: 0.840164	valid_0's binary_logloss: 0.144323	valid_1's auc: 0.832446	valid_1's binary_logloss: 0.153425
[5]	valid_0's auc: 0.84235	valid_0's binary_logloss: 0.141872	valid_1's auc: 0.834635	valid_1's binary_logloss: 0.151099
[6]	valid_0's auc: 0.843803	valid_0's binary_logloss: 0.139812	valid_1's auc: 0.834578	valid_1's binary_logloss: 0.149292
[7]	valid_0's auc: 0.845663	valid_0's binary_logloss: 0.138171	valid_1's auc: 0.835391	valid_1's binary_logloss: 0.147789
[8]	valid_0's auc: 0.847395	valid_0's binary_logloss: 0.136752	valid_1's auc: 0.836785	valid_1's binary_logloss: 0.14655
[9]	valid_0's auc: 0.848443	va

[68]	valid_0's auc: 0.890598	valid_0's binary_logloss: 0.117486	valid_1's auc: 0.839626	valid_1's binary_logloss: 0.139386
[69]	valid_0's auc: 0.891077	valid_0's binary_logloss: 0.117312	valid_1's auc: 0.839419	valid_1's binary_logloss: 0.13945
[70]	valid_0's auc: 0.891599	valid_0's binary_logloss: 0.117064	valid_1's auc: 0.839644	valid_1's binary_logloss: 0.139445
[71]	valid_0's auc: 0.891729	valid_0's binary_logloss: 0.116954	valid_1's auc: 0.83928	valid_1's binary_logloss: 0.139577
[72]	valid_0's auc: 0.892048	valid_0's binary_logloss: 0.116779	valid_1's auc: 0.839128	valid_1's binary_logloss: 0.139633
[1]	valid_0's auc: 0.824771	valid_0's binary_logloss: 0.156249	valid_1's auc: 0.817793	valid_1's binary_logloss: 0.165102
[2]	valid_0's auc: 0.828535	valid_0's binary_logloss: 0.151281	valid_1's auc: 0.823189	valid_1's binary_logloss: 0.160178
[3]	valid_0's auc: 0.835411	valid_0's binary_logloss: 0.147483	valid_1's auc: 0.828532	valid_1's binary_logloss: 0.15649
[4]	valid_0's auc: 0.8

[26]	valid_0's auc: 0.867823	valid_0's binary_logloss: 0.126413	valid_1's auc: 0.836534	valid_1's binary_logloss: 0.139699
[27]	valid_0's auc: 0.868409	valid_0's binary_logloss: 0.126125	valid_1's auc: 0.836508	valid_1's binary_logloss: 0.139603
[28]	valid_0's auc: 0.869242	valid_0's binary_logloss: 0.125842	valid_1's auc: 0.836643	valid_1's binary_logloss: 0.139574
[29]	valid_0's auc: 0.869833	valid_0's binary_logloss: 0.125591	valid_1's auc: 0.836613	valid_1's binary_logloss: 0.139505
[30]	valid_0's auc: 0.870941	valid_0's binary_logloss: 0.125323	valid_1's auc: 0.836407	valid_1's binary_logloss: 0.139532
[31]	valid_0's auc: 0.87123	valid_0's binary_logloss: 0.125132	valid_1's auc: 0.836019	valid_1's binary_logloss: 0.139575
[32]	valid_0's auc: 0.872038	valid_0's binary_logloss: 0.124898	valid_1's auc: 0.836243	valid_1's binary_logloss: 0.139503
[33]	valid_0's auc: 0.87259	valid_0's binary_logloss: 0.124662	valid_1's auc: 0.836362	valid_1's binary_logloss: 0.139505
[34]	valid_0's auc

[57]	valid_0's auc: 0.885946	valid_0's binary_logloss: 0.119861	valid_1's auc: 0.837352	valid_1's binary_logloss: 0.139737
[58]	valid_0's auc: 0.886397	valid_0's binary_logloss: 0.119671	valid_1's auc: 0.837847	valid_1's binary_logloss: 0.139697
[59]	valid_0's auc: 0.886538	valid_0's binary_logloss: 0.119546	valid_1's auc: 0.837886	valid_1's binary_logloss: 0.139712
[60]	valid_0's auc: 0.886988	valid_0's binary_logloss: 0.119353	valid_1's auc: 0.837877	valid_1's binary_logloss: 0.139723
[61]	valid_0's auc: 0.887124	valid_0's binary_logloss: 0.119212	valid_1's auc: 0.837656	valid_1's binary_logloss: 0.139832
[62]	valid_0's auc: 0.887579	valid_0's binary_logloss: 0.119015	valid_1's auc: 0.83767	valid_1's binary_logloss: 0.13982
[63]	valid_0's auc: 0.887823	valid_0's binary_logloss: 0.118889	valid_1's auc: 0.837798	valid_1's binary_logloss: 0.139825
[64]	valid_0's auc: 0.888244	valid_0's binary_logloss: 0.118706	valid_1's auc: 0.837513	valid_1's binary_logloss: 0.139876
[65]	valid_0's auc

[26]	valid_0's auc: 0.867823	valid_0's binary_logloss: 0.126413	valid_1's auc: 0.836534	valid_1's binary_logloss: 0.139699
[27]	valid_0's auc: 0.868409	valid_0's binary_logloss: 0.126125	valid_1's auc: 0.836508	valid_1's binary_logloss: 0.139603
[28]	valid_0's auc: 0.869242	valid_0's binary_logloss: 0.125842	valid_1's auc: 0.836643	valid_1's binary_logloss: 0.139574
[29]	valid_0's auc: 0.869833	valid_0's binary_logloss: 0.125591	valid_1's auc: 0.836613	valid_1's binary_logloss: 0.139505
[30]	valid_0's auc: 0.870941	valid_0's binary_logloss: 0.125323	valid_1's auc: 0.836407	valid_1's binary_logloss: 0.139532
[31]	valid_0's auc: 0.87123	valid_0's binary_logloss: 0.125132	valid_1's auc: 0.836019	valid_1's binary_logloss: 0.139575
[32]	valid_0's auc: 0.872038	valid_0's binary_logloss: 0.124898	valid_1's auc: 0.836243	valid_1's binary_logloss: 0.139503
[33]	valid_0's auc: 0.87259	valid_0's binary_logloss: 0.124662	valid_1's auc: 0.836362	valid_1's binary_logloss: 0.139505
[34]	valid_0's auc

[62]	valid_0's auc: 0.887579	valid_0's binary_logloss: 0.119015	valid_1's auc: 0.83767	valid_1's binary_logloss: 0.13982
[63]	valid_0's auc: 0.887823	valid_0's binary_logloss: 0.118889	valid_1's auc: 0.837798	valid_1's binary_logloss: 0.139825
[64]	valid_0's auc: 0.888244	valid_0's binary_logloss: 0.118706	valid_1's auc: 0.837513	valid_1's binary_logloss: 0.139876
[65]	valid_0's auc: 0.888416	valid_0's binary_logloss: 0.118602	valid_1's auc: 0.837413	valid_1's binary_logloss: 0.139911
[66]	valid_0's auc: 0.888956	valid_0's binary_logloss: 0.118442	valid_1's auc: 0.837124	valid_1's binary_logloss: 0.139978
[1]	training's auc: 0.823544	training's binary_logloss: 0.156167	valid_1's auc: 0.81662	valid_1's binary_logloss: 0.165164
[2]	training's auc: 0.827956	training's binary_logloss: 0.150935	valid_1's auc: 0.819528	valid_1's binary_logloss: 0.160021
[3]	training's auc: 0.837575	training's binary_logloss: 0.147071	valid_1's auc: 0.827643	valid_1's binary_logloss: 0.156404
[4]	training's a

In [None]:
from lightgbm import LGBMClassifier
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import f1_score, roc_auc_score

dataset = load_breast_cancer()
ftr = dataset.data
target = dataset.target

# 전체 데이터 중 80%는 학습용 데이터, 20%는 테스트용 데이터 추출
X_train, X_test, y_train, y_test=train_test_split(ftr, target, test_size=0.2, random_state=156 )

# 앞서 XGBoost와 동일하게 n_estimators는 400 설정. 
lgbm_wrapper = LGBMClassifier(n_estimators=400)
print('LGBMClassifier 기본 하이퍼 파라미터:\n',lgbm_wrapper.get_params(),'\n')
# LightGBM도 XGBoost와 동일하게 조기 중단 수행 가능. 
evals = [(X_test, y_test)]
lgbm_wrapper.fit(X_train, y_train, early_stopping_rounds=100, eval_metric="logloss", 
                 eval_set=evals, verbose=True)
preds = lgbm_wrapper.predict(X_test)
pred_proba = lgbm_wrapper.predict_proba(X_test)[:, 1]

confusion = confusion_matrix(y_test, preds)
accuracy = accuracy_score(y_test, preds)
precision = precision_score(y_test, preds)
recall = recall_score(y_test, preds)
f1 = f1_score(y_test, preds)
roc_auc = roc_auc_score(y_test, pred_proba)

print(confusion)
print('정확도: {0:.4f}, 정밀도: {1:.4f}, 재현율: {2:.4f}, F1: {3:.4f}, AUC:{4:.4f}'.format(accuracy, precision, recall, f1, roc_auc))

# grid_cv = GridSearchCV(dt_clf,param_grid=params,scoring='accuracy',cv=5,verbose=1)
# print('LGBMClassifier 최고 평균 정확도 수치:{0:.4f}'.format(grid_cv.best_score_))
# print('LGBMClassifier 최적 하이퍼파라미터:',grid_cv.best_params_)

로지스틱 회귀

In [22]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score
import warnings
warnings.filterwarnings('ignore')

cancer = load_breast_cancer()

scaler = StandardScaler()
data_scaled= scaler.fit_transform(cancer.data)

X_train, X_test, y_train, y_test = train_test_split(data_scaled,cancer.target,test_size=0.3,random_state=0)

# 로지스틱 회귀를 이용해 학습 및 예측 수행
lr_clf = LogisticRegression()
lr_clf.fit(X_train,y_train)
lr_preds = lr_clf.predict(X_test)

# 정확도와 roc_auc 측정
accuracy = accuracy_score(y_test,lr_preds)
roc_auc = roc_auc_score(y_test,lr_preds)
print(f'accuracy: {accuracy:.4f}')
print(f'roc_auc: {roc_auc:.4f}')

from sklearn.model_selection import GridSearchCV

params = {'penalty':['l2','l1'],
         'C':[0.01,0.1,1,5,10]}

grid_clf = GridSearchCV(lr_clf,param_grid=params, scoring='accuracy', cv=3)
grid_clf.fit(data_scaled,cancer.target)
print(f'최적 하이퍼 파라미터: {grid_clf.best_params_}')
print(f'최적 평균 정확도: {grid_clf.best_score_}')

pred = grid_clf.best_estimator_.predict(X_test)
accuracy_test = accuracy_score(y_test,pred)
print(f'테스트 데이터 정확도: {accuracy_test:.4f}')

accuracy: 0.9766
roc_auc: 0.9716
최적 하이퍼 파라미터: {'C': 1, 'penalty': 'l2'}
최적 평균 정확도: 0.975392184164114
테스트 데이터 정확도: 0.9883
