### Gradient boost

- 참고
https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html

In [7]:
from sklearn.ensemble import GradientBoostingClassifier 
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error

In [8]:
import warnings
warnings.filterwarnings('ignore')

In [9]:
cancer=load_breast_cancer()

In [10]:
x_train,x_test,y_train,y_test=train_test_split(cancer.data,cancer.target,train_size=0.8,test_size=0.2,random_state=156)
print(x_train.shape,x_test.shape,y_train.shape,y_test.shape)

(455, 30) (114, 30) (455,) (114,)


In [11]:
gb_cif=GradientBoostingClassifier(n_estimators=10,random_state=0)
gb_cif.fit(x_train,y_train)
pred=gb_cif.predict(x_test)
print(accuracy_score(y_test,pred))

0.956140350877193


In [12]:
gb_cif=GradientBoostingClassifier(n_estimators=50,random_state=0)
gb_cif.fit(x_train,y_train)
pred=gb_cif.predict(x_test)
print(accuracy_score(y_test,pred))

0.9473684210526315


### XGboost

- 참고 https://xgboost.readthedocs.io/en/latest/python/python_intro.html

In [None]:
!pip install xgboost

In [19]:
import xgboost as xgb

In [22]:
dtrain=xgb.DMatrix(data=x_train, label=y_train)
dtest=xgb.DMatrix(data=x_test, label=y_test)

In [23]:
xgb_param={'max_depth':3, 'objective':'binary:logistic','eval_metric':'logloss'}

In [26]:
wlist=[(dtrain,'train'),(dtest,'eval')]
xgb_model=xgb.train(params=xgb_param,dtrain=dtrain,evals=wlist)

[0]	train-logloss:0.46775	eval-logloss:0.47984
[1]	train-logloss:0.33578	eval-logloss:0.36218
[2]	train-logloss:0.25254	eval-logloss:0.29004
[3]	train-logloss:0.19584	eval-logloss:0.23382
[4]	train-logloss:0.15664	eval-logloss:0.19901
[5]	train-logloss:0.12663	eval-logloss:0.17469
[6]	train-logloss:0.10353	eval-logloss:0.15549
[7]	train-logloss:0.08545	eval-logloss:0.13367
[8]	train-logloss:0.07277	eval-logloss:0.12411
[9]	train-logloss:0.06333	eval-logloss:0.11611


In [28]:
xgb_model_predict=xgb_model.predict(dtest)
xgb_model_predict

array([0.839086  , 0.02631566, 0.8552334 , 0.15512495, 0.9551853 ,
       0.97217983, 0.97217983, 0.9572686 , 0.947391  , 0.02631566,
       0.02631566, 0.02631566, 0.97217983, 0.97217983, 0.9572686 ,
       0.9356049 , 0.9331713 , 0.97217983, 0.96562517, 0.97217983,
       0.02631566, 0.22185656, 0.02631566, 0.97217983, 0.02631566,
       0.77975345, 0.04442447, 0.02631566, 0.96562517, 0.15844832,
       0.97217983, 0.02631566, 0.97217983, 0.5644888 , 0.04298504,
       0.02631566, 0.9572686 , 0.947573  , 0.56105816, 0.97217983,
       0.25004005, 0.938648  , 0.97217983, 0.97217983, 0.97217983,
       0.88794047, 0.15844832, 0.97217983, 0.97217983, 0.97217983,
       0.96562517, 0.02631566, 0.9572686 , 0.97217983, 0.9455654 ,
       0.97080123, 0.97217983, 0.96010363, 0.97217983, 0.788752  ,
       0.96562517, 0.85749364, 0.02631566, 0.02631566, 0.97217983,
       0.9455654 , 0.02631566, 0.9331713 , 0.97217983, 0.97217983,
       0.02631566, 0.96562517, 0.02631566, 0.95047367, 0.97217

In [29]:
preds=[1 if x>0.5 else 0 for x in xgb_model_predict]
preds[:5]

[1, 0, 1, 0, 1]

In [30]:
print(accuracy_score(y_test,preds))

0.9649122807017544


### LightGBM

In [32]:
from lightgbm import LGBMClassifier

In [33]:
lgbm_wrapper=LGBMClassifier()

In [35]:
evals=[(x_test,y_test)]
lgbm_wrapper.fit(x_train,y_train,early_stopping_rounds=100,eval_metric='logloss',eval_set=evals,verbose=True)
preds=lgbm_wrapper.predict(x_test)

[1]	valid_0's binary_logloss: 0.565079
Training until validation scores don't improve for 100 rounds
[2]	valid_0's binary_logloss: 0.507451
[3]	valid_0's binary_logloss: 0.458489
[4]	valid_0's binary_logloss: 0.417481
[5]	valid_0's binary_logloss: 0.385507
[6]	valid_0's binary_logloss: 0.355773
[7]	valid_0's binary_logloss: 0.329587
[8]	valid_0's binary_logloss: 0.308478
[9]	valid_0's binary_logloss: 0.285395
[10]	valid_0's binary_logloss: 0.267055
[11]	valid_0's binary_logloss: 0.252013
[12]	valid_0's binary_logloss: 0.237018
[13]	valid_0's binary_logloss: 0.224756
[14]	valid_0's binary_logloss: 0.213383
[15]	valid_0's binary_logloss: 0.203058
[16]	valid_0's binary_logloss: 0.194015
[17]	valid_0's binary_logloss: 0.186412
[18]	valid_0's binary_logloss: 0.179108
[19]	valid_0's binary_logloss: 0.174004
[20]	valid_0's binary_logloss: 0.167155
[21]	valid_0's binary_logloss: 0.162494
[22]	valid_0's binary_logloss: 0.156886
[23]	valid_0's binary_logloss: 0.152855
[24]	valid_0's binary_loglo

In [37]:
preds

array([1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1,
       0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1])

In [36]:
print(accuracy_score(y_test,preds))

0.956140350877193
