# GradientBoostingClassifier
 * GradientBoostingClassifier(*, loss='log_loss', learning_rate=0.1, n_estimators=100, subsample=1.0, criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3, min_impurity_decrease=0.0, init=None, random_state=None, max_features=None, verbose=0, max_leaf_nodes=None, warm_start=False, validation_fraction=0.1, n_iter_no_change=None, tol=0.0001, ccp_alpha=0.0)

In [43]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, 
                                                    test_size = 0.2, random_state = 1)

gb_clf = GradientBoostingClassifier(min_samples_split=2, 
                                    min_samples_leaf=1, 
                                    max_depth=3,
                                    random_state= 10)# Hyperparameter 조절
gb_clf.fit(X_train, y_train)
gb_pred = gb_clf.predict(X_test)
gb_accuracy = accuracy_score(y_test, gb_pred)

print(f'GBM 정확도: {gb_accuracy}')

GBM 정확도: 0.9666666666666667


## XGB

In [59]:
# Extreme Gradient Boosting (xgboost) 모델 사용
import xgboost as xgb
from xgboost import XGBClassifier

# 1. 모델 선언
xgb  = XGBClassifier()

# 2. 모델 훈련 fit()함수
xgb.fit(X_train, y_train)

# 3. 모델 예측 predict()함수
y_pred = xgb.predict(X_test)

# 4. score()
# xgb.score(X_train, y_train)
accuracy_score(y_pred, y_test) # GB 보다 빠르다.(298ms)

0.9666666666666667

In [44]:
from sklearn.model_selection import GridSearchCV

params = {
    'n_estimators' : [100, 500], 
    'learning_rate' : [0.05, 0.1]
}
grid_cv = GridSearchCV(gb_clf, param_grid=params, cv=2, verbose=1)
grid_cv.fit(X_train, y_train)
print('최적 하이퍼 파라미터:\n {grid_cv.best_params_}')
print(f'최고 예측 정확도: {grid_cv.best_score_}') # 시간이 오래 걸린다.

Fitting 2 folds for each of 4 candidates, totalling 8 fits
최적 하이퍼 파라미터:
 {grid_cv.best_params_}
최고 예측 정확도: 0.925


In [46]:
gb_pred = grid_cv.best_estimator_.predict(X_test)
gb_accuracy = accuracy_score(y_test, gb_pred)
print(f'GBM 정확도: {gb_accuracy}')
# 예측시간은 Model File로 저장한 모델을 사용해서 오래걸리지 않는다.

GBM 정확도: 0.9666666666666667
