 # GradientBoostingClassifier
 
 https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html?highlight=gradientboostingclassifier#sklearn.ensemble.GradientBoostingClassifier
 
 - loss : 사용할 loss함수 지정 (default='deviance') {'deviance’, ‘exponential’}
 - learning_rate (default=0.1)
 - n_estimators : weak learner 개수 (default=100)
 - subsample : 데이터를 얼마나 사용할지 비율 설정 (default=1)
 - min_samples_split : 분할하는 데 필요한 최소 샘플 수 (default=2) {int, float}
 - min_samples_leaf : 리프 노드에 있어야 하는 최소 샘플 수 (default=1) {int, float}
 - max_depth : 트리의 최대 깊이 (default=3)
 - max_features : (default=None) {‘auto’, ‘sqrt’, ‘log2’}
 - verbose : (default=0) {int}
 - 너무 많아서 나머진 생략

In [1]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
SEED=111

# 데이터 load 및 split
cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.2, random_state=SEED)

# 모델 학습
model = GradientBoostingClassifier(random_state=SEED)
model.fit(X_train, y_train)

# 예측 및 정확도 확인
pred = model.predict(X_test)
accuracy = accuracy_score(y_test, pred)
print('GBM 정확도: {0:.4f}'.format(accuracy))

GBM 정확도: 0.9825


## 파라미터 서치

In [15]:
from sklearn.model_selection import RandomizedSearchCV

params = {
    'n_estimators' : range(100,200),
    'learning_rate' : [0.05, 0.07, 0.1],
    'loss' : ['deviance', 'exponential'],
    'max_features' : ['None','auto','sqrt','log2'],
    #'max_depth' : range(10,20)
}

rand_cv = RandomizedSearchCV(estimator=model,
                             param_distributions=params,
                            n_iter=500,
                            cv=3,
                             n_jobs=8,
                            random_state=SEED)
rand_cv.fit(X_train, y_train)

RandomizedSearchCV(cv=3, estimator=GradientBoostingClassifier(random_state=111),
                   n_iter=500, n_jobs=8,
                   param_distributions={'learning_rate': [0.05, 0.07, 0.1],
                                        'loss': ['deviance', 'exponential'],
                                        'max_features': ['None', 'auto', 'sqrt',
                                                         'log2'],
                                        'n_estimators': range(100, 200)},
                   random_state=111)

In [16]:
rand_cv.best_params_

{'n_estimators': 192,
 'max_features': 'auto',
 'loss': 'deviance',
 'learning_rate': 0.1}

In [17]:
rand_cv.best_score_

0.9758191007319623