## [作業重點]
了解如何使用 Sklearn 中的 hyper-parameter search 找出最佳的超參數

### 作業
請使用不同的資料集，並使用 hyper-parameter search 的方式，看能不能找出最佳的超參數組合

In [1]:
from sklearn import datasets, metrics
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import GradientBoostingClassifier
import matplotlib.pyplot as plt 

  return f(*args, **kwds)


In [7]:
digits = datasets.load_digits()
x_train, x_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size=0.25, random_state=32)

In [11]:
#default parameters
gbc = GradientBoostingClassifier()
gbc.fit(x_train, y_train)
y_pred = gbc.predict(x_test)
cfs_matrix = metrics.confusion_matrix(y_test, y_pred)
precision = metrics.precision_score(y_test, y_pred, average='macro')
recall = metrics.recall_score(y_test, y_pred, average='macro')
F1_score = metrics.f1_score(y_test, y_pred, average='macro')
print('confusion matrix:\n', cfs_matrix)
print('precision:\n', precision)
print('recall:\n', recall)
print('F1_score:\n', F1_score)

confusion matrix:
 [[41  0  0  0  0  1  0  0  0  1]
 [ 0 43  1  0  0  0  0  0  0  0]
 [ 0  0 38  0  0  0  0  0  1  0]
 [ 0  0  0 66  0  0  0  0  0  0]
 [ 0  0  0  0 43  0  0  0  0  0]
 [ 0  0  0  0  0 42  0  0  1  1]
 [ 0  0  0  0  0  0 45  0  2  0]
 [ 1  0  0  0  1  0  0 45  0  0]
 [ 0  0  0  0  0  0  0  0 33  0]
 [ 0  0  0  0  0  0  0  0  0 44]]
precision:
 0.9752979994891018
recall:
 0.9774559145291455
F1_score:
 0.9758750252109456


In [15]:
n_estimators = [10, 200, 500]
max_depth = [1, 3, 5]
para_grid = {
    'n_estimators': n_estimators,
    'max_depth': max_depth
}
grid_search = GridSearchCV(gbc, para_grid, n_jobs=-1, scoring='f1_micro')
grid_result = grid_search.fit(x_train, y_train)



In [16]:
print("Best Accuracy: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best Accuracy: 0.942094 using {'max_depth': 3, 'n_estimators': 500}


In [18]:
gbc_best = GradientBoostingClassifier(max_depth=grid_result.best_params_['max_depth'], n_estimators=grid_result.best_params_['n_estimators'])
gbc_best.fit(x_train, y_train)

GradientBoostingClassifier(criterion='friedman_mse', init=None,
                           learning_rate=0.1, loss='deviance', max_depth=3,
                           max_features=None, max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_impurity_split=None,
                           min_samples_leaf=1, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=500,
                           n_iter_no_change=None, presort='auto',
                           random_state=None, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)

In [21]:
y_pred_best = gbc_best.predict(x_test)
cfs_matrix = metrics.confusion_matrix(y_test, y_pred_best)
precision = metrics.precision_score(y_test, y_pred_best, average='macro')
recall = metrics.recall_score(y_test, y_pred_best, average='macro')
F1_score = metrics.f1_score(y_test, y_pred_best, average='macro')

In [22]:
print('confusion matrix:\n', cfs_matrix)
print('precision:\n', precision)
print('recall:\n', recall)
print('F1_score:\n', F1_score)

confusion matrix:
 [[41  0  0  0  0  1  0  0  0  1]
 [ 0 43  1  0  0  0  0  0  0  0]
 [ 0  0 38  0  0  0  0  0  1  0]
 [ 0  0  0 66  0  0  0  0  0  0]
 [ 0  0  0  0 42  0  0  1  0  0]
 [ 0  0  0  0  0 43  0  0  0  1]
 [ 0  0  0  0  0  0 46  0  1  0]
 [ 1  0  0  0  1  0  0 45  0  0]
 [ 0  0  0  0  0  0  0  0 33  0]
 [ 0  0  0  0  0  0  0  0  0 44]]
precision:
 0.9782206115421485
recall:
 0.9795307199809923
F1_score:
 0.9786942777541314
