## [作業重點]
了解如何使用 Sklearn 中的 hyper-parameter search 找出最佳的超參數

### 作業
請使用不同的資料集，並使用 hyper-parameter search 的方式，看能不能找出最佳的超參數組合

In [4]:
from sklearn import datasets, linear_model, metrics
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor, export_graphviz
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingClassifier, GradientBoostingRegressor
from IPython.display import Image
import pydotplus 
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [5]:
def data(dataset, is_regression, title=None):
    if title is not None:
        print(title.upper())
        print()
    print('data shape:', dataset.data.shape)
    print('target shape:', dataset.target.shape)
    
    alpha=0.3
    
    X = dataset.data
    x_train, x_test, y_train, y_test = train_test_split(X, dataset.target, test_size=0.1, random_state=4)
    print('x_train', x_train[0])
    print('y_train', y_train[0])
    print('x_test', x_test[0])
    print('y_test', y_test[0])
    print()
    
    n_estimators = [100, 200, 300]
    max_depth = [1, 3, 5]
    param_grid = dict(n_estimators=n_estimators, max_depth=max_depth)
    
    if is_regression:
        model = GradientBoostingRegressor()
        model.fit(x_train, y_train)
        y_pred = model.predict(x_test)
        print('GradientBoostingRegressor:')
        print('tree score:', model.score(x_test,y_test))
        print("Mean squared error: %.2f"% mean_squared_error(y_test, y_pred))
#         df = pd.DataFrame(model.feature_importances_, index=dataset.feature_names, columns=['importance'])
#         print("Feature importance: \n", df.sort_values('importance', ascending=False))
        print("Feature importance: \n", model.feature_importances_)
        print()
    else:
        model = GradientBoostingClassifier()
        model.fit(x_train, y_train)
        y_pred = model.predict(x_test)
        print('GradientBoostingClassifier:')
        print('tree score:', model.score(x_test,y_test))
        print("r2_score: %.2f"% r2_score(y_test, y_pred))
        print('accuracy_score: %.2f'% accuracy_score(y_test, y_pred))
#         df = pd.DataFrame(model.feature_importances_, index=dataset.feature_names, columns=['importance'])
#         print("Feature importance: \n", df.sort_values('importance', ascending=False))
        print("Feature importance: \n", model.feature_importances_)
        print()
        
    grid_search = GridSearchCV(model, param_grid, scoring="neg_mean_squared_error", n_jobs=-1, verbose=1)
    grid_result = grid_search.fit(x_train, y_train)
    print("Best Accuracy: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    
    if is_regression:
        model = GradientBoostingRegressor(max_depth=grid_result.best_params_['max_depth'],
                                                   n_estimators=grid_result.best_params_['n_estimators'])
        model.fit(x_train, y_train)
        y_pred = model.predict(x_test)
        print('Best GradientBoostingRegressor:')
        print('tree score:', model.score(x_test,y_test))
        print("Mean squared error: %.2f"% mean_squared_error(y_test, y_pred))
        print("Feature importance: \n", model.feature_importances_)
        print()
    else:
        model = GradientBoostingClassifier(max_depth=grid_result.best_params_['max_depth'],
                                                   n_estimators=grid_result.best_params_['n_estimators'])
        model.fit(x_train, y_train)
        y_pred = model.predict(x_test)
        print('Best GradientBoostingClassifier:')
        print('tree score:', model.score(x_test,y_test))
        print("r2_score: %.2f"% r2_score(y_test, y_pred))
        print('accuracy_score: %.2f'% accuracy_score(y_test, y_pred))
#         df = pd.DataFrame(model.feature_importances_, index=dataset.feature_names, columns=['importance'])
#         print("Feature importance: \n", df.sort_values('importance', ascending=False))
        print("Feature importance: \n", model.feature_importances_)
        print()
    print('\n-----------------------------\n')

In [6]:
diabetes = datasets.load_diabetes()
data(diabetes, True, 'diabetes')

breast_cancer = datasets.load_breast_cancer()
data(breast_cancer, True, 'breast_cancer')

boston = datasets.load_boston()
data(boston, True, 'boston')

iris = datasets.load_iris()
data(iris, False, 'iris')

wine = datasets.load_wine()
data(wine, False, 'wine')

digits = datasets.load_digits()
data(digits, False, 'digits')

DIABETES

data shape: (442, 10)
target shape: (442,)
x_train [-0.04547248 -0.04464164 -0.04824063 -0.01944209 -0.00019301 -0.01603186
  0.06704829 -0.03949338 -0.02479119  0.01963284]
y_train 111.0
x_test [-0.04183994 -0.04464164 -0.04931844 -0.03665645 -0.00707277 -0.02260797
  0.08545648 -0.03949338 -0.06648815  0.00720652]
y_test 128.0

GradientBoostingRegressor:
tree score: 0.4325014103102579
Mean squared error: 3034.84
Feature importance: 
 [0.0446979  0.01708734 0.2362979  0.11256456 0.0305353  0.04245075
 0.04286432 0.0296913  0.3807566  0.06305403]

Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    1.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best Accuracy: -3053.300736 using {'max_depth': 1, 'n_estimators': 200}
Best GradientBoostingRegressor:
tree score: 0.4183577988784327
Mean squared error: 3110.47
Feature importance: 
 [0.01900377 0.01885497 0.33536195 0.12393189 0.00474867 0.00863322
 0.04789281 0.01110491 0.37543859 0.05502922]


-----------------------------

BREAST_CANCER

data shape: (569, 30)
target shape: (569,)
x_train [1.026e+01 1.471e+01 6.620e+01 3.216e+02 9.882e-02 9.159e-02 3.581e-02
 2.037e-02 1.633e-01 7.005e-02 3.380e-01 2.509e+00 2.394e+00 1.933e+01
 1.736e-02 4.671e-02 2.611e-02 1.296e-02 3.675e-02 6.758e-03 1.088e+01
 1.948e+01 7.089e+01 3.571e+02 1.360e-01 1.636e-01 7.162e-02 4.074e-02
 2.434e-01 8.488e-02]
y_train 1
x_test [1.442e+01 1.654e+01 9.415e+01 6.412e+02 9.751e-02 1.139e-01 8.007e-02
 4.223e-02 1.912e-01 6.412e-02 3.491e-01 7.706e-01 2.677e+00 3.214e+01
 4.577e-03 3.053e-02 3.840e-02 1.243e-02 1.873e-02 3.373e-03 1.667e+01
 2.151e+01 1.114e+02 8.621e+02 1.294e-01 3.371e-01 3.755e-01 1.414e

[Parallel(n_jobs=-1)]: Done  12 out of  27 | elapsed:    0.2s remaining:    0.3s
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    1.5s finished


Best Accuracy: -0.042492 using {'max_depth': 3, 'n_estimators': 100}
Best GradientBoostingRegressor:
tree score: 0.5925114974886043
Mean squared error: 0.08
Feature importance: 
 [8.74184870e-04 1.39089393e-02 1.06936088e-03 2.08667954e-03
 4.41060516e-04 1.62843367e-03 6.22836892e-03 2.71658343e-02
 1.55651517e-04 6.30574659e-04 1.83596568e-03 1.62922299e-02
 6.76314505e-03 1.89962419e-02 1.06166077e-03 1.60552531e-03
 7.03603192e-04 4.72614600e-04 1.04922120e-04 2.66337284e-03
 4.88397173e-01 2.79620468e-02 2.01026563e-01 6.00575899e-02
 3.70184814e-03 1.14688008e-03 6.25635303e-03 1.05270640e-01
 1.23115695e-03 2.61378891e-04]


-----------------------------

BOSTON

data shape: (506, 13)
target shape: (506,)
x_train [  2.44953   0.       19.58      0.        0.605     6.402    95.2
   2.2625    5.      403.       14.7     330.04     11.32   ]
y_train 22.3
x_test [2.1124e-01 1.2500e+01 7.8700e+00 0.0000e+00 5.2400e-01 5.6310e+00
 1.0000e+02 6.0821e+00 5.0000e+00 3.1100e+02 1.5200e+0

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  12 out of  27 | elapsed:    0.1s remaining:    0.2s
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    1.2s finished


Best Accuracy: -10.443759 using {'max_depth': 3, 'n_estimators': 200}
Best GradientBoostingRegressor:
tree score: 0.8811899527048722
Mean squared error: 9.76
Feature importance: 
 [0.0316723  0.00058416 0.00452014 0.00052379 0.02797176 0.39002464
 0.01249535 0.0915027  0.00185529 0.01374706 0.03103159 0.01156559
 0.38250564]


-----------------------------

IRIS

data shape: (150, 4)
target shape: (150,)
x_train [4.9 3.1 1.5 0.2]
y_train 0
x_test [6.4 2.8 5.6 2.1]
y_test 2

GradientBoostingClassifier:
tree score: 0.9333333333333333
r2_score: 0.92
accuracy_score: 0.93
Feature importance: 
 [0.00702623 0.01053484 0.29571752 0.68672141]

Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  12 out of  27 | elapsed:    0.5s remaining:    0.6s
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    1.1s finished


Best Accuracy: -0.029630 using {'max_depth': 3, 'n_estimators': 100}
Best GradientBoostingClassifier:
tree score: 0.9333333333333333
r2_score: 0.92
accuracy_score: 0.93
Feature importance: 
 [0.00545719 0.01189185 0.27707333 0.70557762]


-----------------------------

WINE

data shape: (178, 13)
target shape: (178,)
x_train [1.229e+01 2.830e+00 2.220e+00 1.800e+01 8.800e+01 2.450e+00 2.250e+00
 2.500e-01 1.990e+00 2.150e+00 1.150e+00 3.300e+00 2.900e+02]
y_train 1
x_test [1.296e+01 3.450e+00 2.350e+00 1.850e+01 1.060e+02 1.390e+00 7.000e-01
 4.000e-01 9.400e-01 5.280e+00 6.800e-01 1.750e+00 6.750e+02]
y_test 2

GradientBoostingClassifier:
tree score: 1.0
r2_score: 1.00
accuracy_score: 1.00
Feature importance: 
 [1.13267892e-02 3.93170934e-02 5.52097099e-03 6.04774092e-03
 7.33388124e-04 4.72492346e-03 2.00146005e-01 4.83248637e-03
 1.97155160e-05 3.25567256e-01 9.78598136e-03 8.90181220e-02
 3.02959527e-01]

Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  12 out of  27 | elapsed:    0.5s remaining:    0.7s
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    1.1s finished


Best Accuracy: -0.062500 using {'max_depth': 1, 'n_estimators': 100}
Best GradientBoostingClassifier:
tree score: 0.9444444444444444
r2_score: 0.92
accuracy_score: 0.94
Feature importance: 
 [8.08213083e-02 5.67652352e-03 5.61963941e-05 5.46589162e-03
 1.72473616e-03 0.00000000e+00 2.14348165e-01 0.00000000e+00
 2.62028084e-04 2.56295302e-01 7.52035487e-02 5.45975833e-02
 3.05548717e-01]


-----------------------------

DIGITS

data shape: (1797, 64)
target shape: (1797,)
x_train [ 0.  1.  7. 14. 16. 12.  1.  0.  0.  7. 16.  9.  6. 11.  1.  0.  0. 11.
 12.  4.  1.  0.  0.  0.  0. 12. 16. 16. 15.  6.  0.  0.  0.  3.  9.  4.
 11. 12.  0.  0.  0.  0.  0.  0.  8. 16.  0.  0.  0.  0.  0.  0. 14. 13.
  0.  0.  0.  0.  6. 16. 15.  3.  0.  0.]
y_train 5
x_test [ 0.  0.  0. 11. 16. 12.  1.  0.  0.  0.  5. 16. 10. 16.  4.  0.  0.  2.
 15. 10.  0.  8.  1.  0.  0.  5. 16.  9.  1.  0.  0.  0.  0.  8. 16. 16.
  9.  0.  0.  0.  0.  2. 16. 10. 16.  6.  0.  0.  0.  0. 11. 16. 16.  7.
  0.  0.  0.  0.  

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:   15.1s finished


Best Accuracy: -0.869511 using {'max_depth': 3, 'n_estimators': 200}
Best GradientBoostingClassifier:
tree score: 0.9611111111111111
r2_score: 0.98
accuracy_score: 0.96
Feature importance: 
 [0.00000000e+00 4.94647223e-04 1.11953390e-02 5.85156508e-03
 2.24393104e-03 5.85505969e-02 3.43881510e-03 3.01529320e-03
 3.03531277e-04 1.08557997e-03 1.60585674e-02 3.94549860e-04
 7.42377604e-03 1.25624042e-02 3.33194934e-03 5.49758565e-04
 2.12171970e-04 2.08086512e-03 1.11426083e-02 3.06214545e-02
 2.68303724e-02 8.96644629e-02 4.52785497e-03 3.82429068e-08
 1.41969383e-04 1.71035360e-03 4.92520924e-02 1.84325815e-02
 3.43841682e-02 2.35340736e-02 8.86516000e-03 5.80072431e-04
 0.00000000e+00 6.86424576e-02 2.11240916e-03 6.03964470e-03
 7.23719809e-02 1.11222238e-02 1.91079693e-02 0.00000000e+00
 0.00000000e+00 8.44922681e-03 8.30146548e-02 7.00860627e-02
 8.69269300e-03 1.90814163e-02 2.09712219e-02 2.58630719e-04
 0.00000000e+00 7.16921251e-04 4.08207107e-03 1.90156645e-02
 1.05078296e-02 