# 파라미터 튜닝 실습

In [24]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn import datasets
from sklearn.model_selection import train_test_split
import numpy as np

In [19]:
data = datasets.load_boston()
x = data['data']
y = data['target']
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size=0.3, random_state=0)

파라미터 튜닝의 경우 grid search를 이용하여 진행할 수 있다. grid search를 활용하는 방법은 parameter grid를 dictionary 형태로 구성하여 grid search를 진행할 수 있다.

- param_grid : 파라미터 셋을 정의하고 해당 알고리즘의 파라미터 이름을 key로, search하고자는 수치의 list를 value로하는 dictionary를 만든다.
- GridSearchCV : 모델과 앞서 정의한 param_grid를 입력하고, cross-validation(cv) 를 입력한다. 또한, 해당 모델의 평가 metric을 정의하면 해당 평가 metric기반으로 cross-validation을 통해 최적 파라미터를 찾는다. 

In [21]:
n_estimators = [10,20,30,40,50]
max_featrues = [2,3,4]
bootstrap = [True, False]

param_grid = [{'n_estimators' : n_estimators, 
               'max_features': max_featrues,
              ,'bootstrap': bootstrap}]

rf = RandomForestRegressor()
grid_search = GridSearchCV(rf, param_grid=param_grid, cv = 4,
                          scoring='neg_mean_squared_error')

grid_search.fit(X_train, Y_train)

GridSearchCV(cv=4, error_score='raise',
       estimator=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_split=1e-07, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid=[{'n_estimators': [10, 20, 30, 40, 50], 'max_features': [2, 3, 4], 'bootstrap': [True, False]}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring='neg_mean_squared_error', verbose=0)

In [22]:
grid_search.best_params_

{'bootstrap': False, 'max_features': 3, 'n_estimators': 40}

In [25]:
cvres = grid_search.cv_results_
for mean_score, params in zip(cvres["mean_test_score"], cvres["params"]):
    print(np.sqrt(-mean_score), params)

3.9040826450569703 {'bootstrap': True, 'max_features': 2, 'n_estimators': 10}
3.8952254654324787 {'bootstrap': True, 'max_features': 2, 'n_estimators': 20}
3.6334224751068285 {'bootstrap': True, 'max_features': 2, 'n_estimators': 30}
3.7021247811871802 {'bootstrap': True, 'max_features': 2, 'n_estimators': 40}
3.707195894625741 {'bootstrap': True, 'max_features': 2, 'n_estimators': 50}
3.848557951374092 {'bootstrap': True, 'max_features': 3, 'n_estimators': 10}
3.6648309959414993 {'bootstrap': True, 'max_features': 3, 'n_estimators': 20}
3.5511546931581974 {'bootstrap': True, 'max_features': 3, 'n_estimators': 30}
3.432856326738457 {'bootstrap': True, 'max_features': 3, 'n_estimators': 40}
3.4496590742874234 {'bootstrap': True, 'max_features': 3, 'n_estimators': 50}
3.7794186546652435 {'bootstrap': True, 'max_features': 4, 'n_estimators': 10}
3.457311083686067 {'bootstrap': True, 'max_features': 4, 'n_estimators': 20}
3.3092233477203234 {'bootstrap': True, 'max_features': 4, 'n_estimat