### 模型选择与调优
* 交叉验证
* 网格搜索

![image.png](attachment:image.png)

***

**以knn分类鸢尾花为例来演示gridsearch**

In [3]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

**步骤:**   
1.获取数据  
2.分割数据集  
3.特征工程  
   * 特征预处理 
   
4.模型选择  
5.模型调优  
6.模型评估  

In [4]:
# 1.加载数据
iris = load_iris()

In [5]:
# 2.分割数据
x_train,x_test,y_train,y_test = train_test_split(iris.data,iris.target,test_size=0.2,random_state=22)

In [6]:
# 3.特征标准化
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)

In [7]:
# 4.模型选择与调优
estimator = KNeighborsClassifier()

In [8]:
# 5.模型调优
# 参数准备
param_dict = {'n_neighbors':[1,3,5,7,9,11]}
estimator_grid = GridSearchCV(estimator,param_grid = param_dict,cv=10) 
estimator_grid.fit(x_train,y_train)

GridSearchCV(cv=10, error_score='raise',
       estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform'),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'n_neighbors': [1, 3, 5, 7, 9, 11]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [9]:
# 6.模型评估
score = estimator_grid.score(x_test,y_test)

In [10]:
score

0.9333333333333333

In [11]:
# 最佳参数
estimator_grid.best_params_

{'n_neighbors': 7}

In [12]:
# 最佳结果  这个最佳结果是训练集划分出来的验证集的结果
estimator_grid.best_score_

0.9666666666666667

In [14]:
# 最佳估计其\器
estimator_grid.best_estimator_

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=7, p=2,
           weights='uniform')

In [16]:
# 最佳交叉验证结果
estimator_grid.cv_results_



{'mean_fit_time': array([1.31577492e-02, 2.99191475e-04, 2.99215317e-04, 9.97304916e-05,
        1.99699402e-04, 1.99508667e-04]),
 'std_fit_time': array([0.03815495, 0.00045702, 0.00045706, 0.00029919, 0.0003994 ,
        0.00039902]),
 'mean_score_time': array([0.00557015, 0.00039892, 0.00029924, 0.00059836, 0.00049891,
        0.00060356]),
 'std_score_time': array([0.01539882, 0.00048858, 0.0004571 , 0.00048876, 0.00049908,
        0.0004929 ]),
 'param_n_neighbors': masked_array(data=[1, 3, 5, 7, 9, 11],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'n_neighbors': 1},
  {'n_neighbors': 3},
  {'n_neighbors': 5},
  {'n_neighbors': 7},
  {'n_neighbors': 9},
  {'n_neighbors': 11}],
 'split0_test_score': array([1., 1., 1., 1., 1., 1.]),
 'split1_test_score': array([0.92307692, 0.92307692, 1.        , 1.        , 0.92307692,
        0.92307692]),
 'split2_test_score': array([1.        , 0.92307692, 1.     