# 加载数据

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

In [2]:
iris=load_iris()

In [3]:
x=iris.data
y=iris.target

In [4]:
x.shape,y.shape

((150, 4), (150,))

In [5]:
x_train,x_test,y_train,y_test=train_test_split(x,y,train_size=0.7,random_state=233,stratify=y)

In [6]:
x_train.shape,x_test.shape,y_train.shape,y_test.shape

((105, 4), (45, 4), (105,), (45,))

# 超参数

In [7]:
from sklearn.neighbors import KNeighborsClassifier

In [8]:
neigh=KNeighborsClassifier(
    n_neighbors=3,#k取值
    weights='distance',#权重，uniform均相等，distance与距离相关且成反比
    p=2#明氏距离中的p，1为曼哈顿距离，2为欧氏距离
)

In [9]:
neigh.fit(x_train,y_train)

In [10]:
neigh.score(x_test,y_test)

0.9777777777777777

## 网格搜索方法寻找最优超参数
#### 本质是看所有超参数可能性选最优的那一组

In [21]:
best_score=-1
best_n=-1
best_weight=''
best_p=-1
for n in range(1,20):
    for weight in['uniform','distance']:
        for p in range(1,7):
            neigh=KNeighborsClassifier(
            n_neighbors=n,#k取值
            weights=weight,#权重，uniform均相等，distance与距离相关且成反比
            p=p#明氏距离中的p，1为曼哈顿距离，2为欧氏距离
            )
            neigh.fit(x,y)
            score=neigh.score(x_test,y_test)  
            if score>best_score:
                best_score=score
                best_n=n
                best_weight=weight
                best_p=p
print("n_neighbors:",best_n)
print("weights:",best_weight)
print("p:",best_p)
print("score:",best_score)

n_neighbors: 1
weights: uniform
p: 1
score: 1.0


## sklearn实现超参数搜索

## [GridSearchCV](https://blog.csdn.net/MR_Trustin/article/details/96614446?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522170784266116800185870911%2522%252C%2522scm%2522%253A%252220140713.130102334..%2522%257D&request_id=170784266116800185870911&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~sobaiduend~default-1-96614446-null-null.142^v99^pc_search_result_base2&utm_term=gridsearchcv%E5%8F%82%E6%95%B0%E8%AF%B4%E6%98%8E&spm=1018.2226.3001.4187)

In [12]:
from sklearn.model_selection import GridSearchCV

In [13]:
params ={
    'n_neighbors':[n for n in range(1,20)],
    'weights':['uniform','distance'],
    'p':[p for p in range(1,7)]
}#参数字典，k是参数名，value是参数值

In [14]:
grid=GridSearchCV(
    estimator=KNeighborsClassifier(),#表示要使用的分类器
    param_grid=params,#参数字典
    n_jobs=-1#自动设定任务数量
)

In [22]:
grid.fit(x_train,y_train)

In [23]:
grid.best_params_

{'n_neighbors': 9, 'p': 2, 'weights': 'uniform'}

In [24]:
grid.best_score_

0.961904761904762

In [25]:
grid.best_estimator_

In [18]:
grid.best_estimator_.predict(x_test)

array([2, 2, 0, 1, 1, 1, 2, 0, 2, 0, 0, 1, 0, 2, 1, 1, 0, 2, 2, 1, 0, 1,
       1, 2, 2, 0, 0, 1, 1, 0, 2, 2, 0, 1, 1, 2, 1, 1, 0, 0, 0, 2, 0, 1,
       1])

In [26]:
grid.best_estimator_.score(x_test,y_test)

0.9555555555555556