In [25]:
import numpy as np
from sklearn import datasets

In [26]:
digits = datasets.load_digits()
x=digits.data
y=digits.target


In [27]:
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=666)

In [28]:
from sklearn.neighbors import KNeighborsClassifier

KNN_classifier=KNeighborsClassifier(n_neighbors=3)
KNN_classifier.fit(x_train,y_train)
KNN_classifier.score(x_test,y_test)

0.9888888888888889

## 寻找最好的超参数K
### 调参的过程

In [29]:
best_score=0.0
best_k=-1
for k in range(1,11):
    knn_clf = KNeighborsClassifier(n_neighbors=k)
    knn_clf.fit(x_train,y_train)
    score = knn_clf.score(x_test,y_test)
    if score > best_score:
        best_k = k
        best_score = score

print("best_k = ",best_k)
print("best_score",best_score)

best_k =  4
best_score 0.9916666666666667


## 另一种超参数--考虑距离权重还是不考虑距离权重

In [30]:
best_method = ""
best_score=0.0
best_k=-1
for method in ["uniform","distance"]:
    for k in range(1,11):
        knn_clf = KNeighborsClassifier(n_neighbors=k,weights=method)
        knn_clf.fit(x_train,y_train)
        score = knn_clf.score(x_test,y_test)
        if score > best_score:
            best_k = k
            best_score = score
            best_method = method
            
print("best_method = ",best_method)
print("best_k = ",best_k)
print("best_score",best_score)

best_method =  uniform
best_k =  4
best_score 0.9916666666666667


## 第三种超参数==搜索明科夫斯基距离相对应的p值


In [31]:
%%time

best_p = -1
best_score=0.0
best_k=-1


for k in range(1,11):
    for p in range(1,5):
        knn_clf = KNeighborsClassifier(n_neighbors=k,weights="distance",p=p)
        knn_clf.fit(x_train,y_train)
        score = knn_clf.score(x_test,y_test)
        if score > best_score:
            best_k = k
            best_score = score
            best_p = p
            
print("best_p = ",best_p)
print("best_k = ",best_k)
print("best_score",best_score)

best_p =  2
best_k =  3
best_score 0.9888888888888889
Wall time: 19.5 s


## 网格搜索

In [48]:
param_grid = [
    {
        'weights':['uniform'],
        'n_neighbors':[i for i in range(1,11)]
    },
    {
        'weights':['distance'],
        'n_neighbors':[i for i in range(1,11)],
        'p':[i for i in range(1,6)]
    }
]

In [49]:
knn_clf = KNeighborsClassifier()

In [50]:
from sklearn.model_selection import GridSearchCV

In [51]:
grid_search = GridSearchCV(knn_clf,param_grid)

In [None]:
grid_search.fit(x_train,y_train)




In [None]:
grid_search.best_estimator_