### 超参数

In [1]:
import numpy as np
from sklearn import datasets

In [2]:
digits_data = datasets.load_digits()
Feature = digits_data.data
target = digits_data.target

In [3]:
from sklearn.model_selection import train_test_split

Feature_train, Feature_test, tag_train, tag_test = train_test_split(Feature, target, test_size=0.2, random_state=666)

In [6]:
from sklearn.neighbors import KNeighborsClassifier
digit_knn_clf = KNeighborsClassifier(n_neighbors = 3)
digit_knn_clf.fit(Feature_train, tag_train)
digit_knn_clf.score(Feature_test, tag_test)

0.9888888888888889

#### 寻找最好的K

In [9]:
best_score = 0
best_k = -1
for k in range(1, 11):
    digit_knn_clf = KNeighborsClassifier(n_neighbors=k)
    digit_knn_clf.fit(Feature_train, tag_train)
    score = digit_knn_clf.score(Feature_test, tag_test)
    if (score > best_score):
        best_score = score
        best_k = k
print('best_k: ', best_k)
print('best_score: ', best_score)

best_k:  4
best_score:  0.9916666666666667


#### 权重weight，是否考虑距离

简单的距离相加可能会造成一些问题，如果将距离的倒数作为权重，理论上效果更好

distances 为np.array的距离向量，则表达式：np.sum(1 / distances)

In [16]:
best_score = 0.0
best_k = -1
best_method = ""
for method in ["uniform", "distance"]:
    for k in range(1, 11):
        digit_knn_clf = KNeighborsClassifier(n_neighbors=k, weights=method)
        digit_knn_clf.fit(Feature_train, tag_train)
        score = digit_knn_clf.score(Feature_test, tag_test)
        if score > best_score:
            best_k = k
            best_score = score
            best_method = method
        
print("best_method =", best_method)
print("best_k =", best_k)
print("best_score =", best_score)

best_method = uniform
best_k = 4
best_score = 0.9916666666666667


#### 搜索明可夫斯基距离相应的p

In [21]:
best_score = 0.0
best_k = -1
best_p = -1

for k in range(1, 11):
    for p in range(1, 6):
        digit_knn_clf = KNeighborsClassifier(n_neighbors=k, weights="uniform", p=p)
        digit_knn_clf.fit(Feature_train, tag_train)
        score = digit_knn_clf.score(Feature_test, tag_test)
        if score > best_score:
            best_k = k
            best_p = p
            best_score = score
        
print("best_k =", best_k)
print("best_p =", best_p)
print("best_score =", best_score)

best_k = 4
best_p = 2
best_score = 0.9916666666666667


#### 网格搜索和更多kNN中的超参数 --- Grid Search

In [19]:
param_grid = [
    {
        'weights': ['uniform', 'distance'], 
        'n_neighbors': [i for i in range(1, 11)],
        'p': [i for i in range(1, 6)]
    }
]

In [23]:
digit_knn_clf = KNeighborsClassifier()
from sklearn.model_selection import GridSearchCV

In [26]:
%%time
digit_grid_search = GridSearchCV(digit_knn_clf, param_grid, n_jobs=-1, verbose=3)
digit_grid_search.fit(Feature_train, tag_train)

Fitting 3 folds for each of 100 candidates, totalling 300 fits


[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    2.9s
[Parallel(n_jobs=-1)]: Done 112 tasks      | elapsed:   25.0s
[Parallel(n_jobs=-1)]: Done 272 tasks      | elapsed:  1.2min


Wall time: 1min 21s


[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:  1.3min finished


In [27]:
## n_job 表示同时运行任务，verbose显示内容详尽程度

In [29]:
digit_grid_search.best_estimator_

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=3, p=3,
           weights='uniform')

In [31]:
digit_grid_search.best_score_

0.9853862212943633

In [33]:
digit_grid_search.predict(Feature_test)

array([8, 1, 3, 4, 4, 0, 7, 0, 8, 0, 4, 6, 1, 1, 2, 0, 1, 6, 7, 3, 3, 6,
       5, 2, 9, 4, 0, 2, 0, 3, 0, 8, 7, 2, 3, 5, 1, 3, 1, 5, 8, 6, 2, 6,
       3, 1, 3, 0, 0, 4, 9, 9, 2, 8, 7, 0, 5, 4, 0, 9, 5, 5, 8, 7, 4, 2,
       8, 8, 7, 5, 4, 3, 0, 2, 7, 2, 1, 2, 4, 0, 9, 0, 6, 6, 2, 0, 0, 5,
       4, 4, 3, 1, 3, 8, 6, 4, 4, 7, 5, 6, 8, 4, 8, 4, 6, 9, 7, 7, 0, 8,
       8, 3, 9, 7, 1, 8, 4, 2, 7, 0, 0, 4, 9, 6, 7, 3, 4, 6, 4, 8, 4, 7,
       2, 6, 9, 5, 8, 7, 2, 5, 5, 9, 7, 9, 3, 1, 9, 4, 4, 1, 5, 1, 6, 4,
       4, 8, 1, 6, 2, 5, 2, 1, 4, 4, 3, 9, 4, 0, 6, 0, 8, 3, 8, 7, 3, 0,
       3, 0, 5, 9, 2, 7, 1, 8, 1, 4, 3, 3, 7, 8, 2, 7, 2, 2, 8, 0, 5, 7,
       6, 7, 3, 4, 7, 1, 7, 0, 9, 2, 8, 9, 3, 8, 9, 1, 1, 1, 9, 8, 8, 0,
       3, 7, 3, 3, 4, 8, 2, 1, 8, 6, 0, 1, 7, 7, 5, 8, 3, 8, 7, 6, 8, 4,
       2, 6, 2, 3, 7, 4, 9, 3, 5, 0, 6, 3, 8, 3, 3, 1, 4, 5, 3, 2, 5, 6,
       9, 6, 9, 5, 5, 3, 6, 5, 9, 3, 7, 7, 0, 2, 4, 9, 9, 9, 2, 5, 6, 1,
       9, 6, 9, 7, 7, 4, 5, 0, 0, 5, 3, 8, 4, 4, 3,