### 加载数据

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

In [2]:
iris = load_iris()

In [3]:
x = iris.data
y = iris.target

In [4]:
x.shape, y.shape

((150, 4), (150,))

In [5]:
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.7, random_state=233, stratify=y)

In [6]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((105, 4), (45, 4), (105,), (45,))

### 超参数

In [7]:
from sklearn.neighbors import KNeighborsClassifier

In [8]:
neigh = KNeighborsClassifier(
    n_neighbors=3,
    weights='distance',#'uniform',
    p = 2
)

In [9]:
neigh.fit(x_train, y_train)

In [10]:
neigh.score(x_test, y_test)

0.9777777777777777

In [11]:
best_score = -1
best_n  = -1
best_weight = ''
best_p = -1

for n in range(1, 20):
    for weight in ['uniform', 'distance']:
        for p in range(1, 7):
            neigh = KNeighborsClassifier(
                n_neighbors=n,
                weights=weight,
                p = p
            )
            neigh.fit(x_train, y_train)
            score = neigh.score(x_test, y_test)
            
            if score > best_score:
                best_score = score
                best_n = n
                best_weight = weight
                best_p = p

print("n_neighbors:", best_n)
print("weights:", best_weight)
print("p:", best_p)
print("score:", best_score)

n_neighbors: 5
weights: uniform
p: 2
score: 1.0


### sklearn 超参数搜索

In [12]:
from sklearn.model_selection import GridSearchCV

In [13]:
params = {
    'n_neighbors': [n for n in range(1, 20)],
    'weights': ['uniform', 'distance'],
    'p': [p for p in range(1, 7)]
}

In [14]:
grid = GridSearchCV(
    estimator=KNeighborsClassifier(),
    param_grid=params,
    n_jobs=-1
)

In [15]:
grid.fit(x_train, y_train)

In [16]:
grid.best_params_

{'n_neighbors': 9, 'p': 2, 'weights': 'uniform'}

In [17]:
grid.best_score_

0.961904761904762

In [18]:
grid.best_estimator_

In [19]:
grid.best_estimator_.predict(x_test)

array([2, 2, 0, 1, 1, 1, 2, 0, 2, 0, 0, 1, 0, 2, 1, 1, 0, 2, 2, 1, 0, 1,
       1, 2, 2, 0, 0, 1, 1, 0, 2, 2, 0, 1, 1, 2, 1, 1, 0, 0, 0, 2, 0, 1,
       1])

In [20]:
grid.best_estimator_.score(x_test, y_test)

0.9555555555555556