# 数据集

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

In [2]:
iris=load_iris()
x=iris.data
y=iris.target

In [3]:
x_train,x_test,y_train,y_test=train_test_split(x,y,train_size=0.7,random_state=233,stratify=y)
x_train.shape,x_test.shape,y_train.shape,y_test.shape

((105, 4), (45, 4), (105,), (45,))

# 超参数搜索

## KNN超参数搜索

In [4]:
from sklearn.neighbors import KNeighborsClassifier

In [5]:
best_score=-1
best_n=-1
best_weight=""
best_p=-1

for n in range(1,20):
    for weight in ['uniform','distance']:
        for p in range(1,7):
            neigh=KNeighborsClassifier(
                n_neighbors=n,
                weights=weight,
                p=p
            )
            neigh.fit(x_train,y_train)
            score=neigh.score(x_test,y_test)
            if score>best_score:
                best_score=score
                best_n=n
                best_p=p
                best_weight=weight
                
print("n_neighbors:",best_n)
print("weights:",best_weight)
print("p:",best_p)
print("score:",best_score)

n_neighbors: 5
weights: uniform
p: 2
score: 1.0


## sklearn超参数搜索

In [6]:
from sklearn.model_selection import GridSearchCV

In [7]:
params={
    'n_neighbors':[n for n in range(1,20)],
    'weights':['uniform','distance'],
    'p':[p for p in range(1,7)]
}

In [8]:
grid=GridSearchCV(
    estimator=KNeighborsClassifier(),
    param_grid=params,
    n_jobs=-1,
)

In [9]:
grid.fit(x_train,y_train)

In [10]:
grid.best_params_

{'n_neighbors': 9, 'p': 2, 'weights': 'uniform'}

In [11]:
grid.best_score_

0.961904761904762

# 交叉验证（cross_val_score方法）将交叉验证和score合并到一个方法里将传入的样本随机分为5份，依次把每一份当作验证集数，剩余作训练集，训练结束后输出score

## [cross_val_score](https://blog.csdn.net/worther/article/details/126909270?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522170783717316800226581162%2522%252C%2522scm%2522%253A%252220140713.130102334..%2522%257D&request_id=170783717316800226581162&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~top_positive~default-1-126909270-null-null.142^v99^pc_search_result_base2&utm_term=cross_val_score&spm=1018.2226.3001.4187)

In [12]:
from sklearn.model_selection import cross_val_score

neigh=KNeighborsClassifier()#一个默认参数的knn分类器
cv_scores=cross_val_score(neigh,x_train,y_train,cv=5)
print(cv_scores)

[0.95238095 1.         0.95238095 0.85714286 1.        ]


In [13]:
best_score=-1
best_n=-1
best_weight=""
best_p=-1
best_cv_scores=None
for n in range(1,20):
    for weight in ['uniform','distance']:
        for p in range(1,7):
            neigh=KNeighborsClassifier(
                n_neighbors=n,
                weights=weight,
                p=p
            )
            cv_scores=cross_val_score(neigh,x_train,y_train,cv=5)
            score=np.mean(cv_scores)
            if score>best_score:
                best_score=score
                best_n=n
                best_p=p
                best_weight=weight
                best_cv_scores=cv_scores
                
print("n_neighbors:",best_n)
print("weights:",best_weight)
print("p:",best_p)
print("score:",best_cv_scores)

n_neighbors: 9
weights: uniform
p: 2
score: [1.         1.         0.95238095 0.85714286 1.        ]


In [14]:
#结果与gridcv完全一致