In [61]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

In [62]:
iris = load_iris()
iris.data.shape

(150, 4)

In [63]:
X_train,X_test,y_train,y_test = train_test_split(iris.data,iris.target,random_state=666)

## 使用pipeline求解一元或多元knn问题

In [64]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

def PolynomialClassifier(degree,n_neighbors):
    return Pipeline([
        ("poly",PolynomialFeatures(degree=degree)),
        ("std_scaler",StandardScaler()),
        ("knn",KNeighborsClassifier(n_neighbors=n_neighbors))        
    ])

In [65]:
knn = PolynomialClassifier(degree=1,n_neighbors=3)

In [66]:
knn.fit(X_train,y_train)

Pipeline(memory=None,
     steps=[('poly', PolynomialFeatures(degree=1, include_bias=True, interaction_only=False)), ('std_scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('knn', KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=3, p=2,
           weights='uniform'))])

In [67]:
knn.score(X_test,y_test)

0.9736842105263158

## 使用gridSearchCV网格搜索参数调优

In [68]:
param_dict = {'n_neighbors':[1,3,5,7,9,11]}

In [69]:
#数据标准化处理
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
knn = KNeighborsClassifier(n_neighbors=3)
knn = GridSearchCV(knn,param_grid=param_dict,cv=10)

In [70]:
knn.fit(X_train,y_train)



GridSearchCV(cv=10, error_score='raise-deprecating',
       estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=3, p=2,
           weights='uniform'),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'n_neighbors': [1, 3, 5, 7, 9, 11]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [71]:
y_predict = knn.predict(X_test)
print("y_predict:\n",y_predict)

score = knn.score(X_test,y_test)
print("准确率：\n",score)

print("最佳参数:\n",knn.best_params_)
print("最佳结果:\n",knn.best_score_)
print("最佳估计器:\n",knn.best_estimator_)

y_predict:
 [1 2 1 2 0 1 1 2 1 1 1 0 0 0 2 1 0 2 2 2 1 0 2 0 1 1 0 1 2 2 0 0 1 2 1 1 2
 2]
准确率：
 1.0
最佳参数:
 {'n_neighbors': 9}
最佳结果:
 0.9464285714285714
最佳估计器:
 KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=9, p=2,
           weights='uniform')
