In [2]:
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
# read in the iris data
iris = load_iris()

# create X (features) and y (response)
X = iris.data
y = iris.target

In [6]:
from sklearn.model_selection import GridSearchCV

In [7]:
# define the parameter values that should be searched
k_range = range(1, 31)
weight_options = ["uniform", "distance"]
# create parameter grid
param_grid = dict(n_neighbors=k_range, weights=weight_options)

In [8]:
# instantiate and fit the grid
knn = KNeighborsClassifier()
grid = GridSearchCV(knn, param_grid, cv=10, scoring="accuracy")
grid.fit(X, y)

GridSearchCV(cv=10, error_score='raise',
       estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform'),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'n_neighbors': range(1, 31), 'weights': ['uniform', 'distance']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring='accuracy', verbose=0)

In [9]:
# examine the best model
print(grid.best_score_)
print(grid.best_params_)

0.98
{'n_neighbors': 13, 'weights': 'uniform'}


In [13]:
import numpy as np

knn = KNeighborsClassifier(n_neighbors=13, weights="uniform")
knn.fit(X, y)
to_predict = np.array([3, 5, 4, 2]).reshape(1, -1)
knn.predict(to_predict)

array([1])

In [16]:
# grid can be used to predict with best model
grid.predict(to_predict)

array([1])

In [19]:
from sklearn.model_selection import RandomizedSearchCV

In [20]:
# specify parameter distributions
param_dist = dict(n_neighbors=k_range, weights=weight_options)

In [21]:
# n_iter controls the number of searches
rand = RandomizedSearchCV(knn, param_dist, cv=10, scoring="accuracy", n_iter=10)
rand.fit(X, y)
rand.grid_scores_



[mean: 0.96667, std: 0.04472, params: {'weights': 'distance', 'n_neighbors': 5},
 mean: 0.97333, std: 0.03266, params: {'weights': 'distance', 'n_neighbors': 25},
 mean: 0.96667, std: 0.04472, params: {'weights': 'distance', 'n_neighbors': 21},
 mean: 0.96667, std: 0.04472, params: {'weights': 'uniform', 'n_neighbors': 8},
 mean: 0.96667, std: 0.04472, params: {'weights': 'uniform', 'n_neighbors': 4},
 mean: 0.97333, std: 0.03266, params: {'weights': 'distance', 'n_neighbors': 10},
 mean: 0.96667, std: 0.03333, params: {'weights': 'uniform', 'n_neighbors': 25},
 mean: 0.97333, std: 0.04422, params: {'weights': 'uniform', 'n_neighbors': 14},
 mean: 0.98000, std: 0.03055, params: {'weights': 'distance', 'n_neighbors': 27},
 mean: 0.97333, std: 0.03266, params: {'weights': 'distance', 'n_neighbors': 29}]

In [22]:
# examine best model
print(rand.best_score_)
print(rand.best_params_)

0.98
{'weights': 'distance', 'n_neighbors': 27}


In [23]:
# RandomizedSearch finds the same best score in a fraction of the time