## How to find best model parameter in scikit-learn ?

In [None]:
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# read in the iris data
iris = load_iris()

# create X (features) and y (response)
X = iris.data
y = iris.target

In [None]:
knn = KNeighborsClassifier(n_neighbors=5)

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
# define parameter values that should be searched
k_range = range(1, 31)

In [None]:
# create a parameter grid
param_grid = dict(n_neighbors=list(k_range))
print(param_grid)

In [None]:
# instantiate the grid
grid = GridSearchCV(knn, param_grid, cv=10, scoring='accuracy')

In [None]:
# fit the f=grid with data
grid.fit(X, y)

In [None]:
# view the complete results

import pandas as pd
pd.DataFrame(grid.cv_results_)[['mean_test_score', 'std_test_score', 'params']]

In [None]:
# print the array of mean scores only
grid_mean_scores = grid.cv_results_['mean_test_score']
print(grid_mean_scores)

In [None]:
# plot the results
plt.plot(k_range, grid_mean_scores)
plt.xlabel('Value of K for KNN')
plt.ylabel('Cross-Validated Accuracy')

In [None]:
# examine the best model
print(grid.best_score_)
print(grid.best_params_)
print(grid.best_estimator_)

### Searching multiple parameter simultaneously

In [None]:
# define the parameter values that should be searched
k_range = list(range(1, 31))
weight_options = ['uniform', 'distance']

In [None]:
param_grid = dict(n_neighbors=k_range, weights=weight_options)
print(param_grid)

In [None]:
# instantiate and fit the grid
grid = GridSearchCV(knn, param_grid, cv=10, scoring='accuracy', return_train_score=False)
grid.fit(X, y)

In [None]:
# view the results
pd.DataFrame(grid.cv_results_)[['mean_test_score', 'std_test_score', 'params']]

In [None]:
# examine the best model
print(grid.best_score_)
print(grid.best_params_)

## Using best parameters to make predictions

In [None]:
# train model using all data and best known parameters
knn = KNeighborsClassifier(n_neighbors=13, weights='uniform')
knn.fit(X, y)

# make prediction on out-of-sample data
knn.predict([[3, 5, 4, 2]])

In [None]:
# GridSearchCV automatically refits best model using all of data
grid.predict([[3, 5, 4, 2]])

### Reducing computational expense using RandomizedSearchCV

In [None]:
from sklearn.model_selection import RandomizedSearchCV

In [None]:
# specify "parameter distributions" rather than a "parameter grid"
param_dist = dict(n_neighbors=k_range, weights = weight_options)

In [None]:
# n_iter controls # of searches
rand = RandomizedSearchCV(knn, param_dist, cv=10, scoring='accuracy', n_iter=10, random_state=5, return_train_score=False)
rand.fit(X, y)
pd.DataFrame(rand.cv_results_)[['mean_test_score', 'std_test_score', 'params']]

In [None]:
# examine best model
print(rand.best_score_)
print(rand.best_params_)