In [2]:
import numpy as np
import pandas as pd

In [3]:
df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv')

In [4]:
df.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [5]:
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [6]:
from sklearn.model_selection import cross_val_score,KFold
from sklearn.neighbors import KNeighborsRegressor

In [7]:
knn = KNeighborsRegressor()

In [8]:
kfold = KFold(n_splits=5, shuffle=True, random_state=1)
scores = cross_val_score(knn, X, y, cv=kfold, scoring='r2')

In [9]:
scores.mean()

0.4761976351913221

# GridSearchCV

In [10]:
from sklearn.model_selection import GridSearchCV


In [11]:
knn = KNeighborsRegressor()


In [12]:
param_grid = {
    'n_neighbors':[1,3,5,7,10,12,15,17,20],
    'weights':['uniform','distance'],
    'algorithm':['ball_tree', 'kd_tree', 'brute'],
    'p':[1,2]
}

In [13]:
gcv = GridSearchCV(knn, param_grid, scoring='r2', refit=True, cv=kfold, verbose=2)


In [14]:
gcv.fit(X,y)


Fitting 5 folds for each of 108 candidates, totalling 540 fits
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=1, p=2, weights=uniform; total

In [15]:
gcv.best_params_


{'algorithm': 'ball_tree', 'n_neighbors': 5, 'p': 1, 'weights': 'distance'}

In [16]:
gcv.best_score_


0.6117139367845081

In [23]:
gcv.cv_results_


{'mean_fit_time': array([1.14934921e-02, 0.00000000e+00, 0.00000000e+00, 8.05997849e-04,
        9.00316238e-04, 0.00000000e+00, 3.11851501e-04, 3.00765038e-03,
        4.01496887e-04, 1.19786263e-03, 9.70840454e-05, 5.51223755e-04,
        9.95635986e-04, 0.00000000e+00, 1.22866631e-03, 2.21505165e-03,
        1.41744614e-03, 2.72593498e-03, 3.18555832e-03, 9.96637344e-04,
        1.99365616e-04, 0.00000000e+00, 1.72185898e-04, 0.00000000e+00,
        2.96664238e-03, 3.13425064e-03, 3.12204361e-03, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 4.25119400e-03, 5.97953796e-04,
        2.00223923e-04, 0.00000000e+00, 0.00000000e+00, 3.12333107e-03,
        3.43532562e-03, 0.00000000e+00, 2.84781456e-03, 2.70204544e-03,
        2.22206116e-04, 2.93264389e-03, 0.00000000e+00, 2.88052559e-03,
        2.97398567e-03, 3.77988815e-03, 5.98239899e-04, 1.64160728e-03,
        0.00000000e+00, 4.16946411e-04, 6.39438629e-05, 4.35447693e-04,
        3.00550461e-04, 3.01551819e-04, 0.00000

In [24]:
pd.DataFrame(gcv.cv_results_)[['param_algorithm',	'param_n_neighbors',	'param_p', 'param_weights', 'mean_test_score']].sort_values('mean_test_score',ascending=False)


Unnamed: 0,param_algorithm,param_n_neighbors,param_p,param_weights,mean_test_score
81,brute,5,1,distance,0.611714
45,kd_tree,5,1,distance,0.611714
9,ball_tree,5,1,distance,0.611714
49,kd_tree,7,1,distance,0.605716
85,brute,7,1,distance,0.605716
...,...,...,...,...,...
38,kd_tree,1,2,uniform,0.331522
2,ball_tree,1,2,uniform,0.331522
75,brute,1,2,distance,0.331522
39,kd_tree,1,2,distance,0.331522


# RandomizedSearchCV

In [25]:
from sklearn.model_selection import RandomizedSearchCV


In [26]:
rcv = RandomizedSearchCV(knn, param_grid, scoring='r2', refit=True, cv=kfold, verbose=2)


In [27]:
rcv.fit(X,y)


Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] END algorithm=ball_tree, n_neighbors=20, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=20, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=20, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=20, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=20, p=1, weights=distance; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=17, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=17, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=17, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=17, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=brute, n_neighbors=17, p=1, weights=uniform; total time=   0.0s
[CV] END algorithm=ball_tree, n_neighbors=12, p=1, weights=uniform; total time=   0.

In [28]:
rcv.best_score_


0.589758956010885

In [29]:
rcv.best_params_


{'weights': 'uniform', 'p': 1, 'n_neighbors': 5, 'algorithm': 'brute'}