In [107]:
import numpy as np
import pandas as pd

In [108]:
df = pd.read_csv('https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv')

df.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [109]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [110]:
from sklearn.model_selection import cross_val_score, KFold
from sklearn.neighbors import KNeighborsRegressor

knn = KNeighborsRegressor()

In [111]:
kfold = KFold(n_splits=5, shuffle=True, random_state=1)
scores = cross_val_score(knn, X, y, cv=kfold, scoring='r2')

In [112]:
scores.mean()

0.4761976351913221

### GridSearchCV

In [113]:
from sklearn.model_selection import GridSearchCV

In [114]:
knn = KNeighborsRegressor()

In [115]:
param_grid = {
    'n_neighbors':[1,3,5,7,10,12,15,17,20],
    'weights':['uniform','distance'],
    'algorithm':['ball_tree','kd_tree','brute'],
    'p':[1,2]
}

In [116]:
gcv = GridSearchCV(knn, param_grid, scoring='r2', refit=True, cv=kfold, verbose=2, n_jobs=-1)

In [117]:
gcv.fit(X,y)

Fitting 5 folds for each of 108 candidates, totalling 540 fits


In [118]:
gcv.best_params_

{'algorithm': 'ball_tree', 'n_neighbors': 5, 'p': 1, 'weights': 'distance'}

In [119]:
gcv.best_score_

0.6117139367845081

In [120]:
gcv.cv_results_

{'mean_fit_time': array([0.00235963, 0.00288568, 0.00188451, 0.00205197, 0.00276589,
        0.00285935, 0.0031961 , 0.00217443, 0.00281019, 0.00300646,
        0.00283914, 0.00241985, 0.00251288, 0.00289412, 0.00259137,
        0.00244303, 0.00298748, 0.00241256, 0.00208974, 0.00372081,
        0.0037343 , 0.00336385, 0.00281577, 0.00238147, 0.0028192 ,
        0.00297647, 0.00252113, 0.00197349, 0.00245614, 0.00251617,
        0.00340815, 0.00268626, 0.00193815, 0.00219827, 0.01704221,
        0.00241876, 0.00421991, 0.00275826, 0.00250106, 0.00200858,
        0.00285735, 0.00241008, 0.00313902, 0.00259032, 0.00271039,
        0.00316386, 0.01828742, 0.00236936, 0.00224366, 0.00235591,
        0.00179973, 0.00205903, 0.01351786, 0.01095557, 0.00323844,
        0.00242186, 0.00241175, 0.0071455 , 0.00694304, 0.02333679,
        0.05590591, 0.00321479, 0.00337882, 0.00201092, 0.00233388,
        0.00253553, 0.00197449, 0.00312986, 0.02263761, 0.00271382,
        0.01964874, 0.00239205,

In [121]:
pd.DataFrame(gcv.cv_results_)[['param_algorithm', 'param_n_neighbors', 'param_p', 'param_weights', 'mean_test_score']].sort_values('mean_test_score', ascending=False)

Unnamed: 0,param_algorithm,param_n_neighbors,param_p,param_weights,mean_test_score
81,brute,5,1,distance,0.611714
45,kd_tree,5,1,distance,0.611714
9,ball_tree,5,1,distance,0.611714
49,kd_tree,7,1,distance,0.605716
85,brute,7,1,distance,0.605716
...,...,...,...,...,...
38,kd_tree,1,2,uniform,0.331522
2,ball_tree,1,2,uniform,0.331522
75,brute,1,2,distance,0.331522
39,kd_tree,1,2,distance,0.331522


In [122]:
gcv.predict

<bound method BaseSearchCV.predict of GridSearchCV(cv=KFold(n_splits=5, random_state=1, shuffle=True),
             estimator=KNeighborsRegressor(), n_jobs=-1,
             param_grid={'algorithm': ['ball_tree', 'kd_tree', 'brute'],
                         'n_neighbors': [1, 3, 5, 7, 10, 12, 15, 17, 20],
                         'p': [1, 2], 'weights': ['uniform', 'distance']},
             scoring='r2', verbose=2)>

### RandomizedSearchCV

In [123]:
from sklearn.model_selection import RandomizedSearchCV

In [124]:
rcv = RandomizedSearchCV(knn, param_grid, scoring='r2', refit=True, cv=kfold, verbose=2, n_jobs=-1)

In [125]:
rcv.fit(X,y)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


In [126]:
rcv.best_score_

0.6057158068725681

In [127]:
rcv.best_params_

{'weights': 'distance', 'p': 1, 'n_neighbors': 7, 'algorithm': 'kd_tree'}