In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV, train_test_split, KFold
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import LabelEncoder,StandardScaler, MinMaxScaler, normalize
from sklearn.metrics import r2_score
from sklearn.pipeline import Pipeline

In [2]:
boston = pd.read_csv(r"C:\Users\Administrator.DAI-PC2\Downloads\Shubham\Practical Machine Learning\ClassWork\Datasets\boston.csv")
y = boston["medv"]
X = boston.drop('medv',axis=1)

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size = 0.3, random_state=24)

In [4]:
knn = KNeighborsRegressor(n_neighbors=3)

knn.fit(X_train,y_train)

y_pred = knn.predict(X_test)

In [5]:
print(r2_score(y_test,y_pred))

0.481132571469132


In [6]:
### KNN using gridsearch and Kfold

In [7]:
knn = KNeighborsRegressor()
params= {"n_neighbors":np.arange(1,11)}
kfold = KFold(n_splits =5,shuffle = True, random_state = 24)
gcv = GridSearchCV(knn,param_grid=params, scoring="r2",cv=kfold)

gcv.fit(X,y)

pd.DataFrame(gcv.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_neighbors,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.002847,0.002673,0.00396,0.005554,1,{'n_neighbors': 1},0.404823,0.491209,0.339619,0.182374,0.499303,0.383466,0.116485,10
1,0.002191,0.000398,0.00239,0.000488,2,{'n_neighbors': 2},0.497806,0.546118,0.577362,0.389283,0.631588,0.528431,0.082035,2
2,0.001794,0.000978,0.002325,0.00117,3,{'n_neighbors': 3},0.517078,0.543032,0.582253,0.424479,0.637089,0.540786,0.070835,1
3,0.002449,0.004899,0.0,0.0,4,{'n_neighbors': 4},0.476511,0.509636,0.589293,0.395349,0.631282,0.520414,0.083332,3
4,0.003126,0.006252,0.0,0.0,5,{'n_neighbors': 5},0.485285,0.481125,0.614178,0.373811,0.618954,0.514671,0.092311,4
5,0.0,0.0,0.003125,0.006251,6,{'n_neighbors': 6},0.453636,0.427695,0.640865,0.371636,0.605303,0.499827,0.104677,5
6,0.002226,0.001187,0.004719,0.005452,7,{'n_neighbors': 7},0.462762,0.380781,0.626035,0.379204,0.603439,0.490444,0.106132,6
7,0.002592,0.001618,0.005732,0.005847,8,{'n_neighbors': 8},0.446499,0.347482,0.625397,0.370233,0.59032,0.475986,0.113104,7
8,0.003125,0.00625,0.0,0.0,9,{'n_neighbors': 9},0.444168,0.358745,0.602968,0.366073,0.590738,0.472539,0.105898,8
9,0.003125,0.006249,0.003126,0.006251,10,{'n_neighbors': 10},0.443047,0.348591,0.59891,0.359912,0.580867,0.466266,0.106231,9


In [8]:
print(gcv.best_params_)
print(gcv.best_score_)

{'n_neighbors': 3}
0.5407863726305988


In [9]:
### KNN using pipeline for different scalling and Gridsearch

In [11]:
knn = KNeighborsRegressor()
scl_mm = MinMaxScaler()
scl_std = StandardScaler()
pipe = Pipeline([("Scl",None),("Knn",knn)])

print(pipe.get_params())

{'memory': None, 'steps': [('Scl', None), ('Knn', KNeighborsRegressor())], 'verbose': False, 'Scl': None, 'Knn': KNeighborsRegressor(), 'Knn__algorithm': 'auto', 'Knn__leaf_size': 30, 'Knn__metric': 'minkowski', 'Knn__metric_params': None, 'Knn__n_jobs': None, 'Knn__n_neighbors': 5, 'Knn__p': 2, 'Knn__weights': 'uniform'}


In [13]:
params = {"Knn__n_neighbors":np.arange(1,11),"Scl":[scl_mm,scl_std,None]}
kfold = KFold(n_splits =5,shuffle = True, random_state = 24)
gcv = GridSearchCV(pipe,param_grid = params,cv=kfold, scoring="r2")
gcv.fit(X,y)

In [14]:
print(gcv.best_params_)
print(gcv.best_score_)

{'Knn__n_neighbors': 3, 'Scl': StandardScaler()}
0.7666263304880155
