# __`K Nearest Neighbor Regressor`__

In [78]:
import pandas as pd
import numpy as np
import matplotlib.pyplot  as plt 
import seaborn as sns

In [79]:
from sklearn.datasets import make_regression    
X, y = make_regression(n_samples = 1000, n_features = 2, noise = 10, random_state = 42)

In [80]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 42)

X_train.shape, X_test.shape

((670, 2), (330, 2))

In [81]:
from sklearn.neighbors import KNeighborsRegressor
regressor = KNeighborsRegressor()
regressor.fit(X_train, y_train)

In [82]:
y_pred = regressor.predict(X_test)
y_pred

array([ 41.76673258,  -7.59437128, -16.65330434, -12.40227133,
        36.50390441,  20.2429261 ,  61.33943992, -29.17202648,
       -71.76709886, -11.9092177 ,  16.33441338,  -3.05077276,
       -67.94559513,  59.90553659,  15.71045932,   0.1568295 ,
       -60.10947793,  25.4844424 , -28.8095753 ,  27.02911181,
        20.8555342 ,  15.65608547, -23.61510645, -41.03200396,
       -43.92434524,  25.77610628, -58.95652775,  61.61204386,
       -22.26969946,  73.57828341, -77.28105454,  33.68023612,
       -59.88441434,  -7.59437128, -52.41802667,  -6.1272399 ,
        20.57984555,  32.86381788,  14.41379048, -46.12209025,
        84.06465502,  28.9556839 ,  40.33491469, -70.67778804,
        21.6520122 ,  23.45246046, -30.06738109,  -7.56893351,
        60.3765076 ,  66.69890142,  61.10758863,  77.06956965,
       -57.91445319, -32.73737476, -46.12209025, -24.65667254,
       -33.56726916,  34.05975968,  89.90091082,  29.61443663,
        37.95277128,   5.04859495, -23.51901164,  -4.11

In [83]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
print(r2_score(y_test, y_pred))
print(mean_absolute_error(y_test, y_pred))
print(mean_squared_error(y_test, y_pred))

0.9155927746462499
9.269174435861803
132.70133826064995


## __`HyperParameter Tuning`__

In [84]:
from sklearn.model_selection import GridSearchCV, StratifiedKFold

neighbors = [7, 8, 9, 10, 11, 12, 13]
w = ['uniform', 'distance']
algo = ['kd_tree', 'ball_tree', 'brute']
p = [1, 2, 3, 4, 5]
l_s = [1, 2, 5]   # Data is small so wo predictin ko fast krna chaa rha hai

cv = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 42)

params = dict(n_neighbors = neighbors, algorithm = algo, p = p, leaf_size = l_s, weights = w)
params

{'n_neighbors': [7, 8, 9, 10, 11, 12, 13],
 'algorithm': ['kd_tree', 'ball_tree', 'brute'],
 'p': [1, 2, 3, 4, 5],
 'leaf_size': [1, 2, 5],
 'weights': ['uniform', 'distance']}

In [85]:
grid = GridSearchCV(estimator = KNeighborsRegressor(), param_grid = params, verbose = 3, n_jobs = -1, scoring = 'r2', cv = 15)
grid.fit(X_train, y_train)

Fitting 15 folds for each of 630 candidates, totalling 9450 fits


In [86]:
grid.best_params_

{'algorithm': 'kd_tree',
 'leaf_size': 1,
 'n_neighbors': 10,
 'p': 1,
 'weights': 'distance'}

In [87]:
y_pred = grid.predict(X_test)
y_pred

array([ 38.05562859,  -3.2130404 , -16.23171922, -11.53418981,
        37.63249024,  18.65092356,  61.08082983, -32.17403182,
       -73.99794017, -13.11721621,  23.91599377, -14.40944525,
       -69.12769103,  56.20846239,  15.70917683,   1.72548302,
       -54.77900882,  24.47523064, -25.34665616,  22.95975194,
        24.65389454,  16.49988779, -21.51371669, -39.07317052,
       -45.79148958,  27.98664106, -56.39633447,  58.24926217,
       -20.34458175,  73.70558987, -71.60170961,  27.98079483,
       -63.66882222,  -6.66990945, -49.83557522,  -5.18499476,
        21.98079988,  35.26251743,  12.54270821, -46.01529816,
        83.93026501,  28.23607222,  41.60563897, -78.93766626,
        19.91281181,  27.9965233 , -36.8379866 ,  -9.50060421,
        60.83609202,  70.18293842,  59.80517045,  74.31079812,
       -64.64709854, -32.90394553, -45.95336978, -23.03568347,
       -32.51088446,  38.01487718,  88.65047045,  32.24965809,
        35.10384441,   5.28310377, -24.52116973,  -2.75

In [88]:
print(r2_score(y_test, y_pred))
print(mean_squared_error(y_test, y_pred))
print(mean_absolute_error(y_test, y_pred))


0.9212319135982416
123.83573128884318
8.855239214233727
