In [1]:
from sklearn.datasets import make_regression
x, y = make_regression(n_samples=1000,n_features=2,noise=15,random_state=0)

In [2]:
x

array([[ 0.49949823,  1.26741165],
       [-0.477974  , -1.84306955],
       [-0.13482245, -1.62632194],
       ...,
       [ 0.34965446,  0.5785215 ],
       [-0.52118931, -0.6522936 ],
       [ 2.38074535, -0.03576807]], shape=(1000, 2))

In [3]:
y

array([ 8.42315886e+01, -1.18728502e+02, -4.62794896e+01, -4.76107797e+01,
        6.34505375e+01, -9.55093187e+01,  1.27480604e+01,  3.09939294e+01,
       -1.66932840e+01,  5.84452900e+01,  7.80074895e+01,  1.36813048e+01,
        6.58620630e+01, -2.27594533e+01,  9.04697511e+01,  2.60857875e+01,
       -3.07304718e+00, -9.26866311e+01, -9.08377829e+01,  1.25203282e+01,
       -4.17698353e+01, -1.04868795e+02, -1.25918105e+02, -3.78599198e+01,
       -2.13479739e+01,  1.10320685e+01,  1.15988772e+02, -9.73084764e+01,
       -8.53923079e+01,  5.04978921e+01,  3.46146541e+01, -2.08054365e+01,
        4.28932274e+01, -3.79324257e+01, -2.16137320e+01, -9.10733459e+01,
       -1.84793093e+01,  6.04942351e+01,  1.93597195e+01, -4.54835253e+01,
       -4.57509344e+00,  4.47835113e+01, -4.79071875e+01,  3.44873873e+01,
        1.17635096e+00,  8.45009779e+01,  7.41923895e+01, -6.85786151e+01,
        2.98034948e+01, -3.10335233e+01,  2.97754305e+01, -6.21774965e+01,
        1.52430290e+01,  

In [4]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=.25, random_state=0)

In [5]:
from sklearn.neighbors import KNeighborsRegressor

knn = KNeighborsRegressor(n_neighbors=5, algorithm='auto')

knn.fit(x_train, y_train)
y_pred = knn.predict(x_test)

In [8]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

print("R2 Score:",r2_score(y_pred, y_test))
print("MSE",mean_squared_error(y_pred, y_test))
print("MAE", mean_absolute_error(y_pred, y_test))

R2 Score: 0.9090917223996352
MSE 267.6419001487831
MAE 13.195289375218909


In [9]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_neighbors': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'metric': ['euclidean', 'manhattan', 'minkowski']
}

grid = GridSearchCV(KNeighborsRegressor(),param_grid=param_grid,refit=True,cv=5,verbose=3)

In [11]:
grid.fit(x_train, y_train)
y_pred = grid.predict(x_test)

Fitting 5 folds for each of 240 candidates, totalling 1200 fits
[CV 1/5] END algorithm=auto, metric=euclidean, n_neighbors=1, weights=uniform;, score=0.873 total time=   0.0s
[CV 2/5] END algorithm=auto, metric=euclidean, n_neighbors=1, weights=uniform;, score=0.893 total time=   0.0s
[CV 3/5] END algorithm=auto, metric=euclidean, n_neighbors=1, weights=uniform;, score=0.862 total time=   0.0s
[CV 4/5] END algorithm=auto, metric=euclidean, n_neighbors=1, weights=uniform;, score=0.882 total time=   0.0s
[CV 5/5] END algorithm=auto, metric=euclidean, n_neighbors=1, weights=uniform;, score=0.894 total time=   0.0s
[CV 1/5] END algorithm=auto, metric=euclidean, n_neighbors=1, weights=distance;, score=0.873 total time=   0.0s
[CV 2/5] END algorithm=auto, metric=euclidean, n_neighbors=1, weights=distance;, score=0.893 total time=   0.0s
[CV 3/5] END algorithm=auto, metric=euclidean, n_neighbors=1, weights=distance;, score=0.862 total time=   0.0s
[CV 4/5] END algorithm=auto, metric=euclidean

In [12]:
grid.best_params_

{'algorithm': 'brute',
 'metric': 'euclidean',
 'n_neighbors': 10,
 'weights': 'distance'}

In [13]:
print("R2 Score:",r2_score(y_pred, y_test))
print("MSE",mean_squared_error(y_pred, y_test))
print("MAE", mean_absolute_error(y_pred, y_test))

R2 Score: 0.911584449140861
MSE 250.413173982665
MAE 12.96820791354103
