In [70]:
import sklearn
from sklearn.preprocessing import scale
from sklearn.neighbors import KNeighborsRegressor
from sklearn.datasets import load_boston
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

import numpy as np
import pandas as pd

In [71]:
print('sklearn: ', sklearn.__version__)
print('pandas:  ', pd.__version__)
print('numpy:   ', np.__version__)

sklearn:  0.21.3
pandas:   0.25.1
numpy:    1.18.1


In [72]:
X, y = load_boston(return_X_y=True)
boston = load_boston(return_X_y=False)

## Scaling our data:
# X_scaled = sklearn.preprocessing.scale(X)

X_scaled = scale(X)

# And not-canonic way to extract some info:
'''
boston = load_boston(return_X_y=False)
info_boston = boston['DESCR']
data_boston = boston['data']
targets_boston = boston['target']
path_boston = boston['filename']

print(data_boston.shape)
path_boston
'''
# pd_boston
# bst = load_boston(return_X_y=False)
# bst.keys()

pd_boston = pd.DataFrame(data=X_scaled, index=y, columns=boston['feature_names'])

pd_boston

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
24.0,-0.419782,0.284830,-1.287909,-0.272599,-0.144217,0.413672,-0.120013,0.140214,-0.982843,-0.666608,-1.459000,0.441052,-1.075562
21.6,-0.417339,-0.487722,-0.593381,-0.272599,-0.740262,0.194274,0.367166,0.557160,-0.867883,-0.987329,-0.303094,0.441052,-0.492439
34.7,-0.417342,-0.487722,-0.593381,-0.272599,-0.740262,1.282714,-0.265812,0.557160,-0.867883,-0.987329,-0.303094,0.396427,-1.208727
33.4,-0.416750,-0.487722,-1.306878,-0.272599,-0.835284,1.016303,-0.809889,1.077737,-0.752922,-1.106115,0.113032,0.416163,-1.361517
36.2,-0.412482,-0.487722,-1.306878,-0.272599,-0.835284,1.228577,-0.511180,1.077737,-0.752922,-1.106115,0.113032,0.441052,-1.026501
...,...,...,...,...,...,...,...,...,...,...,...,...,...
22.4,-0.413229,-0.487722,0.115738,-0.272599,0.158124,0.439316,0.018673,-0.625796,-0.982843,-0.803212,1.176466,0.387217,-0.418147
20.6,-0.415249,-0.487722,0.115738,-0.272599,0.158124,-0.234548,0.288933,-0.716639,-0.982843,-0.803212,1.176466,0.441052,-0.500850
23.9,-0.413447,-0.487722,0.115738,-0.272599,0.158124,0.984960,0.797449,-0.773684,-0.982843,-0.803212,1.176466,0.441052,-0.983048
22.0,-0.407764,-0.487722,0.115738,-0.272599,0.158124,0.725672,0.736996,-0.668437,-0.982843,-0.803212,1.176466,0.403225,-0.865302


In [73]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)
p_list = list()

In [74]:
neighbors_number = 5

linspace_product = np.linspace(1, 10, 200)
for lin_i in linspace_product:
    regressor_clf = KNeighborsRegressor(n_neighbors=neighbors_number, weights='distance', p = lin_i, metric='minkowski')
    regressor_clf.fit(X_scaled, y)
    
    cross_validation = cross_val_score(estimator=regressor_clf, X=X_scaled, y=y, cv=kf, scoring='neg_mean_squared_error')
    p_list.append(cross_validation.mean())


In [81]:
maximum_validation_result = max(p_list)
indices = [i for i, j in enumerate(p_list) 
           if j == maximum_validation_result]
print(indices[0] + 1)
print(np.round(maximum_validation_result, decimals=2))

1
-16.03
