### Predict weight(kg) by height(cm) and sex

In [59]:
import numpy as np
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

In [2]:
X_train = np.array([[158, 1], [170, 1],                    
                    [183, 1], [191, 1],                    
                    [155, 0], [163, 0],                    
                    [180, 0], [158, 0],                    
                    [170, 0]])
y_train = [64, 86, 84, 80, 49, 59, 67, 54, 67]

In [6]:
X_test = np.array([[168, 1], [180, 1], [160, 0], [169, 0]])                                  
y_test = [65, 96, 52, 67]                 

In [9]:
k = 3
clf = KNeighborsRegressor(n_neighbors=k)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)

### Predicted weights of 4 samples

In [45]:
predictions

array([70.66666667, 79.        , 59.        , 70.66666667])

### Coefficient of determination, Mean absolute error, Mean squared error

In [15]:
print("Coefficient of determination: {}".format(r2_score(y_test, predictions)))
print("Mean absolute error: {}".format(mean_absolute_error(y_test, predictions)))
print("Mean squared error: {}".format(mean_squared_error(y_test, predictions)))

Coefficient of determination: 0.6290565226735438
Mean absolute error: 8.333333333333336
Mean squared error: 95.8888888888889


## Standardize to compare performance

In [30]:
ss = StandardScaler()

In [33]:
X_train_scaled = ss.fit_transform(X_train)
X_test_scaled = ss.transform(X_test)




In [37]:
X_train_scaled

array([[-0.9908706 ,  1.11803399],
       [ 0.01869567,  1.11803399],
       [ 1.11239246,  1.11803399],
       [ 1.78543664,  1.11803399],
       [-1.24326216, -0.89442719],
       [-0.57021798, -0.89442719],
       [ 0.86000089, -0.89442719],
       [-0.9908706 , -0.89442719],
       [ 0.01869567, -0.89442719]])

In [51]:
clf.fit(X_train_scaled, y_train)
predictions_scaled = clf.predict(X_test_scaled)

In [64]:
X_test.T[0]

array([168, 180, 160, 169])

### Predicted weights standardize

In [52]:
predictions_scaled

array([78.        , 83.33333333, 54.        , 64.33333333])

### Coefficient of determination, Mean absolute error, Mean squared error

In [54]:
print("Coefficient of determination: {}".format(r2_score(y_test, predictions_scaled)))
print("Mean absolute error: {}".format(mean_absolute_error(y_test, predictions_scaled)))
print("Mean squared error: {}".format(mean_squared_error(y_test, predictions_scaled)))

Coefficient of determination: 0.6706425961745109
Mean absolute error: 7.583333333333336
Mean squared error: 85.13888888888893


### Observation:<br/>Standardizing increases coefficent of determination and lowers mean errors.