
## Nearest-Neighbour Regression: Car MPG
Source: https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/mtcars.html

In [60]:
import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import RobustScaler

### Load the car dataset

In [61]:
car = pd.read_csv('mtcars.csv')       

### Feature selection

In [62]:
rel_features = ['hp', 'wt','disp']
car_X = car[rel_features]
car_X.head(5)

Unnamed: 0,hp,wt,disp
0,110,2.62,160.0
1,110,2.875,160.0
2,93,2.32,108.0
3,110,3.215,258.0
4,175,3.44,360.0


### Split the data into training and test sets

In [63]:
# Split the data into training/testing sets
car_X_train = car_X.head(-10)
car_y_train = car['mpg'].head(-10)


car_X_test = car_X.tail(10)
car_y_test = car['mpg'].tail(10)

### Scale the data

In [64]:
scaler = RobustScaler()
car_X_train = scaler.fit_transform(car_X_train)
car_X_test = scaler.transform(car_X_test)

### ML Part

In [67]:
# Select the model
regr = KNeighborsRegressor(n_neighbors=1)

# Train the model using the training sets
regr.fit(car_X_train, car_y_train)

#Predict values for car_X_test
car_y_pred = regr.predict(car_X_test)

### Algorithm v/s Humans

In [68]:
human_pred = [15.8,14.3,17,16,34,26,16.3,14,15,20]
data  = zip(car_y_pred, car_y_test,human_pred)
df = pd.DataFrame(data, columns=['Algo', 'Actual','Humans'])
df['Point'] = 0
df['Point'] = np.where( abs(df['Actual']-df['Algo']) > abs(df['Actual']-df['Humans'])  , 0, 1)
df

Unnamed: 0,Algo,Actual,Humans,Point
0,15.5,15.2,15.8,1
1,14.3,13.3,14.3,1
2,18.7,19.2,17.0,1
3,33.9,27.3,16.0,1
4,22.8,26.0,34.0,1
5,33.9,30.4,26.0,1
6,14.3,15.8,16.3,0
7,21.0,19.7,14.0,1
8,14.3,15.0,15.0,0
9,21.0,21.4,20.0,1
