# Importing Dataset

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('weight-height.csv')
df.head()

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.04247
4,Male,69.881796,206.349801


# Separating X (Gender, Height) and Y (y=Weight).

In [3]:
df = pd.get_dummies(df, columns=['Gender'], drop_first=True)
df.head()

Unnamed: 0,Height,Weight,Gender_Male
0,73.847017,241.893563,True
1,68.781904,162.310473,True
2,74.110105,212.740856,True
3,71.730978,220.04247,True
4,69.881796,206.349801,True


In [4]:
x = df.drop('Weight', axis = 1)
x.head()

Unnamed: 0,Height,Gender_Male
0,73.847017,True
1,68.781904,True
2,74.110105,True
3,71.730978,True
4,69.881796,True


In [5]:
y = df[['Weight']]
y.head()

Unnamed: 0,Weight
0,241.893563
1,162.310473
2,212.740856
3,220.04247
4,206.349801


# Train = 70%, Test = 30%

In [6]:
from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.30)

In [7]:
xtrain.shape

(5988, 2)

In [8]:
df.shape

(8555, 3)

In [9]:
ytest

Unnamed: 0,Weight
2944,178.327236
7719,130.442536
3892,198.956077
2559,173.830870
637,205.157578
...,...
1015,188.311394
2574,223.397572
7188,117.819984
8134,151.333899


# Applying Linear Regression

In [10]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(xtrain, ytrain)

In [11]:
reg.predict(xtest)

array([[180.17390586],
       [124.21669395],
       [188.53641758],
       ...,
       [127.66631477],
       [155.3054734 ],
       [193.55567843]])

In [12]:
ytest['Predicted Weight'] = reg.predict(xtest)
ytest

Unnamed: 0,Weight,Predicted Weight
2944,178.327236,180.173906
7719,130.442536,124.216694
3892,198.956077,188.536418
2559,173.830870,178.376990
637,205.157578,211.242396
...,...,...
1015,188.311394,178.866916
2574,223.397572,211.747677
7188,117.819984,127.666315
8134,151.333899,155.305473


In [13]:
ytest.drop('Predicted Weight', axis=1, inplace=True)

# Evaluating the Model (Testing and training Accuracy, MSE for testing)

Training Accuracy

In [14]:
reg.score(xtrain, ytrain)

0.9005938990292603

Testing Accuracy

In [15]:
reg.score(xtest, ytest)

0.8984269771318834

MSE

In [16]:
from sklearn.metrics import mean_squared_error
MSE_LR = mean_squared_error(ytest, reg.predict(xtest))
MSE_LR

102.44062666820503

# Applying KNN Regressor:

In [17]:
from sklearn.neighbors import KNeighborsRegressor

knn = KNeighborsRegressor(n_neighbors=3)

knn.fit(xtrain, ytrain)

In [18]:
ytest['Predicted Weight'] = knn.predict(xtest)

In [19]:
ytest

Unnamed: 0,Weight,Predicted Weight
2944,178.327236,178.541925
7719,130.442536,117.879687
3892,198.956077,180.982340
2559,173.830870,174.250610
637,205.157578,217.449098
...,...,...
1015,188.311394,186.613898
2574,223.397572,207.968644
7188,117.819984,132.583893
8134,151.333899,160.066141


In [20]:
ytest.drop('Predicted Weight', axis=1, inplace=True)

# Evaluating the Model (Testing and training Accuracy, MSE for testing)

Training Accucacy

In [21]:
knn.score(xtrain, ytrain)

0.9335457813374535

Test Accuracy

In [22]:
knn.score(xtest, ytest)

0.8624768045808872

MSE

In [23]:
from sklearn.metrics import mean_squared_error
MSE_KNN = mean_squared_error(ytest, knn.predict(xtest))
MSE_KNN

138.69787392701588

# Comparing KNN & Linear Regression as well as the KNN Model and Linear regression model 

In [24]:
comparison_df = ytest.copy()

In [25]:
comparison_df['KNN weight'] = knn.predict(xtest)
comparison_df['LR weight'] = reg.predict(xtest)

In [26]:
comparison_df.head()

Unnamed: 0,Weight,KNN weight,LR weight
2944,178.327236,178.541925,180.173906
7719,130.442536,117.879687,124.216694
3892,198.956077,180.98234,188.536418
2559,173.83087,174.25061,178.37699
637,205.157578,217.449098,211.242396


In [27]:
print('Difference between the MSE of KNN and Linear Regression is:')
MSE_KNN-MSE_LR

Difference between the MSE of KNN and Linear Regression is:


36.25724725881085