In [1]:
import pandas as pd

In [2]:
df=pd.read_csv("weight-height.csv")

# 1. Import data set

In [3]:
df.head()

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.04247
4,Male,69.881796,206.349801


# 2. Separate X (Gender, Height) and Y (y=Weight).

In [4]:
df = pd.get_dummies(df, columns=['Gender'], drop_first=True)
df.head()

Unnamed: 0,Height,Weight,Gender_Male
0,73.847017,241.893563,True
1,68.781904,162.310473,True
2,74.110105,212.740856,True
3,71.730978,220.04247,True
4,69.881796,206.349801,True


In [5]:
x = df.drop('Weight', axis = 1)

In [6]:
x.head()

Unnamed: 0,Height,Gender_Male
0,73.847017,True
1,68.781904,True
2,74.110105,True
3,71.730978,True
4,69.881796,True


In [9]:
y=df[['Weight']]

In [10]:
y.head()

Unnamed: 0,Weight
0,241.893563
1,162.310473
2,212.740856
3,220.04247
4,206.349801


# 3. Train = 70%, Test = 30%

In [11]:
from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.30)

In [12]:
ytest

Unnamed: 0,Weight
7471,146.454148
5292,115.228166
1080,153.606833
7138,113.926881
5414,126.787299
...,...
182,165.568499
2197,205.872457
7412,136.393241
7590,117.181701


# 4. Apply Linear Regression

In [13]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(xtrain, ytrain)

In [14]:
reg.predict(xtest)

array([[130.40890195],
       [119.30737178],
       [152.20371993],
       ...,
       [133.38406344],
       [130.29640988],
       [171.30163679]])

In [15]:
ytest['Weight Predicted'] = reg.predict(xtest)
ytest

Unnamed: 0,Weight,Weight Predicted
7471,146.454148,130.408902
5292,115.228166,119.307372
1080,153.606833,152.203720
7138,113.926881,127.625226
5414,126.787299,123.698137
...,...,...
182,165.568499,171.443915
2197,205.872457,201.148454
7412,136.393241,133.384063
7590,117.181701,130.296410


In [16]:
ytest.drop('Weight Predicted', axis=1, inplace=True)

# 5. Evaluate the Model (Testing and training Accuracy, MSE for testing)

In [17]:
reg.score(xtrain, ytrain)

0.9017772488166121

In [18]:
reg.score(xtest, ytest)

0.8954549559041649

In [21]:
from sklearn.metrics import mean_squared_error
MSE_Lin_Reg = mean_squared_error(ytest, reg.predict(xtest))

In [22]:
MSE_Lin_Reg

103.46203563046592

# 6. Apply KNN Regressor: Scikit-Learn Link

In [23]:
from sklearn.neighbors import KNeighborsRegressor
knn = KNeighborsRegressor(n_neighbors=3)
knn.fit(xtrain, ytrain)

In [24]:
ytest['Weight Predicted'] = knn.predict(xtest)

In [25]:
ytest

Unnamed: 0,Weight,Weight Predicted
7471,146.454148,128.162424
5292,115.228166,122.575410
1080,153.606833,144.431358
7138,113.926881,143.640063
5414,126.787299,121.480924
...,...,...
182,165.568499,176.801008
2197,205.872457,205.841047
7412,136.393241,144.486916
7590,117.181701,122.807962


In [26]:
ytest.drop('Weight Predicted', axis=1, inplace=True)

# 7. Evaluate the Model (Testing and training Accuracy, MSE for testing)

In [27]:
knn.score(xtrain, ytrain)

0.9333415278438862

In [28]:
knn.score(xtest, ytest)

0.8586399164876413

In [29]:
from sklearn.metrics import mean_squared_error
MSE_KNN = mean_squared_error(ytest, knn.predict(xtest))
MSE_KNN

139.8956987733859

# 8. Compare KNN & Linear Regression with the KNN Model and Linear regression as well.

In [30]:
comparison_df = ytest.copy()
comparison_df['KNN weight'] = knn.predict(xtest)
comparison_df['LR weight'] = reg.predict(xtest)

In [31]:
comparison_df.head()

Unnamed: 0,Weight,KNN weight,LR weight
7471,146.454148,128.162424,130.408902
5292,115.228166,122.57541,119.307372
1080,153.606833,144.431358,152.20372
7138,113.926881,143.640063,127.625226
5414,126.787299,121.480924,123.698137


In [32]:
MSE_KNN-MSE_LR

36.43366314291998