In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('weight-height.csv')
df.head()

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.04247
4,Male,69.881796,206.349801


# Separating X (Gender, Height) and Y (y=Weight).

In [3]:
df = pd.get_dummies(df, columns=['Gender'], drop_first=True)
df.head()

Unnamed: 0,Height,Weight,Gender_Male
0,73.847017,241.893563,True
1,68.781904,162.310473,True
2,74.110105,212.740856,True
3,71.730978,220.04247,True
4,69.881796,206.349801,True


In [4]:
x = df.drop('Weight', axis=1)
x.head()

Unnamed: 0,Height,Gender_Male
0,73.847017,True
1,68.781904,True
2,74.110105,True
3,71.730978,True
4,69.881796,True


In [5]:
y = df[['Weight']]
y.head()

Unnamed: 0,Weight
0,241.893563
1,162.310473
2,212.740856
3,220.04247
4,206.349801


# Train = 70%, Test = 30%

In [6]:
from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.30)

In [7]:
xtrain.shape

(5988, 2)

In [8]:
df.shape

(8555, 3)

In [9]:
ytest

Unnamed: 0,Weight
3585,213.732348
7522,98.536881
711,218.604697
341,161.378773
4610,194.699194
...,...
4386,150.721988
2956,204.343187
6953,113.230496
2248,236.360678


# Applying Linear Regression

In [10]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression()
reg.fit(xtrain, ytrain)

In [11]:
reg.predict(xtest)

array([[231.11099102],
       [105.53044565],
       [224.80117364],
       ...,
       [118.05762906],
       [221.76640911],
       [167.65096342]])

In [12]:
ytest['Predicted Weight'] = reg.predict(xtest)
ytest

Unnamed: 0,Weight,Predicted Weight
3585,213.732348,231.110991
7522,98.536881,105.530446
711,218.604697,224.801174
341,161.378773,159.878499
4610,194.699194,193.949482
...,...,...
4386,150.721988,147.338202
2956,204.343187,208.152386
6953,113.230496,118.057629
2248,236.360678,221.766409


In [13]:
ytest.drop('Predicted Weight', axis=1, inplace=True)

# Evaluating the Model (Testing and training Accuracy, MSE for testing)

In [14]:
reg.score(xtrain, ytrain)

0.8987565510432344

In [16]:
reg.score(xtest, ytest)

0.9026772732271837

MSE

In [18]:
from sklearn.metrics import mean_squared_error
MSE_LR = mean_squared_error(ytest, reg.predict(xtest))
MSE_LR

100.92223554386571

# Applying KNN Regressor:

In [19]:
from sklearn.neighbors import KNeighborsRegressor

knn = KNeighborsRegressor(n_neighbors=3)

knn.fit(xtrain, ytrain)

In [20]:
ytest['Predicted Weight'] = knn.predict(xtest)

In [21]:
ytest

Unnamed: 0,Weight,Predicted Weight
3585,213.732348,238.228900
7522,98.536881,107.400750
711,218.604697,224.642701
341,161.378773,158.375871
4610,194.699194,195.134357
...,...,...
4386,150.721988,139.750834
2956,204.343187,221.167519
6953,113.230496,124.777979
2248,236.360678,221.949710


In [23]:
ytest.drop('Predicted Weight', axis=1, inplace=True)

# Evaluating the Model (Testing and training Accuracy, MSE for testing)

In [24]:
knn.score(xtrain,ytrain)

0.9326153665879204

In [25]:
knn.score(xtest,ytest)

0.8659757790719324

MSE

In [26]:
from sklearn.metrics import mean_squared_error
MSE_KNN = mean_squared_error(ytest, knn.predict(xtest))
MSE_KNN

138.98114491448433

# Comparing KNN & Linear Regression as well as the KNN Model and Linear regression model

In [27]:
comparison_df = ytest.copy()

In [28]:
comparison_df['KNN weight'] = knn.predict(xtest)
comparison_df['LR weight'] = reg.predict(xtest)

In [29]:
comparison_df.head()

Unnamed: 0,Weight,KNN weight,LR weight
3585,213.732348,238.2289,231.110991
7522,98.536881,107.40075,105.530446
711,218.604697,224.642701,224.801174
341,161.378773,158.375871,159.878499
4610,194.699194,195.134357,193.949482


In [30]:
print('Difference between the MSE of KNN and Linear Regression is:')
MSE_KNN-MSE_LR

Difference between the MSE of KNN and Linear Regression is:


38.05890937061862