# <font color="maganta"><h3 align="center">KNN Algorithm for Regression</h3></font> 

<h2 style = "color:brown" >Import Libraries</h2>

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import math
from termcolor import colored

import warnings as wr
wr.filterwarnings('ignore')

<h2 style = "color:brown" >Data Reading</h2>

In [2]:
df = pd.read_csv('weight-height.csv')
df

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.042470
4,Male,69.881796,206.349801
...,...,...,...
8550,Female,60.483946,110.565497
8551,Female,63.423372,129.921671
8552,Female,65.584057,155.942671
8553,Female,67.429971,151.678405


In [45]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8555 entries, 0 to 8554
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Gender  8555 non-null   object 
 1   Height  8555 non-null   float64
 2   Weight  8555 non-null   float64
dtypes: float64(2), object(1)
memory usage: 200.6+ KB


In [5]:
#rows, columns = df.shape
df.shape

(8555, 3)

In [6]:
df.isnull().sum()

Gender    0
Height    0
Weight    0
dtype: int64

<h3 style = "color:brown" >Separate Features and Target</h3>

In [3]:
x = df.drop('Weight', axis = 1)
x.head(3)

Unnamed: 0,Gender,Height
0,Male,73.847017
1,Male,68.781904
2,Male,74.110105


In [4]:
y = df['Weight']
y.head(3)

0    241.893563
1    162.310473
2    212.740856
Name: Weight, dtype: float64

<h2 style = "color:brown" >Encoding Gender</h2>

Gender -> Male=1, Female = 0

In [7]:
x = x.replace({'Male':1, 'Female':0})
#x = x.replace([Male,Female]:[1,0])

In [8]:
x.head(2)

Unnamed: 0,Gender,Height
0,1,73.847017
1,1,68.781904


In [9]:
x.tail(2)

Unnamed: 0,Gender,Height
8553,0,67.429971
8554,0,60.921791


<h1 style = "color:green">Split dataset for training 70% and testing 30%</h1>

In [10]:
from sklearn.model_selection import train_test_split

In [22]:
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=.30,random_state=42)

In [23]:
xtrain.head()

Unnamed: 0,Gender,Height
553,1,67.594031
1397,1,71.601697
7934,0,62.625985
8367,0,66.136131
3320,1,71.843308


In [24]:
ytrain.head()

553     186.751417
1397    211.031652
7934    143.768451
8367    151.814648
3320    196.505814
Name: Weight, dtype: float64

<h1 style = "color:green">Implementing KNN Regressor Model</h1>

In [25]:
from sklearn.neighbors import KNeighborsRegressor
knn = KNeighborsRegressor() #default n_neighbors = 5

In [26]:
knn.fit(xtrain,ytrain)

In [33]:
predicted_ytest = knn.predict(xtest)
predicted_ytest

array([142.1445657 , 181.38276924, 187.07075266, ..., 101.79654426,
       192.43702356, 145.3632227 ])

<h4 style = "color:purple">Predicted weight for Male with height 70</h4>

In [27]:
a = knn.predict([[1,70]])
print('Predicted weight: ',a)

Predicted weight:  [199.1199376]


<h4 style = "color:purple">Predicted weight for Female with height 70</h4>

In [28]:
print('Predicted weight: ',knn.predict([[0,70]]))

Predicted weight:  [178.93703646]


<h1 style = "color:green">Model Evaluation</h1>

In [30]:
#Training Accuracy
knn.score(xtrain,ytrain)

0.9172142723737918

In [29]:
#Testing Accuracy
knn.score(xtest,ytest)

0.8821086423439015

In [36]:
#r2_score
from sklearn.metrics import r2_score
r2_score(ytest,knn.predict(xtest))

0.8821086423439015

In [37]:
#mse
from sklearn.metrics import mean_squared_error
MSE = mean_squared_error(ytest,predicted_ytest)
MSE

121.33528273624482

In [40]:
#rmse
import math
RMSE = math.sqrt(MSE)
RMSE

11.015229581640359

In [38]:
#mae
from sklearn.metrics import mean_absolute_error
MAE = mean_absolute_error(ytest,predicted_ytest)
MAE

8.809865583894819