# <font color="maganta"><h3 align="center">KNN Algorithm for Classification</h3></font> 

<h2 style = "color:brown" >Import Libraries</h2>

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler

from termcolor import colored

import warnings as wr
wr.filterwarnings('ignore')

<h2 style = "color:brown" >Data Reading</h2>

In [2]:
df = pd.read_csv('weight-height.csv')
df

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.042470
4,Male,69.881796,206.349801
...,...,...,...
8550,Female,60.483946,110.565497
8551,Female,63.423372,129.921671
8552,Female,65.584057,155.942671
8553,Female,67.429971,151.678405


<h3 style = "color:brown" >Separate Features and Target</h3>

In [3]:
x = df[['Height','Weight']]
x.head()

Unnamed: 0,Height,Weight
0,73.847017,241.893563
1,68.781904,162.310473
2,74.110105,212.740856
3,71.730978,220.04247
4,69.881796,206.349801


In [4]:
y = df['Gender']
y.head()

0    Male
1    Male
2    Male
3    Male
4    Male
Name: Gender, dtype: object

In [5]:
y.tail(2)

8553    Female
8554    Female
Name: Gender, dtype: object

<h1 style = "color:green" >Split dataset for training 70% and testing 30%</h1>

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=.30, random_state=42)

In [8]:
xtrain.head()

Unnamed: 0,Height,Weight
553,67.594031,186.751417
1397,71.601697,211.031652
7934,62.625985,143.768451
8367,66.136131,151.814648
3320,71.843308,196.505814


In [9]:
xtrain.shape,xtest.shape

((5988, 2), (2567, 2))

In [10]:
ytrain.shape,ytest.shape

((5988,), (2567,))

<h1 style = "color:green">Implementing KNN Classifier Model</h1>

In [11]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier() #default n_neighbors = 5

In [12]:
knn.fit(xtrain,ytrain)

In [13]:
knn.predict(xtest)

array(['Female', 'Male', 'Male', ..., 'Female', 'Male', 'Female'],
      dtype=object)

<h4 style = "color:indigo">Predict the gender with height 70 and weight 165</h4>

In [33]:
a = knn.predict([[70,165]])
print(colored(f'Prediction of Gender: {a}',color='light_magenta'))

[95mPrediction of Gender: ['Female'][0m


<h4 style = "color:indigo">Predict the gender with height 65 and weight 165</h4>

In [32]:
print(colored(f'Prediction of Gender: {knn.predict([[65,165]])}',color='light_magenta'))

[95mPrediction of Gender: ['Male'][0m


<h2 style = "color:green">Model Accuracy</h2>

In [16]:
#Training Accuracy
knn.score(xtrain,ytrain)

0.9291917167668671

In [17]:
#Testing Accuracy
knn.score(xtest,ytest)

0.9131281651733542

<h1 style = "color:purple">Feature Scaling - Standardization</h1>

In [18]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

std_xtrain = scaler.fit_transform(xtrain)
std_xtest = scaler.transform(xtest)

In [19]:
std_xtrain

array([[ 0.20805032,  0.66508154],
       [ 1.24737259,  1.42328182],
       [-1.08033091, -0.67714999],
       ...,
       [-0.63961404, -0.27019733],
       [ 0.59198357,  0.43510675],
       [ 0.20670404, -0.4706721 ]])

In [20]:
std_xtest

array([[-0.50443941, -0.49291292],
       [ 0.54319636,  1.00517001],
       [ 1.08183278,  0.40901263],
       ...,
       [-2.22460696, -2.07921978],
       [ 0.75155239,  0.79567258],
       [-0.46528896, -1.29856909]])

<h1 style = "color:purple">Train Model</h1>

In [21]:
from sklearn.neighbors import KNeighborsClassifier
knn2 = KNeighborsClassifier() #default n_neighbors = 5

In [22]:
knn2.fit(std_xtrain,ytrain)

In [23]:
knn2.predict(std_xtest)

array(['Female', 'Male', 'Male', ..., 'Female', 'Male', 'Female'],
      dtype=object)

<h4 style = "color:indigo">Predict the gender with height 70 and weight 165</h4>

In [34]:
print(colored(f'Prediction of Gender: {knn2.predict([[70,165]])}',color='light_magenta'))

[95mPrediction of Gender: ['Male'][0m


<h4 style = "color:indigo">Predict the gender with height 65 and weight 165</h4>

In [35]:
print(colored(f'Prediction of Gender: {knn2.predict([[65,165]])}',color='light_magenta'))

[95mPrediction of Gender: ['Male'][0m


 <h2 style = "color:purple">Model Accuracy using Standardization</h2>

In [26]:
#Training Accuracy
knn2.score(std_xtrain,ytrain)

0.9268537074148296

In [27]:
#Testing Accuracy
knn2.score(std_xtest,ytest)

0.917413322945072