# KNN Regressor
1. Import Database.
2. Separate x, y.
3. Train and test.
4. Apply Linear Regression
5. Evalute model. Apply KNN Regressor Evaluate model.

In [266]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [267]:
df = pd.read_excel('Height-Weight.xlsx')

In [268]:
df.head()

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.04247
4,Male,69.881796,206.349801


In [269]:
df1 = df.copy()
df2 = df.copy()

In [270]:
df.describe()

Unnamed: 0,Height,Weight
count,8555.0,8555.0
mean,66.809925,165.632735
std,3.851454,32.043922
min,54.616858,65.78
25%,63.957684,139.876803
50%,66.985923,168.521567
75%,69.604427,190.666305
max,80.45,269.989698


In [271]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8555 entries, 0 to 8554
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Gender  8555 non-null   object 
 1   Height  8555 non-null   float64
 2   Weight  8555 non-null   float64
dtypes: float64(2), object(1)
memory usage: 200.6+ KB


In [272]:
df.shape

(8555, 3)

In [273]:
df.isnull().sum()

Gender    0
Height    0
Weight    0
dtype: int64

In [274]:
df0 = df.copy()

In [275]:
df0.drop('Gender', axis = 1, inplace = True)

In [276]:
df0.head()

Unnamed: 0,Height,Weight
0,73.847017,241.893563
1,68.781904,162.310473
2,74.110105,212.740856
3,71.730978,220.04247
4,69.881796,206.349801


In [277]:
df0.corr()

Unnamed: 0,Height,Weight
Height,1.0,0.922975
Weight,0.922975,1.0


# Label Encoder

In [278]:
df.columns

Index(['Gender', 'Height', 'Weight'], dtype='object')

In [279]:
from pandas.core.dtypes.common import is_numeric_dtype
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

In [280]:
for col in df.columns:
    if is_numeric_dtype(df[col]):
        continue
    else:
        df[col] = le.fit_transform(df[col])

In [281]:
df.head()

Unnamed: 0,Gender,Height,Weight
0,1,73.847017,241.893563
1,1,68.781904,162.310473
2,1,74.110105,212.740856
3,1,71.730978,220.04247
4,1,69.881796,206.349801


# Scaling

In [282]:
from sklearn.preprocessing import MinMaxScaler
mm = MinMaxScaler()

In [283]:
df['Height'] = mm.fit_transform(df[['Height']])

In [284]:
df.head()

Unnamed: 0,Gender,Height,Weight
0,1,0.744399,241.893563
1,1,0.548328,162.310473
2,1,0.754583,212.740856
3,1,0.662487,220.04247
4,1,0.590905,206.349801


# Separate x, y

In [285]:
x = df.drop('Weight', axis = 1)

In [286]:
x.head()

Unnamed: 0,Gender,Height
0,1,0.744399
1,1,0.548328
2,1,0.754583
3,1,0.662487
4,1,0.590905


In [287]:
y = df['Weight']

In [288]:
y.head()

0    241.893563
1    162.310473
2    212.740856
3    220.042470
4    206.349801
Name: Weight, dtype: float64

# Split Train & Test

In [289]:
from sklearn.model_selection import train_test_split as tts
from sklearn.linear_model import LinearRegression

In [290]:
xtrain, xtest, ytrain, ytest = tts(x, y, test_size = 0.3, random_state = 78)

In [291]:
xtrain.head()

Unnamed: 0,Gender,Height
2984,1,0.516626
6487,0,0.296121
7243,0,0.284884
6898,0,0.218997
6136,0,0.416769


In [292]:
ytrain.head()

2984    190.964765
6487    138.381679
7243    141.855825
6898    106.853924
6136    129.375502
Name: Weight, dtype: float64

In [293]:
xtrain.shape

(5988, 2)

# Applying Linear Regression

In [294]:
ln = LinearRegression()

In [295]:
ln.fit(xtrain, ytrain)

In [296]:
pred = ln.predict(xtest)
pred

array([187.53844321, 140.10544754, 162.886255  , ..., 174.14561879,
       126.51552617, 227.16421198])

In [297]:
ln.predict([[0, 0.356]])

array([136.45635696])

# Accuracy, MSE, MAE

In [298]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [299]:
accuracy = r2_score(ytest, pred)
accuracy

0.8999186031764108

In [300]:
mae = mean_absolute_error(ytest, pred)
mae

8.096114041702727

In [301]:
mse = mean_squared_error(ytest, pred)
mse

105.07227399923437

# Visualization

In [302]:
#xtrain = xtrain.drop('Weight', axis = 1)

In [303]:
#xtrain.head()

In [304]:
#plt.scatter(xtrain, ytrain)

# Applying KNN Regression

In [305]:
from sklearn.neighbors import KNeighborsRegressor
ne = KNeighborsRegressor(n_neighbors = 5)

In [306]:
ne.fit(xtrain, ytrain)

In [307]:
kntest = ne.predict(xtest)
kntest

array([189.52696512, 148.70140594, 155.20317956, ..., 171.85830574,
       124.34042158, 234.2667561 ])

In [308]:
ne.predict([[0, 0.356]])

array([136.48957828])

# Evaluate Model

In [309]:
r2_score(ytest, kntest)

0.881850322291756

In [310]:
mse = mean_squared_error(ytest, kntest)
mse

124.0415871789253

In [311]:
mae = mean_absolute_error(ytest, kntest)
mae

8.833802628117647