In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_excel("Height-Weight.xlsx")

In [3]:
df

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.042470
4,Male,69.881796,206.349801
...,...,...,...
8550,Female,60.483946,110.565497
8551,Female,63.423372,129.921671
8552,Female,65.584057,155.942671
8553,Female,67.429971,151.678405


In [4]:
df1 = df.copy()
df2 = df.copy()
df3 = df.copy()
df4 = df.copy()

In [5]:
df.describe()

Unnamed: 0,Height,Weight
count,8555.0,8555.0
mean,66.809925,165.632735
std,3.851454,32.043922
min,54.616858,65.78
25%,63.957684,139.876803
50%,66.985923,168.521567
75%,69.604427,190.666305
max,80.45,269.989698


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8555 entries, 0 to 8554
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Gender  8555 non-null   object 
 1   Height  8555 non-null   float64
 2   Weight  8555 non-null   float64
dtypes: float64(2), object(1)
memory usage: 200.6+ KB


In [7]:
df.isnull().sum()

Gender    0
Height    0
Weight    0
dtype: int64

In [9]:
df.corr(numeric_only=True)

Unnamed: 0,Height,Weight
Height,1.0,0.922975
Weight,0.922975,1.0


# Label Encoder

In [11]:
df1.columns

Index(['Gender', 'Height', 'Weight'], dtype='object')

In [12]:
from pandas.core.dtypes.common import is_numeric_dtype
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

for col in df1.columns:
    if is_numeric_dtype(df1[col]) == False:
        df1[col] = le.fit_transform(df1[col])

In [13]:
df1

Unnamed: 0,Gender,Height,Weight
0,1,73.847017,241.893563
1,1,68.781904,162.310473
2,1,74.110105,212.740856
3,1,71.730978,220.042470
4,1,69.881796,206.349801
...,...,...,...
8550,0,60.483946,110.565497
8551,0,63.423372,129.921671
8552,0,65.584057,155.942671
8553,0,67.429971,151.678405


# Scaling

In [14]:
from sklearn.preprocessing import MinMaxScaler
mn = MinMaxScaler()

In [15]:
df1['Height'] = mn.fit_transform(df1[["Height"]])

In [16]:
df1

Unnamed: 0,Gender,Height,Weight
0,1,0.744399,241.893563
1,1,0.548328,162.310473
2,1,0.754583,212.740856
3,1,0.662487,220.042470
4,1,0.590905,206.349801
...,...,...,...
8550,0,0.227115,110.565497
8551,0,0.340900,129.921671
8552,0,0.424540,155.942671
8553,0,0.495995,151.678405


# Separate x, y

In [18]:
x = df1.drop('Weight', axis=1)

In [19]:
x

Unnamed: 0,Gender,Height
0,1,0.744399
1,1,0.548328
2,1,0.754583
3,1,0.662487
4,1,0.590905
...,...,...
8550,0,0.227115
8551,0,0.340900
8552,0,0.424540
8553,0,0.495995


In [23]:
y = df1[['Weight']]

In [24]:
y

Unnamed: 0,Weight
0,241.893563
1,162.310473
2,212.740856
3,220.042470
4,206.349801
...,...
8550,110.565497
8551,129.921671
8552,155.942671
8553,151.678405


# Split Train & Test

In [25]:
from sklearn.model_selection import train_test_split as tts
from sklearn.linear_model import LinearRegression

In [26]:
xtrain, xtest, ytrain, ytest = tts(x, y, test_size=0.3, random_state=78)

In [27]:
xtrain

Unnamed: 0,Gender,Height
2984,1,0.516626
6487,0,0.296121
7243,0,0.284884
6898,0,0.218997
6136,0,0.416769
...,...,...
6249,0,0.404851
4136,1,0.500368
6039,0,0.331512
470,1,0.470134


In [28]:
ytrain

Unnamed: 0,Weight
2984,190.964765
6487,138.381679
7243,141.855825
6898,106.853924
6136,129.375502
...,...
6249,140.052979
4136,166.334797
6039,117.010257
470,178.797509


# Apply Linear Regression

In [29]:
ln = LinearRegression()

In [30]:
ln.fit(xtrain, ytrain)

In [31]:
pred = ln.predict(xtest)

In [32]:
pred

array([[187.53844321],
       [140.10544754],
       [162.886255  ],
       ...,
       [174.14561879],
       [126.51552617],
       [227.16421198]])

In [33]:
ln.predict([[0,0.356]])

array([[136.45635696]])

# Accuracy, MSE, MAE

In [34]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [35]:
accuracy = r2_score(ytest, pred)

In [36]:
accuracy

0.8999186031764109

In [37]:
mae = mean_absolute_error(ytest, pred)

In [38]:
mae

8.096114041702721

In [39]:
mse = mean_squared_error(ytest, pred)

In [40]:
mse

105.07227399923427

# Applying KNN Regrassion

In [41]:
from sklearn.neighbors import KNeighborsRegressor

In [42]:
ne = KNeighborsRegressor(n_neighbors=5)

In [43]:
ne.fit(xtrain, ytrain)

In [44]:
kntest = ne.predict(xtest)

In [45]:
kntest

array([[189.52696512],
       [148.70140594],
       [155.20317956],
       ...,
       [171.85830574],
       [124.34042158],
       [234.2667561 ]])

In [46]:
accuracy = r2_score(ytest, kntest)

In [47]:
accuracy

0.881850322291756

In [48]:
ne.predict([[0, 0.356]])

array([[136.48957828]])

In [49]:
mse = mean_squared_error(ytest, kntest)

In [50]:
mse

124.0415871789253

In [51]:
mae = mean_absolute_error(ytest, kntest)

In [52]:
mae

8.833802628117647