# Step 01 - Regressor

### Import Libraries

In [16]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split as TTS
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from sklearn.neighbors import KNeighborsRegressor,KNeighborsClassifier

### Import Dataset

In [17]:
df = pd.read_csv('weight-height.csv')
df.head()

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.04247
4,Male,69.881796,206.349801


### Separate x(gender, height) and y(weight)

In [18]:
LE = LabelEncoder()
df.Gender = LE.fit_transform(df.Gender)
df.sample(5)

Unnamed: 0,Gender,Height,Weight
5524,0,59.968419,115.476813
5380,0,59.239199,111.181236
5520,0,62.622337,106.593986
6480,0,62.192176,119.824288
8542,0,65.48764,146.060156


In [19]:
x = df.drop('Weight',axis=1)
y = df[['Weight']]

In [20]:
x.head(5)

Unnamed: 0,Gender,Height
0,1,73.847017
1,1,68.781904
2,1,74.110105
3,1,71.730978
4,1,69.881796


In [21]:
y.head(5)

Unnamed: 0,Weight
0,241.893563
1,162.310473
2,212.740856
3,220.04247
4,206.349801


### Split dataset (70% training, 30%test)

In [22]:
xtrain, xtest, ytrain, ytest = TTS(x,y,test_size=0.3, random_state=1)

### Apply Linear Regression

In [23]:
LR_reg = LinearRegression() #creating a linear regression model
LR_reg.fit(xtrain, ytrain) #training model
m = LR_reg.coef_[0][0] #coefficient
c = LR_reg.intercept_[0] #interceptor
print(f"m : {m}\nc : {c}")

m : 19.34359322312506
c : -244.5528090325578


In [24]:
predicted = (m*162.94858) + c
predicted

2907.4582387732935

### Evaluate the Model (Testing and training Accuracy, MSE for testing)

In [25]:
#MSE
LR_MSE_test = mean_squared_error(ytest, LR_reg.predict(xtest)) 
print("Test MSE Linear Regressor: ",LR_MSE_test)

Test MSE Linear Regressor:  99.6725099807003


In [26]:
#r2 score
LR_R2_test = LR_reg.score(xtest,ytest)
LR_R2_train=LR_reg.score(xtrain,ytrain)
print("R2 score of Linear regressor(test)",LR_R2_test)
print("R2 score of Linear regressor(train)",LR_R2_train)

R2 score of Linear regressor(test) 0.9059959607091161
R2 score of Linear regressor(train) 0.8972135459668117


### Apply KNN Regressor

In [27]:
KNN_reg = KNeighborsRegressor() #KNN
KNN_reg.fit(xtrain, ytrain)

### Evaluate the Model (Testing and training Accuracy, MSE for testing)

In [28]:
#MSE
KNN_reg_MSE_test = mean_squared_error(ytest, KNN_reg.predict(xtest)) 
print("Test MSE for KNN Regressor: ",KNN_reg_MSE_test)

Test MSE for KNN Regressor:  123.15365863436983


In [29]:
#r2 score
KNN_reg_R2_test = KNN_reg.score(xtest,ytest)
KNN_reg_R2_train=KNN_reg.score(xtrain,ytrain)
print("R2 score of Linear regressor(test)",KNN_reg_R2_test)
print("R2 score of Linear regressor(train)",KNN_reg_R2_train)

R2 score of Linear regressor(test) 0.8838502073708883
R2 score of Linear regressor(train) 0.9170756955071054


### Compare KNN & Linear regression with the KNN model and Linear regression as well.

In [30]:
print("Comparison ==> ")
print("Linear Regression r2 score(test):", LR_R2_test)
print("KNN Regressor r2 score(test):", KNN_reg_R2_test)
print("\nLinear Regression r2 score(train):", LR_R2_train)
print("KNN Regressor r2 score(train):", KNN_reg_R2_train)
print("\nLinear Regression MSE:", LR_MSE_test)
print("KNN Regressor MSE:", KNN_reg_MSE_test)

Comparison ==> 
Linear Regression r2 score(test): 0.9059959607091161
KNN Regressor r2 score(test): 0.8838502073708883

Linear Regression r2 score(train): 0.8972135459668117
KNN Regressor r2 score(train): 0.9170756955071054

Linear Regression MSE: 99.6725099807003
KNN Regressor MSE: 123.15365863436983


# Step 02: Classification

### Separate x and (y=Gender)

In [31]:
xc = df.drop('Gender',axis=1)
yc = df[['Gender']]

### Split dataset (70% training, 30%test)

In [32]:
xctrain, xctest, yctrain, yctest = TTS(xc,yc,test_size=0.3, random_state=1)

### Apply KNN Classifier 

In [33]:
KNN = KNeighborsClassifier(n_neighbors=5)
KNN.fit(xctrain,yctrain['Gender'])

In [34]:
KNN.score(xctrain,yctrain) #train score

0.9263527054108216

In [35]:
KNN.score(xctest,yctest) #test score

0.9146864043630697

In [36]:
for i in range(1,10):
    KNN = KNeighborsClassifier(n_neighbors=i)
    KNN.fit(xctrain,yctrain['Gender'])
    print("For ",i)
    print("test: ",KNN.score(xctest,yctest))
    print("train: ",KNN.score(xctrain,yctrain))

For  1
test:  0.8792364627970394
train:  1.0
For  2
test:  0.8772886638098948
train:  0.9340347361389446
For  3
test:  0.9107908063887806
train:  0.9377087508350034
For  4
test:  0.9088430074016361
train:  0.9258517034068137
For  5
test:  0.9146864043630697
train:  0.9263527054108216
For  6
test:  0.9139072847682119
train:  0.9236806947227789
For  7
test:  0.9193611219322166
train:  0.9236806947227789
For  8
test:  0.9189715621347877
train:  0.9218436873747495
For  9
test:  0.9209193611219322
train:  0.9228456913827655


### odd values of neighbour gives a little bit better accuracy