# Step 01:
## KNN regression

### 1. Import data set

In [78]:
import pandas as pd
import matplotlib.pyplot as plt
import warnings as wr
wr.filterwarnings("ignore")

In [79]:
df = pd.read_csv("weight-height.csv")

In [80]:
df.head()

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.04247
4,Male,69.881796,206.349801


In [81]:
#The shape of a DataFrame is a tuple of array dimensions that tells the number of rows and columns of a given DataFrame.
df.shape

(8555, 3)

In [82]:
df.isnull().sum() # check null row amount

Gender    0
Height    0
Weight    0
dtype: int64

In [83]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder() #create object for LabelEncoding

In [84]:
df.Gender = le.fit_transform(df[["Gender"]]) #gender column encoding

In [85]:
df.head()

Unnamed: 0,Gender,Height,Weight
0,1,73.847017,241.893563
1,1,68.781904,162.310473
2,1,74.110105,212.740856
3,1,71.730978,220.04247
4,1,69.881796,206.349801


In [86]:
df.tail()

Unnamed: 0,Gender,Height,Weight
8550,0,60.483946,110.565497
8551,0,63.423372,129.921671
8552,0,65.584057,155.942671
8553,0,67.429971,151.678405
8554,0,60.921791,131.253738


### 2. Seperate x(Gender, Height) and y (y=Weight)

In [87]:
x = df.drop("Weight", axis = 1) # x= gender,height

In [88]:
x.head()

Unnamed: 0,Gender,Height
0,1,73.847017
1,1,68.781904
2,1,74.110105
3,1,71.730978
4,1,69.881796


In [89]:
y = df[["Weight"]] #y = weight

In [90]:
y.head()

Unnamed: 0,Weight
0,241.893563
1,162.310473
2,212.740856
3,220.04247
4,206.349801


### 3. Train = 70%, Test = 30%

In [91]:
from sklearn.model_selection import train_test_split 

In [92]:
xtrain, xtest, ytrain, ytest = train_test_split(x,y, train_size=.70, random_state=1) 
#separate data for train and test
#train_size = 70%

In [93]:
xtrain.head() #xtrain value

Unnamed: 0,Gender,Height
1277,1,72.385301
5361,0,60.960147
5728,0,69.119292
4276,1,68.27105
3112,1,64.991405


In [94]:
ytrain.head() #ytrain value

Unnamed: 0,Weight
1277,189.165592
5361,124.876796
5728,168.864403
4276,172.941248
3112,144.551044


In [95]:
xtrain.shape, ytrain.shape # 70% data trained
# total trained row value

((5988, 2), (5988, 1))

In [96]:
xtest.shape, ytest.shape 

((2567, 2), (2567, 1))

### 4. Apply Linear Regression 

In [97]:
from sklearn.linear_model import LinearRegression

In [98]:
reg = LinearRegression() # creating object for linear regression 

In [99]:
reg.fit(xtrain, ytrain) ## training the maodel using training data

LinearRegression()

In [100]:
reg.coef_ # (y = mx + c) the value of m

array([[19.34359322,  5.96887438]])

In [101]:
reg.intercept_ # (y = mx + c) the value of c

array([-244.55280903])

In [102]:
reg.predict(xtest)

array([[143.83013436],
       [197.53417989],
       [184.63515112],
       ...,
       [160.7078525 ],
       [186.82852694],
       [201.4226542 ]])

In [103]:
reg.predict(xtrain) 

array([[206.84955294],
       [119.31064959],
       [168.01156136],
       ...,
       [171.95856487],
       [152.75798487],
       [195.92617133]])

In [104]:
reg.score(xtrain, ytrain)

0.8972135459668117

In [105]:
reg.score(xtest, ytest)

0.9059959607091161

In [106]:
reg.score(x,y) 

0.899942429187834

In [107]:
from sklearn.metrics import mean_squared_error, r2_score

In [108]:
mean_squared_error(ytest, reg.predict(xtest)) #MSE

99.67250998070027

In [109]:
r2_score(ytest, reg.predict(xtest))

0.9059959607091161

### 6. Apply KNN Regression 

In [110]:
from sklearn.neighbors import KNeighborsRegressor

In [111]:
knn = KNeighborsRegressor(n_neighbors = 99, weights = "distance")

In [112]:
knn.fit(xtrain, ytrain)

KNeighborsRegressor(n_neighbors=99, weights='distance')

### 7. Evaluate Model (Accuracy, MSE, Prediction)

In [113]:
knn.predict(xtest)

array([[147.34891636],
       [197.70696685],
       [182.72646495],
       ...,
       [159.10722409],
       [184.29815187],
       [201.40094163]])

In [114]:
knn.predict(xtrain)

array([[189.1655919],
       [124.8767957],
       [168.8644028],
       ...,
       [175.0932547],
       [156.832191 ],
       [198.4602486]])

In [115]:
mean_squared_error(ytest, knn.predict(xtest))

119.82783287285932

In [116]:
r2_score(ytest, knn.predict(xtest))

0.8869868902498507

In [117]:
knn.score(x,y)

0.9649794477731373

### 8. Compare 

linear regression

In [118]:
# Accuray


In [119]:
reg.score(xtrain, ytrain)

0.8972135459668117

In [120]:
reg.score(xtest, ytest) #testing accuracy

0.9059959607091161

In [121]:
reg.score(x,y) #training accuracy

0.899942429187834

KNN Model

In [122]:
# Accuracy
knn.score(xtrain, ytrain)

1.0

In [123]:
knn.score(xtest, ytest) #testing accuracy

0.8869868902498507

In [124]:
knn.score(x,y)

0.9649794477731373

linear regression

In [125]:
# MSE

In [126]:
mean_squared_error(ytest, reg.predict(xtest))

99.67250998070027

KNN Model

In [127]:
# MSE

In [128]:
mean_squared_error(ytest, knn.predict(xtest))

119.82783287285932

linear regression

In [129]:
# prediction

In [130]:
reg.predict(xtrain)

array([[206.84955294],
       [119.31064959],
       [168.01156136],
       ...,
       [171.95856487],
       [152.75798487],
       [195.92617133]])

In [131]:
reg.predict(xtest)

array([[143.83013436],
       [197.53417989],
       [184.63515112],
       ...,
       [160.7078525 ],
       [186.82852694],
       [201.4226542 ]])

KNN model

In [132]:
# prediction

In [133]:
knn.predict(xtrain)

array([[189.1655919],
       [124.8767957],
       [168.8644028],
       ...,
       [175.0932547],
       [156.832191 ],
       [198.4602486]])

In [134]:
knn.predict(xtest)

array([[147.34891636],
       [197.70696685],
       [182.72646495],
       ...,
       [159.10722409],
       [184.29815187],
       [201.40094163]])

# Step 02:

## KNN Classifier:

###  Import data set 

In [135]:
import pandas as pd
import matplotlib.pyplot as plt
import warnings as wr
wr.filterwarnings("ignore")

In [136]:
df2 = pd.read_csv("weight-height.csv")

In [137]:
df2.head()

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.04247
4,Male,69.881796,206.349801


In [138]:
from sklearn.preprocessing import LabelEncoder
le2 = LabelEncoder() # create LabelEncoder object

In [139]:
df2.Gender = le2.fit_transform(df[["Gender"]]) #encode "Gender" column

In [140]:
df2.head()

Unnamed: 0,Gender,Height,Weight
0,1,73.847017,241.893563
1,1,68.781904,162.310473
2,1,74.110105,212.740856
3,1,71.730978,220.04247
4,1,69.881796,206.349801


### 2. Seperate x and (y=Gender)

In [141]:
x1 = df.drop("Gender", axis = 1) # x = 'height','weight'

In [142]:
x1.head()

Unnamed: 0,Height,Weight
0,73.847017,241.893563
1,68.781904,162.310473
2,74.110105,212.740856
3,71.730978,220.04247
4,69.881796,206.349801


In [143]:
y1 = df[["Gender"]] # y = gender

In [144]:
y1.head()

Unnamed: 0,Gender
0,1
1,1
2,1
3,1
4,1


### 3. Train = 70%, Test = 30% 

In [145]:
from sklearn.model_selection import train_test_split 

In [146]:
x1train, x1test, y1train, y1test = train_test_split(x1,y1, train_size=.70, random_state=1)

### 4. Apply KNN Classifier 

In [147]:
from sklearn.neighbors import KNeighborsClassifier

In [148]:
knnc = KNeighborsClassifier(n_neighbors=99, weights='uniform')
# create KNeighborsClassifier object

In [149]:
knnc.fit(x1train, y1train) 

KNeighborsClassifier(n_neighbors=99)

### 5. Evaluate Model by Accuracy 

In [150]:
knnc.predict(x1test) 

array([1, 1, 1, ..., 0, 1, 1])

In [151]:
knnc.predict(x1train) 

array([1, 0, 1, ..., 1, 0, 1])

In [152]:
mean_squared_error(y1test, knn.predict(x1test)) #MSE

50510.057738685035

In [153]:
r2_score(y1test, knnc.predict(x1test))

0.6652774643295971

In [154]:
knnc.score(x1,y1) #Accuracy

0.9152542372881356