# Step 01:
## KNN regression

### 1. Import data set

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import warnings as wr
wr.filterwarnings("ignore")

In [2]:
df = pd.read_csv("weight-height.csv")

In [3]:
df.head()

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.04247
4,Male,69.881796,206.349801


In [4]:
#The shape of a DataFrame is a tuple of array dimensions that tells the number of rows and columns of a given DataFrame.
df.shape

(8555, 3)

In [5]:
df.isnull().sum() # check null row amount

Gender    0
Height    0
Weight    0
dtype: int64

In [6]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder() #create object for LabelEncoding

In [7]:
df.Gender = le.fit_transform(df[["Gender"]]) #gender column encoding

In [8]:
df.head()

Unnamed: 0,Gender,Height,Weight
0,1,73.847017,241.893563
1,1,68.781904,162.310473
2,1,74.110105,212.740856
3,1,71.730978,220.04247
4,1,69.881796,206.349801


In [9]:
df.tail()

Unnamed: 0,Gender,Height,Weight
8550,0,60.483946,110.565497
8551,0,63.423372,129.921671
8552,0,65.584057,155.942671
8553,0,67.429971,151.678405
8554,0,60.921791,131.253738


### 2. Seperate x(Gender, Height) and y (y=Weight)

In [10]:
x = df.drop("Weight", axis = 1) # x= gender,height

In [11]:
x.head()

Unnamed: 0,Gender,Height
0,1,73.847017
1,1,68.781904
2,1,74.110105
3,1,71.730978
4,1,69.881796


In [12]:
y = df[["Weight"]] #y = weight

In [13]:
y.head()

Unnamed: 0,Weight
0,241.893563
1,162.310473
2,212.740856
3,220.04247
4,206.349801


### 3. Train = 70%, Test = 30%

In [14]:
from sklearn.model_selection import train_test_split 

In [15]:
xtrain, xtest, ytrain, ytest = train_test_split(x,y, train_size=.70, random_state=1) 
#separate data for train and test
#train_size = 70%

In [16]:
xtrain.head() #xtrain value

Unnamed: 0,Gender,Height
1277,1,72.385301
5361,0,60.960147
5728,0,69.119292
4276,1,68.27105
3112,1,64.991405


In [17]:
ytrain.head() #ytrain value

Unnamed: 0,Weight
1277,189.165592
5361,124.876796
5728,168.864403
4276,172.941248
3112,144.551044


In [18]:
xtrain.shape, ytrain.shape # 70% data trained
# total trained row value

((5988, 2), (5988, 1))

In [19]:
xtest.shape, ytest.shape 

((2567, 2), (2567, 1))

### 4. Apply Linear Regression 

In [20]:
from sklearn.linear_model import LinearRegression

In [21]:
reg = LinearRegression() # creating object for linear regression 

In [22]:
reg.fit(xtrain, ytrain) ## training the maodel using training data

LinearRegression()

In [23]:
reg.coef_ # (y = mx + c) the value of m

array([[19.34359322,  5.96887438]])

In [24]:
reg.intercept_ # (y = mx + c) the value of c

array([-244.55280903])

In [25]:
reg.predict(xtest) # xtest prediction

array([[143.83013436],
       [197.53417989],
       [184.63515112],
       ...,
       [160.7078525 ],
       [186.82852694],
       [201.4226542 ]])

In [26]:
reg.predict(xtrain) # xtrain prediction result

array([[206.84955294],
       [119.31064959],
       [168.01156136],
       ...,
       [171.95856487],
       [152.75798487],
       [195.92617133]])

In [28]:
reg.score(xtrain, ytrain) #training accuracy

0.8972135459668117

In [29]:
reg.score(xtest, ytest) #testing accuracy

0.9059959607091161

In [30]:
reg.score(x,y) #training accuracy

0.899942429187834

In [31]:
from sklearn.metrics import mean_squared_error, r2_score

In [49]:
mean_squared_error(ytest, reg.predict(xtest)) #MSE

99.67250998070027

In [33]:
r2_score(ytest, reg.predict(xtest))

0.9059959607091161

### 6. Apply KNN Regression 

In [34]:
from sklearn.neighbors import KNeighborsRegressor

In [35]:
knn = KNeighborsRegressor(n_neighbors = 99, weights = "distance")

In [36]:
knn.fit(xtrain, ytrain)

KNeighborsRegressor(n_neighbors=99, weights='distance')

### 7. Evaluate Model (Accuracy, MSE, Prediction)

In [37]:
knn.predict(xtest)

array([[147.34891636],
       [197.70696685],
       [182.72646495],
       ...,
       [159.10722409],
       [184.29815187],
       [201.40094163]])

In [38]:
knn.predict(xtrain)

array([[189.1655919],
       [124.8767957],
       [168.8644028],
       ...,
       [175.0932547],
       [156.832191 ],
       [198.4602486]])

In [39]:
mean_squared_error(ytest, knn.predict(xtest))

119.82783287285932

In [40]:
r2_score(ytest, knn.predict(xtest))

0.8869868902498507

In [41]:
knn.score(x,y)

0.9649794477731373

### 8. Compare 

linear regression

In [None]:
# Accuray


In [42]:
reg.score(xtrain, ytrain)

0.8972135459668117

In [43]:
reg.score(xtest, ytest) #testing accuracy

0.9059959607091161

In [44]:
reg.score(x,y) #training accuracy

0.899942429187834

KNN Model

In [45]:
# Accuracy
knn.score(xtrain, ytrain)

1.0

In [46]:
knn.score(xtest, ytest) #testing accuracy

0.8869868902498507

In [48]:
knn.score(x,y)

0.9649794477731373

linear regression

In [None]:
# MSE

In [50]:
mean_squared_error(ytest, reg.predict(xtest))

99.67250998070027

KNN Model

In [None]:
# MSE

In [52]:
mean_squared_error(ytest, knn.predict(xtest))

119.82783287285932

linear regression

In [None]:
# prediction

In [53]:
reg.predict(xtrain)

array([[206.84955294],
       [119.31064959],
       [168.01156136],
       ...,
       [171.95856487],
       [152.75798487],
       [195.92617133]])

In [54]:
reg.predict(xtest)

array([[143.83013436],
       [197.53417989],
       [184.63515112],
       ...,
       [160.7078525 ],
       [186.82852694],
       [201.4226542 ]])

KNN model

In [None]:
# prediction

In [55]:
knn.predict(xtrain)

array([[189.1655919],
       [124.8767957],
       [168.8644028],
       ...,
       [175.0932547],
       [156.832191 ],
       [198.4602486]])

In [56]:
knn.predict(xtest)

array([[147.34891636],
       [197.70696685],
       [182.72646495],
       ...,
       [159.10722409],
       [184.29815187],
       [201.40094163]])

# Step 02:

## KNN Classifier:

###  Import data set 

In [90]:
import pandas as pd
import matplotlib.pyplot as plt
import warnings as wr
wr.filterwarnings("ignore")

In [91]:
df2 = pd.read_csv("weight-height.csv")

In [92]:
df2.head()

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.04247
4,Male,69.881796,206.349801


In [93]:
from sklearn.preprocessing import LabelEncoder
le2 = LabelEncoder() # create LabelEncoder object

In [94]:
df2.Gender = le2.fit_transform(df[["Gender"]]) #encode "Gender" column

In [95]:
df2.head()

Unnamed: 0,Gender,Height,Weight
0,1,73.847017,241.893563
1,1,68.781904,162.310473
2,1,74.110105,212.740856
3,1,71.730978,220.04247
4,1,69.881796,206.349801


### 2. Seperate x and (y=Gender)

In [97]:
x1 = df.drop("Gender", axis = 1) # x = 'height','weight'

In [98]:
x1.head()

Unnamed: 0,Height,Weight
0,73.847017,241.893563
1,68.781904,162.310473
2,74.110105,212.740856
3,71.730978,220.04247
4,69.881796,206.349801


In [99]:
y1 = df[["Gender"]] # y = gender

In [100]:
y1.head()

Unnamed: 0,Gender
0,1
1,1
2,1
3,1
4,1


### 3. Train = 70%, Test = 30% 

In [101]:
from sklearn.model_selection import train_test_split 

In [102]:
x1train, x1test, y1train, y1test = train_test_split(x1,y1, train_size=.70, random_state=1)

### 4. Apply KNN Classifier 

In [103]:
from sklearn.neighbors import KNeighborsClassifier

In [107]:
knnc = KNeighborsClassifier(n_neighbors=99, weights='uniform')
# create KNeighborsClassifier object

In [108]:
knnc.fit(x1train, y1train) 

KNeighborsClassifier(n_neighbors=99)

### 5. Evaluate Model by Accuracy 

In [109]:
knnc.predict(x1test) # predict test value

array([1, 1, 1, ..., 0, 1, 1])

In [110]:
knnc.predict(x1train) # predict train value

array([1, 0, 1, ..., 1, 0, 1])

In [111]:
mean_squared_error(y1test, knn.predict(x1test)) #MSE

50510.057738685035

In [113]:
r2_score(y1test, knnc.predict(x1test))

0.6652774643295971

In [115]:
knnc.score(x1,y1) #Accuracy

0.9152542372881356