# 05 - Classification dengan KNN (K Nearest Neighbours)
- KNN adalah model machine learning yang dapat digunakan untuk melakukan prediksi berdasarkan kedekatan karakteristik dengan sejumlah tetangga terdekat.
- Prediksi yang dilakukan dapat diterapkan baik pada classification maupun regression tasks.

## Sample Dataset

In [1]:
import pandas as pd
sensus = {
    'tinggi':[158,170,183,191,155,163,180,158,178],
    'berat':[64,86,84,80,49,59,67,54,67],
    'jk':['pria','pria','pria','pria','wanita','wanita','wanita','wanita','wanita']
}
sensus_df = pd.DataFrame(sensus)
sensus_df

Unnamed: 0,tinggi,berat,jk
0,158,64,pria
1,170,86,pria
2,183,84,pria
3,191,80,pria
4,155,49,wanita
5,163,59,wanita
6,180,67,wanita
7,158,54,wanita
8,178,67,wanita


## Visualisasi Data

In [None]:
import matplotlib.pyplot as plt

fig,ax = plt.subplots()
for jk, d in sensus_df.groupby('jk'):
    ax.scatter(d['tinggi'],d['berat'],label=jk)
    
plt.legend(loc='upper left')
plt.title('Sebaran Data Tinggi Badan, Berat Badan, dan Jenis Kelamin')
plt.xlabel('Tinggi Badan (cm)')
plt.ylabel('Berat Badan (kg)')
plt.grid(True)
plt.show()

## Preprocessing Datasest

In [None]:
import numpy as np

X_train = np.array(sensus_df[['tinggi','berat']])
y_train = np.array(sensus_df['jk'])

print(f'X_train :\n{X_train}\n')
print(f'y_train :\n{y_train}\n')

In [None]:
from sklearn.preprocessing import LabelBinarizer
lb = LabelBinarizer()
y_train = lb.fit_transform(y_train)
print(f'y_train :\n{y_train}')

In [None]:
y_train = y_train.flatten()
print(f'y_train : {y_train}')

## Training KNN Classification Model

In [None]:
from sklearn.neighbors import KNeighborsClassifier

K=3
model = KNeighborsClassifier(n_neighbors=K)
model.fit(X_train,y_train)

## Prediksi Jenis Kelamin

In [None]:
tinggi_badan = 155
berat_badan = 70
X_new = np.array([tinggi_badan,berat_badan]).reshape(1,-1)
X_new

In [None]:
y_new = model.predict(X_new)
y_new

In [None]:
lb.inverse_transform(y_new)

## Visualisasi Nearest Neighbours

In [None]:
fig,ax = plt.subplots()
for jk, d in sensus_df.groupby('jk'):
    ax.scatter(d['tinggi'],d['berat'],label=jk)
    
plt.scatter(tinggi_badan,berat_badan,marker='s',color='red',label='misterius')
    
plt.legend(loc='upper left')
plt.title('Sebaran Data Tinggi Badan, Berat Badan, dan Jenis Kelamin')
plt.xlabel('Tinggi Badan (cm)')
plt.ylabel('Berat Badan (kg)')
plt.grid(True)
plt.show()

## Kalkulasi Distance (Euclidean Distance)

In [None]:
misterius = np.array([tinggi_badan,berat_badan])
misterius

In [None]:
X_train

In [None]:
from scipy.spatial.distance import euclidean

data_jarak = [euclidean(misterius,d) for d in X_train]
data_jarak

In [None]:
sensus_df['jarak']= data_jarak
sensus_df.sort_values(['jarak'])

## Evaluasi KNN Classification Model

### Testing Set

In [None]:
X_test = np.array([[168,65],[180,96],[160,52],[169,67]])
y_test = lb.transform(np.array(['pria','pria','wanita','wanita'])).flatten()

print(f'X_test : {X_test}')
print(f'y_test : {y_test}')

### Prediksi Terhadap Testing Set

In [None]:
y_pred = model.predict(X_test)
y_pred

## Accuracy

In [None]:
from sklearn.metrics import accuracy_score
acc = accuracy_score(y_test,y_pred)
print(f'accuracy : {acc}')

## Precision

In [None]:
from sklearn.metrics import precision_score
prec = precision_score(y_test,y_pred)
print(f'precision : {prec}')

## Recall

In [None]:
from sklearn.metrics import recall_score
rec = recall_score(y_test,y_pred)
print(f'recall : {rec}')

## F1 Score

In [None]:
from sklearn.metrics import f1_score
f1 = f1_score(y_test, y_pred)
print(f'F1 score : {f1}')

## Classification Report

In [None]:
from sklearn.metrics import classification_report
cls_report = classification_report(y_test,y_pred)

print(f'Classification Report : \n {cls_report}')

## Matthew Corelation Coefficient (MCC)

In [None]:
from sklearn.metrics import matthews_corrcoef
mcc = matthews_corrcoef(y_test,y_pred)
print(f'MCC : \n {mcc}')