# K-Nearest Neighbors Classifier
Building a K-Nearest Neighbors classifier by implementing K-Nearest Neighbors algorithm using diabetes dataset.

In [1]:
import numpy as np 
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

### Load the dataset from CSV file

In [2]:
def loadCSV(filename):
    dataset = pd.read_csv(filename)
    return dataset

### Splite dataset into training and testing

In [3]:
def spliteDataset(dataset):
    X = dataset.iloc[:, 0:8]
    y = dataset.iloc[:, 8]
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.2)
    return X_train, X_test, y_train, y_test

### Normalize train and test datasets

In [4]:
def fitDataset(sc_X, X_train, X_test):
    X_train = sc_X.fit_transform(X_train)
    X_test = sc_X.transform(X_test)
    return X_train, X_test

### Generate a model and train the data

In [5]:
def KNN(num, X_train, y_train):
    classifier = KNeighborsClassifier(n_neighbors=num, p=1, metric='euclidean')
    classifier.fit(X_train, y_train)
    return classifier

### Test the algorithm on test dataset

In [6]:
def predict(classifier, X_test):
    y_pred = classifier.predict(X_test)
    return y_pred

### Report the accuracy, precision, and recall

In [7]:
def main():
    filename = 'datasets/diabetes_csv.csv'
    dataset = loadCSV(filename)
    X_train, X_test, y_train, y_test = spliteDataset(dataset)
    X_train, X_test = fitDataset(StandardScaler(), X_train, X_test)
    num = [1, 3, 5, 10]
    for i in num:
        classifier = KNN(i, X_train, y_train)
        y_pred = predict(classifier, X_test)
        print('Accuracy ',i,' score: ', accuracy_score(y_test, y_pred))
        print(classification_report(y_test, y_pred))

In [8]:
main()

Accuracy  1  score:  0.7337662337662337
                 precision    recall  f1-score   support

tested_negative       0.79      0.84      0.81       107
tested_positive       0.57      0.49      0.53        47

       accuracy                           0.73       154
      macro avg       0.68      0.67      0.67       154
   weighted avg       0.72      0.73      0.73       154

Accuracy  3  score:  0.7402597402597403
                 precision    recall  f1-score   support

tested_negative       0.81      0.82      0.81       107
tested_positive       0.58      0.55      0.57        47

       accuracy                           0.74       154
      macro avg       0.69      0.69      0.69       154
   weighted avg       0.74      0.74      0.74       154

Accuracy  5  score:  0.7987012987012987
                 precision    recall  f1-score   support

tested_negative       0.85      0.87      0.86       107
tested_positive       0.68      0.64      0.66        47

       accuracy  

### conclusion

> The hightest accurcy is k=1 by 79%