# Classic KNN Classifier

In [13]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

In [2]:
df = pd.read_csv('DiabetesPrediction.csv')
df.head()

Unnamed: 0,pregnancies,glucose,diastolic,triceps,insulin,bmi,dpf,age,diabetes
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
df.shape

(768, 9)

In [4]:
x = df.drop(['diabetes'], axis=1)
x.head()

Unnamed: 0,pregnancies,glucose,diastolic,triceps,insulin,bmi,dpf,age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33


In [5]:
y = df['diabetes'].values
y[:5]

array([1, 0, 1, 0, 1], dtype=int64)

In [6]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1, stratify=y)

In [7]:
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(x_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='uniform')

In [9]:
yhat = knn.predict(x_test)
yhat[:5]

array([0, 0, 0, 0, 1], dtype=int64)

In [10]:
knn.score(x_test, y_test)

0.6688311688311688

# Model evaluation with Cross Validation

In [17]:
cross_val = cross_val_score(knn, x, y, cv=10)

print(cross_val)
print('cv_scores mean:{}'.format(np.mean(cross_val)))

[0.64935065 0.71428571 0.68831169 0.66233766 0.74025974 0.77922078
 0.71428571 0.72727273 0.67105263 0.68421053]
cv_scores mean:0.7030587833219413


# Using with GridSearchCV

In [19]:
param_grid = {'n_neighbors': np.arange(1, 45)}
knn_gscv = GridSearchCV(knn, param_grid, cv=10)
knn_gscv.fit(x, y)

print('Best k:', knn_gscv.best_params_)
print('Scores:', knn_gscv.best_score_)

Best k: {'n_neighbors': 17}
Scores: 0.7552083333333334
