In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.neighbors import KNeighborsClassifier, RadiusNeighborsClassifier

In [2]:
# loading data
data = pd.read_csv('diabetes.csv')
print('dataset shape {}'.format(data.shape))
data.head()

dataset shape (768, 9)


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
data.groupby('Outcome').size()

Outcome
0    500
1    268
dtype: int64

In [5]:
x = data.iloc[:, :8]
y = data.iloc[:, 8]
print('shape of x {}; shape of y {}'.format(x.shape, y.shape))

shape of x (768, 8); shape of y (768,)


In [8]:
# Divide data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

In [10]:
# Building Models
models = []
models.append(('KNN', KNeighborsClassifier(n_neighbors=2)))
models.append(('KNN with weights', KNeighborsClassifier(n_neighbors=2, weights='distance')))
models.append(('Radius Neightbors', RadiusNeighborsClassifier(n_neighbors=2, radius=500)))

In [11]:
# Training each model and score
results = []
for name, model in models:
    model.fit(x_train, y_train)
    results.append((name, model.score(x_test, y_test)))
for i in range(len(results)):
    print('name: {}; score: {}'.format(results[i][0], results[i][1]))

name: KNN; score: 0.658008658008658
name: KNN with weights; score: 0.6363636363636364
name: Radius Neightbors; score: 0.6320346320346321


In [13]:
# random divide training data and validation
results = []
for name, model in models:
    kfold = KFold(n_splits=10)
    cv_result = cross_val_score(model, x, y, cv=kfold)
    results.append((name, cv_result))
for i in range(len(results)):
    print('name: {}; cross validation score: {}'.format(results[i][0], results[i][1].mean()))

name: KNN; cross validation score: 0.7147641831852358
name: KNN with weights; cross validation score: 0.6770505809979495
name: Radius Neightbors; cross validation score: 0.6497265892002735
