In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, classification_report, roc_curve, RocCurveDisplay, roc_auc_score, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split


In [2]:
diabets_classes = pd.read_csv('../../DATA/diabets_classes.csv', index_col=False)
diabets_x = diabets_classes.drop(['diabets'], axis=1)
diabets = diabets_classes['diabets']
pre_diabets_classes = pd.read_csv('../../DATA/pre_diabets_classes.csv', index_col=False)
pre_diabets_x = pre_diabets_classes.drop(['pre_diabets'], axis=1)
pre_diabets = pre_diabets_classes['pre_diabets']
non_diabets_classes = pd.read_csv('../../DATA/non_diabets_classes.csv', index_col=False)
non_diabets_x = non_diabets_classes.drop(['non_diabets'], axis=1)
non_diabets = non_diabets_classes['non_diabets']


In [4]:
x_diabets_train, x_diabets_test, y_diabets_train, y_diabets_test = train_test_split(diabets_x, 
                                                                                    diabets, 
                                                                                    stratify=diabets, 
                                                                                    test_size=0.25)
x_pre_diabets_train, x_pre_diabets_test, y_pre_diabets_train, y_pre_diabets_test = train_test_split(pre_diabets_x, 
                                                                                                    pre_diabets, 
                                                                                                    stratify=pre_diabets,
                                                                                                    test_size=0.25)
x_non_diabets_train, x_non_diabets_test, y_non_diabets_train, y_non_diabets_test = train_test_split(non_diabets_x, 
                                                                                                    non_diabets, 
                                                                                                    stratify=non_diabets, 
                                                                                                    test_size=0.25)

# Diabets KNN

In [5]:
diabets_classifier = KNeighborsClassifier(n_neighbors=7).fit(x_diabets_train, y_diabets_train)
# 6 min


In [6]:
diabets_predict = diabets_classifier.predict(x_diabets_test)
# 1 min


In [7]:
print(classification_report(diabets_predict, y_diabets_test))


              precision    recall  f1-score   support

         0.0       0.87      0.99      0.93     47254
         1.0       0.99      0.89      0.93     59598

    accuracy                           0.93    106852
   macro avg       0.93      0.94      0.93    106852
weighted avg       0.94      0.93      0.93    106852



# Prediabets KNN

In [8]:
pre_diabets_classifier = KNeighborsClassifier(n_neighbors=7).fit(x_pre_diabets_train, y_pre_diabets_train)
# 6 min


In [9]:
pre_diabets_predict = diabets_classifier.predict(x_pre_diabets_test)
# 1 min


In [10]:
print(classification_report(pre_diabets_predict, y_pre_diabets_test))


              precision    recall  f1-score   support

         0.0       0.43      0.32      0.37     72038
         1.0       0.09      0.13      0.10     34814

    accuracy                           0.26    106852
   macro avg       0.26      0.23      0.24    106852
weighted avg       0.32      0.26      0.28    106852



# Nondiabets KNN

In [11]:
x_non_diabets_train


Unnamed: 0,HighBP,HighChol,CholCheck,BMI,Smoker,Stroke,HeartDiseaseorAttack,PhysActivity,Fruits,Veggies,...,AnyHealthcare,NoDocbcCost,GenHlth,MentHlth,PhysHlth,DiffWalk,Sex,Age,Education,Income
392061,0.0,1.0,1.0,0.174419,1.0,0.0,0.0,1.0,1.0,1.0,...,1.0,0.0,0.25,0.166667,0.100000,0.0,0.0,0.333333,1.000000,1.000000
166446,0.0,0.0,1.0,0.127907,1.0,0.0,0.0,1.0,1.0,1.0,...,1.0,0.0,0.50,0.000000,1.000000,0.0,1.0,1.000000,1.000000,0.714286
127490,0.0,0.0,1.0,0.174419,0.0,0.0,0.0,0.0,0.0,1.0,...,1.0,0.0,0.25,0.000000,0.000000,0.0,0.0,0.416667,1.000000,1.000000
101798,0.0,0.0,1.0,0.104651,0.0,0.0,0.0,1.0,1.0,1.0,...,1.0,0.0,0.50,0.000000,0.000000,0.0,0.0,0.333333,0.800000,1.000000
341516,1.0,0.0,1.0,0.209302,0.0,0.0,1.0,0.0,0.0,1.0,...,1.0,0.0,0.50,0.000000,0.000000,0.0,1.0,0.916667,1.000000,0.428571
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
141886,1.0,1.0,1.0,0.372093,1.0,0.0,0.0,0.0,0.0,1.0,...,1.0,1.0,0.50,0.200000,0.000000,0.0,1.0,0.583333,0.600000,0.714286
392096,1.0,1.0,1.0,0.197674,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.50,0.466667,0.000000,1.0,0.0,0.666667,0.600000,0.285714
302452,1.0,1.0,1.0,0.209302,0.0,0.0,0.0,0.0,1.0,1.0,...,1.0,0.0,1.00,0.100000,1.000000,1.0,0.0,0.750000,0.800000,0.857143
311127,1.0,0.0,1.0,0.139535,0.0,0.0,0.0,1.0,1.0,0.0,...,1.0,0.0,0.50,0.026689,0.040034,1.0,0.0,0.966639,0.680068,0.314189


In [12]:
non_diabets_classifier = KNeighborsClassifier(n_neighbors=7).fit(x_non_diabets_train, y_non_diabets_train)
# 6 min


In [13]:
non_diabets_predict = diabets_classifier.predict(x_non_diabets_test)
# 1 min


In [14]:
confusion_matrix(non_diabets_predict, y_non_diabets_test)


array([[24683, 45915],
       [28603,  7651]], dtype=int64)

In [15]:
print(classification_report(non_diabets_predict, y_non_diabets_test))


              precision    recall  f1-score   support

         0.0       0.46      0.35      0.40     70598
         1.0       0.14      0.21      0.17     36254

    accuracy                           0.30    106852
   macro avg       0.30      0.28      0.28    106852
weighted avg       0.35      0.30      0.32    106852

