In [48]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, classification_report, roc_curve, RocCurveDisplay, roc_auc_score, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split


In [35]:
diabets_classes = pd.read_csv('../../DATA/diabets_classes.csv', index_col=False)
diabets_x = diabets_classes.drop(['diabets'], axis=1)
diabets = diabets_classes['diabets']
pre_diabets_classes = pd.read_csv('../../DATA/pre_diabets_classes.csv', index_col=False)
pre_diabets_x = pre_diabets_classes.drop(['pre_diabets'], axis=1)
pre_diabets = pre_diabets_classes['pre_diabets']
non_diabets_classes = pd.read_csv('../../DATA/non_diabets_classes.csv', index_col=False)
non_diabets_x = non_diabets_classes.drop(['non_diabets'], axis=1)
non_diabets = non_diabets_classes['non_diabets']


In [36]:
x_diabets_train, x_diabets_test, y_diabets_train, y_diabets_test = train_test_split(diabets_x, diabets, test_size=0.25)
x_pre_diabets_train, x_pre_diabets_test, y_pre_diabets_train, y_pre_diabets_test = train_test_split(pre_diabets_x, pre_diabets, test_size=0.25)
x_non_diabets_train, x_non_diabets_test, y_non_diabets_train, y_non_diabets_test = train_test_split(non_diabets_x, non_diabets, test_size=0.25)

# Diabets KNN

In [37]:
diabets_classifier = GridSearchCV(KNeighborsClassifier(), 
                                  {'n_neighbors': np.arange(5, 21, 2)},
                                  n_jobs=4,
                                  scoring='f1',).fit(x_diabets_train, y_diabets_train)
# 6 min


In [38]:
diabets_predict = diabets_classifier.best_estimator_.predict(x_diabets_test)
# 1 min


In [39]:
print(classification_report(diabets_predict, y_diabets_test))


              precision    recall  f1-score   support

         0.0       0.57      0.65      0.61      1000
         1.0       0.70      0.63      0.67      1316

    accuracy                           0.64      2316
   macro avg       0.64      0.64      0.64      2316
weighted avg       0.65      0.64      0.64      2316



# Prediabets KNN

In [40]:
pre_diabets_classifier = GridSearchCV(KNeighborsClassifier(), 
                                  {'n_neighbors': np.arange(5, 21, 2)},
                                  n_jobs=4,
                                  scoring='f1',).fit(x_pre_diabets_train, y_pre_diabets_train)
# 6 min


In [41]:
pre_diabets_predict = diabets_classifier.best_estimator_.predict(x_pre_diabets_test)
# 1 min


In [42]:
print(classification_report(pre_diabets_predict, y_pre_diabets_test))


              precision    recall  f1-score   support

         0.0       0.61      0.74      0.67      7301
         1.0       0.78      0.67      0.72     10372

    accuracy                           0.70     17673
   macro avg       0.70      0.70      0.69     17673
weighted avg       0.71      0.70      0.70     17673



# Nondiabets KNN

In [43]:
x_non_diabets_train


Unnamed: 0,HighBP,HighChol,CholCheck,BMI,HvyAlcoholConsump,NoDocbcCost,GenHlth,MentHlth,Age,Income
65602,1.0,0.0,1.0,0.093023,0.0,0.0,0.50,0.000000,0.666667,1.000000
132862,1.0,0.0,1.0,0.255814,0.0,0.0,0.50,0.000000,0.666667,1.000000
219763,0.0,1.0,1.0,0.244186,0.0,0.0,0.25,0.000000,0.750000,0.714286
147534,0.0,1.0,1.0,0.395349,0.0,0.0,0.75,0.700000,0.416667,0.857143
3831,0.0,0.0,1.0,0.139535,0.0,1.0,0.25,0.000000,0.666667,0.857143
...,...,...,...,...,...,...,...,...,...,...
86531,1.0,0.0,1.0,0.279070,0.0,0.0,0.50,0.000000,0.750000,0.285714
102317,1.0,0.0,1.0,0.186047,0.0,0.0,0.75,0.066667,0.666667,0.000000
251663,0.0,0.0,0.0,0.093023,1.0,0.0,0.00,0.033333,0.666667,0.857143
132222,1.0,1.0,1.0,0.174419,0.0,0.0,0.25,0.166667,1.000000,0.714286


In [50]:
non_diabets_classifier = GridSearchCV(KNeighborsClassifier(), 
                                  {'n_neighbors': np.arange(5, 21, 2)},
                                  n_jobs=4,
                                  scoring='roc_auc',).fit(x_non_diabets_train, y_non_diabets_train)
# 6 min


In [51]:
non_diabets_predict = diabets_classifier.best_estimator_.predict(x_non_diabets_test)
# 1 min


In [52]:
confusion_matrix(non_diabets_predict, y_non_diabets_test)


array([[ 2189, 32433],
       [ 7841, 20957]], dtype=int64)

In [53]:
print(classification_report(non_diabets_predict, y_non_diabets_test))


              precision    recall  f1-score   support

         0.0       0.22      0.06      0.10     34622
         1.0       0.39      0.73      0.51     28798

    accuracy                           0.36     63420
   macro avg       0.31      0.40      0.30     63420
weighted avg       0.30      0.36      0.29     63420

