In [2]:
from sklearn.tree import DecisionTreeClassifier
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_curve, RocCurveDisplay, auc
from sklearn.preprocessing import LabelEncoder


In [4]:
diabets_classes = pd.read_csv('../../DATA/diabets_classes.csv', index_col=False)
diabets_x = diabets_classes.drop(['diabets'], axis=1)
diabets = diabets_classes['diabets']
pre_diabets_classes = pd.read_csv('../../DATA/pre_diabets_classes.csv', index_col=False)
pre_diabets_x = pre_diabets_classes.drop(['pre_diabets'], axis=1)
pre_diabets = pre_diabets_classes['pre_diabets']
non_diabets_classes = pd.read_csv('../../DATA/non_diabets_classes.csv', index_col=False)
non_diabets_x = non_diabets_classes.drop(['non_diabets'], axis=1)
non_diabets = non_diabets_classes['non_diabets']


In [5]:
x_diabets_train, x_diabets_test, y_diabets_train, y_diabets_test = train_test_split(diabets_x, diabets, test_size=0.25)
x_pre_diabets_train, x_pre_diabets_test, y_pre_diabets_train, y_pre_diabets_test = train_test_split(pre_diabets_x, pre_diabets, test_size=0.25)
x_non_diabets_train, x_non_diabets_test, y_non_diabets_train, y_non_diabets_test = train_test_split(non_diabets_x, non_diabets, test_size=0.25)

# Diabets KNN

In [6]:
params = {'criterion': ['gini', 'entropy', 'log_loss'], 
          'max_depth': np.arange(3, 15)}


In [7]:
diabets_classifier = GridSearchCV(DecisionTreeClassifier(),
                                  params,
                                  n_jobs=4,
                                  scoring='accuracy'
                                  ).fit(x_diabets_train, y_diabets_train)

# 6 min


In [8]:
diabets_predict = diabets_classifier.predict(x_diabets_test)
# 1 min


In [9]:
print(classification_report(diabets_predict, y_diabets_test))


              precision    recall  f1-score   support

         0.0       0.51      0.69      0.59       880
         1.0       0.76      0.60      0.67      1436

    accuracy                           0.63      2316
   macro avg       0.64      0.65      0.63      2316
weighted avg       0.67      0.63      0.64      2316



# Prediabets KNN

In [12]:
pre_diabets_classifier = GridSearchCV(DecisionTreeClassifier(),
                                      params,
                                      n_jobs=4,
                                      scoring='accuracy'
                                      ).fit(x_pre_diabets_train, y_pre_diabets_train)
# 6 min


In [13]:
pre_diabets_predict = diabets_classifier.predict(x_pre_diabets_test)
# 1 min


In [14]:
print(classification_report(pre_diabets_predict, y_pre_diabets_test))


              precision    recall  f1-score   support

         0.0       0.56      0.76      0.65      6590
         1.0       0.82      0.65      0.73     11083

    accuracy                           0.69     17673
   macro avg       0.69      0.71      0.69     17673
weighted avg       0.73      0.69      0.70     17673



# Nondiabets KNN

In [15]:
non_diabets_classifier = GridSearchCV(DecisionTreeClassifier(),
                                      params,
                                      n_jobs=4,
                                      scoring='accuracy'
                                      ).fit(x_non_diabets_train, y_non_diabets_train)


In [16]:
non_diabets_predict = diabets_classifier.predict(x_non_diabets_test)
# 1 min


In [17]:
print(classification_report(non_diabets_predict, y_non_diabets_test))


              precision    recall  f1-score   support

         0.0       0.18      0.06      0.09     32367
         1.0       0.43      0.74      0.54     31053

    accuracy                           0.39     63420
   macro avg       0.31      0.40      0.31     63420
weighted avg       0.30      0.39      0.31     63420

