In [1]:
from sklearn.tree import DecisionTreeClassifier
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_curve, RocCurveDisplay, auc
from sklearn.preprocessing import LabelEncoder


In [2]:
diabets_classes = pd.read_csv('../../DATA/diabets_classes.csv', index_col=False)
diabets_x = diabets_classes.drop(['diabets'], axis=1)
diabets = diabets_classes['diabets']
pre_diabets_classes = pd.read_csv('../../DATA/pre_diabets_classes.csv', index_col=False)
pre_diabets_x = pre_diabets_classes.drop(['pre_diabets'], axis=1)
pre_diabets = pre_diabets_classes['pre_diabets']
non_diabets_classes = pd.read_csv('../../DATA/non_diabets_classes.csv', index_col=False)
non_diabets_x = non_diabets_classes.drop(['non_diabets'], axis=1)
non_diabets = non_diabets_classes['non_diabets']


In [3]:
x_diabets_train, x_diabets_test, y_diabets_train, y_diabets_test = train_test_split(diabets_x, diabets, test_size=0.25)
x_pre_diabets_train, x_pre_diabets_test, y_pre_diabets_train, y_pre_diabets_test = train_test_split(pre_diabets_x, pre_diabets, test_size=0.25)
x_non_diabets_train, x_non_diabets_test, y_non_diabets_train, y_non_diabets_test = train_test_split(non_diabets_x, non_diabets, test_size=0.25)

# Diabets KNN

In [4]:
params = {'criterion': ['gini', 'entropy', 'log_loss'], 
          'max_depth': np.arange(3, 15)}


In [5]:
diabets_classifier = GridSearchCV(DecisionTreeClassifier(),
                                  params,
                                  n_jobs=4,
                                  scoring='accuracy'
                                  ).fit(x_diabets_train, y_diabets_train)

# 6 min


In [6]:
diabets_predict = diabets_classifier.predict(x_diabets_test)
# 1 min


In [7]:
print(classification_report(diabets_predict, y_diabets_test))


              precision    recall  f1-score   support

         0.0       0.75      0.82      0.79     49141
         1.0       0.84      0.77      0.80     57711

    accuracy                           0.80    106852
   macro avg       0.80      0.80      0.80    106852
weighted avg       0.80      0.80      0.80    106852



# Prediabets KNN

In [8]:
pre_diabets_classifier = GridSearchCV(DecisionTreeClassifier(),
                                      params,
                                      n_jobs=4,
                                      scoring='accuracy'
                                      ).fit(x_pre_diabets_train, y_pre_diabets_train)
# 6 min


In [9]:
pre_diabets_predict = diabets_classifier.predict(x_pre_diabets_test)
# 1 min


In [10]:
print(classification_report(pre_diabets_predict, y_pre_diabets_test))


              precision    recall  f1-score   support

         0.0       0.57      0.52      0.54     58987
         1.0       0.47      0.52      0.49     47865

    accuracy                           0.52    106852
   macro avg       0.52      0.52      0.52    106852
weighted avg       0.52      0.52      0.52    106852



# Nondiabets KNN

In [11]:
non_diabets_classifier = GridSearchCV(DecisionTreeClassifier(),
                                      params,
                                      n_jobs=4,
                                      scoring='accuracy'
                                      ).fit(x_non_diabets_train, y_non_diabets_train)


In [12]:
non_diabets_predict = diabets_classifier.predict(x_non_diabets_test)
# 1 min


In [13]:
print(classification_report(non_diabets_predict, y_non_diabets_test))


              precision    recall  f1-score   support

         0.0       0.34      0.26      0.30     70867
         1.0       0.02      0.02      0.02     35985

    accuracy                           0.18    106852
   macro avg       0.18      0.14      0.16    106852
weighted avg       0.23      0.18      0.20    106852

