In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_curve, RocCurveDisplay, auc


In [2]:
diabets_classes = pd.read_csv('../../DATA/diabets_classes.csv', index_col=False)
diabets_x = diabets_classes.drop(['diabets'], axis=1)
diabets = diabets_classes['diabets']
pre_diabets_classes = pd.read_csv('../../DATA/pre_diabets_classes.csv', index_col=False)
pre_diabets_x = pre_diabets_classes.drop(['pre_diabets'], axis=1)
pre_diabets = pre_diabets_classes['pre_diabets']
non_diabets_classes = pd.read_csv('../../DATA/non_diabets_classes.csv', index_col=False)
non_diabets_x = non_diabets_classes.drop(['non_diabets'], axis=1)
non_diabets = non_diabets_classes['non_diabets']


In [3]:
x_diabets_train, x_diabets_test, y_diabets_train, y_diabets_test = train_test_split(diabets_x, diabets, test_size=0.25)
x_pre_diabets_train, x_pre_diabets_test, y_pre_diabets_train, y_pre_diabets_test = train_test_split(pre_diabets_x, pre_diabets, test_size=0.25)
x_non_diabets_train, x_non_diabets_test, y_non_diabets_train, y_non_diabets_test = train_test_split(non_diabets_x, non_diabets, test_size=0.25)

In [4]:
def get_model(X_train, y_train):
    diabets_classifier = tf.keras.models.Sequential()
    diabets_classifier.add(tf.keras.layers.Dense(X_train.shape[1],
                                                activation='sigmoid',
                                                input_shape=(X_train.shape[1],),
                                                name='input',
                                                # kernel_regularizer=tf.keras.regularizers.L2(l2=1),
                                                #   kernel_constraint=tf.keras.constraints.max_norm(1000.),
                                                # bias_initializer=tf.keras.initializers.HeNormal,
                                                use_bias=True
                                                ))
    diabets_classifier.add(tf.keras.layers.Dense(X_train.shape[1]*2,
                                                activation='sigmoid',
                                                name='hidden_layer_2',
                                                # kernel_regularizer=tf.keras.regularizers.L2(l2=1),
                                                #   kernel_constraint=tf.keras.constraints.max_norm(1000.),
                                                # bias_initializer=tf.keras.initializers.HeNormal,
                                                use_bias=True
                                                ))
    diabets_classifier.add(tf.keras.layers.Dense(X_train.shape[1]*3,
                                                activation='sigmoid',
                                                name='hidden_layer_3',
                                                # kernel_regularizer=tf.keras.regularizers.L2(l2=1),
                                                #   kernel_constraint=tf.keras.constraints.max_norm(1000.),
                                                # bias_initializer=tf.keras.initializers.HeNormal,
                                                use_bias=True
                                                ))
    diabets_classifier.add(tf.keras.layers.Dense(X_train.shape[1]*2,
                                                 activation='sigmoid',
                                                 name='hidden_layer_4',
                                                 # kernel_regularizer=tf.keras.regularizers.L2(l2=1),
                                                 #   kernel_constraint=tf.keras.constraints.max_norm(1000.),
                                                 # bias_initializer=tf.keras.initializers.HeNormal,
                                                 use_bias=True
                                                 ))
    diabets_classifier.add(tf.keras.layers.Dense(X_train.shape[1]*1,
                                                 activation='sigmoid',
                                                 name='hidden_layer_5',
                                                 # kernel_regularizer=tf.keras.regularizers.L2(l2=1),
                                                 #   kernel_constraint=tf.keras.constraints.max_norm(1000.),
                                                 # bias_initializer=tf.keras.initializers.HeNormal,
                                                 use_bias=True
                                                 ))
    diabets_classifier.add(tf.keras.layers.Dense(X_train.shape[1]//2,
                                                 activation='sigmoid',
                                                 name='hidden_layer_6',
                                                 # kernel_regularizer=tf.keras.regularizers.L2(l2=1),
                                                 #   kernel_constraint=tf.keras.constraints.max_norm(1000.),
                                                 # bias_initializer=tf.keras.initializers.HeNormal,
                                                 use_bias=True
                                                 ))
    diabets_classifier.add(tf.keras.layers.Dense(1,
                                                name='output',
                                                activation='sigmoid',
                                                # kernel_regularizer=tf.keras.regularizers.L2(l2=1),
                                                #   kernel_constraint=tf.keras.constraints.max_norm(1000.),
                                                # bias_initializer=tf.keras.initializers.HeNormal,
                                                use_bias=True
                                                ))
    diabets_classifier.summary()
    diabets_classifier.compile(
        optimizer='Adam',
        loss='binary_crossentropy',
        metrics=['mse', tf.keras.metrics.FalseNegatives(),
                tf.keras.metrics.Recall()]
    )
    diabets_classifier.fit(
        X_train.to_numpy(),
        y_train.to_numpy(),
        validation_split=0.5,
        epochs=20,
        batch_size=10
    )
    return diabets_classifier


# Diabets KNN

In [5]:
diabets_classifier = get_model(x_diabets_train, y_diabets_train)


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input (Dense)               (None, 17)                306       
                                                                 
 hidden_layer_2 (Dense)      (None, 34)                612       
                                                                 
 hidden_layer_3 (Dense)      (None, 51)                1785      
                                                                 
 hidden_layer_4 (Dense)      (None, 34)                1768      
                                                                 
 hidden_layer_5 (Dense)      (None, 17)                595       
                                                                 
 hidden_layer_6 (Dense)      (None, 8)                 144       
                                                                 
 output (Dense)              (None, 1)                 9

In [6]:
diabets_predict = diabets_classifier.predict(x_diabets_test).T[0]
diabets_predict
# 1 min




array([0.61975384, 0.5963406 , 0.25121632, ..., 0.5406073 , 0.6118195 ,
       0.110283  ], dtype=float32)

In [7]:
print(classification_report(diabets_predict > np.median(diabets_predict), y_diabets_test))


              precision    recall  f1-score   support

       False       0.66      0.65      0.65      1158
        True       0.65      0.66      0.66      1158

    accuracy                           0.66      2316
   macro avg       0.66      0.66      0.66      2316
weighted avg       0.66      0.66      0.66      2316



# Prediabets KNN

In [8]:
pre_diabets_classifier = get_model(x_diabets_train, y_diabets_train)
# 6 min


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input (Dense)               (None, 17)                306       
                                                                 
 hidden_layer_2 (Dense)      (None, 34)                612       
                                                                 
 hidden_layer_3 (Dense)      (None, 51)                1785      
                                                                 
 hidden_layer_4 (Dense)      (None, 34)                1768      
                                                                 
 hidden_layer_5 (Dense)      (None, 17)                595       
                                                                 
 hidden_layer_6 (Dense)      (None, 8)                 144       
                                                                 
 output (Dense)              (None, 1)                

In [9]:
pre_diabets_predict = diabets_classifier.predict(x_pre_diabets_test).T[0]
# 1 min




In [10]:
print(classification_report(pre_diabets_predict > np.median(pre_diabets_predict), y_pre_diabets_test))


              precision    recall  f1-score   support

       False       0.72      0.73      0.72      8837
        True       0.72      0.72      0.72      8836

    accuracy                           0.72     17673
   macro avg       0.72      0.72      0.72     17673
weighted avg       0.72      0.72      0.72     17673



# Nondiabets KNN

In [11]:
non_diabets_classifier = get_model(x_non_diabets_train, y_non_diabets_train)
# 6 min


Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input (Dense)               (None, 17)                306       
                                                                 
 hidden_layer_2 (Dense)      (None, 34)                612       
                                                                 
 hidden_layer_3 (Dense)      (None, 51)                1785      
                                                                 
 hidden_layer_4 (Dense)      (None, 34)                1768      
                                                                 
 hidden_layer_5 (Dense)      (None, 17)                595       
                                                                 
 hidden_layer_6 (Dense)      (None, 8)                 144       
                                                                 
 output (Dense)              (None, 1)                

In [12]:
non_diabets_predict = diabets_classifier.predict(x_non_diabets_test).T[0]
# 1 min


   1/1982 [..............................] - ETA: 35s



In [13]:
print(classification_report(non_diabets_predict > np.median(x_non_diabets_test), y_non_diabets_test))


              precision    recall  f1-score   support

       False       0.01      0.01      0.01     11434
        True       0.79      0.81      0.80     51986

    accuracy                           0.67     63420
   macro avg       0.40      0.41      0.41     63420
weighted avg       0.65      0.67      0.66     63420

