<h1>Diabetes Classification</h1>

<h2>Dataset Info</h2>
<br>
Pima Indians Diabetes Database
<br>
<b>For Each Attribute:</b> (all numeric-valued)
<br>
   1. Number of times pregnant <br>
   2. Plasma glucose concentration a 2 hours in an oral glucose tolerance test <br>
   3. Diastolic blood pressure (mm Hg) <br>
   4. Triceps skin fold thickness (mm) <br>
   5. 2-Hour serum insulin (mu U/ml) <br>
   6. Body mass index (weight in kg/(height in m)^2) <br>
   7. Diabetes pedigree function <br>
   8. Age (years) <br>
   9. Class variable (0 or 1)<br>
   <br>
<b>Class Distribution: </b>
<br>
(class value 1 is interpreted as "tested positive for
   diabetes")

In [None]:
!pip install scikit-learn numpy pandas tensorflow

In [214]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split

In [215]:
tf.random.set_seed(1234)  # for consistent results
np.random.seed(1234)

In [216]:
# load data
df = pd.read_csv('dataset.csv', header=None)

In [217]:
df.shape

(768, 9)

In [218]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [219]:
# try adding headers
# df.columns = ['Number of times pregnant','Plasma glucose concentration','Diastolic blood pressure',
#               'Triceps skin fold thickness','Hour serum insulin','Body mass index',
#               'Diabetes pedigree function','Age','Class variable']
# df.columns = df.columns.str.lower()

In [220]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [221]:
df.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [222]:
df_norm = (df - df.min()) / (df.max() - df.min())
df_norm.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.352941,0.743719,0.590164,0.353535,0.0,0.500745,0.234415,0.483333,1.0
1,0.058824,0.427136,0.540984,0.292929,0.0,0.396423,0.116567,0.166667,0.0
2,0.470588,0.919598,0.52459,0.0,0.0,0.347243,0.253629,0.183333,1.0
3,0.058824,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,0.0,0.0
4,0.0,0.688442,0.327869,0.353535,0.198582,0.642325,0.943638,0.2,1.0


In [223]:
# check for missing data
df_norm.isnull().sum()

0    0
1    0
2    0
3    0
4    0
5    0
6    0
7    0
8    0
dtype: int64

In [224]:
# check for duplicates
df_norm.duplicated().sum()

0

In [225]:
X = np.array(df_norm)[:, 0:8]
y = np.array(df_norm)[:, 8]
print(f'X shape : {X.shape}\ny shape : {y.shape}')
print(f'X sample : {X[0]}\nits class: : {y[0]}')


X shape : (768, 8)
y shape : (768,)
X sample : [0.35294118 0.74371859 0.59016393 0.35353535 0.         0.50074516
 0.23441503 0.48333333]
its class: : 1.0


In [226]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
print(f'X_train shape : {X_train.shape}\ny_train shape : {y_train.shape}')
print(f'X_test shape : {X_test.shape}\ny_test shape : {y_test.shape}')


X_train shape : (614, 8)
y_train shape : (614,)
X_test shape : (154, 8)
y_test shape : (154,)


In [229]:
model = Sequential()
model.add(Dense(8, input_dim=8, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='sigmoid'))


In [230]:
model.summary()

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_37 (Dense)            (None, 8)                 72        
                                                                 
 dense_38 (Dense)            (None, 32)                288       
                                                                 
 dense_39 (Dense)            (None, 16)                528       
                                                                 
 dense_40 (Dense)            (None, 1)                 17        
                                                                 
Total params: 905
Trainable params: 905
Non-trainable params: 0
_________________________________________________________________


In [231]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [232]:
model.fit(X_train, y_train, epochs=500, batch_size=20)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

<keras.callbacks.History at 0x213e3697130>

In [236]:
model.save('diabets.model')
model = tf.keras.models.load_model('diabets.model')


INFO:tensorflow:Assets written to: diabets.model\assets


In [237]:
loss, accuracy = model.evaluate(X_test, y_test)



In [238]:
print(f'loss: {loss*100}, accuracy: {accuracy*100}')

loss: 52.480870485305786, accuracy: 74.02597665786743


In [239]:
y_predict = (model.predict(X_test) > 0.5).astype(int)



In [240]:
for i in range(10) :
    print(f'{X_test[i]} => predicted output : {y_predict[i]}, expected output : {y_test[i]}')

[0.         0.5678392  0.6557377  0.16161616 0.         0.46199702
 0.33988044 0.        ] => predicted output : [0], expected output : 0.0
[0.17647059 0.90452261 0.52459016 0.25252525 0.08274232 0.50670641
 0.0824082  0.08333333] => predicted output : [1], expected output : 0.0
[0.41176471 0.57286432 0.52459016 0.         0.         0.40834575
 0.27924851 0.21666667] => predicted output : [0], expected output : 1.0
[0.05882353 0.6281407  0.40983607 0.4040404  0.19739953 0.49627422
 0.37745517 0.11666667] => predicted output : [1], expected output : 1.0
[0.11764706 0.60301508 0.62295082 0.37373737 0.12411348 0.59165425
 0.05849701 0.13333333] => predicted output : [0], expected output : 0.0
[0.11764706 0.64321608 0.52459016 0.42424242 0.         0.59612519
 0.43680615 0.05      ] => predicted output : [1], expected output : 0.0
[0.23529412 0.68844221 0.68852459 0.         0.         0.46497765
 0.07429547 0.15      ] => predicted output : [1], expected output : 0.0
[0.17647059 0.517587