<a href="https://colab.research.google.com/github/EnayathShaik/AI_ML/blob/main/Diabetes_Prediction_Using_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Diabetes Prediction

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from keras import Sequential
from keras.layers import Dense

In [4]:
df_diabetes = pd.read_csv('/content/diabetes.csv')
df_diabetes.shape

(768, 9)

In [5]:
df_diabetes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               768 non-null    int64  
 1   Glucose                   768 non-null    int64  
 2   BloodPressure             768 non-null    int64  
 3   SkinThickness             768 non-null    int64  
 4   Insulin                   768 non-null    int64  
 5   BMI                       768 non-null    float64
 6   DiabetesPedigreeFunction  768 non-null    float64
 7   Age                       768 non-null    int64  
 8   Outcome                   768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


In [6]:
assert df_diabetes.isnull().sum().sum() == 0, "Dataset contains null values"

In [7]:
df_diabetes.head(10)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
5,5,116,74,0,0,25.6,0.201,30,0
6,3,78,50,32,88,31.0,0.248,26,1
7,10,115,0,0,0,35.3,0.134,29,0
8,2,197,70,45,543,30.5,0.158,53,1
9,8,125,96,0,0,0.0,0.232,54,1


In [8]:
df_diabetes.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [9]:
df_diabetes['Glucose'] = np.where(df_diabetes['Glucose']==0, df_diabetes['Glucose'].median(), df_diabetes['Glucose'])

In [10]:
df_diabetes['Glucose'].describe()

Unnamed: 0,Glucose
count,768.0
mean,121.65625
std,30.438286
min,44.0
25%,99.75
50%,117.0
75%,140.25
max,199.0


In [11]:
df_diabetes['BloodPressure'] = np.where(df_diabetes['BloodPressure']==0, df_diabetes['BloodPressure'].median(), df_diabetes['BloodPressure'])
df_diabetes['SkinThickness'] = np.where(df_diabetes['SkinThickness']==0, df_diabetes['SkinThickness'].median(), df_diabetes['SkinThickness'])
df_diabetes['Insulin'] = np.where(df_diabetes['Insulin']==0, df_diabetes['Insulin'].median(), df_diabetes['Insulin'])
df_diabetes['BMI'] = np.where(df_diabetes['BMI']==0, df_diabetes['BMI'].median(), df_diabetes['BMI'])


In [12]:
df_diabetes.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,121.65625,72.386719,27.334635,94.652344,32.450911,0.471876,33.240885,0.348958
std,3.369578,30.438286,12.096642,9.229014,105.547598,6.875366,0.331329,11.760232,0.476951
min,0.0,44.0,24.0,7.0,14.0,18.2,0.078,21.0,0.0
25%,1.0,99.75,64.0,23.0,30.5,27.5,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,31.25,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


## Check duplicate rows

In [13]:
df_diabetes.duplicated().sum()

np.int64(0)

In [14]:
x = df_diabetes.iloc[:, :-1]
y = df_diabetes.iloc[:, -1]


In [15]:
x.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148.0,72.0,35.0,30.5,33.6,0.627,50
1,1,85.0,66.0,29.0,30.5,26.6,0.351,31
2,8,183.0,64.0,23.0,30.5,23.3,0.672,32
3,1,89.0,66.0,23.0,94.0,28.1,0.167,21
4,0,137.0,40.0,35.0,168.0,43.1,2.288,33


In [16]:
y.head()

Unnamed: 0,Outcome
0,1
1,0
2,1
3,0
4,1


In [17]:
print(x.shape, y.shape)

(768, 8) (768,)


## Class Imbalance Verification

In [18]:
y.value_counts()/len(y)*100

Unnamed: 0_level_0,count
Outcome,Unnamed: 1_level_1
0,65.104167
1,34.895833


In [19]:
x = df_diabetes.iloc[:, :-1]
y = df_diabetes.iloc[:, -1]

print(x.shape, y.shape)

(768, 8) (768,)


In [20]:
x.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148.0,72.0,35.0,30.5,33.6,0.627,50
1,1,85.0,66.0,29.0,30.5,26.6,0.351,31
2,8,183.0,64.0,23.0,30.5,23.3,0.672,32
3,1,89.0,66.0,23.0,94.0,28.1,0.167,21
4,0,137.0,40.0,35.0,168.0,43.1,2.288,33


In [21]:
y.head()

Unnamed: 0,Outcome
0,1
1,0
2,1
3,0
4,1


In [22]:
from sklearn.model_selection import train_test_split

In [23]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=.2, stratify=y)

In [24]:
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

(614, 8) (614,)
(154, 8) (154,)


In [25]:
from sklearn.preprocessing import StandardScaler

In [26]:
sc = StandardScaler()

In [27]:
x_train_sc = sc.fit_transform(x_train)
x_test_sc = sc.transform(x_test)

In [28]:
x_train_sc

array([[ 0.03767731, -0.89969208, -0.60828101, ..., -1.14296235,
        -0.9838162 , -1.04408585],
       [ 0.92744143, -0.24076569, -0.69124071, ..., -0.74866353,
         0.8149258 ,  0.08746788],
       [-0.85208682,  0.0886975 ,  0.13835623, ..., -0.69024889,
        -1.13165801, -0.26070249],
       ...,
       [-0.55549878, -0.73496048, -1.68675703, ..., -1.15756601,
         0.52232222, -1.04408585],
       [ 0.63085339, -0.33960465, -0.69124071, ...,  0.24438535,
        -0.63885198, -0.78295806],
       [ 0.63085339,  0.41816069,  0.63611439, ...,  1.99682456,
        -0.70661281,  1.13197902]])

In [29]:
x_test_sc

array([[-0.85208682, -0.0760341 , -1.52083764, ..., -1.49345019,
        -0.80825406, -0.78295806],
       [-0.55549878,  1.73601347,  1.29979195, ...,  1.74856234,
         0.55004256, -0.78295806],
       [ 1.81720556, -0.66906784,  1.13387256, ...,  1.9092026 ,
         2.05926102,  0.43563826],
       ...,
       [-0.85208682, -0.9326384 , -1.35491826, ..., -1.46424287,
        -0.15528607, -0.95704325],
       [ 0.92744143, -0.63612152,  0.13835623, ...,  0.68249515,
        -0.81133409,  1.04493643],
       [ 0.03767731, -0.10898041, -0.19348254, ...,  1.74856234,
         1.34469228, -0.60887287]])

In [30]:
from keras.layers import Dropout, BatchNormalization

In [31]:
import time

In [32]:
model = Sequential()
model.add(Dense(32, activation='relu', input_dim = 8))
model.add(BatchNormalization())
model.add(Dropout(rate=.25))
model.add(Dense(16, activation = 'relu'))
model.add(Dropout(rate=.2))
model.add(Dense(1, activation='sigmoid'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [33]:
model.summary()

In [34]:
(32*8) + 32

288

In [35]:
model.compile(loss= 'binary_crossentropy', metrics=['accuracy'])

In [36]:
start = time.time()
model.fit(x_train_sc, y_train, batch_size= 32, epochs=10, validation_data= (x_test_sc, y_test))
end = time.time()
print(f"Time Taken:{end-start}")

Epoch 1/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 0.5641 - loss: 0.7710 - val_accuracy: 0.6234 - val_loss: 0.6648
Epoch 2/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6775 - loss: 0.6395 - val_accuracy: 0.7338 - val_loss: 0.6184
Epoch 3/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7032 - loss: 0.6205 - val_accuracy: 0.7338 - val_loss: 0.5854
Epoch 4/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6981 - loss: 0.6204 - val_accuracy: 0.7532 - val_loss: 0.5552
Epoch 5/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7422 - loss: 0.5255 - val_accuracy: 0.7792 - val_loss: 0.5335
Epoch 6/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7099 - loss: 0.5444 - val_accuracy: 0.7792 - val_loss: 0.5151
Epoch 7/10
[1m20/20[0m [32m━━━━━━━━━

In [37]:
model_1 = Sequential()
model_1.add(Dense(32, activation='relu', input_dim = 8))
model_1.add(BatchNormalization())
model_1.add(Dropout(rate=.25))
model_1.add(Dense(16, activation = 'relu'))
model_1.add(Dropout(rate=.2))
model_1.add(Dense(1, activation='sigmoid'))

In [38]:
model_1.compile(loss= 'binary_crossentropy', metrics=['accuracy'])

In [39]:
start = time.time()
model_1.fit(x_train_sc, y_train, batch_size= 1, epochs=10, validation_data= (x_test_sc, y_test))
end = time.time()
print(f"Time Taken:{end-start}")

Epoch 1/10
[1m614/614[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.6597 - loss: 0.6867 - val_accuracy: 0.6234 - val_loss: 3.5464
Epoch 2/10
[1m614/614[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.6370 - loss: 0.6709 - val_accuracy: 0.6104 - val_loss: 6.1666
Epoch 3/10
[1m614/614[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.6237 - loss: 0.6662 - val_accuracy: 0.5974 - val_loss: 5.8894
Epoch 4/10
[1m614/614[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.6528 - loss: 0.6516 - val_accuracy: 0.6169 - val_loss: 5.6735
Epoch 5/10
[1m614/614[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6597 - loss: 0.6454 - val_accuracy: 0.6104 - val_loss: 5.8661
Epoch 6/10
[1m614/614[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.6537 - loss: 0.6467 - val_accuracy: 0.5909 - val_loss: 6.2243
Epoch 7/10
[1m614/614[0m 

In [40]:
df_diabetes.head(10)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148.0,72.0,35.0,30.5,33.6,0.627,50,1
1,1,85.0,66.0,29.0,30.5,26.6,0.351,31,0
2,8,183.0,64.0,23.0,30.5,23.3,0.672,32,1
3,1,89.0,66.0,23.0,94.0,28.1,0.167,21,0
4,0,137.0,40.0,35.0,168.0,43.1,2.288,33,1
5,5,116.0,74.0,23.0,30.5,25.6,0.201,30,0
6,3,78.0,50.0,32.0,88.0,31.0,0.248,26,1
7,10,115.0,72.0,23.0,30.5,35.3,0.134,29,0
8,2,197.0,70.0,45.0,543.0,30.5,0.158,53,1
9,8,125.0,96.0,23.0,30.5,32.0,0.232,54,1


In [42]:
pred_data = pd.DataFrame({"Pregnancies": [1],	"Glucose": [100],	"BloodPressure": [80],"SkinThickness": [29] ,
                          "Insulin":[120],"BMI":[36.2],"DiabetesPedigreeFunction": [0.56],"Age":[80]})


In [43]:
pred_data.shape

(1, 8)

In [44]:
#pred_data_sc = sc.transform(pred_data)

In [45]:
#res = model.predict(pred_data_sc)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 107ms/step


In [46]:
#res

array([[0.34851396]], dtype=float32)

In [61]:
def pred_result(x_input):
  x_input_sc = sc.transform(x_input)
  res = model.predict(x_input_sc)
  res = [1 if res > 0.5 else 0]
  return res


In [62]:
result = pred_result(pred_data)
print(result)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[0]


In [63]:
pred_data_2 = pd.DataFrame({"Pregnancies": [10],	"Glucose": [120],	"BloodPressure": [20],"SkinThickness": [22] ,
                          "Insulin":[30.6],"BMI":[32],"DiabetesPedigreeFunction": [0.22],"Age":[55]})
#pred_data_sc_2 = sc.transform(pred_data_2)
#res = model.predict(pred_data_sc_2)
#print(res)
result = pred_result(pred_data_2)
print(result)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step
[1]


In [66]:
pred_data_3 = pd.DataFrame({"Pregnancies": [10],	"Glucose": [120],	"BloodPressure": [20],"SkinThickness": [22] ,
                          "Insulin":[30.6],"BMI":[29],"DiabetesPedigreeFunction": [0.22],"Age":[60]})
result = pred_result(pred_data_3)
print(result)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
[1]
