## Multi Layer Perceptron / DNN Model On Diabeties Data

#### Import Basic packages

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras.layers import Dense , BatchNormalization , Dropout
from keras import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')

#### Import Dataset

In [2]:
df = pd.read_csv('C:\\Users\\Lenovo\\Downloads\\diabetes.csv')

In [3]:
df.head(2)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0


#### Shape 

In [4]:
df.shape

(768, 9)

### Exploratory Data Analysis

#### Info

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               768 non-null    int64  
 1   Glucose                   768 non-null    int64  
 2   BloodPressure             768 non-null    int64  
 3   SkinThickness             768 non-null    int64  
 4   Insulin                   768 non-null    int64  
 5   BMI                       768 non-null    float64
 6   DiabetesPedigreeFunction  768 non-null    float64
 7   Age                       768 non-null    int64  
 8   Outcome                   768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


#### Handling missing values

In [6]:
df.isnull().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

##### Describe

In [7]:
df.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


##### Insights : In describe of features , We noticed there features like Glucose  , BloodPressure  , SkinThickness  , Insulin  ,BMI contains min value is 0.0000 . actually this thing is not possible. if any human have glucose zero or BloodPressure zero that human should not survive. so we need to take care of this data .

##### so we will relace this data with median data (for practice only . In real time , we will take help from SME (subject matter expert) or will discuss with client  about data.

### Data Preprocessing

#### use where function for to replace data

In [8]:
for feature in df.iloc[: , 1:6]:
    
    
    df[feature] = np.where(df[feature] == 0 , df[feature].median() , df[feature])

#### Describe after median imputation

In [9]:
df.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,121.65625,72.386719,27.334635,94.652344,32.450911,0.471876,33.240885,0.348958
std,3.369578,30.438286,12.096642,9.229014,105.547598,6.875366,0.331329,11.760232,0.476951
min,0.0,44.0,24.0,7.0,14.0,18.2,0.078,21.0,0.0
25%,1.0,99.75,64.0,23.0,30.5,27.5,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,31.25,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


#### Split the data into independent and dependent variable

In [10]:
x = df.iloc[: , :-1]
y = df.iloc[: , -1]

In [11]:
x.head(2)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148.0,72.0,35.0,30.5,33.6,0.627,50
1,1,85.0,66.0,29.0,30.5,26.6,0.351,31


In [12]:
y.head(2)

0    1
1    0
Name: Outcome, dtype: int64

#### Split the data into train and test

In [13]:
x_train , x_test , y_train , y_test = train_test_split(x , y , test_size = 0.2 , random_state = 20 , stratify = y)

In [14]:
print(x_train.shape , x_test.shape , y_train.shape , y_test.shape)

(614, 8) (154, 8) (614,) (154,)


#### Feature Scaling

In [15]:
scaler = StandardScaler()

In [16]:
x_train_scale = scaler.fit_transform(x_train)
x_test_scale = scaler.transform(x_test)

In [17]:
print(x_train_scale.shape , x_test_scale.shape)

(614, 8) (154, 8)


### Building MLP / DNN model

In [18]:
DNN = Sequential()

#### Fully Connected 1st hidden layer

In [19]:
DNN.add(Dense(units = 32 , activation = 'relu' , input_dim = 8 ))

### Add batch normalization for 1st hidden layer
##### BatchNormalization is make output featurescale(normalize)

In [20]:
#DNN.add(BatchNormalization())

##### Non trainable parameters are , after 1st hidden layer we have 32 neurons so 32 * (1(mean) + 1(variance)) = 64 , for 1 st hidden layer
##### after2nd hidden layer we have 16 neuron so 16 * (1(mean) + (variance)) = 32 for 2nd hidden layer.

### Add Dropout for 1st hidden layer

In [21]:
#DNN.add(Dropout(0.25))

#### Fully Connected 2nd hidden layer

In [22]:
DNN.add(Dense(units = 16 , activation = 'relu'))

### Add batch normalization for 2nd hidden layer
##### BatchNormalization is make output featurescale(normalize)

In [23]:
#DNN.add(BatchNormalization())

#### Non trainable parameters are , after 1st hidden layer we have 32 neurons so 32 * (1(mean) + 1(variance)) = 64 , for 1 st hidden layer
#### after2nd hidden layer we have 16 neuron so 16 * (1(mean) + (variance)) = 32 for 2nd hidden layer.

### Add Dropout for 2nd hidden layer

In [24]:
#DNN.add(Dropout(0.50))

#### Output layer

In [25]:
DNN.add(Dense(units = 1 , activation = 'sigmoid'))

#### Total operational parameters of sequential architecture

In [26]:
DNN.summary()

#### Total Operational parameters (8 * 32 + 32) + (32 * 16 + 16) + (16 * 1 + 1) = 833 (without BatchNormalization)

#### Total Operational parameters (9 * 32 + 32) + (32 * 16 + 16) + (16 * 1 + 1) + 128 +64(Batch Normalization for 1st hidden layer and 2nd hidden layer  = 1025(with BatchNormalization)

#### Total Operational parameters (8 * 32 + 32) + (32 * 16 + 16) + (16 * 1 + 1) + 0 + 0 (Dropout 1 and Dropout 2)  = 833(with Dropout)

#### Non trainable parameters means parameters of normalization is 96 . i.e. 32 * 2 + 16 * 2 = 96 (when we add Batch normalization)

#### Non trainable parameters are , after 1st hidden layer we have 32 neurons so 32 * (1(mean) + 1(variance)) = 64 , for 1 st hidden layer 
#### after2nd hidden layer we have 16 neuron so 16 * (1(mean) + (variance)) = 32 for 2nd hidden layer.
#### 64 + 32 = 96 total non trainable parameters

#### Compile the model

#### Compile is for setting loss fun as well as performance evaluation matrix and optimizer for improving performance of model very fast

In [27]:
DNN.compile(optimizer = 'adam' , loss = 'binary_crossentropy' , metrics = ['accuracy'])

##### If problem is binary classification then use loss is binary crossentropy
##### If problem is multiclass classification then use loss is categorical crossentropy
##### If problem is contains images then use loss is sparse categorical crossentropy

### fit the model or train the data

#### Till now we create model and now our model is ready for training so model.fit is nothing but only doing training of model and then after training we check performances and make prediction like y_train_pred and y_test_pred on training and testing data as well

In [28]:
DNN.fit(x_train_scale , y_train , validation_data = (x_test_scale , y_test) , epochs = 25)

Epoch 1/25
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 247ms/step - accuracy: 0.6305 - loss: 0.6754 - val_accuracy: 0.7078 - val_loss: 0.6207
Epoch 2/25
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.7116 - loss: 0.6135 - val_accuracy: 0.7403 - val_loss: 0.5687
Epoch 3/25
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 54ms/step - accuracy: 0.7229 - loss: 0.5737 - val_accuracy: 0.7532 - val_loss: 0.5306
Epoch 4/25
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 42ms/step - accuracy: 0.7245 - loss: 0.5323 - val_accuracy: 0.7987 - val_loss: 0.4954
Epoch 5/25
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.7700 - loss: 0.4921 - val_accuracy: 0.7987 - val_loss: 0.4660
Epoch 6/25
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step - accuracy: 0.7506 - loss: 0.4782 - val_accuracy: 0.8052 - val_loss: 0.4574
Epoch 7/25
[1m20/20[0m [32m━━

<keras.src.callbacks.history.History at 0x1d1210207d0>

#### Insights : 
#### without Batchnormalization , we get accuracy is 0.8006 and testing accuracy is 0.7338
#### with Batchnormalization , we get accuracy is 0.8066 and testing accuracy is 0.7403
#### with Dropout  , we get accuracy is 0.7700 and testing accuracy is 0.7532