### Deep Neural Network Fundamental

#### Import Basic Libraries

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf               # backend
from tensorflow import keras          # frontend
from keras import Sequential          # architecture
from keras.layers import Dense        # fully connected
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')

#### Import Dataset

In [2]:
df = pd.read_csv('C:\\Users\\Lenovo\\Downloads\\Social_Network_Ads.csv')

In [3]:
df.head(2)

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19.0,19000.0,0
1,15810944,Male,35.0,20000.0,0


#### Shape

In [4]:
df.shape

(400, 5)

#### We just considering only Gender , Age , EstimatedSalary , Purchased features.

In [5]:
df = df.iloc[: , 1:]

In [6]:
df.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,Male,19.0,19000.0,0
1,Male,35.0,20000.0,0
2,Female,26.0,43000.0,0
3,Female,27.0,57000.0,0
4,Male,19.0,76000.0,0


In [7]:
df['Gender'] = df['Gender'].astype('category')

In [8]:
df['Gender'] = df['Gender'].cat.codes

In [9]:
df

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,1,19.0,19000.0,0
1,1,35.0,20000.0,0
2,0,26.0,43000.0,0
3,0,27.0,57000.0,0
4,1,19.0,76000.0,0
...,...,...,...,...
395,0,46.0,41000.0,1
396,1,51.0,23000.0,1
397,0,50.0,20000.0,1
398,1,36.0,33000.0,0


#### Check Missing Data

In [10]:
df.isnull().sum()

Gender             0
Age                0
EstimatedSalary    0
Purchased          0
dtype: int64

##### Insights : There is no any missing value

#### Data split into independent and dependent variable

In [11]:
x = df.iloc[: , :-1]

In [12]:
y = df.iloc[: , -1]

In [13]:
x.head(2)

Unnamed: 0,Gender,Age,EstimatedSalary
0,1,19.0,19000.0
1,1,35.0,20000.0


In [14]:
y.head(2)

0    0
1    0
Name: Purchased, dtype: int64

#### Split the data into train and test

In [15]:
x_train , x_test , y_train , y_test = train_test_split(x , y , test_size = 0.2 , random_state = 1 , stratify = y)

In [16]:
print(x_train.shape , x_test.shape , y_train.shape , y_test.shape)

(320, 3) (80, 3) (320,) (80,)


## MultiLayer Perceptron (DNN / ANN) without Feature Scaling

#### If your ask is more than one means output is more than one  then use functional and sequential is for ask is for one  

#### Use Sequential architecture

In [17]:
dnn = Sequential()

#### Hidden Layer 1

#### Activation function is use in two cases only 
#### 1.Hidden Layer
#### 2.Output Layer

#### When in hidden layer we always use relu as activation function

In [18]:
dnn.add(Dense(128 , activation = 'relu'  , input_dim = 3 ))

#### weight = 128 * 3 
#### Bias = 128

#### (128 * 3) + 128 = 512

#### Hidden Layer 2

In [19]:
dnn.add(Dense(128 , activation = 'relu'))

#### Weight = 128 * 128
#### Bias = 128

#### (128 * 128) + 128 = 16 , 512

#### Output Layer

#### Output Layer always use activation function is sigmoid because output is 1 only.

In [20]:
dnn.add(Dense(1 , activation = 'sigmoid'))

#### Weight = 128 *1 
#### Bias = 1

#### (128 * 1)  + 1 = 129

#### Total Parameters = 512 + 16 , 512 + 129  = 17153

In [21]:
dnn.summary()

#### Compile the DNN model

#### By default use adam as optimizer

#### optimization means performance get increase very fast

In [22]:
dnn.compile(optimizer = 'adam' , loss = 'binary_crossentropy' , metrics = ['accuracy'])

#### Binary Crossentropy is the synonym of log loss func. (it is loss func in classification )

#### Fit the DNN model without scaled data

#### Epochs means iteration
#### 1 epoch = 1 backprop + 1 forwordprop

In [23]:
history_without_feature_scaling =  dnn.fit(x_train , y_train , validation_data = (x_test , y_test) , epochs = 100)

Epoch 1/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 33ms/step - accuracy: 0.5079 - loss: 1593.7091 - val_accuracy: 0.6375 - val_loss: 590.2262
Epoch 2/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.4429 - loss: 358.5275 - val_accuracy: 0.6375 - val_loss: 181.6546
Epoch 3/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5773 - loss: 232.2460 - val_accuracy: 0.6375 - val_loss: 111.0371
Epoch 4/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5289 - loss: 200.0235 - val_accuracy: 0.3625 - val_loss: 298.2349
Epoch 5/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.4716 - loss: 155.6642 - val_accuracy: 0.3625 - val_loss: 275.3873
Epoch 6/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.4682 - loss: 269.9294 - val_accuracy: 0.6375 - val_loss: 205.8094
Epoch 7/

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5324 - loss: 89.5169 - val_accuracy: 0.6375 - val_loss: 118.1519
Epoch 51/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5050 - loss: 187.6151 - val_accuracy: 0.6375 - val_loss: 317.0424
Epoch 52/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5402 - loss: 217.3654 - val_accuracy: 0.6375 - val_loss: 143.9871
Epoch 53/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.4309 - loss: 250.3972 - val_accuracy: 0.3625 - val_loss: 265.8570
Epoch 54/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.4784 - loss: 147.3542 - val_accuracy: 0.6375 - val_loss: 8.5341
Epoch 55/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.4617 - loss: 223.2171 - val_accuracy: 0.6375 - val_loss: 39.6978
Epoch 56/100
[1m10/1

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.4800 - loss: 49.3639 - val_accuracy: 0.3625 - val_loss: 59.2384
Epoch 100/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5514 - loss: 75.8620 - val_accuracy: 0.6375 - val_loss: 245.4225


In [24]:
history_without_feature_scaling

<keras.src.callbacks.history.History at 0x2d8a3f8aad0>

#### Insights : We get very poor training and testing accuracy because of without feature scaling

## Case 2 : Mutltilayer Perceptron (DNN / ANN) with Feature Scaling

In [25]:
scaler = StandardScaler()

In [26]:
x_train_scaled = scaler.fit_transform(x_train)

In [27]:
x_test_scaled = scaler.transform(x_test)

#### Build DNN Model

In [28]:
dnn = Sequential()

#### Hidden Layer 1

In [29]:
dnn.add(Dense(128 , activation = 'relu' , input_dim = 3))

#### Hidden Layer 2

In [30]:
dnn.add(Dense(128 , activation = 'relu'))

#### Output Layer

In [31]:
dnn.add(Dense(1 , activation = 'sigmoid'))

#### Summary

In [32]:
dnn.summary()

#### Compile the DNN model

In [33]:
dnn.compile(optimizer = 'adam' , loss = 'binary_crossentropy' , metrics = ['accuracy'])

#### Fit the DNN model with scaled data

In [34]:
history_with_feature_scaling = dnn.fit(x_train_scaled , y_train , validation_data = (x_test_scaled , y_test) , epochs = 100)

Epoch 1/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - accuracy: 0.7184 - loss: 0.6642 - val_accuracy: 0.7875 - val_loss: 0.6595
Epoch 2/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8318 - loss: 0.6478 - val_accuracy: 0.7750 - val_loss: 0.6481
Epoch 3/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8367 - loss: 0.6328 - val_accuracy: 0.7375 - val_loss: 0.6375
Epoch 4/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8194 - loss: 0.6233 - val_accuracy: 0.7500 - val_loss: 0.6274
Epoch 5/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7947 - loss: 0.6118 - val_accuracy: 0.7500 - val_loss: 0.6179
Epoch 6/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8126 - loss: 0.5975 - val_accuracy: 0.7500 - val_loss: 0.6088
Epoch 7/100
[1m10/10[0m [32m━━

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8833 - loss: 0.3113 - val_accuracy: 0.8250 - val_loss: 0.4150
Epoch 52/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.9059 - loss: 0.2979 - val_accuracy: 0.8250 - val_loss: 0.4139
Epoch 53/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8862 - loss: 0.3047 - val_accuracy: 0.8375 - val_loss: 0.4129
Epoch 54/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8849 - loss: 0.3137 - val_accuracy: 0.8375 - val_loss: 0.4119
Epoch 55/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.8840 - loss: 0.3271 - val_accuracy: 0.8375 - val_loss: 0.4110
Epoch 56/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.8794 - loss: 0.3109 - val_accuracy: 0.8375 - val_loss: 0.4102
Epoch 57/100
[1m10/10[0m [32m━━━━━━━━

In [35]:
history_with_feature_scaling

<keras.src.callbacks.history.History at 0x2d8a67e4350>

#### Insights : 

#### 1. performance without feature scaling is training accuracy  : 0.4397  and testing accuracy is val_accuracy: 0.6375 
#### 2. performance with feature scaling is training accuracy  : 0.8987  and testing accuracy is val_accuracy: 0.8625 

#### You must need to do feature scaling in DNN for better performance