### Batch Normalization

In [54]:
import os
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns 
import time 
plt.style.use("fivethirtyeight")
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [55]:
(X_train_full, y_train_full), (X_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
X_train_full = X_train_full / 255.0
X_test = X_test / 255.0
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

In [56]:
tf.random.set_seed(42) # for reproducibility
np.random.seed(42) # for reproducibility

LAYERS = [
          tf.keras.layers.Flatten(input_shape=[28, 28]),
          tf.keras.layers.Dense(300, kernel_initializer="he_normal", name="hiddenLayer1"),
          tf.keras.layers.LeakyReLU(),
          tf.keras.layers.Dense(100, kernel_initializer="he_normal" ,name="hiddenLayer2"),
          tf.keras.layers.LeakyReLU(),
          tf.keras.layers.Dense(10, activation="softmax", name="outputLayer")
        ]

model = tf.keras.models.Sequential(LAYERS)


In [57]:
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=tf.keras.optimizers.SGD(learning_rate=1e-3),
              metrics=["accuracy"])

In [58]:
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_8 (Flatten)         (None, 784)               0         
                                                                 
 hiddenLayer1 (Dense)        (None, 300)               235500    
                                                                 
 leaky_re_lu_6 (LeakyReLU)   (None, 300)               0         
                                                                 
 hiddenLayer2 (Dense)        (None, 100)               30100     
                                                                 
 leaky_re_lu_7 (LeakyReLU)   (None, 100)               0         
                                                                 
 outputLayer (Dense)         (None, 10)                1010      
                                                                 
Total params: 266610 (1.02 MB)
Trainable params: 26661

In [59]:
model.fit(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x21d97e74e10>

In [60]:
y_pred =  model.predict(X_test[:3])



In [61]:
model.evaluate(X_test, y_test)



[0.5138227939605713, 0.8227999806404114]

### Conclusion
- Runtime = 1m 4s
- Accuracy = 0.8321

## After applying  Batch Normalization

In [62]:
del model 

In [63]:
LAYERS = [
    tf.keras.layers.Flatten(input_shape=[28, 28]),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(300, activation="relu"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(100, activation="relu"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(10, activation="softmax")
]

model = tf.keras.models.Sequential(LAYERS)

In [64]:
## Normalization elements are added to the model 2 * 2 because one for gamma and one for beta, mue and sigma
784*2*2, 300*2*2, 100*2*2, (784*2*2 + 300*2*2 + 100*2*2)/2

(3136, 1200, 400, 2368.0)

In [65]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_9 (Flatten)         (None, 784)               0         
                                                                 
 batch_normalization_6 (Bat  (None, 784)               3136      
 chNormalization)                                                
                                                                 
 dense_6 (Dense)             (None, 300)               235500    
                                                                 
 batch_normalization_7 (Bat  (None, 300)               1200      
 chNormalization)                                                
                                                                 
 dense_7 (Dense)             (None, 100)               30100     
                                                                 
 batch_normalization_8 (Bat  (None, 100)              

In [66]:
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=tf.keras.optimizers.SGD(learning_rate=1e-3),
              metrics=["accuracy"])

In [67]:
model.fit(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x21d803b02d0>

In [68]:
model.evaluate(X_test, y_test)



[0.4071011543273926, 0.854200005531311]

### Conclusion
- Runtime = 1m 9 sec
- Accuracy = 0.8622