# Observing Before and After Applying Batch Normalization

In [1]:
import os
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
plt.style.use("fivethirtyeight")
%load_ext tensorboard

In [2]:
# Loading the data of fashion_mnist

(X_train_full, y_train_full), (X_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [3]:
X_train_full = X_train_full/255
X_test = X_test/255
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]

y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

In [4]:
# Creating Layer of Model

tf.random.set_seed(42) # for getting similar output (optional)
np.random.seed(42) # for getting similar output (optional)

LAYERS = [ tf.keras.layers.Flatten(input_shape = [28, 28]),
    tf.keras.layers.Dense(300,kernel_initializer = "he_normal"),
    tf.keras.layers.LeakyReLU(),
           tf.keras.layers.Dense(100,kernel_initializer = "he_normal"),
           tf.keras.layers.LeakyReLU(),
           tf.keras.layers.Dense(10,activation = "softmax")]


model = tf.keras.models.Sequential(LAYERS)

In [5]:
# compiling the model

model.compile(loss = "sparse_categorical_crossentropy",
              optimizer = tf.keras.optimizers.SGD(lr=1e-3),
              metrics = ["accuracy"])



In [6]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 300)               235500    
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 300)               0         
                                                                 
 dense_1 (Dense)             (None, 100)               30100     
                                                                 
 leaky_re_lu_1 (LeakyReLU)   (None, 100)               0         
                                                                 
 dense_2 (Dense)             (None, 10)                1010      
                                                                 
Total params: 266610 (1.02 MB)
Trainable params: 266610 

In [7]:
# Now Training and calculating the training time

# starting time
start = time.time()


history = model.fit(X_train, y_train, epochs = 10, validation_data = (X_valid, y_valid), verbose = 2)

# ending time

end = time.time()

# total time taken
print(f"Runtime of the progress: {end - start}")

Epoch 1/10
1719/1719 - 7s - loss: 0.6735 - accuracy: 0.7766 - val_loss: 0.5062 - val_accuracy: 0.8248 - 7s/epoch - 4ms/step
Epoch 2/10
1719/1719 - 8s - loss: 0.4818 - accuracy: 0.8308 - val_loss: 0.4369 - val_accuracy: 0.8538 - 8s/epoch - 5ms/step
Epoch 3/10
1719/1719 - 5s - loss: 0.4425 - accuracy: 0.8449 - val_loss: 0.4975 - val_accuracy: 0.8130 - 5s/epoch - 3ms/step
Epoch 4/10
1719/1719 - 6s - loss: 0.4198 - accuracy: 0.8540 - val_loss: 0.4030 - val_accuracy: 0.8656 - 6s/epoch - 4ms/step
Epoch 5/10
1719/1719 - 5s - loss: 0.4040 - accuracy: 0.8587 - val_loss: 0.3878 - val_accuracy: 0.8656 - 5s/epoch - 3ms/step
Epoch 6/10
1719/1719 - 6s - loss: 0.3876 - accuracy: 0.8634 - val_loss: 0.3860 - val_accuracy: 0.8708 - 6s/epoch - 3ms/step
Epoch 7/10
1719/1719 - 6s - loss: 0.3772 - accuracy: 0.8672 - val_loss: 0.3760 - val_accuracy: 0.8708 - 6s/epoch - 3ms/step
Epoch 8/10
1719/1719 - 7s - loss: 0.3671 - accuracy: 0.8699 - val_loss: 0.4003 - val_accuracy: 0.8558 - 7s/epoch - 4ms/step
Epoch 9/

- Runtime of the program is 83.72 Seconds
- Accuracy = 0.8688

# After applying batch normalization

In [8]:
# delete the previous model
del model


# Defining new model with batch normalization

LAYERS_BN = [
    tf.keras.layers.Flatten(input_shape = [28,28]),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(300, activation = "relu"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(100, activation = "relu"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(10, activation = "softmax")
]


model = tf.keras.models.Sequential(LAYERS_BN)

In [9]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 784)               0         
                                                                 
 batch_normalization (Batch  (None, 784)               3136      
 Normalization)                                                  
                                                                 
 dense_3 (Dense)             (None, 300)               235500    
                                                                 
 batch_normalization_1 (Bat  (None, 300)               1200      
 chNormalization)                                                
                                                                 
 dense_4 (Dense)             (None, 100)               30100     
                                                                 
 batch_normalization_2 (Bat  (None, 100)              

In [11]:
bn1 = model.layers[1]

In [12]:
for variable in bn1.variables :
  print(variable.name, variable.trainable)

batch_normalization/gamma:0 True
batch_normalization/beta:0 True
batch_normalization/moving_mean:0 False
batch_normalization/moving_variance:0 False


In [15]:
model.compile(loss = "sparse_categorical_crossentropy",
              optimizer = tf.keras.optimizers.SGD(lr = 1e-3),
              metrics = ["accuracy"])



In [18]:
# Now Training and calculating the training time

# starting time
start = time.time()

history = model.fit(X_train, y_train, epochs = 10, validation_data = (X_valid, y_valid), verbose = 2)

# end time
end = time.time()

# total time taken
print(f"total time takne: {end - start}")

Epoch 1/10
1719/1719 - 10s - loss: 0.2146 - accuracy: 0.9214 - val_loss: 0.3060 - val_accuracy: 0.8928 - 10s/epoch - 6ms/step
Epoch 2/10
1719/1719 - 10s - loss: 0.2097 - accuracy: 0.9235 - val_loss: 0.3125 - val_accuracy: 0.8930 - 10s/epoch - 6ms/step
Epoch 3/10
1719/1719 - 9s - loss: 0.2049 - accuracy: 0.9256 - val_loss: 0.3113 - val_accuracy: 0.8928 - 9s/epoch - 5ms/step
Epoch 4/10
1719/1719 - 10s - loss: 0.1929 - accuracy: 0.9296 - val_loss: 0.3151 - val_accuracy: 0.8916 - 10s/epoch - 6ms/step
Epoch 5/10
1719/1719 - 10s - loss: 0.1876 - accuracy: 0.9317 - val_loss: 0.3079 - val_accuracy: 0.8986 - 10s/epoch - 6ms/step
Epoch 6/10
1719/1719 - 10s - loss: 0.1785 - accuracy: 0.9351 - val_loss: 0.3203 - val_accuracy: 0.8904 - 10s/epoch - 6ms/step
Epoch 7/10
1719/1719 - 9s - loss: 0.1731 - accuracy: 0.9377 - val_loss: 0.3126 - val_accuracy: 0.8922 - 9s/epoch - 5ms/step
Epoch 8/10
1719/1719 - 10s - loss: 0.1669 - accuracy: 0.9394 - val_loss: 0.3268 - val_accuracy: 0.8906 - 10s/epoch - 6ms/s

- Time taken 142.71 seconds
- Accuracy = 0.8948