In [1]:
import os
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
plt.style.use("fivethirtyeight")

### Downloading Fashion MNIST data

In [2]:
(X_train_full, y_train_full), (X_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
X_train_full = X_train_full / 255.0
X_test = X_test / 255.0
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [3]:
print(X_train.shape, X_valid.shape)

(55000, 28, 28) (5000, 28, 28)


**Creating Simple Architecture**

In [4]:
tf.random.set_seed(42)
np.random.seed(42)

LAYERS = [ tf.keras.layers.Flatten(input_shape=[28, 28]),
    tf.keras.layers.Dense(300, kernel_initializer="he_normal"),
    tf.keras.layers.LeakyReLU(),
    tf.keras.layers.Dense(100, kernel_initializer="he_normal"),
    tf.keras.layers.LeakyReLU(),
    tf.keras.layers.Dense(10, activation="softmax")]


model = tf.keras.models.Sequential(LAYERS)

In [5]:
model.compile(loss="sparse_categorical_crossentropy",
              optimizer=tf.keras.optimizers.SGD(lr=1e-3),
              metrics=["accuracy"])

  super(SGD, self).__init__(name, **kwargs)


In [6]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 300)               235500    
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 300)               0         
                                                                 
 dense_1 (Dense)             (None, 100)               30100     
                                                                 
 leaky_re_lu_1 (LeakyReLU)   (None, 100)               0         
                                                                 
 dense_2 (Dense)             (None, 10)                1010      
                                                                 
Total params: 266,610
Trainable params: 266,610
Non-trai

In [7]:
history = model.fit(X_train, y_train, epochs=10,
                    validation_data=(X_valid, y_valid), verbose=2)

Epoch 1/10
1719/1719 - 6s - loss: 1.2819 - accuracy: 0.6229 - val_loss: 0.8886 - val_accuracy: 0.7160 - 6s/epoch - 3ms/step
Epoch 2/10
1719/1719 - 5s - loss: 0.7955 - accuracy: 0.7361 - val_loss: 0.7130 - val_accuracy: 0.7656 - 5s/epoch - 3ms/step
Epoch 3/10
1719/1719 - 5s - loss: 0.6816 - accuracy: 0.7721 - val_loss: 0.6427 - val_accuracy: 0.7900 - 5s/epoch - 3ms/step
Epoch 4/10
1719/1719 - 5s - loss: 0.6217 - accuracy: 0.7944 - val_loss: 0.5900 - val_accuracy: 0.8064 - 5s/epoch - 3ms/step
Epoch 5/10
1719/1719 - 5s - loss: 0.5832 - accuracy: 0.8074 - val_loss: 0.5582 - val_accuracy: 0.8200 - 5s/epoch - 3ms/step
Epoch 6/10
1719/1719 - 5s - loss: 0.5553 - accuracy: 0.8156 - val_loss: 0.5350 - val_accuracy: 0.8238 - 5s/epoch - 3ms/step
Epoch 7/10
1719/1719 - 5s - loss: 0.5339 - accuracy: 0.8223 - val_loss: 0.5156 - val_accuracy: 0.8302 - 5s/epoch - 3ms/step
Epoch 8/10
1719/1719 - 5s - loss: 0.5173 - accuracy: 0.8272 - val_loss: 0.5079 - val_accuracy: 0.8284 - 5s/epoch - 3ms/step
Epoch 9/

## Batch Normalisation Approach One

In [8]:
LAYERS_BN_ONE = [ 
          tf.keras.layers.Flatten(input_shape=[28, 28]),
          tf.keras.layers.BatchNormalization(),
          tf.keras.layers.Dense(300, activation="relu", kernel_initializer="he_normal"),
          tf.keras.layers.BatchNormalization(),
          tf.keras.layers.Dense(100,activation="relu", kernel_initializer="he_normal"),
          tf.keras.layers.BatchNormalization(),
          tf.keras.layers.Dense(10, activation="softmax")
          ]

In [9]:
model_one = tf.keras.models.Sequential(LAYERS_BN_ONE)

In [10]:
model_one.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 784)               0         
                                                                 
 batch_normalization (BatchN  (None, 784)              3136      
 ormalization)                                                   
                                                                 
 dense_3 (Dense)             (None, 300)               235500    
                                                                 
 batch_normalization_1 (Batc  (None, 300)              1200      
 hNormalization)                                                 
                                                                 
 dense_4 (Dense)             (None, 100)               30100     
                                                                 
 batch_normalization_2 (Batc  (None, 100)             

**Analyzing Extra Parameters imputed while considering Batch Normalisation**

In [11]:
784 * 4 ## gamma, beta, mu, sigma (Four Params are included in BN) Layer = batch_normalization

3136

In [12]:
300 * 4 ## Layer = batch_normalization_1

1200

In [13]:
100 * 4 ## Layer = batch_normalization_2

400

In [14]:
BN_para = 784 * 4 + 300 * 4 + 100 * 4 ## Total Parameters
BN_para

4736

In [15]:
trainable_BN_para = BN_para / 2
trainable_BN_para

2368.0

In [16]:
BN_para - trainable_BN_para ## non trainable

2368.0

In [17]:
model_one.layers

[<keras.layers.core.flatten.Flatten at 0x7f16f555d990>,
 <keras.layers.normalization.batch_normalization.BatchNormalization at 0x7f16f55bf750>,
 <keras.layers.core.dense.Dense at 0x7f16f8412510>,
 <keras.layers.normalization.batch_normalization.BatchNormalization at 0x7f16f555d8d0>,
 <keras.layers.core.dense.Dense at 0x7f16f65f91d0>,
 <keras.layers.normalization.batch_normalization.BatchNormalization at 0x7f16f9dcaa90>,
 <keras.layers.core.dense.Dense at 0x7f16f9e05290>]

In [18]:
bn1 = model_one.layers[1]
for variable in bn1.variables:
  print(variable.name, variable.trainable)

batch_normalization/gamma:0 True
batch_normalization/beta:0 True
batch_normalization/moving_mean:0 False
batch_normalization/moving_variance:0 False


In [19]:
model_one.compile(loss="sparse_categorical_crossentropy",
              optimizer=tf.keras.optimizers.SGD(lr=1e-3),
              metrics=["accuracy"])

  super(SGD, self).__init__(name, **kwargs)


In [20]:
history = model_one.fit(X_train, y_train, epochs=10,
                    validation_data=(X_valid, y_valid), verbose=2)

Epoch 1/10
1719/1719 - 9s - loss: 0.9123 - accuracy: 0.6961 - val_loss: 0.5906 - val_accuracy: 0.8032 - 9s/epoch - 5ms/step
Epoch 2/10
1719/1719 - 8s - loss: 0.6127 - accuracy: 0.7893 - val_loss: 0.5002 - val_accuracy: 0.8332 - 8s/epoch - 5ms/step
Epoch 3/10
1719/1719 - 8s - loss: 0.5483 - accuracy: 0.8105 - val_loss: 0.4617 - val_accuracy: 0.8424 - 8s/epoch - 4ms/step
Epoch 4/10
1719/1719 - 8s - loss: 0.5075 - accuracy: 0.8237 - val_loss: 0.4382 - val_accuracy: 0.8504 - 8s/epoch - 5ms/step
Epoch 5/10
1719/1719 - 8s - loss: 0.4800 - accuracy: 0.8329 - val_loss: 0.4223 - val_accuracy: 0.8552 - 8s/epoch - 5ms/step
Epoch 6/10
1719/1719 - 8s - loss: 0.4640 - accuracy: 0.8376 - val_loss: 0.4102 - val_accuracy: 0.8614 - 8s/epoch - 5ms/step
Epoch 7/10
1719/1719 - 8s - loss: 0.4495 - accuracy: 0.8427 - val_loss: 0.3989 - val_accuracy: 0.8616 - 8s/epoch - 4ms/step
Epoch 8/10
1719/1719 - 8s - loss: 0.4373 - accuracy: 0.8455 - val_loss: 0.3925 - val_accuracy: 0.8650 - 8s/epoch - 4ms/step
Epoch 9/

## BN Approach Two

In [None]:
LAYERS_BN_TWO = [ 
          tf.keras.layers.Flatten(input_shape=[28, 28]),
          tf.keras.layers.BatchNormalization(),
          tf.keras.layers.Dense(300, kernel_initializer="he_normal"),
          tf.keras.layers.BatchNormalization(),
          tf.keras.layers.Activation("relu"),
          tf.keras.layers.Dense(100,kernel_initializer="he_normal"),
          tf.keras.layers.BatchNormalization(),
          tf.keras.layers.Activation("relu"),          
          tf.keras.layers.Dense(10, activation="softmax")
          ]