In [None]:
#Importing packages
import time
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Dense
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical

In [None]:
#MNIST Dataset has 70000 instances of hand written digits from 0-9, each image size is 28*28
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [None]:
#Get the data ready for training
#Convert to Float
x_train, x_test = np.array(x_train, np.float32), np.array(x_test, np.float32)

#Flatten images to 1-D vector of 784 features (28*28).
x_train, x_test = x_train.reshape([-1, 784]), x_test.reshape([-1, 784])

#One hot encoding of labels
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

#Normalize images value from [0, 255] to [0, 1].
x_train = x_train / 255.
x_test =  x_test / 255.

In [None]:
def build_model(activation, initializer):
      
    model = Sequential()
    
    model.add(Dense(512, input_dim = 784, activation = activation, kernel_initializer = initializer))
    model.add(Dense(256, activation = activation, kernel_initializer = initializer))
    model.add(Dense(128, activation = activation, kernel_initializer = initializer))
    model.add(Dense(64, activation = activation, kernel_initializer = initializer))
    model.add(Dense(10, activation = 'softmax', kernel_initializer = initializer))
    
    model.compile(loss = 'binary_crossentropy', optimizer = tf.keras.optimizers.Adam(), metrics = ['acc'])
    return model

In [None]:
random_normal_model = build_model(activation = 'relu', initializer = tf.keras.initializers.RandomNormal(mean = 0.0, stddev = 0.01))
random_normal_hist = random_normal_model.fit(x_train, y_train, epochs = 15, validation_data = (x_test, y_test), batch_size = 128)

In [None]:
random_normal_model1 = build_model(activation = 'relu', initializer = tf.keras.initializers.RandomNormal(mean = 0.0, stddev = 0.1))
random_normal_hist1 = random_normal_model1.fit(x_train, y_train, epochs = 15, validation_data = (x_test, y_test), batch_size = 128)

In [None]:
random_normal_model2 = build_model(activation = 'relu', initializer = tf.keras.initializers.RandomNormal(mean = 0.0, stddev = 1.0))
random_normal_hist2 = random_normal_model2.fit(x_train, y_train, epochs = 15, validation_data = (x_test, y_test), batch_size = 128)

In [None]:
random_uniform_model = build_model(activation = 'relu', initializer = tf.keras.initializers.RandomUniform(minval = -1, maxval = 1))
random_uniform_hist = random_uniform_model.fit(x_train, y_train, epochs = 15, validation_data = (x_test, y_test), batch_size = 128)

In [None]:
random_uniform_model1 = build_model(activation = 'relu', initializer = tf.keras.initializers.RandomUniform(minval = 0, maxval = 1))
random_uniform_hist1 = random_uniform_model1.fit(x_train, y_train, epochs = 15, validation_data = (x_test, y_test), batch_size = 128)

In [None]:
ones_model = build_model(activation = 'relu', initializer = tf.keras.initializers.Ones())
ones_hist = ones_model.fit(x_train, y_train, epochs = 15, validation_data = (x_test, y_test), batch_size = 128)

In [None]:
zeros_model = build_model(activation = 'relu', initializer = tf.keras.initializers.Zeros())
zeros_hist = zeros_model.fit(x_train, y_train, epochs = 15, validation_data = (x_test, y_test), batch_size = 128)

In [None]:
glorot_normal_model = build_model(activation = 'relu', initializer = tf.keras.initializers.GlorotNormal())
glorot_normal_hist = model.fit(x_train, y_train, epochs = 15, validation_data = (x_test, y_test), batch_size = 128)

In [None]:
glorot_uniform_model1 = build_model(activation = 'relu', initializer = tf.keras.initializers.GlorotUniform())
glorot_uniform_hist1 = model.fit(x_train, y_train, epochs = 15, validation_data = (x_test, y_test), batch_size = 128)

In [None]:
he_normal_model = build_model(activation = 'relu', initializer = tf.keras.initializers.HeNormal())
he_normal_hist = model.fit(x_train, y_train, epochs = 15, validation_data = (x_test, y_test), batch_size = 128)

In [None]:
he_uniform_model = build_model(activation = 'relu', initializer = tf.keras.initializers.HeUniform())
he_uniform_hist = model.fit(x_train, y_train, epochs = 15, validation_data = (x_test, y_test), batch_size = 128)

In [None]:
lecun_uniform_model = build_model(activation = 'relu', initializer = tf.keras.initializers.LecunUniform())
lecun_uniform_hist = model.fit(x_train, y_train, epochs = 15, validation_data = (x_test, y_test), batch_size = 128)

In [None]:
lecun_normal_model = build_model(activation = 'relu', initializer = tf.keras.initializers.LecunNormal())
lecun_normal_hist = model.fit(x_train, y_train, epochs = 15, validation_data = (x_test, y_test), batch_size = 128)

In [None]:
fig, ax = plt.subplots(5, 2, figsize = (12, 28))
ax[0, 0].plot(random_normal_hist.history['acc'])
ax[0, 0].plot(random_normal_hist.history['val_acc'])
ax[0, 0].set_ylabel('Accuracy')
ax[0, 0].set_xlabel('Epochs')
ax[0, 0].legend(['Train', 'Test'], loc='upper left')
ax[0, 0].set_title('Random Normal - StdDev of 0.05')

ax[0, 1].plot(random_normal_hist.history['loss'])
ax[0, 1].plot(random_normal_hist.history['val_loss'])
ax[0, 1].set_ylabel('Loss')
ax[0, 1].set_xlabel('Epoch')
ax[0, 1].legend(['Train', 'Test'], loc='upper left')
ax[0, 1].set_title('Random Normal - StdDev of 0.05')

ax[1, 0].plot(random_normal_hist1.history['acc'])
ax[1, 0].plot(random_normal_hist1.history['val_acc'])
ax[1, 0].set_ylabel('Accuracy')
ax[1, 0].set_xlabel('Epochs')
ax[1, 0].legend(['Train', 'Test'], loc='upper left')
ax[1, 0].set_title('Random Normal - StdDev of 0.1')

ax[1, 1].plot(random_normal_hist1.history['loss'])
ax[1, 1].plot(random_normal_hist1.history['val_loss'])
ax[1, 1].set_ylabel('Loss')
ax[1, 1].set_xlabel('Epoch')
ax[1, 1].legend(['Train', 'Test'], loc='upper left')
ax[1, 1].set_title('Random Normal - StdDev of 0.1')

ax[2, 0].plot(random_normal_hist2.history['acc'])
ax[2, 0].plot(random_normal_hist2.history['val_acc'])
ax[2, 0].set_ylabel('Accuracy')
ax[2, 0].set_xlabel('Epochs')
ax[2, 0].legend(['Train', 'Test'], loc='upper left')
ax[2, 0].set_title('Random Normal - StdDev of 1.0')

ax[2, 1].plot(random_normal_hist2.history['loss'])
ax[2, 1].plot(random_normal_hist2.history['val_loss'])
ax[2, 1].set_ylabel('Loss')
ax[2, 1].set_xlabel('Epoch')
ax[2, 1].legend(['Train', 'Test'], loc='upper left')
ax[2, 1].set_title('Random Normal - StdDev of 1.0')

ax[3, 0].plot(random_uniform_hist.history['acc'])
ax[3, 0].plot(random_uniform_hist.history['val_acc'])
ax[3, 0].set_ylabel('Accuracy')
ax[3, 0].set_xlabel('Epochs')
ax[3, 0].legend(['Train', 'Test'], loc='upper left')
ax[3, 0].set_title('Random Uniform - [-1, 1]')

ax[3, 1].plot(random_uniform_hist.history['loss'])
ax[3, 1].plot(random_uniform_hist.history['val_loss'])
ax[3, 1].set_ylabel('Loss')
ax[3, 1].set_xlabel('Epoch')
ax[3, 1].legend(['Train', 'Test'], loc='upper left')
ax[3, 1].set_title('Random Uniform - [-1, 1]')

ax[4, 0].plot(random_uniform_hist1.history['acc'])
ax[4, 0].plot(random_uniform_hist1.history['val_acc'])
ax[4, 0].set_ylabel('Accuracy')
ax[4, 0].set_xlabel('Epochs')
ax[4, 0].legend(['Train', 'Test'], loc='upper left')
ax[4, 0].set_title('Random Uniform - [0, 1]')

ax[4, 1].plot(random_uniform_hist1.history['loss'])
ax[4, 1].plot(random_uniform_hist1.history['val_loss'])
ax[4, 1].set_ylabel('Loss')
ax[4, 1].set_xlabel('Epoch')
ax[4, 1].legend(['Train', 'Test'], loc='upper left')
ax[4, 1].set_title('Random Uniform - [0, 1]')


plt.show()

In [None]:
fig, ax = plt.subplots(2, 2, figsize = (12, 10))
ax[0, 0].plot(ones_hist.history['acc'])
ax[0, 0].plot(ones_hist.history['val_acc'])
ax[0, 0].set_ylabel('Accuracy')
ax[0, 0].set_xlabel('Epochs')
ax[0, 0].legend(['Train', 'Test'], loc='upper left')
ax[0, 0].set_title('Ones')

ax[0, 1].plot(ones_hist.history['loss'])
ax[0, 1].plot(ones_hist.history['val_loss'])
ax[0, 1].set_ylabel('Loss')
ax[0, 1].set_xlabel('Epoch')
ax[0, 1].legend(['Train', 'Test'], loc='upper left')
ax[0, 1].set_title('Ones')

ax[1, 0].plot(zeros_hist.history['acc'])
ax[1, 0].plot(zeros_hist.history['val_acc'])
ax[1, 0].set_ylabel('Accuracy')
ax[1, 0].set_xlabel('Epochs')
ax[1, 0].legend(['Train', 'Test'], loc='upper left')
ax[1, 0].set_title('Zeros')

ax[1, 1].plot(zeros_hist.history['loss'])
ax[1, 1].plot(zeros_hist.history['val_loss'])
ax[1, 1].set_ylabel('Loss')
ax[1, 1].set_xlabel('Epoch')
ax[1, 1].legend(['Train', 'Test'], loc='upper left')
ax[1, 1].set_title('Zeros')

plt.show()

## Given below is a method to manually initialize and have complete control (If necessary and for custom weight init as required) 

In [None]:
#Iterate over the layers of a given model
for layer in model.layers:
    
    if isinstance(layer, tf.keras.layers.Dense):
        
        #Initialize weights in the range of [-y, y], where y = 1/√n (n is no. of neurons in the next layer)
        
        shape = (layer.weights[0].shape[0], layer.weights[0].shape[1])
        y = 1.0/np.sqrt(shape[0])
        
        rule_weights = np.random.uniform(-y, y, shape)
        
        #Weights
        layer.weights[0] = rule_weights 
        
        #Bias
        layer.weights[1] = 0 

## You can visualize layer weight init distribution in a graphical way also, using Tensorboard - Head over to the notebook [Tensorboard Demo]()

## In the above given weight init techniques, feel free to change and run the respective init techniques, make slight changes to the matplotlib cell to accomodate your interested weight init graph for visualization

## You can combine various weight init in a single graph also, for a visualization and comparision