# Types of weight initialization:

a. Constant
b. Uniform
c. Normal
d. LeCun uniform & normal
e. Glorot/Xavier - uniform & normal
f. He - uniform & normal


In [1]:
import numpy as np

In [2]:
# Constant
W1 = np.zeros((64,32))
W2 = np.ones((64,32))
W3 = np.ones((64,32)) * 5


In [5]:
#Uniform
W_uniform = np.random.uniform(low=-1,high=1,size=(64,32))
#Normal
W_normal = np.random.normal(0.0, 0.5,size=(64,32))

In [7]:
#LeCun uniform & normal
#uniform - default for torch
F_in = 64 #no. of inputs to the layer
F_out = 32 #no. of outputs from the layer
limit = np.sqrt(3/float(F_in))
W_LeCun_uniform = np.random.uniform(low=-limit,high=limit,size=(F_in,F_out))
#normal - truncated formulation used in Keras/Tensorflow
F_in = 64 #no. of inputs to the layer
F_out = 32 #no. of outputs from the layer
limit = np.sqrt(1/float(F_in))
W_LeCun_normal = np.random.normal(0.0,limit,size=(F_in,F_out))


In [9]:
#Glorot / Xavier
#Default initialization in Keras/Tensorflow
F_in = 64 #no. of inputs to the layer
F_out = 32 #no. of outputs from the layer
limit = np.sqrt(2/float(F_in+F_out))
W_GX_normal = np.random.normal(0.0,limit,size=(F_in,F_out))

#uniform - more stricter limit
F_in = 64 #no. of inputs to the layer
F_out = 32 #no. of outputs from the layer
limit = np.sqrt(6/float(F_in+F_out))
W_GX_uniform = np.random.uniform(low=-limit,high=limit,size=(F_in,F_out))

#best practise - used for non-residual neural networks

In [10]:
#He et al. / Kaiming /MSRA uniform normal
#used for deep residual neural networks
#used with variations of ReLus
F_in = 64 #no. of inputs to the layer
F_out = 32 #no. of outputs from the layer
limit = np.sqrt(6/float(F_in))
W_He_uniform = np.random.uniform(low=-limit,high=limit,size=(F_in,F_out))

F_in = 64 #no. of inputs to the layer
F_out = 32 #no. of outputs from the layer
limit = np.sqrt(2/float(F_in))
W_He_normal = np.random.normal(0.0,limit,size=(F_in,F_out))
