In [1]:
#对比几种初始化方法
import numpy as np
import matplotlib.pyplot as plt

#初始化为0
def initialize_parameters_zeros(layers_dims):
	"""
	Arguments:
	layer_dims -- python array (list) containing the size of each layer.
	Returns:
	parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":
					W1 -- weight matrix of shape (layers_dims[1], layers_dims[0])
					b1 -- bias vector of shape (layers_dims[1], 1)
					...
					WL -- weight matrix of shape (layers_dims[L], layers_dims[L-1])
					bL -- bias vector of shape (layers_dims[L], 1)
	"""
	parameters = {}
	L = len(layers_dims)  # number of layers in the network

	for l in range(1, L):
		parameters['W' + str(l)] = np.zeros((layers_dims[l], layers_dims[l - 1]))
		parameters['b' + str(l)] = np.zeros((layers_dims[l], 1))
	return parameters

In [7]:
initialize_parameters_zeros([5,5,5,5,5])

{'W1': array([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]]),
 'b1': array([[0.],
        [0.],
        [0.],
        [0.],
        [0.]]),
 'W2': array([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]]),
 'b2': array([[0.],
        [0.],
        [0.],
        [0.],
        [0.]]),
 'W3': array([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]]),
 'b3': array([[0.],
        [0.],
        [0.],
        [0.],
        [0.]]),
 'W4': array([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]]),
 'b4': array([[0.],
        [0.],
        [0.],
        [0.],
        [0.]])}

In [23]:
def initialize_parameters_random(layers_dims):
	"""
	Arguments:
	layer_dims -- python array (list) containing the size of each layer.

	Returns:
	parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":
					W1 -- weight matrix of shape (layers_dims[1], layers_dims[0])
					b1 -- bias vector of shape (layers_dims[1], 1)
					...
					WL -- weight matrix of shape (layers_dims[L], layers_dims[L-1])
					bL -- bias vector of shape (layers_dims[L], 1)
	"""
	np.random.seed(3)  # This seed makes sure your "random" numbers will be the as ours
	parameters = {}
	L = len(layers_dims)  # integer representing the number of layers
	for l in range(1, L):
		parameters['W' + str(l)] = np.random.randn(layers_dims[l], layers_dims[l - 1])*0.01
		parameters['b' + str(l)] = np.zeros((layers_dims[l], 1))
	return parameters


In [25]:
initialize_parameters_random([5,5,5,5,5])

{'W1': array([[ 0.01788628,  0.0043651 ,  0.00096497, -0.01863493, -0.00277388],
        [-0.00354759, -0.00082741, -0.00627001, -0.00043818, -0.00477218],
        [-0.01313865,  0.00884622,  0.00881318,  0.01709573,  0.00050034],
        [-0.00404677, -0.0054536 , -0.01546477,  0.00982367, -0.01101068],
        [-0.01185047, -0.0020565 ,  0.01486148,  0.00236716, -0.01023785]]),
 'b1': array([[0.],
        [0.],
        [0.],
        [0.],
        [0.]]),
 'W2': array([[-0.00712993,  0.00625245, -0.00160513, -0.00768836, -0.00230031],
        [ 0.00745056,  0.01976111, -0.01244123, -0.00626417, -0.00803766],
        [-0.02419083, -0.00923792, -0.01023876,  0.01123978, -0.00131914],
        [-0.01623285,  0.00646675, -0.00356271, -0.01743141, -0.0059665 ],
        [-0.00588594, -0.00873882,  0.00029714, -0.02248258, -0.00267762]]),
 'b2': array([[0.],
        [0.],
        [0.],
        [0.],
        [0.]]),
 'W3': array([[ 0.01013183,  0.00852798,  0.01108187,  0.01119391,  0.01487543

In [29]:
#xavier initialization
def initialize_parameters_xavier(layers_dims):
	"""
	Arguments:
	layer_dims -- python array (list) containing the size of each layer.

	Returns:
	parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":
					W1 -- weight matrix of shape (layers_dims[1], layers_dims[0])
					b1 -- bias vector of shape (layers_dims[1], 1)
					...
					WL -- weight matrix of shape (layers_dims[L], layers_dims[L-1])
					bL -- bias vector of shape (layers_dims[L], 1)
	"""
	np.random.seed(3)
	parameters = {}
	L = len(layers_dims)  # integer representing the number of layers
	for l in range(1, L):
		parameters['W' + str(l)] = np.random.randn(layers_dims[l], layers_dims[l - 1]) * np.sqrt(1 / layers_dims[l - 1])
		parameters['b' + str(l)] = np.zeros((layers_dims[l], 1))
	return parameters

In [31]:
initialize_parameters_xavier([5,5,5,5,5])

{'W1': array([[ 0.79989897,  0.19521314,  0.04315498, -0.83337927, -0.12405178],
        [-0.15865304, -0.03700312, -0.28040323, -0.01959608, -0.21341839],
        [-0.58757818,  0.39561516,  0.39413741,  0.76454432,  0.02237573],
        [-0.18097724, -0.24389238, -0.69160568,  0.43932807, -0.49241241],
        [-0.52996892, -0.09196943,  0.66462575,  0.10586273, -0.45785063]]),
 'b1': array([[0.],
        [0.],
        [0.],
        [0.],
        [0.]]),
 'W2': array([[-0.31886025,  0.27961805, -0.07178376, -0.34383407, -0.10287287],
        [ 0.33319929,  0.88374361, -0.55638887, -0.28014216, -0.35945513],
        [-1.08184688, -0.41313235, -0.45789116,  0.50265822, -0.05899384],
        [-0.72595532,  0.28920205, -0.15932913, -0.77955637, -0.26682983],
        [-0.26322741, -0.39081204,  0.01328842, -1.00545144, -0.11974675]]),
 'b2': array([[0.],
        [0.],
        [0.],
        [0.],
        [0.]]),
 'W3': array([[ 0.45310941,  0.38138279,  0.49559652,  0.50060672,  0.66524951