In [1]:
import numpy as np
import h5py
import matplotlib.pyplot as plt
from testCases_v3 import *
from dnn_utils_v2 import sigmoid, sigmoid_backward, relu, relu_backward

%matplotlib inline
plt.rcParams['figure.figsize'] = (5.0, 4.0)
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

%load_ext autoreload
%autoreload 2

np.random.seed(1)

In [2]:
# 初始化一个2层网络：input layer[nx] + hidden layer[nh] + ouput layer[ny]
def initialize_parameters(nx, nh, ny):
    # random initialisation
    W1 = np.random.randn(nh, nx)*0.01
    b1 = np.zeros((nh, 1))
    W2 = np.random.randn(ny, nh)*0.01
    b2 = np.zeros((ny, 1))
    
    params = {
        "W1": W1,
        "b1": b1,
        "W2": W2,
        "b2": b2
    }
    return params

In [3]:
params = initialize_parameters(2, 2, 1)
print(params)

{'W1': array([[ 0.01624345, -0.00611756],
       [-0.00528172, -0.01072969]]), 'b1': array([[0.],
       [0.]]), 'W2': array([[ 0.00865408, -0.02301539]]), 'b2': array([[0.]])}


In [5]:
def initialize_parameters_deep(layer_dims):
    np.random.seed(3)
    params = {}
    layers = len(layer_dims)
    
    for l in range(1, layers):
        params["W"+str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01
        params["b"+str(l)] = np.zeros((layer_dims[l], 1))
        
    return params

In [6]:
params = initialize_parameters_deep([3, 2, 2, 1])
print(params)

{'W1': array([[ 0.01788628,  0.0043651 ,  0.00096497],
       [-0.01863493, -0.00277388, -0.00354759]]), 'b1': array([[0.],
       [0.]]), 'W2': array([[-0.00082741, -0.00627001],
       [-0.00043818, -0.00477218]]), 'b2': array([[0.],
       [0.]]), 'W3': array([[-0.01313865,  0.00884622]]), 'b3': array([[0.]])}


## 正向传播

In [7]:
# Z[l] = W[l]A[l-1] + b[l]
def linear_forward(A, W, b):
    Z = np.dot(W, A) + b
    
    cache = (A, W, b)
    return Z, cache

In [8]:
A, W, b = linear_forward_test_case()

Z, cache = linear_forward(A, W, b)
print(Z)

[[ 3.26295337 -1.23429987]]


In [9]:
# A[l] = g[l](Z[l])
def linear_activation_forward(A_prev, W, b, act):
    if act == "sigmoid":
        Z, lnr_cache = linear_forward(A_prev, W, b)
        A, act_cache = sigmoid(Z)
    elif act == "relu":
        Z, lnr_cache = linear_forward(A_prev, W, b)
        A, act_cache = relu(Z)
    
    cache = (lnr_cache, act_cache)
    return A, cache

In [10]:
A_prev, W, b = linear_activation_forward_test_case()
A, cache = linear_activation_forward(A_prev, W, b, act="sigmoid")
print("sigmoid A="+str(A))
A, cache = linear_activation_forward(A_prev, W, b, act="relu")
print("relu A="+str(A))

sigmoid A=[[0.96890023 0.11013289]]
relu A=[[3.43896131 0.        ]]


In [13]:
def model_forward(X, params):
    caches = []
    A = X
    L = len(params) // 2
    
    for l in range(1, L):
        A_prev = A
        W = params["W"+str(l)]
        b = params["b"+str(l)]
        A, cache = linear_activation_forward(A_prev, W, b, act="relu")
        caches.append(cache)
    
    AL, cache = linear_activation_forward(A, params["W"+str(L)], params["b"+str(L)], act="sigmoid")
    caches.append(cache)
    
    return AL, caches

In [14]:
X, params = L_model_forward_test_case()
AL, caches = model_forward(X, params)
print("AL = "+str(AL))
print("caches length: "+str(len(caches)))

AL = [[0.17007265 0.2524272 ]]
caches length: 2
