In [1]:
import numpy as np

In [2]:
def structure(X, Y):
    """
    指定输入层、隐藏层、输出层参数个数、定义神经网络结构
    :param X: 输入训练样本
    :param Y: 输出
    :return: 输入层、隐藏层、输出层
    """
    nx = X.shape[0]
    nh = 4
    ny = Y.shape[0]
    return nx, nh, ny

In [5]:
def init_para(nx, nh, ny):
    """
    初始化参数
    :param nx: 输入层参数个数
    :param nh: 隐藏层参数个数
    :param ny: 输出层参数个数
    :return: 
    """
    np.random.seed(2)  # 可回溯的随机(可选)
    W1 = np.random.randn(nh, nx)
    b1 = np.random.randn(nh, 1)
    W2 = np.random.randn(ny, nh)
    b2 = np.random.randn(ny, 1)
    
    assert (W1.shape == (nh, nx))   # 验证维度
    paras = {
        "W1": W1,
        "b1": b1,
        "W2": W2,
        "b2": b2
    }
    return paras

In [6]:
def sigmoid(x):
    """
    Compute the sigmoid of x

    Arguments:
    x -- A scalar or numpy array of any size.

    Return:
    s -- sigmoid(x)
    """
    s = 1/(1+np.exp(-x))
    return s

In [9]:
def forward_propagation(X, para):
    """
    前向传播
    :param X: 
    :param para: 
    :return: 
    """
    W1 = para["W1"]
    b1 = para["b1"]
    W2 = para["W2"]
    b2 = para["b2"]
    
    Z1 = np.dot(W1.T, X) + b1
    A1 = np.tanh(Z1)
    Z2 = np.dot(W2.T, X) + b2
    A2 = sigmoid(Z2)
    
    cache = {
        "Z1": Z1,
        "A1": A1,
        "Z2": Z2,
        "A2": A2
    }
    return A2, cache 

In [12]:
def compute_cost(A2, Y):
    """
    
    :param A2: 
    :param Y: 
    :return: 
    """
    m = Y.shape[1]
    logprobs = np.multiply(Y, np.log(A2)) + np.multiply(1 - Y, np.log(1 - A2))
    cost = -1/m * np.sum(logprobs)
    cost = np.squeeze(cost)
    return cost

In [13]:
def back_propagation(cache, para, X, Y):
     m = X.shape[1]
     # First, retrieve W1 and W2 from the dictionary "parameters",   
     W1 = para['W1']
     W2 = para['W2']
     # Retrieve also A1 and A2 from dictionary "cache".    
     A1 = cache['A1']
     A2 = cache['A2']
    
     dZ2 = A2 - Y
     dW2 = 1/m * np.dot(A1.T, dZ2)
     db2 = 1/m * np.sum(dZ2, axis=1, keepdims=True)
     dZ1 = np.dot(W2.T, dZ2) * (1 - np.power(A1, 2))
     dW1 = 1/m * np.dot(X.T, dZ1)
     db1 = 1/m * np.sum(dZ1, axis=1, keepdims=True)
     
     grads = {"dW1": dW1,
             "db1": db1,
             "dW2": dW2,
             "db2": db2}
     
     return grads