## Q2: 
1. sigmoid
2. (梯度检查)
3. 双层神经网络

### 1.1 实现sigmoid激活函数

$$
sigmoid(z) = \frac{1}{1+e^{-z}}
$$

### 1.2 sigmoid求导

$$
s = sigmoid(z)\\
s^{'} = s \cdot (1-s)
$$

In [1]:
import numpy as np
import random
import os
from q1_softmax import softmax
from q2_gradcheck import gradcheck_naive

In [2]:
def sigmoid(z):
    """
    Arguments:
    z -- A scalar or numpy array.

    Return:
    s -- sigmoid(z)
    """
    s = 1 / (1 + np.exp(-z))
    
    return s

In [7]:
def sigmoid_grad(s):
    """
    Arguments:
    s -- A scalar or numpy array.

    Return:
    ds -- Your computed gradient.
    """
    ds = s * (1 - s)
    
    return ds

In [8]:
def sigmoid_basic():
    """
    Some simple tests to get you started.
    Warning: these are not exhaustive.
    """
    print ("Running basic tests...")
    x = np.array([[1, 2], [-1, -2]])
    f = sigmoid(x)
    g = sigmoid_grad(f)
    print (f)
    f_ans = np.array([
        [0.73105858, 0.88079708],
        [0.26894142, 0.11920292]])
    assert np.allclose(f, f_ans, rtol=1e-05, atol=1e-06)
    print (g)
    g_ans = np.array([
        [0.19661193, 0.10499359],
        [0.19661193, 0.10499359]])
    assert np.allclose(g, g_ans, rtol=1e-05, atol=1e-06)
    print ("You should verify these results by hand!\n")

In [9]:
sigmoid_basic()

Running basic tests...
[[0.73105858 0.88079708]
 [0.26894142 0.11920292]]
[[0.19661193 0.10499359]
 [0.19661193 0.10499359]]
You should verify these results by hand!



### 3.双层神经网络

In [36]:
def forward_backward_prop(data, labels, params, dimensions):
    """
    Forward and backward propagation for a two-layer sigmoidal network

    Compute the forward propagation and for the cross entropy cost,
    and backward propagation for the gradients for all parameters.

    Arguments:
    data -- N x Dx matrix, where each row is a training example.
    labels -- N x Dy matrix, where each row is a one-hot vector.
    params -- Model parameters, these are unpacked for you.
    dimensions -- A tuple of input dimension, number of hidden units
                  and output dimension
    """
    np.random.seed(1)
    ### Unpack network parameters (do not modify)
    ofs = 0
    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
    ofs += Dx * H
    b1 = np.reshape(params[ofs:ofs + H], (1, H))
    ofs += H
    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
    ofs += H * Dy
    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))
    """
    W1  # shape = (样本, 节点)  (20, 5)
    b1  # shape = (1, 节点)     (1, 5)
    W2  # shape = (节点， 特征) (5, 10)
    b2  # shape = (1, 类别)     (1, 10)
    """
    ###################################################
    """
    W1  # shape = (节点, )  (10, 5)
    b1  # shape = (1, 节点)     (1, 5)
    W2  # shape = (节点， 特征) (5, 10)
    b2  # shape = (1, 类别)     (1, 10)
    """
    ###################################################
    N = data.shape[0]
    #样本 = 20， 特征=10，节点 = 5， 类别 = 10
    #forward propagation                   
    A0 = data                             # shape = (样本，特征) (20, 10)
    Z1 = np.dot(A0, W1) + b1              # shape = (样本, 节点) (20, 5)
    A1 = sigmoid(Z1)                      # shape = (样本, 节点) (20, 5)
    Z2 = np.dot(A1, W2) + b2              # shape = (样本，类别) (20, 10)
    A2 = softmax(Z2)                      # shape = (样本，类别) (20, 10)

    #compute the cost
    target = np.argmax(labels, axis=1)
    cost_each = -np.log(A2[range(N), target]).reshape(-1, 1)   #shape = (样本, 1), 计算li
    cost = np.mean(cost_each, axis=0)        #shape =  (1,), 计算L

    #backward propagation
    dZ2 = (A2 - labels) / N                     # shape = (样本，类别) (20, 10)
    dW2 = np.dot(A1.T, dZ2)                     # shape = (节点， 特征) (5, 10)
    db2 = np.sum(dZ2, axis=0, keepdims=True)    # shape = (1, 类别)  (1, 10)

    dZ1 = np.dot(dZ2, W2.T) * sigmoid_grad(A1)  # shape = (1, 类别)  (1, 10)
    dW1 = np.dot(A0.T, dZ1)                     # shape = (样本, 节点)(20, 5)
    db1 = np.sum(dZ1, axis=0, keepdims=True)    # shape = (1, 节点)   (1, 5)

    ###################################################
    
    ###################################################
    
    ### Stack gradients (do not modify)
    grad = np.concatenate((dW1.flatten(), db1.flatten(),
        dW2.flatten(), db2.flatten()))

    return cost, grad

In [37]:
def sanity_check():
    """
    Set up fake data and parameters for the neural network, and test using
    gradcheck.
    """
    print ("Running sanity check...")

    N = 20
    dimensions = [10, 5, 10]
    data = np.random.randn(N, dimensions[0])   # each row will be a datum
    labels = np.zeros((N, dimensions[2]))
    for i in range(N):
        labels[i, random.randint(0,dimensions[2]-1)] = 1

    params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (
        dimensions[1] + 1) * dimensions[2], )

    gradcheck_naive(lambda params:
        forward_backward_prop(data, labels, params, dimensions), params)


In [38]:
sanity_check()

Running sanity check...
Gradient check passed!


In [11]:
#############初始化#############
np.random.seed(1)
N = 20
dimensions = [10, 5, 10]
data = np.random.randn(N, dimensions[0])   # each row will be a datum
labels = np.zeros((N, dimensions[2]))
for i in range(N):
    labels[i, random.randint(0,dimensions[2]-1)] = 1

params = np.random.randn((dimensions[0] + 1) * dimensions[1] + (
    dimensions[1] + 1) * dimensions[2], )

In [12]:
#########################参数初始化########################
ofs = 0
Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])

W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))
ofs += Dx * H
b1 = np.reshape(params[ofs:ofs + H], (1, H))
ofs += H
W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
ofs += H * Dy
b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))
#########################参数初始化########################

In [17]:
data.shape[0]

20

In [33]:
"""
W1  # shape = (节点, )  (10, 5)
b1  # shape = (1, 节点)     (1, 5)
W2  # shape = (节点， 特征) (5, 10)
b2  # shape = (1, 类别)     (1, 10)
"""
###################################################
N = data.shape[0]
#样本 = 20， 特征=10，节点 = 5， 类别 = 10
#forward propagation                   
A0 = data                             # shape = (样本，特征) (20, 10)
Z1 = np.dot(A0, W1) + b1              # shape = (样本, 节点) (20, 5)
A1 = sigmoid(Z1)                      # shape = (样本, 节点) (20, 5)
Z2 = np.dot(A1, W2) + b2              # shape = (样本，类别) (20, 10)
A2 = softmax(Z2)                      # shape = (样本，类别) (20, 10)

#compute the cost
target = np.argmax(labels, axis=1)
cost_each = -np.log(A2[range(N), target]).reshape(-1, 1)   #shape = (样本, 1), 计算li
cost = np.mean(cost_each, axis=0)        #shape =  (1,), 计算L

#backward propagation
dZ2 = (A2 - labels) / N                     # shape = (样本，类别) (20, 10)
dW2 = np.dot(A1.T, dZ2)                     # shape = (节点， 特征) (5, 10)
db2 = np.sum(dZ2, axis=0, keepdims=True)    # shape = (1, 类别)  (1, 10)

dZ1 = np.dot(dZ2, W2.T) * sigmoid_grad(A1)  # shape = (1, 类别)  (1, 10)
dW1 = np.dot(A0.T, dZ1)                     # shape = (样本, 节点)(20, 5)
db1 = np.sum(dZ1, axis=0, keepdims=True)    # shape = (1, 节点)   (1, 5)

###################################################