In [207]:
import numpy as np

## NumPy

In [208]:
def init():
    global x, w, z, y, delta
    x = 1 * np.ones(2)
    w = [
        0.5 * np.ones((3, 2)),
        0.5 * np.ones((2, 3)),
        0.5 * np.ones((1, 2))
    ]
    z = [
        np.ones(3),
        np.ones(2),
        np.ones(1)
    ]
    y = 1 * np.ones(1)

    delta = [
        np.ones(3),
        np.ones(2),
        np.ones(1)
    ]

init()

In [209]:
f = lambda x: x

In [210]:
def forward(x, w, z):
    z[0] = np.dot(w[0], x)
    for i in range(1, len(w)):
        z[i] = np.dot(w[i], z[i-1])
    return z

forward(x, w, z)

[array([1., 1., 1.]), array([1.5, 1.5]), array([1.5])]

In [211]:
def backward(y, w, z):
    delta[-1] = z[-1] - y  # l1/l2?
    for i in range(len(w)-1, 0, -1):
        delta[i-1] = np.dot(w[i].T, delta[i])
    return delta

backward(y, w, z)

[array([0.25, 0.25, 0.25]), array([0.25, 0.25]), array([0.5])]

In [212]:
def update(x, w, z, delta, eta=1):
    w[0] -= eta * np.outer(delta[0], x)  # +=?
    for i in range(1, len(w)):
        w[i] -= eta * np.outer(delta[i], z[i-1])
    return w

update(x, w, z, delta)

[array([[0.25, 0.25],
        [0.25, 0.25],
        [0.25, 0.25]]),
 array([[0.25, 0.25, 0.25],
        [0.25, 0.25, 0.25]]),
 array([[-0.25, -0.25]])]

In [213]:
init()

for i in range(100):
    z = forward(x, w, z)
    delta = backward(y, w, z)
    w = update(x, w, z, delta, eta=0.1)
    #print('z:', z, 'delta:', delta, 'w:', w, sep='\n')
    #print()

z, delta, w

([array([0.92790055, 0.92790055, 0.92790055]),
  array([1.29149914, 1.29149914]),
  array([1.])],
 [array([0., 0., 0.]), array([0., 0.]), array([0.])],
 [array([[0.46395027, 0.46395027],
         [0.46395027, 0.46395027],
         [0.46395027, 0.46395027]]),
  array([[0.46395027, 0.46395027, 0.46395027],
         [0.46395027, 0.46395027, 0.46395027]]),
  array([[0.38714699, 0.38714699]])])

In [214]:
forward(np.array([1, 1]), w, z)

[array([0.92790055, 0.92790055, 0.92790055]),
 array([1.29149914, 1.29149914]),
 array([1.])]

In [215]:
forward(np.array([2, 2]), w, z)

[array([1.85580109, 1.85580109, 1.85580109]),
 array([2.58299827, 2.58299827]),
 array([2.])]

- 权重之和不必为一
- 学习率太高时会跑飞
- 偏置不是必须的

### Bias

In [216]:
def init():
    global x, w, b, z, y, delta
    x = 1 * np.ones(2)
    w = [
        0.5 * np.ones((3, 2)),
        0.5 * np.ones((2, 3)),
        0.5 * np.ones((1, 2))
    ]
    b = [
        0.5 * np.ones(3),
        0.5 * np.ones(2),
        0.5 * np.ones(1)
    ]
    z = [
        np.ones(3),
        np.ones(2),
        np.ones(1)
    ]
    y = 1 * np.ones(1)

    delta = [
        np.ones(3),
        np.ones(2),
        np.ones(1)
    ]

init()

In [218]:
def forward(x, w, z):
    z[0] = np.dot(w[0], x) + b[0]
    for i in range(1, len(w)):
        z[i] = np.dot(w[i], z[i-1]) + b[i]
    return z

forward(x, w, z)

[array([1.5, 1.5, 1.5]), array([2.75, 2.75]), array([3.25])]

In [219]:
def backward(y, w, z):
    delta[-1] = z[-1] - y
    for i in range(len(w)-1, 0, -1):
        delta[i-1] = np.dot(w[i].T, delta[i])
    return delta

backward(y, w, z)

[array([1.125, 1.125, 1.125]), array([1.125, 1.125]), array([2.25])]

In [220]:
def update(x, w, b, z, delta, eta=1):
    w[0] -= eta * np.outer(delta[0], x)
    b[0] -= eta * delta[0]
    for i in range(1, len(w)):
        w[i] -= eta * np.outer(delta[i], z[i-1])
        b[i] -= eta * delta[i]
    return w, b

update(x, w, b, z, delta)

([array([[-0.625, -0.625],
         [-0.625, -0.625],
         [-0.625, -0.625]]),
  array([[-1.1875, -1.1875, -1.1875],
         [-1.1875, -1.1875, -1.1875]]),
  array([[-5.6875, -5.6875]])],
 [array([-0.625, -0.625, -0.625]), array([-0.625, -0.625]), array([-1.75])])

In [221]:
init()

for i in range(1):
    print(i)
    print('z:', z, 'delta:', delta, 'w:', w, 'b', b, sep='\n')
    print()
    z = forward(x, w, z)
    delta = backward(y, w, z)
    w, b = update(x, w, b, z, delta, eta=0.1)

print('z:', z, 'delta:', delta, 'w:', w, 'b', b, sep='\n')

0
z:
[array([1., 1., 1.]), array([1., 1.]), array([1.])]
delta:
[array([1., 1., 1.]), array([1., 1.]), array([1.])]
w:
[array([[0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5]]), array([[0.5, 0.5, 0.5],
       [0.5, 0.5, 0.5]]), array([[0.5, 0.5]])]
b
[array([0.5, 0.5, 0.5]), array([0.5, 0.5]), array([0.5])]

z:
[array([1.5, 1.5, 1.5]), array([2.75, 2.75]), array([3.25])]
delta:
[array([1.125, 1.125, 1.125]), array([1.125, 1.125]), array([2.25])]
w:
[array([[0.3875, 0.3875],
       [0.3875, 0.3875],
       [0.3875, 0.3875]]), array([[0.33125, 0.33125, 0.33125],
       [0.33125, 0.33125, 0.33125]]), array([[-0.11875, -0.11875]])]
b
[array([0.3875, 0.3875, 0.3875]), array([0.3875, 0.3875]), array([0.275])]
