In [20]:


# 前向传播函数
def forward(X):
    # 第一层
    z1 = np.dot(X, W1)
    a1 = np.tanh(z1)

    # 输出层
    z2 = np.dot(a1, W2)
    a2 = np.sigmoid(z2)

    return a2, (a1, z1, z2)

# 反向传播函数
def backward(X, Y, cache):
    a1, z1, z2 = cache

    # 计算输出层误差
    delta2 = a2 - Y
    dW2 = np.dot(a1.T, delta2)

    # 计算第一层误差
    delta1 = np.dot(delta2, W2.T) * (1 - np.power(a1, 2))
    dW1 = np.dot(X.T, delta1)

    return dW1, dW2

# 更新权重函数
def update_weights(W1, W2, dW1, dW2, learning_rate):
    W1 -= learning_rate * dW1
    W2 -= learning_rate * dW2
    return W1, W2

# 训练函数
def train(X, Y, num_epochs, learning_rate):
    for i in range(num_epochs):
        # 前向传播
        a2, cache = forward(X)

        # 反向传播
        dW1, dW2 = backward(X, Y, cache)

        # 更新权重
        W1, W2 = update_weights(W1, W2, dW1, dW2, learning_rate)

        # 打印损失
        loss = np.mean(np.square(a2 - Y))
        print('Epoch:', i, 'Loss:', loss)



# 训练网络
train(X, Y, 1000, 0.1)


In [22]:
# 定义神经网络结构和权重
import numpy as np

# 网络结构
# 输入数据维度为2
# 隐藏层维度为4
# 输出层维度为1

input_size = 2
hidden_size = 4
output_size = 1

# 随机初始化权重
# 每个W中第一行只是为了能够进行计算，和前面一层保持一致
W1 = np.random.randn(input_size, hidden_size)
W2 = np.random.randn(hidden_size, output_size)
print(W1)
print(W2)

[[-0.15757929 -0.14859494 -0.20527248 -0.15234392]
 [-0.75307366  1.50959478  0.79336134  0.01510461]]
[[ 1.40063196]
 [-0.60462767]
 [-0.25547128]
 [ 0.89567958]]


W1 2x4的意义：只要input是nx2 就输出nx4的结果
等于是输入中的每一行都和W中的元素做了形式相同的运算
W1中每个元素值都融入到了输出的结果中

In [49]:
# 生成数据
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([[0], [1], [1], [0]])

# 学习率
lr = 0.02

In [48]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x)*(1-sigmoid(x))

def tanh_derivative(x):
    return (1 - np.power(np.tanh(x), 2))

In [50]:
# 前向传播


z1 = np.dot(X, W1)
a1 = np.tanh(z1)
print("z1：",z1)
print("a1：",a1)

# 输出层
z2 = np.dot(a1, W2)
a2 = sigmoid(z2)
print("z2：",z1)
print("a2：",a2)

z1： [[ 0.          0.          0.          0.        ]
 [-0.74114261  1.50820724  0.79148885  0.027496  ]
 [-0.14437181 -0.15474043 -0.20745957 -0.14516025]
 [-0.88551442  1.3534668   0.58402928 -0.11766425]]
a1： [[ 0.          0.          0.          0.        ]
 [-0.629835    0.90662039  0.65925164  0.02748907]
 [-0.14337705 -0.15351708 -0.20453362 -0.14414919]
 [-0.70917114  0.87486908  0.52558782 -0.11712422]]
z2： [[ 0.          0.          0.          0.        ]
 [-0.74114261  1.50820724  0.79148885  0.027496  ]
 [-0.14437181 -0.15474043 -0.20745957 -0.14516025]
 [-0.88551442  1.3534668   0.58402928 -0.11766425]]
a2： [[0.5       ]
 [0.17465497]
 [0.45353505]
 [0.14923128]]


In [51]:
# 反向传播函数
a1, z1, z2 = cache

# 计算 输出层误差
delta2 = (a2 - Y)*sigmoid_derivative(z2)

# W2的梯度 (a2 - Y)*a1
dW2 = np.dot(a1.T, delta2)


$$
\frac {\partial{L}} {\partial{w_1}} \,\,\,\,\,\,\,\,\,\,\,\,\,\,w_1 \gets w_1 - \varepsilon \frac {\partial{L}} {\partial{w_1}}
$$

$$
\frac {\partial{L}} {\partial{w_1}} = \frac {\partial{L}} {\partial{y}} \frac {\partial{y}} {\partial{w_1}} = (a2 - Y)*a1.T
$$

In [52]:

# 计算 第一层误差
delta1 = np.dot(delta2, W2.T) * tanh_derivative(z1)
dW1 = np.dot(X.T, delta1)

In [53]:
lr = 0.02
W1 -= lr * dW1
W2 -= lr * dW2

In [54]:
print(W1)
print(W2)

[[-0.14095018 -0.15625929 -0.20803758 -0.14311816]
 [-0.73947355  1.50801332  0.79123002  0.02924598]]
[[ 1.3887285 ]
 [-0.59229122]
 [-0.24744492]
 [ 0.89432214]]


In [55]:
loss = np.mean(np.square(a2 - Y))
print('Epoch:', 1, 'Loss:', loss)

Epoch: 1 Loss: 0.3130220807011879


In [56]:
np.square(a2 - Y)

array([[0.25      ],
       [0.68119441],
       [0.29862394],
       [0.02226997]])

In [13]:
# 定义神经网络结构和权重
import numpy as np


# 前向传播函数
def forward(X, W1, W2):
    # 第一层
    z1 = np.dot(X, W1)
    a1 = np.tanh(z1)

    # 输出层
    z2 = np.dot(a1, W2)
    a2 = sigmoid(z2)

    return a2, (a1, z1, z2)

# 反向传播函数
def backward(X, Y, a2, cache):
    a1, z1, z2 = cache

    # 计算输出层误差
    delta2 = (a2 - Y)*sigmoid_derivative(z2)
    dW2 = np.dot(a1.T, delta2)

    # 计算第一层误差
    delta1 = np.dot(delta2, W2.T) * tanh_derivative(z1)
    dW1 = np.dot(X.T, delta1)

    return dW1, dW2

# 更新权重函数
def update_weights(W1, W2, dW1, dW2, learning_rate):
    W1 -= learning_rate * dW1
    W2 -= learning_rate * dW2
    return W1, W2


def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x)*(1-sigmoid(x))

def tanh_derivative(x):
    return (1 - np.power(np.tanh(x), 2))


# 训练函数
def train(X, Y, num_epochs, learning_rate, W1, W2):
    
    W1 = W1 
    W2 = W2

    for i in range(num_epochs):
        # 前向传播
        a2, cache = forward(X, W1, W2)

        # 反向传播
        dW1, dW2 = backward(X, Y, a2, cache)

        # 更新权重
        W1, W2 = update_weights(W1, W2, dW1, dW2, learning_rate)

        # 打印损失
        loss = np.mean(np.square(a2 - Y))
        print('Epoch:', i, 'Loss:', loss)
    
    return W1, W2

    

# 生成数据
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([[0], [1], [1], [0]])


# 网络结构
input_size = 2
hidden_size = 4
output_size = 1

# 随机初始化权重
W1 = np.random.randn(input_size, hidden_size)
W2 = np.random.randn(hidden_size, output_size)

print('Before:',W1)
print('Before:',W2)


# 训练网络
train(X, Y, 5000, 0.1, W1, W2)

    
print('After:',W1)
print('After:',W2)

Before: [[ 1.45917368  0.8124977  -0.21178796 -0.24443374]
 [ 0.24337333 -1.58436856 -1.58103183  0.62320021]]
Before: [[ 1.03504538]
 [-0.23389856]
 [ 0.23391025]
 [-0.65441071]]
Epoch: 0 Loss: 0.2626848822777315
Epoch: 1 Loss: 0.2617718762536867
Epoch: 2 Loss: 0.26088392734894394
Epoch: 3 Loss: 0.26002018889736056
Epoch: 4 Loss: 0.2591798430043293
Epoch: 5 Loss: 0.2583620999617163
Epoch: 6 Loss: 0.25756619755558213
Epoch: 7 Loss: 0.25679140029430253
Epoch: 8 Loss: 0.25603699858078155
Epoch: 9 Loss: 0.25530230784884345
Epoch: 10 Loss: 0.254586667680614
Epoch: 11 Loss: 0.253889440918759
Epoch: 12 Loss: 0.253210012784825
Epoch: 13 Loss: 0.2525477900126268
Epoch: 14 Loss: 0.2519022000036073
Epoch: 15 Loss: 0.2512726900093645
Epoch: 16 Loss: 0.2506587263450514
Epoch: 17 Loss: 0.2500597936361072
Epoch: 18 Loss: 0.24947539409972488
Epoch: 19 Loss: 0.2489050468616104
Epoch: 20 Loss: 0.24834828730788497
Epoch: 21 Loss: 0.24780466647143717
Epoch: 22 Loss: 0.24727375045160443
Epoch: 23 Loss: 0.

Epoch: 2530 Loss: 0.06501192393214884
Epoch: 2531 Loss: 0.06501049259286806
Epoch: 2532 Loss: 0.0650090627853416
Epoch: 2533 Loss: 0.06500763450718841
Epoch: 2534 Loss: 0.06500620775603226
Epoch: 2535 Loss: 0.06500478252950173
Epoch: 2536 Loss: 0.06500335882523024
Epoch: 2537 Loss: 0.06500193664085598
Epoch: 2538 Loss: 0.0650005159740219
Epoch: 2539 Loss: 0.06499909682237577
Epoch: 2540 Loss: 0.06499767918357006
Epoch: 2541 Loss: 0.06499626305526204
Epoch: 2542 Loss: 0.06499484843511368
Epoch: 2543 Loss: 0.06499343532079167
Epoch: 2544 Loss: 0.06499202370996746
Epoch: 2545 Loss: 0.06499061360031715
Epoch: 2546 Loss: 0.0649892049895215
Epoch: 2547 Loss: 0.06498779787526604
Epoch: 2548 Loss: 0.0649863922552409
Epoch: 2549 Loss: 0.06498498812714083
Epoch: 2550 Loss: 0.06498358548866534
Epoch: 2551 Loss: 0.06498218433751841
Epoch: 2552 Loss: 0.06498078467140879
Epoch: 2553 Loss: 0.06497938648804974
Epoch: 2554 Loss: 0.06497798978515916
Epoch: 2555 Loss: 0.06497659456045951
Epoch: 2556 Loss

Epoch: 4861 Loss: 0.06354582830595129
Epoch: 4862 Loss: 0.06354555685144321
Epoch: 4863 Loss: 0.06354528553171679
Epoch: 4864 Loss: 0.06354501434667373
Epoch: 4865 Loss: 0.06354474329621579
Epoch: 4866 Loss: 0.06354447238024492
Epoch: 4867 Loss: 0.06354420159866307
Epoch: 4868 Loss: 0.06354393095137234
Epoch: 4869 Loss: 0.06354366043827489
Epoch: 4870 Loss: 0.06354339005927298
Epoch: 4871 Loss: 0.063543119814269
Epoch: 4872 Loss: 0.06354284970316537
Epoch: 4873 Loss: 0.06354257972586466
Epoch: 4874 Loss: 0.06354230988226953
Epoch: 4875 Loss: 0.06354204017228265
Epoch: 4876 Loss: 0.06354177059580693
Epoch: 4877 Loss: 0.06354150115274523
Epoch: 4878 Loss: 0.06354123184300059
Epoch: 4879 Loss: 0.06354096266647612
Epoch: 4880 Loss: 0.063540693623075
Epoch: 4881 Loss: 0.06354042471270054
Epoch: 4882 Loss: 0.06354015593525612
Epoch: 4883 Loss: 0.06353988729064522
Epoch: 4884 Loss: 0.06353961877877139
Epoch: 4885 Loss: 0.0635393503995383
Epoch: 4886 Loss: 0.06353908215284972
Epoch: 4887 Loss: