In [2]:
import numpy as np

In [1]:
def sigmoid(x):
  return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
  return x * (1 - x)

def mse_loss(y_true, y_pred):
  return np.mean( (y_true - y_pred)**2 )

def forward(X, weights_input_hidden1, weights_hidden1_hidden2, weights_hidden2_output,
            bias_input_hidden1, bias_hidden1_hidden2, bias_hidden2_output):
    # 第一層
    hidden1_output = sigmoid(np.dot(X, weights_input_hidden1) + bias_input_hidden1)

    # 第二層
    hidden2_output = sigmoid(np.dot(hidden1_output, weights_hidden1_hidden2) + bias_hidden1_hidden2)

    # 輸出層
    output = sigmoid(np.dot(hidden2_output, weights_hidden2_output) + bias_hidden2_output)

    return output, hidden1_output, hidden2_output


def backward(X, y_true, output, hidden1_output, hidden2_output, weights_input_hidden1,
             weights_hidden1_hidden2, weights_hidden2_output, bias_input_hidden1,
             bias_hidden1_hidden2, bias_hidden2_output, learning_rate):
    # 計算輸出層的梯度
    error = y_true - output
    delta_output = error * sigmoid_derivative(output)

    # 第二層
    # error_hidden2: 第二層的誤差 delta_hidden2: 第二層的梯度
    error_hidden2 = delta_output.dot(weights_hidden2_output.T)
    delta_hidden2 = error_hidden2 * sigmoid_derivative(hidden2_output)

    # 第一層
    # error_hidden1: 第一層的誤差 delta_hidden1: 第一層的梯度
    error_hidden1 = delta_hidden2.dot(weights_hidden1_hidden2.T)
    delta_hidden1 = error_hidden1 * sigmoid_derivative(hidden1_output)

    # 更新權重: W' = W + LR * 權重的梯度
    weights_hidden2_output += hidden2_output.T.dot(delta_output) * learning_rate
    weights_hidden1_hidden2 += hidden1_output.T.dot(delta_hidden2) * learning_rate
    weights_input_hidden1 += X.T.dot(delta_hidden1) * learning_rate

    # 更新偏差: b' = b + LR * 偏差的梯度
    bias_hidden2_output += np.sum(delta_output, axis=0) * learning_rate
    bias_hidden1_hidden2 += np.sum(delta_hidden2, axis=0) * learning_rate
    bias_input_hidden1 += np.sum(delta_hidden1, axis=0) * learning_rate

def initialize_parameters(input_size, hidden1_size, hidden2_size, output_size):
    # 初始化權重
    weights_input_hidden1 = np.random.randn(input_size, hidden1_size)
    weights_hidden1_hidden2 = np.random.randn(hidden1_size, hidden2_size)
    weights_hidden2_output = np.random.randn(hidden2_size, output_size)

    # 初始化偏差
    bias_input_hidden1 = np.zeros((1, hidden1_size))
    bias_hidden1_hidden2 = np.zeros((1, hidden2_size))
    bias_hidden2_output = np.zeros((1, output_size))

    return weights_input_hidden1, weights_hidden1_hidden2, weights_hidden2_output, \
           bias_input_hidden1, bias_hidden1_hidden2, bias_hidden2_output


def train(X, y_true, epochs, learning_rate, input_size, hidden1_size, hidden2_size, output_size):
    # 初始化權重和偏差
    weights_input_hidden1, weights_hidden1_hidden2, weights_hidden2_output, \
    bias_input_hidden1, bias_hidden1_hidden2, bias_hidden2_output = \
        initialize_parameters(input_size, hidden1_size, hidden2_size, output_size)

    for epoch in range(epochs):
        # 前向傳播計算輸出和誤差
        output, hidden1_output, hidden2_output = forward(X, weights_input_hidden1, weights_hidden1_hidden2,
                                      weights_hidden2_output, bias_input_hidden1,
                                      bias_hidden1_hidden2, bias_hidden2_output)
        loss = mse_loss(y_true, output)

        # 反向傳播更新參數
        backward(X, y_true, output, hidden1_output, hidden2_output, weights_input_hidden1,
                 weights_hidden1_hidden2, weights_hidden2_output, bias_input_hidden1,
                 bias_hidden1_hidden2, bias_hidden2_output, learning_rate)

        # 每1000次迭代打印一次損失
        if epoch % 1000 == 0:
            print(f'Epoch {epoch}, Loss: {loss}')


    return weights_input_hidden1, weights_hidden1_hidden2, weights_hidden2_output, \
           bias_input_hidden1, bias_hidden1_hidden2, bias_hidden2_output


In [3]:
# Example
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y_true = np.array([[0], [1], [1], [0]])

input_size = X.shape[1]
hidden1_size = 4
hidden2_size = 3
output_size = y_true.shape[1]

epochs = 10000
learning_rate = 0.1

trained_params = train(X, y_true, epochs, learning_rate, input_size, hidden1_size, hidden2_size, output_size)


Epoch 0, Loss: 0.2599325613535397
Epoch 1000, Loss: 0.24006020838744463
Epoch 2000, Loss: 0.11960399889780675
Epoch 3000, Loss: 0.01235012486780169
Epoch 4000, Loss: 0.004335372905750736
Epoch 5000, Loss: 0.002417037313370791
Epoch 6000, Loss: 0.0016245719381712295
Epoch 7000, Loss: 0.0012049753194590634
Epoch 8000, Loss: 0.0009492663680188012
Epoch 9000, Loss: 0.0007787095059457619
