**工程完整信息与更多演示：**
- 博主个人主页：[三余知行官方网站](https://threefish.site/trend)
- GitHub Repo：[ThreeFish-AI/deep-learning](https://github.com/ThreeFish-AI/deep-learning)

# 误差逆传播

In [1]:
import numpy as np

## AddLayer

In [2]:
class AddLayer:
    """
    加法运算的前向传播与逆传播
    """

    def __init__(self):
        self.x = None
        self.y = None

    def forward(self, x, y):
        """
        前向传播

        Args:
            x: 输入 x
            y: 输入 y

        Returns:
            out: 输出
        """

        out = x + y

        return out

    def backward(self, dout):
        """
        逆传播

        Args:
            dout: 上游梯度

        Returns:
            dx: x 的梯度
            dy: y 的梯度
        """

        dx = dout * 1
        dy = dout * 1

        return dx, dy

## MulLayer

In [3]:
class MulLayer:
    """
    乘法运算的前向传播与逆传播
    """

    def __init__(self):
        self.x = None
        self.y = None

    def forward(self, x, y):
        """
        前向传播

        Args:
            x: 输入 x
            y: 输入 y

        Returns:
            out: 输出
        """

        self.x = x
        self.y = y
        out = x * y

        return out

    def backward(self, dout):
        """
        逆传播

        Args:
            dout: 上游梯度

        Returns:
            dx: x 的梯度
            dy: y 的梯度
        """

        dx = dout * self.y
        dy = dout * self.x

        return dx, dy

In [4]:
x1, x2, x3 = 100, 2, 300
mul_layer = MulLayer()
add_layer = AddLayer()

# forward
a = mul_layer.forward(x1, x2)
y = add_layer.forward(a, x3)
print(y)                # 500

# backward
da, dx3 = add_layer.backward(1)
dx1, dx2 = mul_layer.backward(da)
print(dx1, dx2, dx3)    # (x2, x1, 1) = (2, 100, 1)

500
2 100 1


In [5]:
x1, x2, x3 = np.array([100, 101, 102]), np.array([2, 3, 4]), np.array([300, 301, 302])
mul_layer = MulLayer()
add_layer = AddLayer()

# forward
a = mul_layer.forward(x1, x2)
y = add_layer.forward(a, x3)
print(y)                # 500

# backward
da, dx3 = add_layer.backward(1)
dx1, dx2 = mul_layer.backward(da)
print(dx1, dx2, dx3)    # (x2, x1, 1) = (2, 100, 1)

[500 604 710]
[2 3 4] [100 101 102] 1


## SoftmaxWithLoss

In [6]:
def softmax(x):
   """归一化指数函数"""
   if x.ndim == 2:
       x = x.T
       x = x - np.max(x, axis=0)
       y = np.exp(x) / np.sum(np.exp(x), axis=0)
       return y.T
   x = x - np.max(x)  # 溢出对策
   return np.exp(x) / np.sum(np.exp(x))

In [7]:
def cross_entropy_error(y, t):
    """
    交叉熵误差函数
    Args:
        y: 神经网络的输出
        t: 监督数据
    Returns:
        float: 交叉熵误差
    """
    # 监督数据是 one-hot-vector 的情况下，转换为正确解标签的索引
    if t.size == y.size:
        t = t.argmax(axis=1)
    batch_size = y.shape[0]
    return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size

In [8]:
class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None
        self.y = None  # softmax 的输出
        self.t = None  # 监督数据

    def forward(self, x, t):
        """
        前向传播

        Args:
            x: 输入数据
            t: 监督数据

        Returns:
            float: 损失
        """

        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)

        return self.loss

    def backward(self, dout=1):
        """
        逆传播

        Args:
            dout: 上游梯度

        Returns:
            np.ndarray: 损失关于输入 x 的梯度
        """

        batch_size = self.t.shape[0]
        if self.t.size == self.y.size:  # 监督数据是one-hot-vector的情况
            dx = (self.y - self.t) / batch_size
        else:
            dx = self.y.copy()
            dx[np.arange(batch_size), self.t] -= 1
            dx = dx / batch_size

        return dx

In [9]:
softmax_with_loss = SoftmaxWithLoss()

# 示例数据
a = np.array([[1.0, 2.0, 0.5], [0.0, 1.0, 1.0]])  # 未归一化输出
t = np.array([[1, 0, 0], [0, 1, 0]])  # one-hot 编码标签

# 前向传播
loss = softmax_with_loss.forward(a, t)

# 逆传播
da = softmax_with_loss.backward()

print("Softmax Result:\n", softmax_with_loss.y)
print("Cross Entropy Loss:", loss)
print("Gradient:\n", da)

Softmax Result:
 [[0.2312239  0.62853172 0.14024438]
 [0.1553624  0.4223188  0.4223188 ]]
Cross Entropy Loss: 1.1631814594485623
Gradient:
 [[-0.38438805  0.31426586  0.07012219]
 [ 0.0776812  -0.2888406   0.2111594 ]]


**工程完整信息与更多演示：**
- 博主个人主页：[三余知行官方网站](https://threefish.site/trend)
- GitHub Repo：[ThreeFish-AI/deep-learning](https://github.com/ThreeFish-AI/deep-learning)