In [2]:
import numpy as np

# ============ 1. 生成随机数据 ============
np.random.seed(42)

N = 100              # 样本数
C_in = 1             # 输入通道数（示例中为灰度图）
H, W = 5, 5          # 图像高和宽
out_channels = 1     # 卷积层输出通道数（示例只用1个通道）
kernel_size = 3      # 卷积核的高和宽(3x3)
hidden_dim_fc = 1    # 全连接输出维度=1（做回归）

learning_rate = 0.01
num_epochs = 2000

# 数据 X 形状 (N, C_in, H, W) = (100,1,5,5)
X = np.random.rand(N, C_in, H, W)

# 目标值 y 形状 (N, 1)，这里做一个简单的回归
# 例如让 y = 所有像素之和 + 一点噪声
y = np.sum(X, axis=(1,2,3)).reshape(N,1) + 0.1*np.random.randn(N,1)


# ============ 2. 初始化参数 ============
# 卷积核形状: (out_channels, C_in, kernel_size, kernel_size)
# 这里 out_channels=1, C_in=1, kernel_size=3 => (1,1,3,3)
conv_kernel = 0.1 * np.random.randn(out_channels, C_in, kernel_size, kernel_size)
conv_bias   = np.zeros((out_channels, 1))  # 每个输出通道有1个偏置

# 全连接层参数:
# 假设卷积输出大小= (out_channels, (H - kernel_size+1), (W - kernel_size+1))
# 这里= (1, 3, 3) => flatten后 1*3*3=9
fc_in_dim = out_channels * (H - kernel_size + 1) * (W - kernel_size + 1)  # 9
fc_W = 0.1 * np.random.randn(fc_in_dim, hidden_dim_fc)  # (9,1)
fc_b = np.zeros((1, hidden_dim_fc))                    # (1,1)


# ============ 3. 定义辅助函数 ============

def relu(x):
    return np.maximum(0, x)

def relu_deriv(x):
    # x>0 => 1, x<=0 => 0
    return (x > 0).astype(float)

def mean_squared_error(pred, true):
    return np.mean((pred - true)**2)

# 卷积的前向 + 反向（简化版本, stride=1, no padding, single out_channel）
def conv2d_forward(X, kernel, bias):
    """
    X.shape: (N, C_in, H, W)
    kernel.shape: (out_channels, C_in, kH, kW)
    bias.shape: (out_channels, 1)
    返回 (N, out_channels, H_out, W_out)
    其中 H_out = H - kH + 1, W_out = W - kW + 1
    """
    N, C_in, H, W = X.shape
    out_channels, _, kH, kW = kernel.shape
    H_out = H - kH + 1
    W_out = W - kW + 1

    out = np.zeros((N, out_channels, H_out, W_out))
    for n in range(N):
        for oc in range(out_channels):
            for i in range(H_out):
                for j in range(W_out):
                    # 取出 X 在 (i : i+kH, j : j+kW) 的局部区域
                    region = X[n, :, i:i+kH, j:j+kW]  # shape (C_in,kH,kW)
                    # kernel[oc, :, :, :] shape (C_in,kH,kW)
                    out[n, oc, i, j] = np.sum(region * kernel[oc]) + bias[oc]
    return out

def conv2d_backward(dout, X, kernel):
    """
    计算对 X 和 kernel 的梯度 (简化实现):
    dout.shape = (N, out_channels, H_out, W_out)
    X.shape     = (N, C_in, H, W)
    kernel.shape= (out_channels, C_in, kH, kW)
    返回:
        dX, dKernel, dBias
    """
    N, C_in, H, W = X.shape
    out_channels, _, kH, kW = kernel.shape
    _, _, H_out, W_out = dout.shape

    dX = np.zeros_like(X)
    dKernel = np.zeros_like(kernel)
    dBias = np.zeros((out_channels, 1))

    for n in range(N):
        for oc in range(out_channels):
            for i in range(H_out):
                for j in range(W_out):
                    val = dout[n, oc, i, j]
                    # 对 bias
                    dBias[oc] += val
                    # 对 kernel
                    region = X[n, :, i:i+kH, j:j+kW]  # (C_in,kH,kW)
                    dKernel[oc] += region * val
                    # 对 X
                    dX[n, :, i:i+kH, j:j+kW] += kernel[oc] * val

    return dX, dKernel, dBias


# ============ 4. 训练循环 ============
loss_history = []
for epoch in range(num_epochs):

    # ---- (1) 前向传播 ----
    # 1) 卷积层
    conv_out = conv2d_forward(X, conv_kernel, conv_bias)  # (N,1,3,3)

    # 2) ReLU
    relu_out = relu(conv_out)  # (N,1,3,3)

    # 3) Flatten
    # 先把 (N,1,3,3) 拉平到 (N, 9)
    N_, oc_, h_, w_ = relu_out.shape
    flatten_out = relu_out.reshape(N_, -1)  # (N, 9)

    # 4) 全连接层
    # (N,9) x (9,1) => (N,1)
    fc_out = flatten_out.dot(fc_W) + fc_b  # (N,1)

    # 5) 计算损失 (MSE)
    loss = mean_squared_error(fc_out, y)
    loss_history.append(loss)

    # ---- (2) 反向传播 ----
    # dL/d(fc_out)
    dL_dfc_out = 2*(fc_out - y) / N  # shape (N,1)

    # 1) 全连接层反向
    # fc_out = flatten_out.dot(fc_W) + fc_b
    d_fc_W = flatten_out.T.dot(dL_dfc_out)  # (9,N) x (N,1)->(9,1)
    d_fc_b = np.sum(dL_dfc_out, axis=0, keepdims=True)  # (1,1)

    d_flatten_out = dL_dfc_out.dot(fc_W.T)  # (N,1) x (1,9)->(N,9)

    # 2) Flatten层反向 -> ReLU输出
    d_relu_out = d_flatten_out.reshape(N_, oc_, h_, w_)  # (N,1,3,3)

    # 3) ReLU 反向
    # relu_out = relu(conv_out)
    d_conv_out = d_relu_out * relu_deriv(conv_out)  # (N,1,3,3)

    # 4) 卷积层反向
    dX, dKernel, dBias = conv2d_backward(d_conv_out, X, conv_kernel)

    # ---- (3) 参数更新 ----
    conv_kernel -= learning_rate * dKernel
    conv_bias   -= learning_rate * dBias
    fc_W        -= learning_rate * d_fc_W
    fc_b        -= learning_rate * d_fc_b

    # ---- (4) 打印训练过程 ----
    if (epoch+1) % 200 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss = {loss:.6f}")

# ============ 5. 查看结果 ============
print("\n训练完成后最终Loss: {:.6f}".format(loss_history[-1]))
# 随机挑几条样本看看预测
test_indices = [0,1,2,3,4]
pred = fc_out  # 最后一轮的前向传播结果
for i in test_indices:
    print(f"Sample[{i}]: True={y[i][0]:.3f}, Pred={pred[i][0]:.3f}")


  out[n, oc, i, j] = np.sum(region * kernel[oc]) + bias[oc]


Epoch [200/2000], Loss = 3.403006
Epoch [400/2000], Loss = 1.091970
Epoch [600/2000], Loss = 0.660530
Epoch [800/2000], Loss = 0.669164
Epoch [1000/2000], Loss = 0.700347
Epoch [1200/2000], Loss = 0.697723
Epoch [1400/2000], Loss = 0.629579
Epoch [1600/2000], Loss = 0.613865
Epoch [1800/2000], Loss = 0.507784
Epoch [2000/2000], Loss = 0.435224

训练完成后最终Loss: 0.435224
Sample[0]: True=11.072, Pred=11.638
Sample[1]: True=11.341, Pred=11.967
Sample[2]: True=12.982, Pred=12.960
Sample[3]: True=11.804, Pred=12.544
Sample[4]: True=12.575, Pred=13.539
