# Batch Norm

## 从零实现

In [1]:
import numpy as np

In [2]:
class BatchNorm:
    def __init__(self, num_features, momentum=0.9, epsilon=1e-5):
        self.gamma = np.ones(num_features)  # 缩放参数
        self.beta = np.zeros(num_features)   # 移位参数
        self.momentum = momentum
        self.epsilon = epsilon
        self.running_mean = np.zeros(num_features)
        self.running_var = np.ones(num_features)
        self.training = True  # 标志是否在训练模式

    def forward(self, x):
        if self.training:
            # 计算当前批次的均值和方差
            batch_mean = np.mean(x, axis=0)
            batch_var = np.var(x, axis=0)

            # 更新运行均值和方差
            self.running_mean = self.momentum * self.running_mean + (1 - self.momentum) * batch_mean
            self.running_var = self.momentum * self.running_var + (1 - self.momentum) * batch_var

            # 归一化
            x_normalized = (x - batch_mean) / np.sqrt(batch_var + self.epsilon)
        else:
            # 使用运行均值和方差
            x_normalized = (x - self.running_mean) / np.sqrt(self.running_var + self.epsilon)

        return self.gamma * x_normalized + self.beta  # 缩放和移位

In [3]:
# 模拟数据输入
np.random.seed(42)
data = np.random.rand(10, 5)  # 10个样本，5个特征

# 创建BatchNorm实例
batch_norm = BatchNorm(num_features=5)

# 训练模式
output_train = batch_norm.forward(data)
print("训练模式输出:\\n", output_train)

# 切换到推理模式
batch_norm.training = False
output_infer = batch_norm.forward(data)  # 推理阶段使用训练得到的均值和方差
print("推理模式输出:\\n", output_infer)

训练模式输出:\n [[-0.20879924  1.82754911  0.9557318   0.04413511 -0.79327021]
 [-0.98663681 -1.0839743   1.40712816  0.05559572  1.26379352]
 [-1.46858203  1.89015986  1.29364678 -1.75818707 -0.69711066]
 [-0.88908042 -0.28107046  0.25857334 -0.73364462 -0.28944876]
 [ 0.63583362 -0.81843577 -0.52394557 -1.03961429  0.32478129]
 [ 1.25271721 -0.62214491  0.22317678  0.01500495 -1.20154341]
 [ 0.62050063 -0.71722332 -1.28789917  1.67807333  2.22351172]
 [ 1.33536572 -0.27985864 -1.17816194  0.44337187  0.26546953]
 [-1.10749248  0.34170754 -1.39105144  1.4934874  -0.41036084]
 [ 0.81617381 -0.25670911  0.24280127 -0.1982224  -0.68582218]]
推理模式输出:\n [[ 0.34761317  0.95600362  0.72084368  0.56748504  0.12507165]
 [ 0.11825073  0.01996812  0.86159417  0.57006785  0.70467022]
 [-0.02386137  0.97613256  0.82620939  0.16130611  0.15216558]
 [ 0.14701737  0.27809639  0.50346179  0.39220132  0.26702845]
 [ 0.59667166  0.105337    0.25946354  0.3232467   0.440094  ]
 [ 0.77857329  0.16844321  0.49242

## 框架实现

In [10]:
import torch  
import torch.nn as nn  
import torch.optim as optim

In [11]:
net = nn.Sequential(
    nn.Linear(10, 50, bias=False), nn.BatchNorm1d(50), nn.ReLU(),
    nn.Linear(50, 20, bias=False), nn.BatchNorm1d(20), nn.ReLU(),
    nn.Linear(20, 1)
)

In [12]:
# 创建一个模型实例  
model = net  

# 定义损失函数  
criterion = nn.MSELoss()  # 均方误差损失  

# 定义优化器  
optimizer = optim.SGD(model.parameters(), lr=0.01)  # 使用 SGD 优化器

In [13]:
# 模拟一些随机数据作为输入和目标  
input_data = torch.randn(100, 10)  # 100个样本，每个样本10个特征  
target_data = torch.randn(100, 1)   # 100个目标值  

# 训练模型  
for epoch in range(100):  # 训练100个epoch  
    model.train()  # 设置模型为训练模式  
    
    # 前向传播  
    optimizer.zero_grad()  # 清空梯度  
    output = model(input_data)  # 计算输出  
    loss = criterion(output, target_data)  # 计算损失  
    
    # 反向传播  
    loss.backward()  # 计算梯度  
    optimizer.step()  # 更新参数  

    # 打印损失  
    if (epoch + 1) % 10 == 0:  
        print(f'Epoch [{epoch + 1}/100], Loss: {loss.item():.4f}')

Epoch [10/100], Loss: 0.7255
Epoch [20/100], Loss: 0.6408
Epoch [30/100], Loss: 0.5818
Epoch [40/100], Loss: 0.5271
Epoch [50/100], Loss: 0.4738
Epoch [60/100], Loss: 0.4246
Epoch [70/100], Loss: 0.3807
Epoch [80/100], Loss: 0.3407
Epoch [90/100], Loss: 0.3058
Epoch [100/100], Loss: 0.2747


In [14]:
# 模型推理  
model.eval()  # 切换到评估模式  
with torch.no_grad():  # 不计算梯度  
    test_input = torch.randn(5, 10)  # 测试5个样本  
    test_output = model(test_input)  
    print("推理输出:\n", test_output)

推理输出:
 tensor([[-0.2790],
        [ 0.0336],
        [-0.9290],
        [ 0.4044],
        [-0.6063]])
