In [1]:
print('批标准化')


批标准化


In [2]:
# 一维的情况
import torch
def simple_batch_norm_1d(x, gamma, beta):
    eps = 1e-5
    x_mean = torch.mean(x, dim=0, keepdim=True)
    x_var = torch.mean((x-x_mean)**2, dim=0, keepdim=True)
    x_hat = (x - x_mean) / torch.sqrt(x_var + eps)
    return gamma.view_as(x_mean) * x_hat + beta.view_as(x_mean)



In [4]:
x = torch.arange(15).view(5, 3).to(torch.float)
gamma = torch.ones(x.shape[1])
beta = torch.zeros(x.shape[1])
print('before bn:')
print(x)
y = simple_batch_norm_1d(x, gamma, beta)
print('after bn:')
print(y)


before bn:
tensor([[ 0.,  1.,  2.],
        [ 3.,  4.,  5.],
        [ 6.,  7.,  8.],
        [ 9., 10., 11.],
        [12., 13., 14.]])
after bn:
tensor([[-1.4142, -1.4142, -1.4142],
        [-0.7071, -0.7071, -0.7071],
        [ 0.0000,  0.0000,  0.0000],
        [ 0.7071,  0.7071,  0.7071],
        [ 1.4142,  1.4142,  1.4142]])


In [23]:
print(x.shape)
x_m = torch.mean(x, dim=0, keepdim=True)
print(x_m.shape)
x_v = torch.mean((x-x_m)**2, dim=0, keepdim=True)
print(x_v.shape)
print(gamma.view_as(x_m).shape)

torch.Size([5, 3])
torch.Size([1, 3])
torch.Size([1, 3])
torch.Size([1, 3])


In [44]:
# 能够区分训练状态和测试状态的批标准化方法
def batch_norm_1d(x, gamma, beta, is_training, moving_mean, moving_var, moving_momentum=0.1):
    eps = 1e-5
    x_mean = torch.mean(x, dim=0, keepdim=True) # 保留维度进行broadcast
    x_var = torch.mean((x - x_mean) ** 2, dim=0, keepdim=True)
    if is_training:
        x_hat = (x - x_mean) / torch.sqrt(x_var + eps)
        moving_mean[:] = moving_momentum * moving_mean + (1. - moving_momentum) * x_mean.cpu()
        moving_var[:] = moving_momentum * moving_var + (1. - moving_momentum) * x_var.cpu()
    else:
        x_hat = (x - moving_mean) / torch.sqrt(moving_var + eps)
    return gamma.view_as(x_mean) * x_hat + beta.view_as(x_mean)


In [45]:
# 用MNIST验证是否可用
import numpy as np
from torchvision.datasets import mnist
from torch.utils.data import DataLoader
from torch import nn
from torch.autograd import Variable

# 使用内置函数下载MNIST数据集
train_set = mnist.MNIST('../data', train=True)
test_set = mnist.MNIST('../data', train=False)

def data_tf(x):
    x = np.array(x, dtype='float32') / 255
    x = (x - 0.5) / 0.5
    x = x.reshape((-1,))
    x = torch.from_numpy(x)
    return x

train_set = mnist.MNIST('../data', train=True, transform=data_tf, download=True)
test_set = mnist.MNIST('../data', train=False, transform=data_tf, download=True)
train_data = DataLoader(train_set, batch_size=32, shuffle=True)
test_data = DataLoader(test_set, batch_size=32, shuffle=False)


In [46]:
class multi_network(nn.Module):
    def __init__(self):
        super(multi_network, self).__init__()
        self.layer1 = nn.Linear(784, 100)
        self.relu = nn.ReLU(True)
        self.layer2 = nn.Linear(100, 10)
        
        self.gamma = nn.Parameter(torch.randn(100))
        self.beta = nn.Parameter(torch.randn(100))
        
        self.moving_mean = Variable(torch.zeros(100))
        self.moving_var = Variable(torch.zeros(100))
        
    def forward(self, x, is_train=True):
        x = self.layer1(x)
        x = batch_norm_1d(x, self.gamma, self.beta, is_train, self.moving_mean, self.moving_var)
        x = self.relu(x)
        x = self.layer2(x)
        return x
    


In [47]:
# 训练
net = multi_network()
# loss
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), 1e-1)
from C5_CNN.utils import train
train(net, train_data, test_data, 10, optimizer, criterion)


Epoch 0. Train Loss: 0.300847, Train Acc: 0.911950, Valid Loss: 0.188840, Valid Acc: 0.943890, Time 00:00:26
Epoch 1. Train Loss: 0.168357, Train Acc: 0.949817, Valid Loss: 0.148208, Valid Acc: 0.956769, Time 00:00:30
Epoch 2. Train Loss: 0.131344, Train Acc: 0.960033, Valid Loss: 0.130643, Valid Acc: 0.961861, Time 00:00:31
Epoch 3. Train Loss: 0.110123, Train Acc: 0.966550, Valid Loss: 0.125073, Valid Acc: 0.960563, Time 00:00:34
Epoch 4. Train Loss: 0.093588, Train Acc: 0.971133, Valid Loss: 0.120028, Valid Acc: 0.963159, Time 00:00:37
Epoch 5. Train Loss: 0.083763, Train Acc: 0.974333, Valid Loss: 0.111815, Valid Acc: 0.966753, Time 00:00:38
Epoch 6. Train Loss: 0.075459, Train Acc: 0.976283, Valid Loss: 0.109438, Valid Acc: 0.967552, Time 00:00:37
Epoch 7. Train Loss: 0.069377, Train Acc: 0.978217, Valid Loss: 0.105319, Valid Acc: 0.967552, Time 00:00:39
Epoch 8. Train Loss: 0.065798, Train Acc: 0.979300, Valid Loss: 0.108963, Valid Acc: 0.967652, Time 00:00:40
Epoch 9. Train Loss

In [48]:
print(net.moving_mean[:10])

tensor([-3.3059, -0.6155,  0.0912, -0.3872,  5.9877,  3.4052,  0.5311,  0.1161,
        -3.1937,  0.6356], grad_fn=<SliceBackward>)


In [49]:
# 不用批标准化的结果
no_bn_net = nn.Sequential(
    nn.Linear(784, 100),
    nn.ReLU(True),
    nn.Linear(100, 10)
)
optimizer = torch.optim.SGD(no_bn_net.parameters(), 1e-1)
train(no_bn_net, train_data, test_data, 10, optimizer, criterion)


Epoch 0. Train Loss: 0.366584, Train Acc: 0.882483, Valid Loss: 0.163591, Valid Acc: 0.948782, Time 00:00:20
Epoch 1. Train Loss: 0.173393, Train Acc: 0.946367, Valid Loss: 0.148600, Valid Acc: 0.953175, Time 00:00:23
Epoch 2. Train Loss: 0.134626, Train Acc: 0.958833, Valid Loss: 0.144682, Valid Acc: 0.956569, Time 00:00:26
Epoch 3. Train Loss: 0.116492, Train Acc: 0.963867, Valid Loss: 0.150574, Valid Acc: 0.956170, Time 00:00:25
Epoch 4. Train Loss: 0.100459, Train Acc: 0.968517, Valid Loss: 0.122538, Valid Acc: 0.964357, Time 00:00:25
Epoch 5. Train Loss: 0.091769, Train Acc: 0.971233, Valid Loss: 0.132585, Valid Acc: 0.962260, Time 00:00:24
Epoch 6. Train Loss: 0.082237, Train Acc: 0.973750, Valid Loss: 0.125345, Valid Acc: 0.962260, Time 00:00:24
Epoch 7. Train Loss: 0.077787, Train Acc: 0.975967, Valid Loss: 0.103685, Valid Acc: 0.971046, Time 00:00:24
Epoch 8. Train Loss: 0.070292, Train Acc: 0.977117, Valid Loss: 0.102938, Valid Acc: 0.969948, Time 00:00:24
Epoch 9. Train Loss

In [50]:
# 卷积网络 批标准化 用pytorch自带的批标准化函数
def data_tf(x):
    x = np.array(x, dtype='float32') / 255
    x = (x - 0.5) / 0.5
    x = torch.from_numpy(x)
    x = x.unsqueeze(0)
    return x

train_set = mnist.MNIST('../data', train=True, transform=data_tf, download=True)
test_set = mnist.MNIST('../data', train=False, transform=data_tf, download=True)
train_data = DataLoader(train_set, batch_size=32, shuffle=True)
test_data = DataLoader(test_set, batch_size=32, shuffle=False)

# 使用批标准化
class conv_bn_net(nn.Module):
    def __init__(self):
        super(conv_bn_net, self).__init__()
        self.stage1 = nn.Sequential(
            nn.Conv2d(1, 6, 3, padding=1),
            nn.BatchNorm2d(6),
            nn.ReLU(True),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(6, 16, 5),
            nn.BatchNorm2d(16),
            nn.ReLU(True),
            nn.MaxPool2d(2, 2)
        )
        
        self.classify = nn.Linear(400, 10)
        
    def forward(self, x):
        x = self.stage1(x)
        x = x.view(x.shape[0], -1)
        x = self.classify(x)
        return x

net = conv_bn_net()
optimizer = torch.optim.SGD(net.parameters(), 1e-1)
train(net, train_data, test_data, 5, optimizer, criterion)


Epoch 0. Train Loss: 0.130878, Train Acc: 0.961333, Valid Loss: 0.093079, Valid Acc: 0.970647, Time 00:00:50
Epoch 1. Train Loss: 0.053629, Train Acc: 0.983500, Valid Loss: 0.039332, Valid Acc: 0.986222, Time 00:00:51
Epoch 2. Train Loss: 0.041747, Train Acc: 0.987200, Valid Loss: 0.039187, Valid Acc: 0.987220, Time 00:00:50
Epoch 3. Train Loss: 0.035006, Train Acc: 0.988600, Valid Loss: 0.035332, Valid Acc: 0.989117, Time 00:00:49
Epoch 4. Train Loss: 0.030113, Train Acc: 0.990417, Valid Loss: 0.043766, Valid Acc: 0.986621, Time 00:00:48


In [52]:
# 不使用批标准化
class conv_no_bn_net(nn.Module):
    def __init__(self):
        super(conv_no_bn_net, self).__init__()
        self.stage1 = nn.Sequential(
            nn.Conv2d(1, 6, 3, padding=1),
            # nn.BatchNorm2d(6),
            nn.ReLU(True),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(6, 16, 5),
            #nn.BatchNorm2d(16),
            nn.ReLU(True),
            nn.MaxPool2d(2, 2)
        )
        
        self.classify = nn.Linear(400, 10)
        
    def forward(self, x):
        x = self.stage1(x)
        x = x.view(x.shape[0], -1)
        x = self.classify(x)
        return x

net = conv_no_bn_net()
optimizer = torch.optim.SGD(net.parameters(), 1e-1)
train(net, train_data, test_data, 5, optimizer, criterion)

Epoch 0. Train Loss: 0.174324, Train Acc: 0.944900, Valid Loss: 0.065926, Valid Acc: 0.977736, Time 00:00:35
Epoch 1. Train Loss: 0.064510, Train Acc: 0.979967, Valid Loss: 0.049920, Valid Acc: 0.984125, Time 00:00:39
Epoch 2. Train Loss: 0.049106, Train Acc: 0.984950, Valid Loss: 0.053070, Valid Acc: 0.983227, Time 00:00:38
Epoch 3. Train Loss: 0.042375, Train Acc: 0.986967, Valid Loss: 0.050653, Valid Acc: 0.983926, Time 00:00:38
Epoch 4. Train Loss: 0.036420, Train Acc: 0.988133, Valid Loss: 0.052674, Valid Acc: 0.982328, Time 00:00:38
