In [1]:
import sys
sys.path.append('..')
import torch

In [2]:
def simple_batch_norm_1d(x,gama,beta):
    eps=1e-5
    x_mean=torch.mean(x,dim=0,keepdim=True)
    x_var=torch.mean((x-x_mean)**2,dim=0,keepdim=True)
    x_hat=(x-x_mean)/torch.sqrt(x_var+eps)
    return gama.view_as(x_mean)*x_hat+beta.view_as(x_mean)

In [3]:
x= torch.arange(15).view(5,3)
x=x.type(torch.FloatTensor)
gama=torch.ones(x.shape[1])
beta=torch.zeros(x.shape[1])
print('before bn:')
print(x)
y=simple_batch_norm_1d(x,gama,beta)
print('after bn:')
print(y)

before bn:
tensor([[ 0.,  1.,  2.],
        [ 3.,  4.,  5.],
        [ 6.,  7.,  8.],
        [ 9., 10., 11.],
        [12., 13., 14.]])
after bn:
tensor([[-1.4142, -1.4142, -1.4142],
        [-0.7071, -0.7071, -0.7071],
        [ 0.0000,  0.0000,  0.0000],
        [ 0.7071,  0.7071,  0.7071],
        [ 1.4142,  1.4142,  1.4142]])


In [4]:
def batch_norm_1d(x,gamma,beta,is_traing,moving_mean,moving_var,moving_momentum=0.1):
    eps=1e-5
    x_mean=torch.mean(x,dim=0,keepdim=True)
    x_var=torch.mean((x-x_mean)**2,dim=0,keepdim=True)
    if is_traing:
        x_hat=(x-x_mean)/torch.sqrt(x_var+eps)
        moving_mean[:]=moving_momentum*moving_mean+(1.-moving_momentum)*x_mean
        moving_var[:]=moving_momentum*moving_var+(1.-moving_momentum)*x_var
    else:
        x_hat=(x-moving_mean)/torch.sqrt(moving_var+eps)
    return gamma.view_as(x_mean)*x_hat+beta.view_as(x_mean)

In [5]:
import numpy as np
from torchvision.datasets import mnist # 导入 pytorch 内置的 mnist 数据
from torch.utils.data import DataLoader
from torch import nn
from torch.autograd import Variable

In [6]:
# 使用内置函数下载 mnist 数据集
train_set = mnist.MNIST('./data', train=True)
test_set = mnist.MNIST('./data', train=False)

def data_tf(x):
    x = np.array(x, dtype='float32') / 255
    x = (x - 0.5) / 0.5 # 数据预处理，标准化
    x = x.reshape((-1,)) # 拉平
    x = torch.from_numpy(x)
    return x

train_set = mnist.MNIST('./data', train=True, transform=data_tf, download=True) # 重新载入数据集，申明定义的数据变换
test_set = mnist.MNIST('./data', train=False, transform=data_tf, download=True)
train_data = DataLoader(train_set, batch_size=64, shuffle=True)
test_data = DataLoader(test_set, batch_size=128, shuffle=False)

In [7]:
class multi_network(nn.Module):
    def __init__(self):
        super(multi_network,self).__init__()
        self.layer1=nn.Linear(784,100)
        self.relu=nn.ReLU(True)
        self.layer2=nn.Linear(100,10)
        
        self.gamma=nn.Parameter(torch.randn(100))
        self.beta=nn.Parameter(torch.randn(100))
        
        self.moving_mean=Variable(torch.zeros(100))
        self.moving_var=Variable(torch.zeros(100))
    def forward(self,x,is_train=True):
        x=self.layer1(x)
        x=batch_norm_1d(x,self.gamma,self.beta,is_train,self.moving_mean,self.moving_var)
        x=self.relu(x)
        x=self.layer2(x)
        return x

In [8]:
net=multi_network()
print(net)

multi_network(
  (layer1): Linear(in_features=784, out_features=100, bias=True)
  (relu): ReLU(inplace)
  (layer2): Linear(in_features=100, out_features=10, bias=True)
)


In [9]:
criterion=nn.CrossEntropyLoss()
optimizer=torch.optim.SGD(net.parameters(),lr=1e-1)

In [10]:
from utils import train

In [11]:
train(net, train_data, test_data, 10, optimizer, criterion)

  im = Variable(im, volatile=True)
  label = Variable(label, volatile=True)


Epoch 0. Train Loss: 0.307045, Train Acc: 0.914729, Valid Loss: 0.181659, Valid Acc: 0.946499, Time 00:00:08
Epoch 1. Train Loss: 0.166932, Train Acc: 0.952225, Valid Loss: 0.137236, Valid Acc: 0.958366, Time 00:00:09
Epoch 2. Train Loss: 0.129827, Train Acc: 0.962836, Valid Loss: 0.116029, Valid Acc: 0.965981, Time 00:00:08
Epoch 3. Train Loss: 0.106928, Train Acc: 0.969650, Valid Loss: 0.110975, Valid Acc: 0.967959, Time 00:00:09
Epoch 4. Train Loss: 0.092049, Train Acc: 0.973847, Valid Loss: 0.096674, Valid Acc: 0.970728, Time 00:00:09
Epoch 5. Train Loss: 0.081043, Train Acc: 0.976396, Valid Loss: 0.097490, Valid Acc: 0.970629, Time 00:00:10
Epoch 6. Train Loss: 0.072950, Train Acc: 0.978628, Valid Loss: 0.091845, Valid Acc: 0.972112, Time 00:00:09
Epoch 7. Train Loss: 0.066913, Train Acc: 0.979944, Valid Loss: 0.092770, Valid Acc: 0.970827, Time 00:00:09
Epoch 8. Train Loss: 0.059466, Train Acc: 0.982060, Valid Loss: 0.091550, Valid Acc: 0.971519, Time 00:00:09
Epoch 9. Train Loss

In [12]:
# 打出 moving_mean 的前 10 项
print(net.moving_mean[:10])

tensor([ 0.9455,  1.7531, -1.0747, -1.0521,  1.1635,  0.7730,  0.3855, -1.4986,
        -1.3545, -0.6360], grad_fn=<SliceBackward>)


In [13]:
no_bn_net = nn.Sequential(
    nn.Linear(784, 100),
    nn.ReLU(True),
    nn.Linear(100, 10)
)

optimizer = torch.optim.SGD(no_bn_net.parameters(), 1e-1) # 使用随机梯度下降，学习率 0.1
train(no_bn_net, train_data, test_data, 10, optimizer, criterion)

  im = Variable(im, volatile=True)
  label = Variable(label, volatile=True)


Epoch 0. Train Loss: 0.402252, Train Acc: 0.873567, Valid Loss: 0.220926, Valid Acc: 0.935918, Time 00:00:07
Epoch 1. Train Loss: 0.184110, Train Acc: 0.944313, Valid Loss: 0.147016, Valid Acc: 0.957377, Time 00:00:09
Epoch 2. Train Loss: 0.136039, Train Acc: 0.959322, Valid Loss: 0.147227, Valid Acc: 0.955696, Time 00:00:08
Epoch 3. Train Loss: 0.109659, Train Acc: 0.966801, Valid Loss: 0.138487, Valid Acc: 0.955498, Time 00:00:09
Epoch 4. Train Loss: 0.094737, Train Acc: 0.970999, Valid Loss: 0.111056, Valid Acc: 0.965981, Time 00:00:09
Epoch 5. Train Loss: 0.083283, Train Acc: 0.974530, Valid Loss: 0.110512, Valid Acc: 0.965684, Time 00:00:09
Epoch 6. Train Loss: 0.073668, Train Acc: 0.977412, Valid Loss: 0.093237, Valid Acc: 0.972508, Time 00:00:08
Epoch 7. Train Loss: 0.066807, Train Acc: 0.978911, Valid Loss: 0.138552, Valid Acc: 0.958663, Time 00:00:08
Epoch 8. Train Loss: 0.061216, Train Acc: 0.980877, Valid Loss: 0.122029, Valid Acc: 0.961926, Time 00:00:08
Epoch 9. Train Loss

In [14]:
def data_tf(x):
    x = np.array(x, dtype='float32') / 255
    x = (x - 0.5) / 0.5 # 数据预处理，标准化
    x = torch.from_numpy(x)
    x = x.unsqueeze(0)
    return x

train_set = mnist.MNIST('./data', train=True, transform=data_tf, download=True) # 重新载入数据集，申明定义的数据变换
test_set = mnist.MNIST('./data', train=False, transform=data_tf, download=True)
train_data = DataLoader(train_set, batch_size=64, shuffle=True)
test_data = DataLoader(test_set, batch_size=128, shuffle=False)

In [24]:
from torch import nn
class conv_bn_net(nn.Module):
    def __init__(self):
        super(conv_bn_net,self).__init__()
        self.stage1=nn.Sequential(
            nn.Conv2d(1,6,3,padding=1),
            nn.BatchNorm2d(6),
            nn.ReLU(True),
            nn.MaxPool2d(2,2),
            nn.Conv2d(6,16,5),
            nn.BatchNorm2d(16),
            nn.ReLU(True),
            nn.MaxPool2d(2,2)
        )
        self.classfy=nn.Linear(400,10)
    def forward(self,x):
        x=self.stage1(x)
        x=x.view(x.shape[0],-1)
        x=self.classfy(x)
        return x
    
net=conv_bn_net()
optimizer=torch.optim.SGD(net.parameters(),lr=1e-1)

In [25]:
train(net, train_data, test_data, 5, optimizer, criterion)

  im = Variable(im, volatile=True)
  label = Variable(label, volatile=True)


Epoch 0. Train Loss: 0.158638, Train Acc: 0.953242, Valid Loss: 0.064446, Valid Acc: 0.979628, Time 00:00:53
Epoch 1. Train Loss: 0.065400, Train Acc: 0.979994, Valid Loss: 0.059181, Valid Acc: 0.980222, Time 00:00:56
Epoch 2. Train Loss: 0.052123, Train Acc: 0.984025, Valid Loss: 0.050308, Valid Acc: 0.983386, Time 00:00:58
Epoch 3. Train Loss: 0.044278, Train Acc: 0.986291, Valid Loss: 0.040967, Valid Acc: 0.986056, Time 00:00:56
Epoch 4. Train Loss: 0.039090, Train Acc: 0.987940, Valid Loss: 0.038483, Valid Acc: 0.987441, Time 00:00:57


In [26]:
# 不使用批标准化
class conv_no_bn_net(nn.Module):
    def __init__(self):
        super(conv_no_bn_net, self).__init__()
        self.stage1 = nn.Sequential(
            nn.Conv2d(1, 6, 3, padding=1),
            nn.ReLU(True),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(6, 16, 5),
            nn.ReLU(True),
            nn.MaxPool2d(2, 2)
        )
        
        self.classfy = nn.Linear(400, 10)
    def forward(self, x):
        x = self.stage1(x)
        x = x.view(x.shape[0], -1)
        x = self.classfy(x)
        return x

net = conv_no_bn_net()
optimizer = torch.optim.SGD(net.parameters(), 1e-1) # 使用随机梯度下降，学习率 0.1

In [27]:
train(net, train_data, test_data, 5, optimizer, criterion)

  im = Variable(im, volatile=True)
  label = Variable(label, volatile=True)


Epoch 0. Train Loss: 0.212905, Train Acc: 0.934768, Valid Loss: 0.109351, Valid Acc: 0.963509, Time 00:00:37
Epoch 1. Train Loss: 0.071837, Train Acc: 0.977978, Valid Loss: 0.047891, Valid Acc: 0.984078, Time 00:00:39
Epoch 2. Train Loss: 0.054570, Train Acc: 0.982959, Valid Loss: 0.045030, Valid Acc: 0.985562, Time 00:00:43
Epoch 3. Train Loss: 0.044837, Train Acc: 0.986057, Valid Loss: 0.042893, Valid Acc: 0.986748, Time 00:00:38
Epoch 4. Train Loss: 0.039101, Train Acc: 0.987807, Valid Loss: 0.038874, Valid Acc: 0.986353, Time 00:00:37
