In [1]:
%matplotlib inline


神经网络构建与训练
===============

神经网络的典型训练过程如下：

·定义神经网络的参数和前向传播过程

·网络前向传播处理输入

·计算损失函数

·将梯度传播回网络参数

·使用优化器来更新网络的权重


===================

首先让我们借助torch.nn定义网络



In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__() #nn.Module的子类必须在构造函数中执行父类的构造函数
        #torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, bias=True)
       
        self.features = nn.Sequential(
            # 1 input image channel, 6 output channels, 3x3 square convolution
            nn.Conv2d(in_channels=1, out_channels=6,kernel_size=3,padding=1),
            nn.ReLU(),
            nn.MaxPool2d((2,2)),
            nn.Conv2d(6, 16, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d((2,2)),
        )
        self.classifier = nn.Sequential(
            nn.Linear(16 * 8 * 8, 120),
            nn.ReLU(),
            nn.Linear(120,10),
        )
        self.my_para = nn.Parameter(torch.ones(1, requires_grad=True))
        '''
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 6 * 6, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        '''
    def forward(self, x):
        x = self.features(x) + self.my_para
        x = x.view(x.size(0),-1)
        x = self.classifier(x)
        return x
        '''
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square, you can specify with a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        '''
net = Net()
print(net)

Net(
  (features): Sequential(
    (0): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Linear(in_features=1024, out_features=120, bias=True)
    (1): ReLU()
    (2): Linear(in_features=120, out_features=10, bias=True)
  )
)


查看模型内的可学习参数

In [3]:
for name,params in net.named_parameters():
    print(name)
    print(params.size())

my_para
torch.Size([1])
features.0.weight
torch.Size([6, 1, 3, 3])
features.0.bias
torch.Size([6])
features.3.weight
torch.Size([16, 6, 3, 3])
features.3.bias
torch.Size([16])
classifier.0.weight
torch.Size([120, 1024])
classifier.0.bias
torch.Size([120])
classifier.2.weight
torch.Size([10, 120])
classifier.2.bias
torch.Size([10])


尝试输入图片获取输出（注意输入需要是(B,C,H,W)4维）


In [4]:
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

tensor([[ 0.1219,  0.1282,  0.0808,  0.7262, -0.6104, -0.2138, -0.1289,  0.0475,
          0.0816,  0.0085]], grad_fn=<AddmmBackward0>)


目前我们已经完成了神经网络与前向传播，并得到了一组网络输出out

为完成一次训练，还需要

·计算LOSS并进行反向传播取得梯度

·更新权重参数

In [5]:
output = net(input)
target = torch.randn(10)  # a dummy target, for example
target = target.view(1, -1)  # make it the same shape as output
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

tensor(1.8914, grad_fn=<MseLossBackward0>)


Backprop
-------------



In [6]:
net.zero_grad()     # 清零现有梯度

print('conv1.bias.grad before backward')
print(net.features[0].bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.features[0].bias.grad)

conv1.bias.grad before backward
None
conv1.bias.grad after backward
tensor([-0.0126,  0.0281, -0.0075,  0.0085, -0.0369,  0.0201])


In [7]:
#to check for zero_grad()

output = net(input)
loss = criterion(output, target) #重建计算图
#net.zero_grad()     # 清零现有梯度

print('conv1.bias.grad before backward')
print(net.features[0].bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.features[0].bias.grad)

conv1.bias.grad before backward
tensor([-0.0126,  0.0281, -0.0075,  0.0085, -0.0369,  0.0201])
conv1.bias.grad after backward
tensor([-0.0253,  0.0561, -0.0150,  0.0171, -0.0737,  0.0402])


更新权重
-------



In [8]:
learning_rate = 0.01
for f in net.parameters():
        f.data.sub_(f.grad.data * learning_rate)

In [9]:
import torch.optim as optim

# create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)

# in your training loop:
optimizer.zero_grad()   # zero the gradient buffers
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()    # Does the update