# 3 pytorch初步应用
构建神经网络：torch.nn 依赖autograd自动求导

流程：
- 定义一个拥有可学习参数的神经网络
- 遍历训练数据集，处理输入数据使其流经神经网络
- 计算损失值
- 将网络参数的梯度进行反向传播
- 以一定的规则更新网络的权重

## 1.构建网络

In [8]:

import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        # 网络层定义
        # 定义第一层卷积神经网络, 输入通道维度=1, 输出通道维度=6, 卷积核大小3*3
        self.conv1 = nn.Conv2d(1, 6, 3)
        # 定义第二层卷积神经网络, 输入通道维度=6, 输出通道维度=16, 卷积核大小3*3
        self.conv2 = nn.Conv2d(6, 16, 3)
        # 定义三层全连接网络
        self.fc1 = nn.Linear(16 * 6 * 6, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self,x):
        # 在(2, 2)的池化窗口下执行最大池化操作
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, (2, 2))
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, (2, 2))
        # x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        # 计算size, 除了第0个维度上的batch_size
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


## 2. 构建输入

In [9]:
params = list(net.parameters())
print(len(params))
# print(params[0].size()) 
for p in params:
    print(p.size())

input = torch.randn(1, 1, 32, 32)
'''
torch.nn构建的神经网络只支持mini-batches的输入, 不支持单一样本的输入.
比如: nn.Conv2d 需要一个4D Tensor, 形状为(nSamples, nChannels, Height, Width). 
如果你的输入只有单一样本形式, 则需要执行input.unsqueeze(0), 主动将3D Tensor扩充成4D Tensor.
'''
out = net.forward(input)
print(out)

10
torch.Size([6, 1, 3, 3])
torch.Size([6])
torch.Size([16, 6, 3, 3])
torch.Size([16])
torch.Size([120, 576])
torch.Size([120])
torch.Size([84, 120])
torch.Size([84])
torch.Size([10, 84])
torch.Size([10])
tensor([[-0.0390, -0.1032, -0.1672,  0.0213, -0.0284,  0.0307, -0.0461,  0.0201,
         -0.0496, -0.0904]], grad_fn=<AddmmBackward0>)


## 3. 计算loss

In [10]:
print(out.size())
target  = torch.randn(10)
print(target.size())
target = target.view(1,-1)
print(target.size())
c = nn.MSELoss()
loss = c(out,target)
print(loss)

torch.Size([1, 10])
torch.Size([10])
torch.Size([1, 10])
tensor(0.9437, grad_fn=<MseLossBackward0>)


In [11]:
print(loss.grad_fn)
print(loss.grad_fn.next_functions[0][0])
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])

<MseLossBackward0 object at 0x7f136ce777f0>
<AddmmBackward0 object at 0x7f136ce77ee0>
<AccumulateGrad object at 0x7f136ce777f0>


## 4.反向传播

In [12]:
net.zero_grad()
print(net.conv1.bias.grad)
loss.backward()
print(net.conv1.bias.grad)

None
tensor([ 0.0127, -0.0005, -0.0009,  0.0025, -0.0058,  0.0057])


## 5.更新权重

In [13]:
# 更新参数
import torch.optim as optim
net.zero_grad()
print(list(net.parameters())[0][0])
optimizer = optim.SGD(net.parameters(),lr=0.01)
optimizer.zero_grad()
output = net(input)
loss = c(output,target)
loss.backward()
optimizer.step()
print(list(net.parameters())[0][0])

tensor([[[-0.2442, -0.1785, -0.0841],
         [ 0.0188, -0.1052,  0.1658],
         [ 0.2443,  0.3327,  0.3157]]], grad_fn=<SelectBackward0>)
tensor([[[-0.2441, -0.1783, -0.0840],
         [ 0.0189, -0.1050,  0.1655],
         [ 0.2440,  0.3328,  0.3157]]], grad_fn=<SelectBackward0>)
