# Pytorch神经网络
  
一个典型的神经网络训练过程包括以下几点：  
1.定义一个包含可训练参数的神经网络  
2.迭代整个输入  
3.通过神经网络处理输入  
4.计算损失(loss)  
5.反向传播梯度到神经网络的参数  
6.更新网络的参数，典型的用一个简单的更新方法：weight = weight - learning_rate *gradient  


In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5) # 1 input image channel, 6 output channels, 5x5 square convolution kernel
        self.conv2 = nn.Conv2d(6, 16, 5)
        # torch.nn.Linear（in_features，out_features，bias = True ）
        # 输入为[batch_size, in_features]的张量变换成了[batch_size, out_features]的输出张量
        # https://blog.csdn.net/qq_42079689/article/details/102873766
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x)) # 变成二维的
        print(x.size())
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:] # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
        

#### 1.定义一个神经网络

In [9]:
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [10]:
# 定义了一个前馈函数，反向传播函数被自动通过 autograd 定义。可以使用任何张量操作在前馈函数上
params = list(net.parameters())
print(params)
print(len(params))
print(params[0].size())  # conv1's .weight

[Parameter containing:
tensor([[[[-0.1349,  0.1467,  0.1184, -0.0255,  0.0521],
          [ 0.1381, -0.1011, -0.0850, -0.0293, -0.0547],
          [ 0.2000,  0.0971,  0.0777,  0.0252,  0.1191],
          [-0.1058,  0.0364,  0.0198, -0.0917, -0.0028],
          [-0.0229,  0.0083, -0.1244, -0.1101, -0.0546]]],


        [[[-0.1025,  0.0980,  0.1687, -0.1677, -0.0193],
          [-0.1645,  0.0033, -0.1953,  0.1175, -0.0983],
          [ 0.1003, -0.0635,  0.0410, -0.1394, -0.0530],
          [ 0.0878,  0.0984, -0.1403,  0.0196,  0.0905],
          [-0.1773,  0.0294,  0.0782,  0.0754, -0.0497]]],


        [[[-0.0563,  0.1161, -0.0245, -0.1300,  0.0774],
          [ 0.1658,  0.0431, -0.0231, -0.0575, -0.0158],
          [ 0.1284, -0.1402,  0.1414,  0.0412, -0.1003],
          [ 0.0346, -0.0797, -0.0516, -0.0537, -0.0405],
          [ 0.0761, -0.0554,  0.1701, -0.0086, -0.0913]]],


        [[[ 0.1607, -0.1700, -0.0649,  0.1922,  0.0293],
          [-0.1091,  0.0923, -0.0765,  0.0903,  0.071

#### 2.处理输入，调用反向传播

In [11]:
# 随机生成一个32x32的输入
input = torch.randn(1,1,32,32)
out = net(input)
print(input)
print(out)

torch.Size([1, 400])
tensor([[[[ 0.1410,  0.0656,  0.8847,  ...,  1.0742, -2.1916,  0.0191],
          [ 0.2862, -1.4059,  2.5971,  ...,  0.0371,  1.0758, -0.3404],
          [ 1.3754,  1.0421,  0.5606,  ...,  0.3113,  0.6303,  0.0870],
          ...,
          [ 1.1281,  0.0157, -0.5550,  ...,  0.4896, -0.3716,  2.1073],
          [ 0.6121,  2.4070, -0.1312,  ..., -0.2073, -1.5819,  1.3727],
          [-1.1189,  2.3323, -1.2708,  ..., -1.1971,  0.3736, -1.8210]]]])
tensor([[ 0.0246,  0.0646, -0.1019, -0.0359,  0.0387,  0.0608,  0.0526,  0.1081,
         -0.0086, -0.0436]], grad_fn=<AddmmBackward>)


In [12]:
# 所有参数梯度缓存器置零，用随机的梯度来反向传播
net.zero_grad()
out.backward(torch.randn(1, 10))
print(torch.randn(1,10))

tensor([[-0.2264, -0.4808, -0.4831, -1.4269, -0.3363, -0.7751, -1.4564,  1.0399,
          0.2601,  1.3453]])


#### 3.计算loss

In [13]:
# 简单的损失函数，MSEloss：均方差
output = net(input)
target = torch.randn(10)
print(target)
target = target.view(1, -1)
print(target)
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

torch.Size([1, 400])
tensor([-0.6647,  0.4939, -0.1575,  0.2591,  1.1398,  0.5038, -0.5245, -0.9105,
        -0.5328, -0.2107])
tensor([[-0.6647,  0.4939, -0.1575,  0.2591,  1.1398,  0.5038, -0.5245, -0.9105,
         -0.5328, -0.2107]])
tensor(0.3831, grad_fn=<MseLossBackward>)


In [14]:
# 当调用loss.backward()，整个图都会微分.
# 所有在图中requires_grad=True 的张量将会让他们的grad张量累计梯度
print(loss.grad_fn)
print(loss.grad_fn.next_functions[0][0])  # Linear 
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU

<MseLossBackward object at 0x0000015944F944C8>
<AddmmBackward object at 0x0000015944F94088>
<AccumulateGrad object at 0x0000015944F944C8>


In [16]:
'''反向传播'''
#  con1 的偏置项在反向传播之前和之后的变化
net.zero_grad()     # zeroes the gradient buffers of all parameters
print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)
loss.backward()
print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad after backward
tensor([-0.0025,  0.0041, -0.0003, -0.0119, -0.0018,  0.0049])


#### 4.更新神经网络参数

In [17]:
# 随机梯度下降
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

In [18]:
# 其他更新规则
import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr = 0.01)
optimizer.zero_grad()  # 清零
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()  # does the update

torch.Size([1, 400])
