## 建立一个神经网络

In [18]:
import torch
import torch.nn as nn
import torch.nn.functional as F 

In [19]:
# 定义网络结构

class Net(nn.Module):
    # define a neural network
    
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # 全链接？
        self.fc1 = nn.Linear(16* 5 * 5 , 120)
        self.fc2 = nn.Linear( 120, 84)
        self.fc3 = nn.Linear(84, 10)
        pass
    
    def forward(self, x):
        x = F.max_pool2d( F.relu(self.conv1(x)) , (2,2) )  # 池化
        x = F.max_pool2d( F.relu(self.conv2(x)), 2) 
        # 拉成1d向量
        x = x.view( -1 , self.num_flat_features (x) )
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x 
        pass
    
    def num_flat_features(self, x):
        size = x.size()[1:] 
        num_features = 1 
        for s in size:
            num_features *= s 
        return num_features
        pass

卷积层Conv2D里面常用的参数有四个，in_channels, out_channels, kernel_size, stride, padding

* in_channels表示的是输入卷积层的图片厚度
* out_channels表示的是要输出的厚度
* kernel_size表示的是卷积核的大小，可以用一个数字表示长宽相等的卷积核，比如kernel_size=3，也可以用不同的数字表示长宽不同的卷积核，比如kernel_size=(3, 2) stride表示卷积核滑动的步长
* padding表示的是在图片周围填充0的多少，padding=0表示不填充，padding=1四周都填充1维

In [20]:
# 看看
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


关于参数：

In [21]:
params = list(net.parameters())
print(len(params) ) # 代表参数总共有10 组，不是10个
print(params[0].size()) # parameter size of the conv1 layer 
# 卷基层 个数6， 厚度1 ，大小 5 * 5 

10
torch.Size([6, 1, 5, 5])


In [22]:
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

tensor([[-0.1695,  0.0876, -0.0661,  0.0528,  0.0761,  0.0028, -0.1067,  0.0255,
         -0.0847,  0.1321]], grad_fn=<AddmmBackward>)


清零所有参数的梯度缓存，然后进行随机梯度的反向传播：

In [23]:
net.zero_grad()
print(input.grad)
out.backward(torch.randn(1, 10))
print(input.grad)

# 没有的原因是？
# [X] 没有损失函数？白盒攻击也没有损失函数的

None
None


## 设定损失函数

In [24]:
output = net(input)
target = torch.randn(10)  # a dummy target, for example
target = target.view(1, -1)  # make it the same shape as output
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

tensor(0.5222, grad_fn=<MseLossBackward>)


In [25]:
print(loss.grad_fn)  # MSELoss
print(loss.grad_fn.next_functions[0][0])  # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU

<MseLossBackward object at 0x105308780>
<AddmmBackward object at 0x105308710>
<AccumulateGrad object at 0x105308780>


## 反向传播

调用loss.backward()来反向传播权重。

需要清零现有的梯度，否则梯度将会  **与已有的梯度累加** 。

In [26]:
net.zero_grad()     # 清零所有参数（parameter）的梯度缓存

print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad after backward
tensor([0.0007, 0.0035, 0.0043, 0.0054, 0.0103, 0.0055])


## 更新权重

`weight = weight - learning_rate * gradient`


In [27]:
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)