In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
class Net(nn.Module):
    def __init__(self): #定义网络参数 包括两个卷积核和3个全连接层
        super(Net,self).__init__()
        self.conv1 = nn.Conv2d(1,6,5) #输入通道 输出通道 卷积核大小 剩余参数见文档
        self.conv2 = nn.Conv2d(6,16,5)
        
        self.fc1 = nn.Linear(16*5*5,120)
        self.fc2 = nn.Linear(120,84)
        self.fc3 = nn.Linear(84,10)
    
    def num_flat_features(self,x): #此函数用于扁平化输入，就是把2维降到1维
        size = x.size()[1:]    #取出除了batch的所有维度
        length = 1
        for s in size:
            length *= s
        return length
    
    
    def forward(self,x):
        x = F.max_pool2d(F.relu(self.conv1(x)),(2,2)) #核的大小如果是正方形的话可以只用一个参数
        x = F.max_pool2d(F.relu(self.conv2(x)),(2,2))
        x = x.view(-1,self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net()
print(net)
        

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [3]:
params = list(net.parameters()) #展示参数,带偏置所以为10
print(len(params))
print(params[2].size())

10
torch.Size([16, 6, 5, 5])


In [4]:
input = torch.rand(1,1,32,32)
out = net(input)
print(out)     #获得输出

tensor([[-0.0334,  0.0367, -0.0327,  0.0089,  0.0147,  0.0279, -0.0456, -0.0005,
          0.0085, -0.0416]], grad_fn=<AddmmBackward>)


In [5]:
net.zero_grad()
print(out.backward(torch.randn(1,10)))
print(params[2].grad)

None
tensor([[[[ 2.6203e-03,  4.7484e-03, -1.1624e-03,  2.8907e-03,  2.5488e-03],
          [ 1.1749e-03,  3.2801e-03,  3.9844e-03,  2.1648e-03,  1.4257e-03],
          [ 2.1373e-03,  6.6446e-04,  1.6895e-03,  1.6092e-03,  1.5504e-03],
          [ 3.6784e-03,  7.5188e-03,  3.7309e-03,  2.2690e-03,  5.4030e-03],
          [ 4.0631e-03,  4.1616e-03, -6.3834e-04,  1.0305e-03,  3.5159e-03]],

         [[ 1.4677e-02,  8.0485e-03,  8.5866e-03,  1.2183e-02,  9.7600e-03],
          [ 7.4713e-03,  1.0632e-02,  7.6905e-03,  3.8951e-03,  1.3115e-02],
          [ 1.3668e-02,  9.3151e-03,  7.8545e-03,  4.1869e-03,  2.4774e-03],
          [ 1.0270e-02,  7.2276e-03,  6.3357e-03,  1.2257e-02,  1.1213e-02],
          [ 8.5768e-03,  9.3000e-03,  9.7055e-03,  1.3264e-02,  8.7156e-03]],

         [[ 1.9636e-02,  1.7042e-02,  1.8843e-02,  1.6429e-02,  1.3463e-02],
          [ 2.0527e-02,  1.3296e-02,  1.5946e-02,  1.6787e-02,  1.5918e-02],
          [ 1.2871e-02,  1.2722e-02,  1.4952e-02,  1.3124e-02,  2.0

In [6]:
output = net(input)
target = torch.randn(10)
target = target.view(1,-1)
criterion = nn.MSELoss()
loss = criterion(output, target)
print(loss)  #产生损失

tensor(1.4127, grad_fn=<MseLossBackward>)


In [7]:
net.zero_grad()     

print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad) #反向传播梯度

conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad after backward
tensor([-0.0211, -0.0106,  0.0073,  0.0094, -0.0058,  0.0115])


In [8]:
#更新权重
learning_rate = 0.01
for f in net.parameters():
    f.data.sub(f.grad.data*learning_rate)

利用优化器优化网络

In [9]:
import torch.optim as optim
optimizer = optim.Adam(net.parameters(),lr = 0.01)
optimizer.zero_grad()
output = net(input)
loss = criterion(output,target)
loss.backward()
optimizer.step()