In [1]:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5)
        self.fc1 = nn.Linear(in_features=16*5*5, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=84)
        self.fc3 = nn.Linear(in_features=84, out_features=10)
    
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)
    
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [3]:
net = Net()
net

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [4]:
parms = list(net.parameters())
# 卷积 1 (输出通道数, 输入通道数, 卷积大小)
for parm in parms:
    print(parm.size())

torch.Size([6, 1, 5, 5])
torch.Size([6])
torch.Size([16, 6, 5, 5])
torch.Size([16])
torch.Size([120, 400])
torch.Size([120])
torch.Size([84, 120])
torch.Size([84])
torch.Size([10, 84])
torch.Size([10])


In [6]:
input = torch.randn(2, 1, 32, 32) # (样本数, 通道数, 高, 宽)
out = net(input)
out

tensor([[-0.0923,  0.1038,  0.0516,  0.0857, -0.0774,  0.0847, -0.0476, -0.0178,
         -0.0751, -0.0169],
        [-0.1000,  0.0970,  0.0635,  0.0869, -0.0702,  0.0855, -0.0310, -0.0130,
         -0.0787, -0.0178]], grad_fn=<AddmmBackward>)

In [7]:
net.zero_grad()
# out.backward(torch.randn(1, 10))

In [8]:
target = torch.randn(10).view(1, -1)
criterion = nn.MSELoss()

loss = criterion(out, target)
loss, \
loss.grad_fn, \
loss.grad_fn.next_functions[0][0], \
loss.grad_fn.next_functions[0][0].next_functions[0][0]

(tensor(1.2605, grad_fn=<MseLossBackward>),
 <MseLossBackward at 0x118989240>,
 <AddmmBackward at 0x1189892b0>,
 <AccumulateGrad at 0x118989860>)

In [9]:
net.zero_grad() # 清除梯度缓存
print(net.conv1.bias.grad)

loss.backward()

print(net.conv1.bias.grad) # 反向传播后 卷积层1的偏置的梯度

None
tensor([-0.0019,  0.0077,  0.0106, -0.0095, -0.0073,  0.0044])


In [10]:
import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr=0.01) # loss 反向传播结束，更新参数
optimizer.zero_grad()
optimizer.step()