In [1]:
import torch as t
t.__version__

'0.2.0_3'

In [1]:
import torch.nn as nn
import torch.nn.functional as F

In [2]:
class Net(nn.Module):
    def __init__(self):
        # nn.Module 子类的函数必须在构造函数中执行父类的构造函数
        # 下式等价于 nn.Module.__init__(self)
        super(Net, self).__init__()
        # 卷积层 ’1‘ 表示输入图片为单通道， ’6‘ 表示输出为通道数， ’5‘ 表示卷积核为 5×5
        self.conv1 = nn.Conv2d(1, 6, 5)
        # 卷积层
        self.conv2 = nn.Conv2d(6, 16, 5)
        # 仿射层/全连接层， y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        #卷积 -> 激活 -> 池化
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        #reshape, '-1' 表示自适应
        x = x.view(x.size()[0], -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
net = Net()
net

Net (
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear (400 -> 120)
  (fc2): Linear (120 -> 84)
  (fc3): Linear (84 -> 10)
)

In [3]:
params = list(net.parameters())
len(params)

10

In [4]:
for name, paramsters in net.named_parameters():
    print('66',name, ':', paramsters.size())

66 conv1.weight : torch.Size([6, 1, 5, 5])
66 conv1.bias : torch.Size([6])
66 conv2.weight : torch.Size([16, 6, 5, 5])
66 conv2.bias : torch.Size([16])
66 fc1.weight : torch.Size([120, 400])
66 fc1.bias : torch.Size([120])
66 fc2.weight : torch.Size([84, 120])
66 fc2.bias : torch.Size([84])
66 fc3.weight : torch.Size([10, 84])
66 fc3.bias : torch.Size([10])


In [5]:
from torch.autograd import Variable
import torch as t
input = Variable(t.randn(1, 1, 32, 32))
out = net(input)
out.size()

torch.Size([1, 10])

In [6]:
net.zero_grad() #所有的参数的梯度清零
out.backward(Variable(t.ones(1, 10))) # 反向传播

In [7]:
output = net(input)
target = Variable(t.arange(0, 10))
criterion = nn.MSELoss()
loss = criterion(output, target)
loss

Variable containing:
 28.4529
[torch.FloatTensor of size 1]

In [8]:
# .backward 观察调用之前和调用之后的 grad
net.zero_grad() # 把 net 中所有学习参数的梯度清零

net.conv1.bias.grad
loss.backward()
net.conv1.bias.grad

Variable containing:
-0.1200
 0.0039
 0.0733
 0.0142
 0.0350
-0.0554
[torch.FloatTensor of size 6]

In [11]:
import torch.optim as optim
# 新建一个优化器， 指定要调整的参数和学习率
optimzer = optim.SGD(net.parameters(), lr = 0.01)
#在训练过程中
#先梯度清零
optimzer.zero_grad()
# 计算损失
output = net(input)
loss = criterion(output, target)

# 反向传播
loss.backward()
# 更新参数
optimzer.step()