In [5]:
import torch as t
from torch import nn
from torch.autograd import Variable as V

# >=０, inplace是否用输出覆盖到输入
relu = nn.ReLU(inplace=True)
input = V(t.randn(2, 3))
print(input)
output = relu(input)
print(output)
print(input)# 输入被覆盖

Variable containing:
 1.2263 -1.5229 -0.7173
-0.6483 -0.3258  0.2499
[torch.FloatTensor of size 2x3]

Variable containing:
 1.2263  0.0000  0.0000
 0.0000  0.0000  0.2499
[torch.FloatTensor of size 2x3]

Variable containing:
 1.2263  0.0000  0.0000
 0.0000  0.0000  0.2499
[torch.FloatTensor of size 2x3]



In [16]:
#0. 正常的构建方法已经在之前提过了，载forward里面才考虑池化和激活

#1. 使用Sequential的方法快速搭建神经网络(__init__部分),还是要写forward的

# 通过add_module()添加每一层，并且为每一层增加了一个单独的名字
net1 = nn.Sequential()
net1.add_module('conv', nn.Conv2d(3, 3, 3))
net1.add_module('batchnorm', nn.BatchNorm2d(3))
net1.add_module('activation_layer', nn.ReLU())# 需要显示的写出relu等层

print(net1)

# 这种方法利用torch.nn.Sequential（）容器进行快速搭建，模型的各层被顺序添加到容器中。缺点是每层的编号是默认的阿拉伯数字，不易区分。
net2 = nn.Sequential(
    nn.Conv2d(3,3,3),
    nn.BatchNorm2d(3),
    nn.ReLU()# 需要显示的写出relu等层
    )

print(net2)

# 是第三种方法的另外一种写法，通过字典的形式添加每一层，并且设置单独的层名称。
from collections import OrderedDict
net3 = nn.Sequential(OrderedDict([
    ('conv1', nn.Conv2d(3, 3, 3)),
    ('bn1', nn.BatchNorm2d(3)),
    ('relu1', nn.ReLU())
]))

print(net3)

Sequential (
  (conv): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
  (batchnorm): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True)
  (activation_layer): ReLU ()
)
Sequential (
  (0): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
  (1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True)
  (2): ReLU ()
)
Sequential (
  (conv1): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
  (bn1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True)
  (relu1): ReLU ()
)


In [22]:
# 我们使用第三种方法，实现先前的LeNet。通过字典的形式添加每一层，并且设置单独的层名称
import torch as t
from torch import nn
import torch.optim as optim
from torch.autograd import Variable as V

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv = nn.Sequential(
            OrderedDict(
                [
                    ("conv1", nn.Conv2d(1, 6, 5)),
                    ("rel_c1", nn.ReLU()),
                    ("pool1", nn.MaxPool2d((2,2))),
                    ("conv2", nn.Conv2d(6, 16, 5)),
                    ("rel_c2", nn.ReLU()),
                    ("pool2", nn.MaxPool2d(2))
                ]
            ))
        self.dense = nn.Sequential(
            OrderedDict(
                [
                    ("line1", nn.Linear(16*5*5, 120)),
                    ("rel_d1", nn.ReLU()),
                    ("line2", nn.Linear(120, 84)),
                    ("rel_d2", nn.ReLU()),
                    ("line3", nn.Linear(84, 10))
                ]
            ))
    def forward(self, x):# 前向传播更简单了
        conv_out = self.conv(x)
        res = conv_out.view(conv_out.size(0), -1)
        out = self.dense(res)
        return out

net = Net()
print(net)

#3. 输出参数
params = list(net.parameters())
print(len(params))

# 参数，权重矩阵和向上传递的偏差矩阵
for name, parameters in net.named_parameters():
    print(name, ':', parameters.size())
#４．训练
optimizer = optim.SGD(net.parameters(), lr = 0.01)# 设置学习率
for i in range(10000):
    #4.1 前向传播
    input = V(t.randn(1, 1, 32, 32))# 定义输入，这里只用了一个案例输入一个
    output = net(input)# 进行一次前向传播

    #4.2 计算损失(损失函数)
    target = V(t.arange(0, 10))
    criterion = nn.MSELoss()
    loss = criterion(output, target)

    #4.3 反向传播计算损失
    net.zero_grad()# 可学习参数清零
    loss.backward()# 直接反向传播loss即可
    
    #4.4 更新参数
    optimizer.step()

#5. 测试
input = V(t.randn(1, 1, 32, 32))# 定义输入，这里只用了一个案例输入一个
output = net(input)# 进行一次前向传播
print(output)

Net (
  (conv): Sequential (
    (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (rel_c1): ReLU ()
    (pool1): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (rel_c2): ReLU ()
    (pool2): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  )
  (dense): Sequential (
    (line1): Linear (400 -> 120)
    (rel_d1): ReLU ()
    (line2): Linear (120 -> 84)
    (rel_d2): ReLU ()
    (line3): Linear (84 -> 10)
  )
)
10
conv.conv1.weight : torch.Size([6, 1, 5, 5])
conv.conv1.bias : torch.Size([6])
conv.conv2.weight : torch.Size([16, 6, 5, 5])
conv.conv2.bias : torch.Size([16])
dense.line1.weight : torch.Size([120, 400])
dense.line1.bias : torch.Size([120])
dense.line2.weight : torch.Size([84, 120])
dense.line2.bias : torch.Size([84])
dense.line3.weight : torch.Size([10, 84])
dense.line3.bias : torch.Size([10])


RuntimeError: size mismatch, m1: [400 x 1], m2: [400 x 120] at /pytorch/torch/lib/TH/generic/THTensorMath.c:1293