In [3]:
import torch
import torchvision
import onnx
import torch.nn as nn
import torch.nn.functional as F

In [4]:
torch.manual_seed(1)

<torch._C.Generator at 0x7fb3ec387530>

In [21]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)  # 32-4=28
        self.conv2 = nn.Conv2d(6, 16, 5)  # 28-4=24
        
        self.fc1 = nn.Linear(16*24*24, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        print("after 1 conv:", x.size())  # [1, 6, 28, 28]
        x = F.relu(self.conv2(x)) 
        print("after 2 conv:", x.size())   # [1, 6, 24, 24]
        
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    # num_flat_features：计算张量x的总特征量（把每个数字都看出是一个特征，即特征总量）
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension(批大小维度)
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=9216, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [22]:
params = list(net.parameters())
print(len(params))
print(type(params[0]))
for i in range(10):
    print("第{}个参数形状为{}".format(i,params[i].size()))

10
<class 'torch.nn.parameter.Parameter'>
第0个参数形状为torch.Size([6, 1, 5, 5])
第1个参数形状为torch.Size([6])
第2个参数形状为torch.Size([16, 6, 5, 5])
第3个参数形状为torch.Size([16])
第4个参数形状为torch.Size([120, 9216])
第5个参数形状为torch.Size([120])
第6个参数形状为torch.Size([84, 120])
第7个参数形状为torch.Size([84])
第8个参数形状为torch.Size([10, 84])
第9个参数形状为torch.Size([10])


In [23]:
input = torch.randn(1, 1, 32, 32)
print(input)
print('conv1输出的形状：',net.conv1(input).size()) # conv1输出的形状

out = net(input)
print('输出层形状',out.size())
print('输出层结果：',out)

tensor([[[[ 0.9131,  1.5890, -0.3152,  ...,  0.4767, -0.0083,  0.6722],
          [ 0.3228, -0.7687, -0.3986,  ..., -0.9400, -0.5485, -0.9605],
          [ 0.0745, -1.2055, -1.8201,  ...,  0.6705,  0.3617, -0.3346],
          ...,
          [ 0.5745, -0.6045, -0.4176,  ..., -0.7851, -0.5122, -1.5070],
          [-1.2280, -0.5470, -0.7053,  ..., -0.4143, -0.6768,  1.0649],
          [-0.6395, -0.4630, -0.5645,  ..., -0.5569, -0.4479,  1.6299]]]])
conv1输出的形状： torch.Size([1, 6, 28, 28])
after 1 conv: torch.Size([1, 6, 28, 28])
after 2 conv: torch.Size([1, 16, 24, 24])
输出层形状 torch.Size([1, 10])
输出层结果： tensor([[-0.0583, -0.0590,  0.0269,  0.1262, -0.0282, -0.0732,  0.0569, -0.0128,
         -0.0683,  0.0967]], grad_fn=<AddmmBackward>)


In [24]:
output = net(input)
target = torch.randn(10)  # a dummy target, for example
target = target.view(1, -1)  # make it the same shape as output
criterion = nn.MSELoss() 
print("target:", target)

loss = criterion(output, target)
print('loss:',loss)
print('''
反向跟踪loss,使用它的.grad_fn属性,你会看到向下面这样的一个计算图:
input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d -> view -> linear -> relu -> linear -> relu -> linear -> MSELoss -> loss''')
print(loss.grad_fn)  # MSELoss
print(loss.grad_fn.next_functions[0][0])  # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0]) # ReLU

after 1 conv: torch.Size([1, 6, 28, 28])
after 2 conv: torch.Size([1, 16, 24, 24])
target: tensor([[-0.0427,  1.2368, -0.7777,  0.4782, -0.9152,  0.6702, -0.0835, -0.7565,
         -1.4372,  0.0444]])
loss: tensor(0.6239, grad_fn=<MseLossBackward>)

反向跟踪loss,使用它的.grad_fn属性,你会看到向下面这样的一个计算图:
input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d -> view -> linear -> relu -> linear -> relu -> linear -> MSELoss -> loss
<MseLossBackward object at 0x7fb3844d3eb8>
<AddmmBackward object at 0x7fb3844cccc0>
<AccumulateGrad object at 0x7fb3844d3eb8>


In [25]:
# 优化器
optimizer = torch.optim.SGD(net.parameters(),lr=0.1)

In [26]:
for epoch in range(100):
    optimizer.zero_grad()
    output = net(input)
    loss = criterion(output, target)
    print(loss)
    loss.backward()
    optimizer.step()

after 1 conv: torch.Size([1, 6, 28, 28])
after 2 conv: torch.Size([1, 16, 24, 24])
tensor(0.6239, grad_fn=<MseLossBackward>)
after 1 conv: torch.Size([1, 6, 28, 28])
after 2 conv: torch.Size([1, 16, 24, 24])
tensor(0.5102, grad_fn=<MseLossBackward>)
after 1 conv: torch.Size([1, 6, 28, 28])
after 2 conv: torch.Size([1, 16, 24, 24])
tensor(0.3737, grad_fn=<MseLossBackward>)
after 1 conv: torch.Size([1, 6, 28, 28])
after 2 conv: torch.Size([1, 16, 24, 24])
tensor(0.1650, grad_fn=<MseLossBackward>)
after 1 conv: torch.Size([1, 6, 28, 28])
after 2 conv: torch.Size([1, 16, 24, 24])
tensor(0.0263, grad_fn=<MseLossBackward>)
after 1 conv: torch.Size([1, 6, 28, 28])
after 2 conv: torch.Size([1, 16, 24, 24])
tensor(0.0144, grad_fn=<MseLossBackward>)
after 1 conv: torch.Size([1, 6, 28, 28])
after 2 conv: torch.Size([1, 16, 24, 24])
tensor(0.0229, grad_fn=<MseLossBackward>)
after 1 conv: torch.Size([1, 6, 28, 28])
after 2 conv: torch.Size([1, 16, 24, 24])
tensor(0.0681, grad_fn=<MseLossBackward>)


after 1 conv: torch.Size([1, 6, 28, 28])
after 2 conv: torch.Size([1, 16, 24, 24])
tensor(0.0018, grad_fn=<MseLossBackward>)
after 1 conv: torch.Size([1, 6, 28, 28])
after 2 conv: torch.Size([1, 16, 24, 24])
tensor(0.0014, grad_fn=<MseLossBackward>)
after 1 conv: torch.Size([1, 6, 28, 28])
after 2 conv: torch.Size([1, 16, 24, 24])
tensor(0.0016, grad_fn=<MseLossBackward>)
after 1 conv: torch.Size([1, 6, 28, 28])
after 2 conv: torch.Size([1, 16, 24, 24])
tensor(0.0013, grad_fn=<MseLossBackward>)
after 1 conv: torch.Size([1, 6, 28, 28])
after 2 conv: torch.Size([1, 16, 24, 24])
tensor(0.0014, grad_fn=<MseLossBackward>)
after 1 conv: torch.Size([1, 6, 28, 28])
after 2 conv: torch.Size([1, 16, 24, 24])
tensor(0.0012, grad_fn=<MseLossBackward>)
after 1 conv: torch.Size([1, 6, 28, 28])
after 2 conv: torch.Size([1, 16, 24, 24])
tensor(0.0013, grad_fn=<MseLossBackward>)
after 1 conv: torch.Size([1, 6, 28, 28])
after 2 conv: torch.Size([1, 16, 24, 24])
tensor(0.0011, grad_fn=<MseLossBackward>)


In [27]:
torch.save(net, 'net_without_pool.pth')  # 保存整个网络
torch.save(net.state_dict(), 'net_param_without_pool.pth')   # 只保存网络中的参数 (速度快, 占内存少)

  "type " + obj.__name__ + ". It won't be checked "


In [28]:
input2 = torch.ones(1,1,32,32)
print(net(input2))

after 1 conv: torch.Size([1, 6, 28, 28])
after 2 conv: torch.Size([1, 16, 24, 24])
tensor([[-0.0633,  0.4135, -0.2653,  0.2503, -0.3490,  0.2363, -0.0144, -0.2845,
         -0.5569,  0.1053]], grad_fn=<AddmmBackward>)
