In [3]:
import torch
import torchvision
import onnx
import torch.nn as nn
import torch.nn.functional as F

In [4]:
torch.manual_seed(1)

<torch._C.Generator at 0x7fb3ec387530>

In [11]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        print("after 1 conv:", x.size())  # [1, 6, 28, 28]
        x = F.relu(self.conv2(x)) 
        print("after 2 conv:", x.size())   # [1, 6, 24, 24]
        
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(4, 4), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [12]:
params = list(net.parameters())
print(len(params))
print(type(params[0]))
for i in range(10):
    print("第{}个参数形状为{}".format(i,params[i].size()))

10
<class 'torch.nn.parameter.Parameter'>
第0个参数形状为torch.Size([6, 1, 4, 4])
第1个参数形状为torch.Size([6])
第2个参数形状为torch.Size([16, 6, 5, 5])
第3个参数形状为torch.Size([16])
第4个参数形状为torch.Size([120, 400])
第5个参数形状为torch.Size([120])
第6个参数形状为torch.Size([84, 120])
第7个参数形状为torch.Size([84])
第8个参数形状为torch.Size([10, 84])
第9个参数形状为torch.Size([10])


In [13]:
input = torch.randn(1, 1, 32, 32)
print(input)
print('conv1输出的形状：',net.conv1(input).size()) # conv1输出的形状

out = net(input)
print('输出层形状',out.size())
print('输出层结果：',out)

tensor([[[[-1.5158, -0.3849, -0.7996,  ...,  0.7700, -1.1614, -0.8911],
          [ 0.9729, -1.4265, -0.3596,  ..., -1.6052,  0.6524,  0.0380],
          [-1.7911,  1.2447,  0.8333,  ...,  1.2385, -0.9725,  2.3731],
          ...,
          [-0.7393, -0.4844, -0.3090,  ...,  1.8053, -0.2067,  2.2725],
          [-0.5636, -0.4947, -0.9229,  ...,  0.9189, -0.8116,  0.2147],
          [-0.9190,  0.0750,  0.8289,  ...,  0.9475, -1.3660, -1.1423]]]])
conv1输出的形状： torch.Size([1, 6, 29, 29])
after 1 conv: torch.Size([1, 6, 29, 29])
after 2 conv: torch.Size([1, 16, 25, 25])


AttributeError: 'Net' object has no attribute 'num_flat_features'

In [31]:
output = net(input)
target = torch.randn(10)  # a dummy target, for example
target = target.view(1, -1)  # make it the same shape as output
criterion = nn.MSELoss() 
print("target:", target)

loss = criterion(output, target)
print('loss:',loss)
print('''
反向跟踪loss,使用它的.grad_fn属性,你会看到向下面这样的一个计算图:
input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d -> view -> linear -> relu -> linear -> relu -> linear -> MSELoss -> loss''')
print(loss.grad_fn)  # MSELoss
print(loss.grad_fn.next_functions[0][0])  # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0]) # ReLU

target: tensor([[ 0.3406,  0.6965, -1.0862, -1.0394,  0.1721,  1.2643, -0.6560, -0.4747,
          1.8006,  1.3724]])
loss: tensor(1.1409, grad_fn=<MseLossBackward>)

反向跟踪loss,使用它的.grad_fn属性,你会看到向下面这样的一个计算图:
input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d -> view -> linear -> relu -> linear -> relu -> linear -> MSELoss -> loss
<MseLossBackward object at 0x7f353c0f0390>
<AddmmBackward object at 0x7f353c101978>
<AccumulateGrad object at 0x7f353c0f0390>


In [32]:
# 优化器
optimizer = torch.optim.SGD(net.parameters(),lr=0.1)

In [33]:
for epoch in range(100):
    optimizer.zero_grad()
    output = net(input)
    loss = criterion(output, target)
    print(loss)
    loss.backward()
    optimizer.step()

tensor(1.1409, grad_fn=<MseLossBackward>)
tensor(0.6371, grad_fn=<MseLossBackward>)
tensor(0.0670, grad_fn=<MseLossBackward>)
tensor(0.0210, grad_fn=<MseLossBackward>)
tensor(0.0683, grad_fn=<MseLossBackward>)
tensor(0.1396, grad_fn=<MseLossBackward>)
tensor(0.4719, grad_fn=<MseLossBackward>)
tensor(0.0028, grad_fn=<MseLossBackward>)
tensor(0.0025, grad_fn=<MseLossBackward>)
tensor(0.0064, grad_fn=<MseLossBackward>)
tensor(0.0142, grad_fn=<MseLossBackward>)
tensor(0.0419, grad_fn=<MseLossBackward>)
tensor(0.0655, grad_fn=<MseLossBackward>)
tensor(0.1972, grad_fn=<MseLossBackward>)
tensor(0.0609, grad_fn=<MseLossBackward>)
tensor(0.1740, grad_fn=<MseLossBackward>)
tensor(0.0595, grad_fn=<MseLossBackward>)
tensor(0.1634, grad_fn=<MseLossBackward>)
tensor(0.0552, grad_fn=<MseLossBackward>)
tensor(0.1478, grad_fn=<MseLossBackward>)
tensor(0.0556, grad_fn=<MseLossBackward>)
tensor(0.1443, grad_fn=<MseLossBackward>)
tensor(0.0512, grad_fn=<MseLossBackward>)
tensor(0.1295, grad_fn=<MseLossBac

In [34]:
torch.save(net, 'net.pth')  # 保存整个网络
torch.save(net.state_dict(), 'net_params.pth')   # 只保存网络中的参数 (速度快, 占内存少)

In [35]:
input2 = torch.ones(1,1,32,32)
print(net(input2))

tensor([[ 0.1206,  0.2151, -0.1010, -0.0846,  0.0959,  0.1254, -0.0207, -0.1024,
          0.2443,  0.3066]], grad_fn=<AddmmBackward>)
