In [3]:
import torch
import torchvision
import onnx
import torch.nn as nn
import torch.nn.functional as F

In [4]:
torch.manual_seed(1)

<torch._C.Generator at 0x7f23e67b2cb0>

In [5]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension(批大小维度)
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [6]:
params = list(net.parameters())
print(len(params))
print(type(params[0]))
for i in range(10):
    print("第{}个参数形状为{}".format(i,params[i].size()))

10
<class 'torch.nn.parameter.Parameter'>
第0个参数形状为torch.Size([6, 1, 5, 5])
第1个参数形状为torch.Size([6])
第2个参数形状为torch.Size([16, 6, 5, 5])
第3个参数形状为torch.Size([16])
第4个参数形状为torch.Size([120, 400])
第5个参数形状为torch.Size([120])
第6个参数形状为torch.Size([84, 120])
第7个参数形状为torch.Size([84])
第8个参数形状为torch.Size([10, 84])
第9个参数形状为torch.Size([10])


In [7]:
input = torch.randn(1, 1, 32, 32)
print(input)
print('conv1输出的形状：',net.conv1(input).size()) # conv1输出的形状

out = net(input)
print('输出层形状',out.size())
print('输出层结果：',out)

tensor([[[[ 0.3889,  0.2365,  0.6433,  ...,  0.4024, -1.7507,  0.7494],
          [ 0.1942, -0.5817,  1.0935,  ...,  1.0392, -0.9482,  0.5560],
          [ 0.1490,  0.4119, -0.2119,  ...,  0.6395, -0.3441,  0.1987],
          ...,
          [ 0.3519, -0.4162,  0.4879,  ...,  1.9595,  0.4943, -1.0916],
          [-0.8808, -0.8463,  1.2546,  ..., -0.7825, -1.3408, -0.8927],
          [-0.2942, -0.1980,  0.0215,  ..., -0.8730, -0.2482, -0.9396]]]])
conv1输出的形状： torch.Size([1, 6, 28, 28])
输出层形状 torch.Size([1, 10])
输出层结果： tensor([[ 0.0457, -0.0500,  0.0811, -0.0702, -0.0676, -0.0173,  0.0802, -0.0525,
         -0.1420,  0.0894]], grad_fn=<AddmmBackward>)


In [14]:
output = net(input)
target = torch.randn(10)  # a dummy target, for example
target = target.view(1, -1)  # make it the same shape as output
criterion = nn.MSELoss() 
print("target:", target)

loss = criterion(output, target)
print('loss:',loss)
print('''
反向跟踪loss,使用它的.grad_fn属性,你会看到向下面这样的一个计算图:
input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d -> view -> linear -> relu -> linear -> relu -> linear -> MSELoss -> loss''')
print(loss.grad_fn)  # MSELoss
print(loss.grad_fn.next_functions[0][0])  # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0]) # ReLU

target: tensor([[-0.4072,  1.1454,  1.0253, -0.6259, -0.3500,  0.0630,  1.2650, -2.1565,
          0.1425,  1.4169]])
loss: tensor(1.4198, grad_fn=<MseLossBackward>)

反向跟踪loss,使用它的.grad_fn属性,你会看到向下面这样的一个计算图:
input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d -> view -> linear -> relu -> linear -> relu -> linear -> MSELoss -> loss
<MseLossBackward object at 0x7f236226fd68>
<AddmmBackward object at 0x7f23e6f26908>
<AccumulateGrad object at 0x7f236226fd68>


In [9]:
# 优化器
optimizer = torch.optim.SGD(net.parameters(),lr=0.1)

In [10]:
for epoch in range(100):
    optimizer.zero_grad()
    output = net(input)
    loss = criterion(output, target)
    print(loss)
    loss.backward()
    optimizer.step()

tensor(0.7358, grad_fn=<MseLossBackward>)
tensor(0.6271, grad_fn=<MseLossBackward>)
tensor(0.5315, grad_fn=<MseLossBackward>)
tensor(0.3964, grad_fn=<MseLossBackward>)
tensor(0.1981, grad_fn=<MseLossBackward>)
tensor(0.0365, grad_fn=<MseLossBackward>)
tensor(0.0069, grad_fn=<MseLossBackward>)
tensor(0.0015, grad_fn=<MseLossBackward>)
tensor(0.0007, grad_fn=<MseLossBackward>)
tensor(0.0008, grad_fn=<MseLossBackward>)
tensor(0.0009, grad_fn=<MseLossBackward>)
tensor(0.0013, grad_fn=<MseLossBackward>)
tensor(0.0015, grad_fn=<MseLossBackward>)
tensor(0.0022, grad_fn=<MseLossBackward>)
tensor(0.0025, grad_fn=<MseLossBackward>)
tensor(0.0038, grad_fn=<MseLossBackward>)
tensor(0.0039, grad_fn=<MseLossBackward>)
tensor(0.0061, grad_fn=<MseLossBackward>)
tensor(0.0059, grad_fn=<MseLossBackward>)
tensor(0.0094, grad_fn=<MseLossBackward>)
tensor(0.0081, grad_fn=<MseLossBackward>)
tensor(0.0133, grad_fn=<MseLossBackward>)
tensor(0.0103, grad_fn=<MseLossBackward>)
tensor(0.0171, grad_fn=<MseLossBac

In [11]:
torch.save(net, 'net.pth')  # 保存整个网络
torch.save(net.state_dict(), 'net_params.pth')   # 只保存网络中的参数 (速度快, 占内存少)

  "type " + obj.__name__ + ". It won't be checked "


In [12]:
input2 = torch.ones(1,1,32,32)
print(net(input2))

tensor([[-0.7142,  0.4877,  0.4872,  0.0903, -0.2065, -0.4697, -0.4465, -0.1339,
         -0.3003,  0.1789]], grad_fn=<AddmmBackward>)
