In [None]:
# 参考链接：https://github.com/chenyuntc/pytorch-book/blob/master/chapter2-%E5%BF%AB%E9%80%9F%E5%85%A5%E9%97%A8/chapter2%3A%20PyTorch%E5%BF%AB%E9%80%9F%E5%85%A5%E9%97%A8.ipynb
# 官方文档（英文）：https://pytorch.org/tutorials/beginner/blitz/neural_networks_tutorial.html#sphx-glr-beginner-blitz-neural-networks-tutorial-py



In [19]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 3x3 square convolution
        self.conv1 = nn.Conv2d(1,6,3)
        self.conv2 = nn.Conv2d(6,16,3)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 6 * 6, 120) # 6*6 from image dimension
        self.fc2 = nn.Linear(120,84)
        self.fc3 = nn.Linear(84,10)
        
    def forward(self,x):
        # 卷积--》激活--》池化
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)),(2,2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        # reshape，‘-1’表示自适应
        # x = x.view(-1,slef.num_flat_features(x))
        x = x.view(x.size()[0], -1) 
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        return x 
    
#     def num_flat_features(self,x):
#         size = x.size()[1:] # all dimensions except the batch dimension
#         num_features = 1
#         for s in size:
#             num_features *= s
#         return num_features

# 为什么效果是一样的 num_features怎么没变化，这个函数那不就是多写的了吗？

net = Net()
print(net)
        

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [20]:
# The learnable parameters of a model are returned by net.parameters()

params = list(net.parameters())
print(len(params))
print(params[0].size())   # conv1's .weight

10
torch.Size([6, 1, 3, 3])


In [21]:
for name,parameters in net.named_parameters():
    print(name,':',parameters.size())

conv1.weight : torch.Size([6, 1, 3, 3])
conv1.bias : torch.Size([6])
conv2.weight : torch.Size([16, 6, 3, 3])
conv2.bias : torch.Size([16])
fc1.weight : torch.Size([120, 576])
fc1.bias : torch.Size([120])
fc2.weight : torch.Size([84, 120])
fc2.bias : torch.Size([84])
fc3.weight : torch.Size([10, 84])
fc3.bias : torch.Size([10])


In [22]:
# forward函数的输入和输出都是Tensor。

input = torch.randn(1,1,32,32)
out = net(input)
print(out)
out.size()

tensor([[0.0000, 0.0984, 0.0000, 0.0017, 0.0000, 0.0000, 0.0921, 0.0000, 0.0000,
         0.0713]], grad_fn=<ReluBackward0>)


torch.Size([1, 10])

In [23]:
net.zero_grad()  # 所有参数的梯度清零
out.backward(t.ones(1,10))   #反向传播

# 需要注意的是，torch.nn只支持mini-batches，不支持一次只输入一个样本，即一次必须是一个batch。
# 但如果只想输入一个样本，则用 input.unsqueeze(0)将batch_size设为１。
# 例如 nn.Conv2d 输入必须是4维的，形如 nSamples * nChannels * Height * Width$。
# 可将nSample设为1，即 1 * nChannels * Height * Width$。



NameError: name 't' is not defined

In [24]:
# 损失函数

output = net(input)
target = torch.randn(10)
target = target.view(1,-1)
criterion = nn.MSELoss()

loss = criterion(output,target)
print(loss)

tensor(1.6763, grad_fn=<MseLossBackward>)


In [25]:
net.zero_grad()
print('反向传播之前 conv1.bias的梯度')
print(net.conv1.bias.grad)
loss.backward()
print('反向传播之后 conv1.bias的梯度')
print(net.conv1.bias.grad)

反向传播之前 conv1.bias的梯度
None
反向传播之后 conv1.bias的梯度
tensor([-0.0028, -0.0003,  0.0008,  0.0008,  0.0029,  0.0015])


In [28]:
# The simplest update rule used in practice is the Stochastic Gradient Descent (SGD):
# weight = weight - learning_rate * gradient

learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

In [29]:
# However, as you use neural networks, you want to use various different update rules such as SGD, Nesterov-SGD, Adam, RMSProp, etc. 
# To enable this, we built a small package: torch.optim that implements all these methods. Using it is very simple:

import torch.optim as optim

optimizer = optim.SGD(net.parameters(),lr=0.01)

optimizer.zero_grad()   # 梯度清零

# 计算损失
output = net(input)  
loss = criterion(output, target)

#反向传播
loss.backward()

#更新参数
optimizer.step()