In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

from bokeh.io import show, output_notebook
from bokeh.plotting import figure, gridplot
from bokeh.models import LinearAxis, Range1d
output_notebook()

In [2]:
class MNISTConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 10, 5)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(10, 20, 5)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)
    def forward(self, input):
        x = self.pool1(F.relu(self.conv1(input)))
        x = self.pool2(F.relu(self.conv2(x)))
        return x
MNISTConvNet()

MNISTConvNet(
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=320, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=10, bias=True)
)

In [3]:
net = MNISTConvNet().double()
criterion = nn.MSELoss()
optim = torch.optim.SGD(net.parameters(), lr=0.001)
x = torch.randn(1, 1, 28, 28).double()
target = torch.randn(1, 20, 4, 4).double()

def closure():
    optim.zero_grad()
    y = net(x)
    loss = criterion(y, target)
    loss.backward()
    return loss

### 查看参数值
查看每次更新时，参数的数值与梯度。

In [4]:
for i in range(3):
    optim.step(closure)
    print('weight:', i, net.conv1.weight.data.norm())
    print('grad:', i, net.conv1.weight.grad.data.norm(), '\n')

weight: 0 tensor(1.8285, dtype=torch.float64)
grad: 0 tensor(0.3722, dtype=torch.float64) 

weight: 1 tensor(1.8283, dtype=torch.float64)
grad: 1 tensor(0.3685, dtype=torch.float64) 

weight: 2 tensor(1.8282, dtype=torch.float64)
grad: 2 tensor(0.3653, dtype=torch.float64) 



### 冻结参数

In [5]:
net.conv1.weight.requires_grad = False
for i in range(3):
    optim.step(closure)
    print('weight:', i, net.conv1.weight.data.norm())
    print('grad:', i, net.conv1.weight.grad.data.norm(), '\n')

weight: 0 tensor(1.8282, dtype=torch.float64)
grad: 0 tensor(0., dtype=torch.float64) 

weight: 1 tensor(1.8282, dtype=torch.float64)
grad: 1 tensor(0., dtype=torch.float64) 

weight: 2 tensor(1.8282, dtype=torch.float64)
grad: 2 tensor(0., dtype=torch.float64) 



### 指定学习率
给不同的参数学习指定不同的学习率

In [6]:
multi_optim = torch.optim.SGD([
    {'params': net.conv1.parameters()},
    {'params': net.conv2.parameters(), 'lr':0.001}
], lr = 0.01, momentum=0.9)