### 模型保存

In [3]:
import torch
from torch import nn

In [4]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    # 定义权值初始化
    def initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                torch.nn.init.xavier_normal_(m.weight.data)
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                torch.nn.init.normal_(m.weight.data, 0, 0.01)
                m.bias.data.zero_()

In [5]:
net = Net()

In [6]:
# 保存模型
torch.save(net.state_dict(), 'net_params.pkl')

### 获取模型参数

In [7]:
pretrained_dict = torch.load('net_params.pkl')

In [8]:
pretrained_dict # 获取参数

OrderedDict([('conv1.weight',
              tensor([[[[-2.2161e-02, -4.4090e-02, -9.2931e-02,  1.2507e-02,  4.6231e-02],
                        [ 4.9560e-02, -1.0323e-01,  7.2356e-02,  7.5136e-02,  4.2600e-02],
                        [ 8.0681e-02,  1.1233e-01,  1.1389e-01, -1.0807e-01,  3.6007e-03],
                        [-8.6756e-02, -6.7329e-02, -1.0309e-01,  1.6573e-02,  2.5528e-02],
                        [ 9.7430e-03, -9.9515e-03, -2.5553e-02,  6.3250e-02,  8.0874e-02]],
              
                       [[ 2.9401e-02,  3.5484e-02, -6.8339e-02,  1.0939e-01, -6.2919e-02],
                        [ 6.1721e-02,  1.1007e-01,  7.5689e-02, -1.6020e-02,  2.4713e-02],
                        [-2.9354e-02, -6.6847e-02, -6.7151e-02, -2.5507e-04, -1.7680e-02],
                        [-2.6186e-02,  1.0538e-01,  6.7284e-02, -9.2069e-02, -3.8072e-02],
                        [ 2.8348e-02, -7.8537e-02, -2.3559e-02, -6.6372e-03,  9.6753e-02]],
              
                       [[ 8.

### 使用参数初始化模型

In [9]:
net = Net()
net_state_dict = net.state_dict() # 获取已创建 net 的 state_dict

In [11]:
net_state_dict

OrderedDict([('conv1.weight',
              tensor([[[[-0.0612,  0.1036, -0.0866,  0.0916,  0.0710],
                        [-0.1034,  0.0659,  0.0553,  0.0011,  0.0985],
                        [ 0.0452,  0.0610,  0.0974,  0.1138, -0.0888],
                        [ 0.0599, -0.0913,  0.1015, -0.1111,  0.0254],
                        [ 0.1018, -0.0550, -0.0183,  0.0463,  0.0978]],
              
                       [[-0.0403, -0.0379,  0.1082, -0.0981,  0.0028],
                        [-0.1094,  0.0952,  0.0061,  0.0439,  0.1045],
                        [-0.0624, -0.0761,  0.0819,  0.0182, -0.0797],
                        [-0.0572,  0.0824, -0.0490,  0.0994, -0.1026],
                        [-0.1089, -0.0384, -0.0727,  0.0219, -0.0366]],
              
                       [[ 0.0310,  0.0649, -0.0188,  0.0055,  0.1147],
                        [-0.0667, -0.0801, -0.0846, -0.0080, -0.0113],
                        [-0.0193, -0.0908, -0.0248, -0.0921,  0.0163],
               

In [12]:
# 接着将 pretrained_dict 里不属于 net_state_dict 的键剔除掉
pretrained_dict_1 = {k: v for k, v in pretrained_dict.items() if k in net_state_dict}

In [13]:
pretrained_dict_1

{'conv1.weight': tensor([[[[-2.2161e-02, -4.4090e-02, -9.2931e-02,  1.2507e-02,  4.6231e-02],
           [ 4.9560e-02, -1.0323e-01,  7.2356e-02,  7.5136e-02,  4.2600e-02],
           [ 8.0681e-02,  1.1233e-01,  1.1389e-01, -1.0807e-01,  3.6007e-03],
           [-8.6756e-02, -6.7329e-02, -1.0309e-01,  1.6573e-02,  2.5528e-02],
           [ 9.7430e-03, -9.9515e-03, -2.5553e-02,  6.3250e-02,  8.0874e-02]],
 
          [[ 2.9401e-02,  3.5484e-02, -6.8339e-02,  1.0939e-01, -6.2919e-02],
           [ 6.1721e-02,  1.1007e-01,  7.5689e-02, -1.6020e-02,  2.4713e-02],
           [-2.9354e-02, -6.6847e-02, -6.7151e-02, -2.5507e-04, -1.7680e-02],
           [-2.6186e-02,  1.0538e-01,  6.7284e-02, -9.2069e-02, -3.8072e-02],
           [ 2.8348e-02, -7.8537e-02, -2.3559e-02, -6.6372e-03,  9.6753e-02]],
 
          [[ 8.0544e-02, -5.3512e-02, -9.4615e-02, -2.0617e-02,  2.1160e-03],
           [ 9.0646e-02,  1.0274e-01, -7.2114e-02, -1.9761e-02, -4.6731e-02],
           [-6.9136e-02,  8.4829e-02,  4.6

In [14]:
# 然后，用预训练模型的参数字典 对 新模型的参数字典 net_state_dict 进行更新
net_state_dict.update(pretrained_dict_1)

In [15]:
# 最后，将更新了参数的字典 “放”回到网络中
net.load_state_dict(net_state_dict)

<All keys matched successfully>

### 不同层使用不同的学习率

    让 fc 层更新相对快一些，而希望前面的权值更新小一些

为不同层设置不同的学习率，主要通过优化器对多个参数组进行设置不同的参数。所以，只需要将原始的参数组，划分成两个，甚至更多的参数组，然后分别进行设置学习
率。这里将原始参数“切分”成 fc3 层参数和其余参数，为 fc3 层设置更大的学习率。

In [17]:
import torch.optim as optim
# 将fc3层的参数从原始网络参数中剔除
'''
将 fc3 层的参数 net.fc3.parameters()从原始参数 net.parameters()中剥离出来
base_params 就是剥离了 fc3 层的参数的其余参数，然后在优化器中为 fc3 层的参数单独设定学习率
'''
ignored_params = list(map(id, net.fc3.parameters()))  # 返回的是 parameters 的 内存地址
base_params = filter(lambda p: id(p) not in ignored_params, net.parameters())

# 为fc3层设置需要的学习率
'''
base_params 中的层，用 0.001, momentum=0.9,weight_decay=1e-4  fc3 层设定学习率为： 0.001*10
'''
optimizer = optim.SGD([
    {'params': base_params},
    {'params': net.fc3.parameters(), 'lr': 0.001*10}],  0.001, momentum=0.9, weight_decay=1e-4)