## 设置模型存放在cpu/gpu

基础使用

In [1]:
import torch
import torch.nn as nn

net = nn.Sequential(nn.Linear(3, 3))

print("\nid:{} is_cuda: {}".format(id(net), next(net.parameters()).is_cuda))

net.cuda()
print("\nid:{} is_cuda: {}".format(id(net), next(net.parameters()).is_cuda))

net.cpu()
print("\nid:{} is_cuda: {}".format(id(net), next(net.parameters()).is_cuda))


id:2235286002224 is_cuda: False

id:2235286002224 is_cuda: True

id:2235286002224 is_cuda: False


to 方法的妙用：根据当前平台是否支持cuda加速，自动选择

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

import torch
import torch.nn as nn

net = nn.Sequential(nn.Linear(3, 3))
print("\nid:{} is_cuda: {}".format(id(net), next(net.parameters()).is_cuda))

net.to(device)
print("\nid:{} is_cuda: {}".format(id(net), next(net.parameters()).is_cuda))


id:1329468901248 is_cuda: False

id:1329468901248 is_cuda: True


## 获取模型参数、加载权重参数

### state_dict

In [3]:
class TinnyCNN(nn.Module):
    def __init__(self, cls_num=2):
        super(TinnyCNN, self).__init__()
        self.convolution_layer = nn.Conv2d(1, 1, kernel_size=(3, 3))
        self.fc = nn.Linear(36, cls_num)

    def forward(self, x):
        x = self.convolution_layer(x)
        x = x.view(x.size(0), -1)
        out = self.fc(x)
        return out

model = TinnyCNN(2)

state_dict = model.state_dict()
for key, parameter_value in state_dict.items():
    print(key)
    print(parameter_value, end="\n\n")

convolution_layer.weight
tensor([[[[-0.0586,  0.2958,  0.0794],
          [-0.0975, -0.1863, -0.2886],
          [ 0.0428, -0.0775, -0.0212]]]])

convolution_layer.bias
tensor([-0.0820])

fc.weight
tensor([[ 0.1165,  0.0235, -0.0438,  0.1110,  0.1489, -0.0167, -0.0951,  0.0331,
          0.1470, -0.0072,  0.0420, -0.0689,  0.0828,  0.0833,  0.1161,  0.0625,
          0.1383,  0.0415, -0.0853, -0.0155, -0.0974,  0.1354, -0.0970,  0.1201,
          0.0051,  0.1168, -0.0631, -0.0012, -0.1244,  0.0307,  0.1647,  0.1610,
         -0.0011, -0.0883,  0.0176, -0.1347],
        [ 0.1008, -0.0250,  0.0644,  0.0470,  0.1096, -0.0654,  0.0835,  0.1284,
         -0.0624,  0.0111, -0.0449, -0.1071,  0.1286, -0.1086, -0.0395,  0.1416,
          0.1321, -0.1113, -0.0998, -0.0708, -0.0049,  0.0485, -0.1228,  0.0476,
          0.1246,  0.1118, -0.1652, -0.0927, -0.1385,  0.1040, -0.1084, -0.0119,
         -0.0311, -0.0928, -0.0599,  0.1003]])

fc.bias
tensor([0.0320, 0.1332])



In [4]:
from torchvision import models

resnet18 = models.resnet18()
state_dict = resnet18.state_dict()
for key, parameter_value in state_dict.items():
    print(key)
    print(parameter_value, end="\n\n")

conv1.weight
tensor([[[[-5.6967e-02, -2.8291e-05, -2.1978e-02,  ..., -9.8702e-03,
           -1.1116e-02,  4.1635e-02],
          [-3.3665e-02,  7.7276e-03, -2.7842e-02,  ...,  4.2969e-03,
           -2.6130e-02,  9.6772e-03],
          [-2.4401e-02,  5.5175e-03, -8.9807e-03,  ...,  1.4303e-02,
           -7.0192e-02, -2.5800e-02],
          ...,
          [-4.8629e-03, -4.8208e-02,  4.3374e-02,  ..., -1.1308e-02,
            3.0745e-02, -4.5938e-02],
          [-6.3373e-02, -1.5671e-03,  5.0147e-03,  ...,  5.8759e-03,
           -2.5620e-02,  5.4922e-03],
          [-1.5900e-02,  1.3739e-02,  4.0720e-02,  ..., -3.9768e-03,
            2.8076e-02, -2.4102e-02]],

         [[-9.9695e-03, -3.4427e-02, -1.8891e-02,  ..., -3.5531e-02,
           -3.3229e-02,  3.7305e-02],
          [ 6.4337e-04, -1.4819e-02,  2.6763e-02,  ..., -1.5438e-02,
            6.9425e-02,  5.3044e-03],
          [ 1.7078e-02, -5.2597e-02, -4.0298e-02,  ...,  8.4107e-03,
            3.8125e-02,  2.3045e-02],
       

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 

In [5]:
from torchvision import models
alexnet = models.AlexNet()
state_dict = alexnet.state_dict()
for key, parameter_value in state_dict.items():
    print(key)
    print(parameter_value, end="\n\n")

features.0.weight
tensor([[[[ 4.5314e-02,  2.0033e-02,  3.5577e-02,  ..., -4.9191e-02,
           -6.0317e-03, -3.1011e-02],
          [ 4.0504e-02, -4.4491e-02,  3.2304e-02,  ...,  3.4372e-02,
           -3.1354e-02, -1.9714e-03],
          [ 2.1081e-02, -1.3087e-02, -2.8076e-02,  ...,  3.6429e-02,
           -4.0342e-02, -4.4726e-02],
          ...,
          [-2.9946e-02,  3.7663e-02,  1.4073e-02,  ..., -4.6443e-02,
            1.0959e-02,  4.6729e-02],
          [ 2.0678e-02, -2.0051e-02,  1.2421e-02,  ...,  5.1292e-02,
           -4.6548e-02,  3.1067e-02],
          [ 5.1696e-02, -4.2718e-03, -3.9379e-02,  ...,  4.4402e-02,
           -4.7266e-03, -3.0092e-02]],

         [[ 4.9553e-02, -5.1806e-02, -2.8658e-02,  ...,  4.4496e-02,
           -3.9197e-02, -3.9776e-02],
          [-2.2930e-02,  4.9850e-02,  5.1561e-02,  ...,  2.9524e-02,
           -2.1302e-02, -2.1810e-02],
          [ 4.5847e-02,  3.9873e-02,  3.5166e-02,  ..., -4.3211e-02,
           -4.7453e-02, -2.2792e-02],
  

### load_state_dict

In [6]:
class TinnyCNN(nn.Module):
    def __init__(self, cls_num=2):
        super(TinnyCNN, self).__init__()
        self.convolution_layer = nn.Conv2d(1, 1, kernel_size=(3, 3))
        self.fc = nn.Linear(36, cls_num)

    def forward(self, x):
        x = self.convolution_layer(x)
        x = x.view(x.size(0), -1)
        out = self.fc(x)
        return out

model = TinnyCNN(2)

state_dict_tinnycnn = model.state_dict()

state_dict_tinnycnn["convolution_layer.weight"][0, 0, 0, 0] = 12345. # 假设经过训练，权重参数发现变化

model.load_state_dict(state_dict_tinnycnn)  # 再次查看

for key, parameter_value in model.state_dict().items():
    print(key)
    print(parameter_value, end="\n\n")

convolution_layer.weight
tensor([[[[ 1.2345e+04, -1.3605e-01, -9.4785e-02],
          [ 2.2336e-01,  2.4534e-01, -2.0257e-01],
          [-1.0476e-01, -2.3887e-01, -6.0740e-02]]]])

convolution_layer.bias
tensor([-0.1950])

fc.weight
tensor([[ 0.0374, -0.1247, -0.0989,  0.1309, -0.1088,  0.0429,  0.1109,  0.1286,
          0.0574, -0.0468, -0.0367, -0.0170,  0.0444, -0.1594, -0.0038,  0.1352,
         -0.0714, -0.1400,  0.0942, -0.1413, -0.1531, -0.0405,  0.0347, -0.0797,
         -0.1182,  0.1191, -0.1489, -0.1434,  0.0179, -0.0541,  0.0506,  0.0643,
         -0.0544, -0.1617, -0.0443,  0.0307],
        [ 0.1272,  0.0803, -0.1073,  0.1587,  0.1110, -0.1121, -0.1207, -0.0549,
          0.0478,  0.1658,  0.0312,  0.0558,  0.1103,  0.0687,  0.0382,  0.0753,
         -0.1332,  0.0949, -0.0379,  0.0353, -0.0063, -0.1196, -0.1295,  0.0591,
          0.0372,  0.1542,  0.1590,  0.1328, -0.1524,  0.1462, -0.1419,  0.0449,
         -0.1299, -0.1022,  0.0896,  0.1011]])

fc.bias
tensor([0.1021, 

### load_state_dict常见报错

In [7]:
from torchvision import models
alexnet = models.AlexNet()
alexnet.load_state_dict(state_dict_tinnycnn)

RuntimeError: Error(s) in loading state_dict for AlexNet:
	Missing key(s) in state_dict: "features.0.weight", "features.0.bias", "features.3.weight", "features.3.bias", "features.6.weight", "features.6.bias", "features.8.weight", "features.8.bias", "features.10.weight", "features.10.bias", "classifier.1.weight", "classifier.1.bias", "classifier.4.weight", "classifier.4.bias", "classifier.6.weight", "classifier.6.bias". 
	Unexpected key(s) in state_dict: "convolution_layer.weight", "convolution_layer.bias", "fc.weight", "fc.bias". 

可以看到对alexnet这个模型传入TinyCNN的state_dict，会得到两大报错：
* 第一种是alexnet需要的，但传进来的字典里没找到：分别是"features.0.weight", "features.0.bias"等等
* 第二种是传进来的不是alexnet想要的，分别是"convolution_layer.weight", "convolution_layer.bias", "fc.weight", "fc.bias".

# Module的模块、参数管理

### paramters、 named_parameters

In [None]:
class TinnyCNN(nn.Module):
    def __init__(self, cls_num=2):
        super(TinnyCNN, self).__init__()
        self.convolution_layer = nn.Conv2d(1, 1, kernel_size=(3, 3))
        self.fc = nn.Linear(36, cls_num)

    def forward(self, x):
        x = self.convolution_layer(x)
        x = x.view(x.size(0), -1)
        out = self.fc(x)
        return out

model = TinnyCNN(2)

In [None]:
for param in model.parameters():
    print(type(param), param.size())
    print(param, end="\n\n")

In [None]:
for name, param in model.named_parameters():
    print(name)
    print(param, end="\n\n")

### modules、named_modules

In [None]:
for sub_module in model.modules():
    print(sub_module, end="\n\n")

In [None]:
for name, sub_module in model.named_modules():
    print(name)
    print(sub_module, end="\n\n")

### children、named_children

In [None]:
for sub_module in model.children():
    print(sub_module, end="\n\n")

In [None]:
for name, sub_module in model.named_children():
    print(name)
    print(sub_module, end="\n\n")

### get_parameter、get_submodule

In [None]:
print(model.get_parameter("fc.bias"))

print(model.get_submodule("convolution_layer"))

print(model.get_submodule("convolution_layer").get_parameter("bias")) # module还可以继续调用get_prameter

## 设置模型的参数精度，可选半精度、单精度、双精度等

In [8]:
model = TinnyCNN(2)
for name, param in model.named_parameters():
    print(param.dtype)

torch.float32
torch.float32
torch.float32
torch.float32


In [None]:
model.half()
for name, param in model.named_parameters():
    print(param.dtype)

In [None]:
model.float()
for name, param in model.named_parameters():
    print(param.dtype)

In [None]:
model.double()
for name, param in model.named_parameters():
    print(param.dtype)

In [None]:
model.bfloat16()
for name, param in model.named_parameters():
    print(param.dtype)

## 对子模块执行特定功能

apply

In [20]:
@torch.no_grad()
def init_weights(m):

    if type(m) == nn.Linear:
        m.weight.fill_(1.0)
        
#         print(m.weight)
net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2))

for param in net.parameters():
    print(param, end="\n\n")
    
net.apply(init_weights)

print("执行apply之后:")
for name, param in net.named_parameters():
    print(name)
    print(param, end="\n\n")

Parameter containing:
tensor([[-0.2534,  0.2375],
        [-0.4776, -0.2796]], requires_grad=True)

Parameter containing:
tensor([-0.5713,  0.1877], requires_grad=True)

Parameter containing:
tensor([[ 0.4486, -0.1789],
        [-0.4061, -0.0006]], requires_grad=True)

Parameter containing:
tensor([-0.6580,  0.6763], requires_grad=True)

执行apply之后:
0.weight
Parameter containing:
tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

0.bias
Parameter containing:
tensor([-0.5713,  0.1877], requires_grad=True)

1.weight
Parameter containing:
tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

1.bias
Parameter containing:
tensor([-0.6580,  0.6763], requires_grad=True)

