## 设置模型存放在 cpu/gpu


基础使用


In [2]:
import torch
import torch.nn as nn
net = nn.Sequential(nn.Linear(3, 3))
# print("\nid:{} is_cuda: {}".format(id(net), next(net.parameters()).is_cuda))
# net.cuda()
print("\nid:{} is_cuda: {}".format(id(net), next(net.parameters()).is_cuda))
net.cpu()
print("\nid:{} is_cuda: {}".format(id(net), next(net.parameters()).is_cuda))


id:3030190484752 is_cuda: False

id:3030190484752 is_cuda: False


to 方法的妙用：根据当前平台是否支持 cuda 加速，自动选择


In [7]:
import torch.nn as nn
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net = nn.Sequential(nn.Linear(3, 3))
print("\nid:{} is_cuda: {}".format(id(net), next(net.parameters()).is_cuda))
net.to(device)
print("\nid:{} is_cuda: {}".format(id(net), next(net.parameters()).is_cuda))


id:3032230700816 is_cuda: False

id:3032230700816 is_cuda: False


## 获取模型参数、加载权重参数


### state_dict


In [None]:
class TinnyCNN(nn.Module):
    # cls_num，用来指定输出的类别数（默认为 2 类）。
    def __init__(self, cls_num=2):
        # super(TinnyCNN, self).__init__() 调用父类 nn.Module 的构造函数，确保模型正确初始化。
        super(TinnyCNN, self).__init__()
        self.convolution_layer = nn.Conv2d(1, 1, kernel_size=(3, 3))
        self.fc = nn.Linear(36, cls_num)

    def forward(self, x):
        x = self.convolution_layer(x)
        x = x.view(x.size(0), -1)
        out = self.fc(x)
        return out


model = TinnyCNN(2)
# 获取模型的状态字典，state_dict 包含了模型的所有参数（权重和偏置）。
state_dict = model.state_dict()
for key, parameter_value in state_dict.items():
    print(key)
    print(parameter_value, end="\n\n")

convolution_layer.weight
tensor([[[[-0.1719,  0.2647, -0.1531],
          [-0.0845,  0.0746, -0.0976],
          [-0.3259,  0.0336, -0.0307]]]])

convolution_layer.bias
tensor([0.1294])

fc.weight
tensor([[-0.1427,  0.0909,  0.1088,  0.1028, -0.0374, -0.1586,  0.1466,  0.0317,
          0.0233,  0.0300, -0.0866, -0.0650, -0.0736,  0.1242, -0.0554, -0.0564,
          0.1090,  0.0185,  0.1025, -0.0702,  0.1365,  0.0391, -0.1307, -0.0819,
          0.1258,  0.0056, -0.1164,  0.0834,  0.1543,  0.0600, -0.1352, -0.1153,
          0.0654, -0.0602,  0.0091, -0.1110],
        [ 0.1370, -0.0724,  0.0198,  0.1204,  0.0907, -0.0738, -0.0507, -0.0423,
          0.1641,  0.0781,  0.0591,  0.0159,  0.1110, -0.1468,  0.0724, -0.0088,
         -0.0982,  0.0831, -0.1334, -0.1327, -0.1425, -0.0981,  0.0332,  0.0587,
         -0.0503, -0.0682, -0.0329,  0.1051, -0.1557,  0.1527, -0.0383,  0.0895,
         -0.1190,  0.0990, -0.0425,  0.0331]])

fc.bias
tensor([-0.0046,  0.1499])



In [9]:
from torchvision import models
# 创建了一个标准的 ResNet-18 模型。该模型是一个已经预先训练好的深度卷积神经网络，通常用于图像分类任务
resnet18 = models.resnet18()
state_dict = resnet18.state_dict()
for key, parameter_value in state_dict.items():
    print(key)
    print(parameter_value, end="\n\n")

conv1.weight
tensor([[[[ 7.8622e-03,  1.5781e-02,  4.6311e-02,  ...,  2.7361e-02,
            3.1056e-02,  3.3218e-03],
          [-4.8565e-02, -1.0991e-02, -9.7051e-03,  ...,  3.4322e-03,
            3.7113e-02,  3.0121e-03],
          [ 8.1790e-03, -3.6568e-03,  1.6216e-02,  ...,  5.8446e-03,
           -2.7615e-02, -3.8544e-02],
          ...,
          [-5.1055e-02,  1.0400e-03,  2.0462e-02,  ..., -3.4593e-02,
           -5.1211e-03,  1.0300e-02],
          [-1.7513e-03, -4.6183e-02, -7.2256e-03,  ..., -1.1755e-02,
           -1.6508e-03, -2.8684e-03],
          [-1.2402e-02,  2.1343e-02, -2.3400e-02,  ..., -5.1887e-02,
            3.9576e-02, -3.5472e-02]],

         [[ 3.7589e-02, -4.0719e-02, -3.1328e-02,  ...,  7.3762e-03,
           -1.2285e-02, -2.4667e-03],
          [-2.1009e-03,  7.5441e-03,  8.6089e-04,  ..., -1.2552e-02,
            3.2762e-03, -4.8166e-03],
          [ 3.9028e-02, -1.1262e-02, -7.8883e-03,  ..., -8.2435e-03,
           -1.8844e-03, -5.2788e-03],
       

In [5]:
from torchvision import models
alexnet = models.AlexNet()
state_dict = alexnet.state_dict()
for key, parameter_value in state_dict.items():
    print(key)
    print(parameter_value, end="\n\n")

features.0.weight
tensor([[[[ 4.5314e-02,  2.0033e-02,  3.5577e-02,  ..., -4.9191e-02,
           -6.0317e-03, -3.1011e-02],
          [ 4.0504e-02, -4.4491e-02,  3.2304e-02,  ...,  3.4372e-02,
           -3.1354e-02, -1.9714e-03],
          [ 2.1081e-02, -1.3087e-02, -2.8076e-02,  ...,  3.6429e-02,
           -4.0342e-02, -4.4726e-02],
          ...,
          [-2.9946e-02,  3.7663e-02,  1.4073e-02,  ..., -4.6443e-02,
            1.0959e-02,  4.6729e-02],
          [ 2.0678e-02, -2.0051e-02,  1.2421e-02,  ...,  5.1292e-02,
           -4.6548e-02,  3.1067e-02],
          [ 5.1696e-02, -4.2718e-03, -3.9379e-02,  ...,  4.4402e-02,
           -4.7266e-03, -3.0092e-02]],

         [[ 4.9553e-02, -5.1806e-02, -2.8658e-02,  ...,  4.4496e-02,
           -3.9197e-02, -3.9776e-02],
          [-2.2930e-02,  4.9850e-02,  5.1561e-02,  ...,  2.9524e-02,
           -2.1302e-02, -2.1810e-02],
          [ 4.5847e-02,  3.9873e-02,  3.5166e-02,  ..., -4.3211e-02,
           -4.7453e-02, -2.2792e-02],
  

### load_state_dict


In [11]:
class TinnyCNN(nn.Module):
    def __init__(self, cls_num=2):
        super(TinnyCNN, self).__init__()
        self.convolution_layer = nn.Conv2d(1, 1, kernel_size=(3, 3))
        self.fc = nn.Linear(36, cls_num)

    def forward(self, x):
        x = self.convolution_layer(x)
        x = x.view(x.size(0), -1)
        out = self.fc(x)
        return out


model = TinnyCNN(2)
state_dict_tinnycnn = model.state_dict()
# 修改卷积层的权重
state_dict_tinnycnn["convolution_layer.weight"][0,
                                                0, 0, 0] = 12345.  # 假设经过训练，权重参数发现变化
model.load_state_dict(state_dict_tinnycnn)  # 再次查看
for key, parameter_value in model.state_dict().items():
    print(key)
    print(parameter_value, end="\n\n")

convolution_layer.weight
tensor([[[[ 1.2345e+04, -1.3855e-01, -2.0814e-02],
          [ 1.4227e-01,  1.1783e-01, -1.4465e-01],
          [-1.9147e-01,  5.8554e-02, -1.2950e-01]]]])

convolution_layer.bias
tensor([-0.1955])

fc.weight
tensor([[ 0.1301,  0.1348, -0.0262, -0.1052,  0.0420,  0.1563,  0.0998, -0.0064,
         -0.1305,  0.1111, -0.1204, -0.0270, -0.0116, -0.0050, -0.0310, -0.0275,
         -0.1411,  0.1253, -0.1181,  0.0784,  0.1395,  0.1235,  0.1601,  0.1622,
          0.1623, -0.1208,  0.0061, -0.0756, -0.0188, -0.0187, -0.1020, -0.0394,
          0.0307,  0.1466, -0.0445,  0.0296],
        [-0.0797,  0.0544, -0.1413,  0.1081, -0.1308, -0.1193,  0.0368,  0.1636,
         -0.1416, -0.1470, -0.0158,  0.1592, -0.0354, -0.0539,  0.1015,  0.0845,
          0.0993, -0.0107,  0.1465,  0.0034,  0.1191, -0.1340, -0.0465, -0.1336,
          0.0331,  0.0118, -0.0979, -0.0693,  0.1556, -0.0984,  0.1308, -0.0860,
          0.1527,  0.1484, -0.0326,  0.0929]])

fc.bias
tensor([-0.0596,

### load_state_dict 常见报错


In [None]:
from torchvision import models
alexnet = models.AlexNet()
# 这将会导致错误，因为 TinnyCNN 和 AlexNet 是两个完全不同的模型，它们的结构和参数数量都不匹配。
alexnet.load_state_dict(state_dict_tinnycnn)

RuntimeError: Error(s) in loading state_dict for AlexNet:
	Missing key(s) in state_dict: "features.0.weight", "features.0.bias", "features.3.weight", "features.3.bias", "features.6.weight", "features.6.bias", "features.8.weight", "features.8.bias", "features.10.weight", "features.10.bias", "classifier.1.weight", "classifier.1.bias", "classifier.4.weight", "classifier.4.bias", "classifier.6.weight", "classifier.6.bias". 
	Unexpected key(s) in state_dict: "convolution_layer.weight", "convolution_layer.bias", "fc.weight", "fc.bias". 

可以看到对 alexnet 这个模型传入 TinyCNN 的 state_dict，会得到两大报错：

- 第一种是 alexnet 需要的，但传进来的字典里没找到：分别是"features.0.weight", "features.0.bias"等等
- 第二种是传进来的不是 alexnet 想要的，分别是"convolution_layer.weight", "convolution_layer.bias", "fc.weight", "fc.bias".


# Module 的模块、参数管理


### paramters、 named_parameters


In [None]:
class TinnyCNN(nn.Module):
    def __init__(self, cls_num=2):
        super(TinnyCNN, self).__init__()
        self.convolution_layer = nn.Conv2d(1, 1, kernel_size=(3, 3))
        self.fc = nn.Linear(36, cls_num)

    def forward(self, x):
        x = self.convolution_layer(x)
        x = x.view(x.size(0), -1)
        out = self.fc(x)
        return out


model = TinnyCNN(2)

In [14]:
for param in model.parameters():
    print(type(param), param.size())
    print(param, end="\n\n")

<class 'torch.nn.parameter.Parameter'> torch.Size([1, 1, 3, 3])
Parameter containing:
tensor([[[[ 0.3201,  0.0280,  0.2331],
          [-0.0950,  0.1594,  0.2902],
          [-0.2684, -0.2074,  0.1126]]]], requires_grad=True)

<class 'torch.nn.parameter.Parameter'> torch.Size([1])
Parameter containing:
tensor([-0.3181], requires_grad=True)

<class 'torch.nn.parameter.Parameter'> torch.Size([2, 36])
Parameter containing:
tensor([[ 0.0330,  0.1069,  0.0066, -0.0552, -0.0186,  0.0681, -0.1436, -0.0070,
         -0.1246,  0.0076,  0.1501,  0.1047,  0.0742, -0.0371, -0.0863, -0.1277,
          0.0769, -0.0223, -0.0055, -0.1035, -0.1130, -0.0667, -0.1480, -0.0431,
         -0.0206,  0.0990,  0.0063, -0.0023,  0.1482,  0.0861,  0.1119, -0.1144,
         -0.1556, -0.1190,  0.0820,  0.0465],
        [ 0.1428,  0.0467,  0.0826,  0.1561,  0.0285,  0.0678,  0.0667,  0.1085,
         -0.0353, -0.0864,  0.1344,  0.1401,  0.1195, -0.0368, -0.1654, -0.0684,
          0.1618, -0.1364, -0.1214,  0.1414,

In [15]:
for name, param in model.named_parameters():
    print(name)
    print(param, end="\n\n")

convolution_layer.weight
Parameter containing:
tensor([[[[ 0.3201,  0.0280,  0.2331],
          [-0.0950,  0.1594,  0.2902],
          [-0.2684, -0.2074,  0.1126]]]], requires_grad=True)

convolution_layer.bias
Parameter containing:
tensor([-0.3181], requires_grad=True)

fc.weight
Parameter containing:
tensor([[ 0.0330,  0.1069,  0.0066, -0.0552, -0.0186,  0.0681, -0.1436, -0.0070,
         -0.1246,  0.0076,  0.1501,  0.1047,  0.0742, -0.0371, -0.0863, -0.1277,
          0.0769, -0.0223, -0.0055, -0.1035, -0.1130, -0.0667, -0.1480, -0.0431,
         -0.0206,  0.0990,  0.0063, -0.0023,  0.1482,  0.0861,  0.1119, -0.1144,
         -0.1556, -0.1190,  0.0820,  0.0465],
        [ 0.1428,  0.0467,  0.0826,  0.1561,  0.0285,  0.0678,  0.0667,  0.1085,
         -0.0353, -0.0864,  0.1344,  0.1401,  0.1195, -0.0368, -0.1654, -0.0684,
          0.1618, -0.1364, -0.1214,  0.1414, -0.1346,  0.0047, -0.1371, -0.0334,
          0.1280, -0.0527,  0.1234,  0.1453, -0.1061,  0.1251,  0.1078, -0.1009,
  

### modules、named_modules


In [16]:
for sub_module in model.modules():
    print(sub_module, end="\n\n")

TinnyCNN(
  (convolution_layer): Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1))
  (fc): Linear(in_features=36, out_features=2, bias=True)
)

Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1))

Linear(in_features=36, out_features=2, bias=True)



In [17]:
for name, sub_module in model.named_modules():
    print(name)
    print(sub_module, end="\n\n")


TinnyCNN(
  (convolution_layer): Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1))
  (fc): Linear(in_features=36, out_features=2, bias=True)
)

convolution_layer
Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1))

fc
Linear(in_features=36, out_features=2, bias=True)



### children、named_children


In [18]:
for sub_module in model.children():
    print(sub_module, end="\n\n")

Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1))

Linear(in_features=36, out_features=2, bias=True)



In [19]:
for name, sub_module in model.named_children():
    print(name)
    print(sub_module, end="\n\n")

convolution_layer
Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1))

fc
Linear(in_features=36, out_features=2, bias=True)



### get_parameter、get_submodule


In [20]:
print(model.get_parameter("fc.bias"))
print(model.get_submodule("convolution_layer"))
print(model.get_submodule("convolution_layer").get_parameter(
    "bias"))  # module还可以继续调用get_prameter

Parameter containing:
tensor([0.0909, 0.0142], requires_grad=True)
Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1))
Parameter containing:
tensor([-0.3181], requires_grad=True)


## 设置模型的参数精度，可选半精度、单精度、双精度等


In [21]:
model = TinnyCNN(2)
for name, param in model.named_parameters():
    print(param.dtype)

torch.float32
torch.float32
torch.float32
torch.float32


In [None]:
# model.half() 会将模型的所有参数转换为 float16 数据类型
model.half()
for name, param in model.named_parameters():
    print(param.dtype)

torch.float16
torch.float16
torch.float16
torch.float16


In [23]:
model.float()
for name, param in model.named_parameters():
    print(param.dtype)

torch.float32
torch.float32
torch.float32
torch.float32


In [24]:
model.double()
for name, param in model.named_parameters():
    print(param.dtype)

torch.float64
torch.float64
torch.float64
torch.float64


In [25]:
model.bfloat16()
for name, param in model.named_parameters():
    print(param.dtype)

torch.bfloat16
torch.bfloat16
torch.bfloat16
torch.bfloat16


## 对子模块执行特定功能


apply


In [None]:
# 使用了 @torch.no_grad() 装饰器来禁止梯度计算，并使用 apply() 方法来应用初始化函数。
# @torch.no_grad() 是一个装饰器，它会在函数执行时关闭梯度计算。这通常用于冻结模型的权重，或者在推理时避免计算梯度，从而节省内存和加速计算。
@torch.no_grad()
def init_weights(m):
    if type(m) == nn.Linear:
        m.weight.fill_(1.0)


#         print(m.weight)
net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2))
for param in net.parameters():
    print(param, end="\n\n")
net.apply(init_weights)
print("执行apply之后:")
for name, param in net.named_parameters():
    print(name)
    print(param, end="\n\n")
    # 因此，你会在 net.parameters() 中看到 4 个参数：
# 第一个 Linear 层的 weight
# 第一个 Linear 层的 bias
# 第二个 Linear 层的 weight
# 第二个 Linear 层的 bias

Parameter containing:
tensor([[-0.1522,  0.4607],
        [-0.1416, -0.4075]], requires_grad=True)

Parameter containing:
tensor([-0.0606, -0.4303], requires_grad=True)

Parameter containing:
tensor([[-0.0275,  0.6632],
        [-0.0637, -0.1339]], requires_grad=True)

Parameter containing:
tensor([0.6056, 0.7064], requires_grad=True)

执行apply之后:
0.weight
Parameter containing:
tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

0.bias
Parameter containing:
tensor([-0.0606, -0.4303], requires_grad=True)

1.weight
Parameter containing:
tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

1.bias
Parameter containing:
tensor([0.6056, 0.7064], requires_grad=True)

