In [2]:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F

  from .autonotebook import tqdm as notebook_tqdm


In [2]:

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 输入图像channel：1；输出channel：6；5x5卷积核
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # 2x2 Max pooling
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # 如果是方阵,则可以只使用一个数字进行定义
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # 除去批处理维度的其他所有维度
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [5]:
params = list(net.parameters())
print(len(params))
print(params[0].size())  # conv1的权重

10
torch.Size([6, 1, 5, 5])


In [6]:
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

tensor([[-0.0921,  0.0474,  0.0372, -0.0836, -0.1015, -0.0173,  0.0596,  0.0553,
         -0.1055,  0.1675]], grad_fn=<AddmmBackward0>)


## 模型参数

### nn.Parameter

In [10]:
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        # 使用nn.Parameter定义权重和偏置
        self.weight = nn.Parameter(torch.randn(2, 2))
        self.bias = nn.Parameter(torch.zeros(2))

    def forward(self, x):
        # 在前向传播中使用定义的参数
        x = torch.matmul(x, self.weight) + self.bias
        return x

# 实例化模型
model = MyModel()

# 打印模型参数
for name, param in model.named_parameters():
    print(name, param.size())

weight torch.Size([2, 2])
bias torch.Size([2])


### model.named_parameters()

In [11]:
# 定义一个简单的模型
class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        # 前向传播逻辑
        pass

# 实例化模型
model = SimpleModel()

# 使用 named_parameters() 遍历模型参数
for name, param in model.named_parameters():
    print(name, param.size())

conv1.weight torch.Size([10, 1, 5, 5])
conv1.bias torch.Size([10])
conv2.weight torch.Size([20, 10, 5, 5])
conv2.bias torch.Size([20])
fc1.weight torch.Size([50, 320])
fc1.bias torch.Size([50])
fc2.weight torch.Size([10, 50])
fc2.bias torch.Size([10])


### model.parameters()

In [12]:
for param in model.parameters():
    print(param.size())

torch.Size([10, 1, 5, 5])
torch.Size([10])
torch.Size([20, 10, 5, 5])
torch.Size([20])
torch.Size([50, 320])
torch.Size([50])
torch.Size([10, 50])
torch.Size([10])


## 权值初始化

In [15]:
import torch
import torch.nn as nn

conv = nn.Conv2d(1,3,3)
linear = nn.Linear(10,1)

print(isinstance(conv,nn.Conv2d)) # 判断conv是否是nn.Conv2d类型
print(isinstance(linear,nn.Conv2d)) # 判断linear是否是nn.Conv2d类型

True
False


In [21]:
conv.weight.data
linear.weight.data

tensor([[-0.0866,  0.2790, -0.0341, -0.2344,  0.0400, -0.1183, -0.2698, -0.0791,
         -0.2824, -0.1778]])

In [22]:
# 对conv进行kaiming初始化
torch.nn.init.kaiming_normal_(conv.weight.data)
conv.weight.data
# 对linear进行常数初始化
torch.nn.init.constant_(linear.weight.data,0.3)
linear.weight.data

tensor([[0.3000, 0.3000, 0.3000, 0.3000, 0.3000, 0.3000, 0.3000, 0.3000, 0.3000,
         0.3000]])

## 模型加载与保存

In [23]:
class LeNet2(nn.Module):
    def __init__(self, classes):
        super(LeNet2, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 6, 5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(6, 16, 5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.classifier = nn.Sequential(
            nn.Linear(16*5*5, 120),
            nn.ReLU(),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size()[0], -1)
        x = self.classifier(x)
        return x

    def initialize(self):
        for p in self.parameters():
            p.data.fill_(2020)

net = LeNet2(classes=2019)

# "训练"
print("训练前: ", net.features[0].weight[0, ...])
net.initialize()
print("训练后: ", net.features[0].weight[0, ...])

path_model = "./model.pkl"
path_state_dict = "./model_state_dict.pkl"

# 保存整个模型
torch.save(net, path_model)

# 保存模型参数
net_state_dict = net.state_dict()
torch.save(net_state_dict, path_state_dict)

训练前:  tensor([[[ 0.0663,  0.1155,  0.0588, -0.0793, -0.0634],
         [-0.0235,  0.1004,  0.0122, -0.0956,  0.0776],
         [ 0.0357, -0.1122,  0.0310,  0.0453, -0.0823],
         [-0.1140, -0.0801, -0.1007, -0.0095, -0.0445],
         [-0.0524, -0.0345,  0.0624,  0.0884, -0.0357]],

        [[-0.0626,  0.0563,  0.0243, -0.1103,  0.0023],
         [ 0.0951, -0.0181, -0.1153, -0.0853,  0.0355],
         [ 0.0713, -0.0709, -0.0379,  0.0242,  0.0476],
         [-0.0903,  0.0322,  0.0994,  0.0152, -0.0590],
         [ 0.0324,  0.0886,  0.0415,  0.0373, -0.0832]],

        [[-0.0949,  0.0828, -0.0199, -0.0991, -0.0592],
         [ 0.0776, -0.0896,  0.0081, -0.0105, -0.0269],
         [ 0.1046, -0.0198, -0.0831,  0.0023,  0.0971],
         [-0.0556, -0.0557, -0.1149,  0.0150,  0.0603],
         [-0.0686,  0.0720, -0.0763, -0.0072,  0.1149]]],
       grad_fn=<SelectBackward0>)
训练后:  tensor([[[2020., 2020., 2020., 2020., 2020.],
         [2020., 2020., 2020., 2020., 2020.],
         [2020.,

In [24]:
path_model = "./model.pkl"
net_load = torch.load(path_model)

print(net_load)

LeNet2(
  (features): Sequential(
    (0): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Linear(in_features=400, out_features=120, bias=True)
    (1): ReLU()
    (2): Linear(in_features=120, out_features=84, bias=True)
    (3): ReLU()
    (4): Linear(in_features=84, out_features=2019, bias=True)
  )
)


In [25]:
path_state_dict = "./model_state_dict.pkl"
state_dict_load = torch.load(path_state_dict)
net_new = LeNet2(classes=2019)

print("加载前: ", net_new.features[0].weight[0, ...])
net_new.load_state_dict(state_dict_load)
print("加载后: ", net_new.features[0].weight[0, ...])

加载前:  tensor([[[ 0.0775,  0.0374,  0.0163,  0.0196, -0.0884],
         [ 0.0293, -0.1051, -0.0362,  0.1122, -0.0616],
         [ 0.0083,  0.0274,  0.0158,  0.0301,  0.0937],
         [-0.0459, -0.1062,  0.0510, -0.0058,  0.1046],
         [-0.0672, -0.0204,  0.0134,  0.0594,  0.0421]],

        [[ 0.0058, -0.0435, -0.0550,  0.0591, -0.1067],
         [ 0.0929,  0.0202, -0.0027,  0.0264,  0.0409],
         [ 0.0038, -0.0219, -0.0522, -0.0065,  0.0717],
         [-0.0300, -0.0819, -0.0238, -0.0132, -0.0364],
         [ 0.0258, -0.0238, -0.0680, -0.0172,  0.0902]],

        [[-0.1087,  0.0948, -0.0848,  0.1148, -0.0212],
         [-0.0634,  0.0479,  0.0064, -0.0287,  0.0732],
         [-0.1080,  0.0522, -0.0891, -0.1137,  0.0838],
         [ 0.0740,  0.0965,  0.0893, -0.1075,  0.0277],
         [-0.0060, -0.0713,  0.0996,  0.0865, -0.0181]]],
       grad_fn=<SelectBackward0>)
加载后:  tensor([[[2020., 2020., 2020., 2020., 2020.],
         [2020., 2020., 2020., 2020., 2020.],
         [2020.,

In [27]:
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
 
#define model
class TheModelClass(nn.Module):
    def __init__(self):
        super(TheModelClass,self).__init__()
        self.conv1=nn.Conv2d(3,6,5)
        self.pool=nn.MaxPool2d(2,2)
        self.conv2=nn.Conv2d(6,16,5)
        self.fc1=nn.Linear(16*5*5,120)
        self.fc2=nn.Linear(120,84)
        self.fc3=nn.Linear(84,10)
 
    def forward(self,x):
        x=self.pool(F.relu(self.conv1(x)))
        x=self.pool(F.relu(self.conv2(x)))
        x=x.view(-1,16*5*5)
        x=F.relu(self.fc1(x))
        x=F.relu(self.fc2(x))
        x=self.fc3(x)
        return x
 
def main():
    # Initialize model
    model = TheModelClass()
 
    #Initialize optimizer
    optimizer=optim.SGD(model.parameters(),lr=0.001,momentum=0.9)
 
    #print model's state_dict
    print('Model.state_dict:')
    for param_tensor in model.state_dict():
        #打印 key value字典
        print(param_tensor,'\t',model.state_dict()[param_tensor].size())
 
    #print optimizer's state_dict
    print('Optimizer`s state_dict:')
    for var_name in optimizer.state_dict():
        print(var_name,'\t',optimizer.state_dict()[var_name])
 
 
 
if __name__=='__main__':
    main()

Model.state_dict:
conv1.weight 	 torch.Size([6, 3, 5, 5])
conv1.bias 	 torch.Size([6])
conv2.weight 	 torch.Size([16, 6, 5, 5])
conv2.bias 	 torch.Size([16])
fc1.weight 	 torch.Size([120, 400])
fc1.bias 	 torch.Size([120])
fc2.weight 	 torch.Size([84, 120])
fc2.bias 	 torch.Size([84])
fc3.weight 	 torch.Size([10, 84])
fc3.bias 	 torch.Size([10])
Optimizer`s state_dict:
state 	 {}
param_groups 	 [{'lr': 0.001, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'maximize': False, 'foreach': None, 'differentiable': False, 'params': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]}]


## 优化器

In [None]:
import os
import torch

# 设置权重，服从正态分布  --> 2 x 2
weight = torch.randn((2, 2), requires_grad=True)
# 设置梯度为全1矩阵  --> 2 x 2
weight.grad = torch.ones((2, 2))
# 输出现有的weight和data
print("The data of weight before step:\n{}".format(weight.data))
print("The grad of weight before step:\n{}".format(weight.grad))
# 实例化优化器
optimizer = torch.optim.SGD([weight], lr=0.1, momentum=0.9)
# 进行一步操作
optimizer.step()
# 查看进行一步后的值，梯度
print("The data of weight after step:\n{}".format(weight.data))
print("The grad of weight after step:\n{}".format(weight.grad))
# 权重清零
optimizer.zero_grad()
# 检验权重是否为0
print("The grad of weight after optimizer.zero_grad():\n{}".format(weight.grad))
# 输出参数
print("optimizer.params_group is \n{}".format(optimizer.param_groups))
# 查看参数位置，optimizer和weight的位置一样，我觉得这里可以参考Python是基于值管理
print("weight in optimizer:{}\nweight in weight:{}\n".format(id(optimizer.param_groups[0]['params'][0]), id(weight)))
# 添加参数：weight2
weight2 = torch.randn((3, 3), requires_grad=True)
optimizer.add_param_group({"params": weight2, 'lr': 0.0001, 'nesterov': True})
# 查看现有的参数
print("optimizer.param_groups is\n{}".format(optimizer.param_groups))
# 查看当前状态信息
opt_state_dict = optimizer.state_dict()
print("state_dict before step:\n", opt_state_dict)
# 进行5次step操作
for _ in range(50):
    optimizer.step()
# 输出现有状态信息
print("state_dict after step:\n", optimizer.state_dict())
# 保存参数信息
torch.save(optimizer.state_dict(),os.path.join(r"D:\pythonProject\Attention_Unet", "optimizer_state_dict.pkl"))
print("----------done-----------")
# 加载参数信息
state_dict = torch.load(r"D:\pythonProject\Attention_Unet\optimizer_state_dict.pkl") # 需要修改为你自己的路径
optimizer.load_state_dict(state_dict)
print("load state_dict successfully\n{}".format(state_dict))
# 输出最后属性信息
print("\n{}".format(optimizer.defaults))
print("\n{}".format(optimizer.state))
print("\n{}".format(optimizer.param_groups))

## 模型容器

### nn.Sequential

In [3]:
class LeNetSequetial(nn.Module):
    def __init__(self, classes):
        super(LeNet2, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 6, 5),
            nn.ReLU(),
            nn.AvgPool2d(2, 2),
            nn.Conv2d(6, 16, 5),
            nn.ReLU(),
            nn.AvgPool2d(2, 2)
        )
        self.classifier = nn.Sequential(
            nn.Linear(16*5*5, 120),
            nn.ReLU(),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size()[0], -1)
        x = self.classifier(x)
        return x

In [3]:
class ModuleList(nn.Module):
    def __init__(self):
        super(ModuleList, self).__init__()
        self.linears = nn.ModuleList([nn.Linear(10, 10) for i in range(20)])

    def forward(self, x):
        for i, linear in enumerate(self.linears):
            x = linear(x)
        return x

net = ModuleList()

print(net)

fake_data = torch.ones((10, 10))

output = net(fake_data)

print(output)

ModuleList(
  (linears): ModuleList(
    (0-19): 20 x Linear(in_features=10, out_features=10, bias=True)
  )
)
tensor([[-0.1253, -0.1615, -0.3969, -0.2678,  0.2628, -0.0124, -0.0215, -0.0008,
         -0.1870,  0.1515],
        [-0.1253, -0.1615, -0.3969, -0.2678,  0.2628, -0.0124, -0.0215, -0.0008,
         -0.1870,  0.1515],
        [-0.1253, -0.1615, -0.3969, -0.2678,  0.2628, -0.0124, -0.0215, -0.0008,
         -0.1870,  0.1515],
        [-0.1253, -0.1615, -0.3969, -0.2678,  0.2628, -0.0124, -0.0215, -0.0008,
         -0.1870,  0.1515],
        [-0.1253, -0.1615, -0.3969, -0.2678,  0.2628, -0.0124, -0.0215, -0.0008,
         -0.1870,  0.1515],
        [-0.1253, -0.1615, -0.3969, -0.2678,  0.2628, -0.0124, -0.0215, -0.0008,
         -0.1870,  0.1515],
        [-0.1253, -0.1615, -0.3969, -0.2678,  0.2628, -0.0124, -0.0215, -0.0008,
         -0.1870,  0.1515],
        [-0.1253, -0.1615, -0.3969, -0.2678,  0.2628, -0.0124, -0.0215, -0.0008,
         -0.1870,  0.1515],
        [-0.1253,

In [4]:
class ModuleDict(nn.Module):
    def __init__(self):
        super(ModuleDict, self).__init__()
        self.choices = nn.ModuleDict({
            'conv': nn.Conv2d(10, 10, 3),
            'pool': nn.MaxPool2d(3)
        })

        self.activations = nn.ModuleDict({
            'relu': nn.ReLU(),
            'prelu': nn.PReLU()
        })

    def forward(self, x, choice, act):
        x = self.choices[choice](x)
        x = self.activations[act](x)
        return x

net = ModuleDict()

fake_img = torch.randn((4, 10, 32, 32))

output = net(fake_img, 'conv', 'relu')
# output = net(fake_img, 'conv', 'prelu')
print(output)

tensor([[[[0.0000, 0.0000, 0.0000,  ..., 0.4056, 0.1909, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.9579, 0.4126],
          [0.0000, 0.6611, 0.0000,  ..., 0.0000, 0.3082, 0.3409],
          ...,
          [0.0000, 0.3477, 0.0000,  ..., 0.1169, 0.0000, 0.1187],
          [0.0000, 0.0000, 0.1221,  ..., 0.0000, 0.0000, 0.1232],
          [0.0000, 0.1271, 0.0539,  ..., 0.0000, 0.0000, 0.0387]],

         [[0.0000, 1.1290, 0.0000,  ..., 0.0983, 0.0000, 0.4820],
          [1.2758, 0.0000, 0.0000,  ..., 0.0637, 0.3755, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0236, 0.9434, 0.0000],
          ...,
          [0.0000, 0.2883, 0.0000,  ..., 0.6844, 0.6549, 0.0085],
          [0.0000, 0.2142, 0.7710,  ..., 0.2872, 0.5901, 0.1529],
          [0.6774, 0.0000, 0.6228,  ..., 0.4654, 0.0000, 0.7522]],

         [[0.0066, 0.5943, 0.1602,  ..., 0.0000, 0.0000, 0.0000],
          [0.3983, 0.0000, 0.1716,  ..., 0.2854, 0.8466, 1.0427],
          [0.0000, 0.2751, 0.0000,  ..., 0

## 修改模型

In [5]:
import torchvision.models as models
net = models.resnet50()
print(net)

  warn(f"Failed to load image Python extension: {e}")


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 