In [1]:
import torch
from torch import nn

In [52]:
data = torch.rand(3, 8, 10)
# 线性层
linear1 = nn.Linear(in_features=10, out_features=10)
print(f'out_con1.shape:{linear1(data).shape}')

# 1D卷积层
conv1 = nn.Conv1d(in_channels=8, out_channels=10, kernel_size=3)
print(f'out_con1.shape:{conv1(data).shape}')

out_con1.shape:torch.Size([3, 8, 10])
out_con1.shape:torch.Size([3, 10, 8])


In [67]:
data = torch.rand(size=(4, 3, 16, 16))
# 2D卷积层：卷积为图片
conv2 = nn.Conv2d(in_channels=3, out_channels=4, kernel_size=3)
# 3D卷积层：卷积为视频，比图片多个时间轴
conv3 = nn.Conv3d(in_channels=4, out_channels=2, kernel_size=3)

out_conv2 = conv2(data)
out_conv3 = conv3(data)

print(f'out_conv2:{out_conv2.shape}; conv2.weight:{conv2.weight.shape}; conv2.bias:{conv2.bias.shape}')
print(f'out_conv3:{out_conv3.shape}; conv3.weight:{conv3.weight.shape}; conv3.bias:{conv3.bias.shape}')

out_conv2:torch.Size([4, 4, 14, 14]); conv2.weight:torch.Size([4, 3, 3, 3]); conv2.bias:torch.Size([4])
out_conv3:torch.Size([2, 1, 14, 14]); conv3.weight:torch.Size([2, 4, 3, 3, 3]); conv3.bias:torch.Size([2])


In [96]:
data = torch.rand(size=(4, 3, 16, 16))
# 激活函数
relu = nn.ReLU()
print(f'out_relu: {relu(data).shape}')

leaky_relu = nn.LeakyReLU()
print(f'out_leaky_relu: {leaky_relu(data).shape}')

gelu = nn.GELU()
print(f'out_gelu: {gelu(data).shape}')

sigmoid = nn.Sigmoid()
print(f'out_sigmoid: {sigmoid(data).shape}')

tanh = nn.Tanh()
print(f'out_tanh: {tanh(data).shape}')

hardswish = nn.Hardswish()
print(f'out_hardswish: {hardswish(data).shape}')

out_relu: torch.Size([4, 3, 16, 16])
out_leaky_relu: torch.Size([4, 3, 16, 16])
out_gelu: torch.Size([4, 3, 16, 16])
out_sigmoid: torch.Size([4, 3, 16, 16])
out_tanh: torch.Size([4, 3, 16, 16])
out_hardswish: torch.Size([4, 3, 16, 16])


In [55]:
# 正则化层

# BN：以特征为单位进行归一化，根据样本的特征不同有1D、2D、3D可选
# BN1d：接受2D或3D数据
bn1 = nn.BatchNorm1d(num_features=10)
out_bn1 = bn1(torch.rand(size=(8, 10)))
print(f'bn1:{out_bn1.shape}; bn1.weight:{bn1.weight.shape}; bn1.bias:{bn1.bias.shape}')

# BN2d：只接受4D
bn2 = nn.BatchNorm2d(num_features=3)
out_bn2 = bn2(torch.rand(size=(4, 3, 8, 8)))
print(f'bn1:{out_bn2.shape}; bn2.weight:{bn2.weight.shape}; bn2.bias:{bn2.bias.shape}')

# BN3d：只接受5D
bn3 = nn.BatchNorm3d(num_features=4)
out_bn3 = bn3(torch.rand(size=(2, 4, 3, 8, 8)))
print(f'bn1:{out_bn3.shape}; bn3.weight:{bn3.weight.shape}; bn3.bias:{bn3.bias.shape}')

# LN以样本为单位进行归一化，normalized_shape可选，int or list
ln = nn.LayerNorm(normalized_shape=8)
out_ln = ln(torch.rand(size=(4, 6, 8)))
print(f'bn1:{out_ln.shape}; bn3.weight:{ln.weight.shape}; bn3.bias:{ln.bias.shape}')

# GroupNorm
# gn = nn.GroupNorm(num_channels=10, num_groups=10)

# RMSNorm


bn1:torch.Size([8, 10]); bn1.weight:torch.Size([10]); bn1.bias:torch.Size([10])
bn1:torch.Size([4, 3, 8, 8]); bn2.weight:torch.Size([3]); bn2.bias:torch.Size([3])
bn1:torch.Size([2, 4, 3, 8, 8]); bn3.weight:torch.Size([4]); bn3.bias:torch.Size([4])
bn1:torch.Size([4, 6, 8]); bn3.weight:torch.Size([8]); bn3.bias:torch.Size([8])


In [3]:
# Dropout层， p为消除的参数比例
drop = nn.Dropout(p=0.3)

drop(torch.rand(size=(3, 4, 8)))

In [52]:
class MyNet(nn.Module):
    def __init__(self):
        super(MyNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3)
        self.conv2 = nn.Conv2d(64, 64, 3)
        self.maxpool1 = nn.MaxPool2d(2, 2)

        self.features = nn.Sequential(
            nn.Conv2d(64, 128, 3),
            nn.Conv2d(128, 128, 3),
            nn.BatchNorm2d(num_features=128),
            nn.ReLU()
        )

        self.features_list = nn.ModuleList([nn.Linear(128, 256), nn.ReLU()])

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.maxpool1(x)
        x = self.features(x)

        return x


net = MyNet()

In [45]:
# 迭代返回每个网络模块，从大到小逐个遍历
# 第一个元素是网络整体
for item in net.modules():
    print(item)
    print('==================================================')

MyNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (features): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
    (1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))
    (2): ReLU()
  )
  (features_list): ModuleList(
    (0): Linear(in_features=128, out_features=256, bias=True)
    (1): ReLU()
  )
)
Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
Sequential(
  (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))
  (2): ReLU()
)
Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))
ReLU()
ModuleList(
  (0): Linear(in_features=128, out_features=256, bias=True)


In [58]:
# 与modules()类似，只是也返回了名字
# 第一个元素，name为空 '', 为网络整体
# 如果中间出现序列，序列名字是 特征名.序列号    如 features.0
# ModuleList也一样，如 feature_list.0
for name, m in net.named_modules():
    print(f"name:{name}\nmodule:{m}")
    print('==================================================')

# 冻结权重
# 可以通过名字、模型类型锁定指定类别，注意如果有些模块没有可学习的参数，可能报错
for name, item in net.named_modules():
    # 通过名字搜索
    if name.startswith('conv'):
        item.weight.requires_grad = False
        item.bias.requires_grad = False
    # 通过类别搜索
    if isinstance(item, nn.Conv2d):
        item.weight.requires_grad = False
        item.bias.requires_grad = False

name:
module:MyNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (features): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
    (1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))
    (2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): ReLU()
  )
  (features_list): ModuleList(
    (0): Linear(in_features=128, out_features=256, bias=True)
    (1): ReLU()
  )
)
name:conv1
module:Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
name:conv2
module:Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
name:maxpool1
module:MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
name:features
module:Sequential(
  (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))
  (2): BatchNorm2

In [56]:
# 优化器参数optimizer的参数
# 返回网络所有可学习参数
for p in net.parameters():
    print(p.shape)

torch.Size([64, 3, 3, 3])
torch.Size([64])
torch.Size([64, 64, 3, 3])
torch.Size([64])
torch.Size([128, 64, 3, 3])
torch.Size([128])
torch.Size([128, 128, 3, 3])
torch.Size([128])
torch.Size([128])
torch.Size([128])
torch.Size([256, 128])
torch.Size([256])


In [53]:
# 可以通过名字锁定指定模块
for name, p in net.named_parameters():
    print(f"name:{name}, parameter:{p.shape}")

name:conv1.weight, parameter:torch.Size([64, 3, 3, 3])
name:conv1.bias, parameter:torch.Size([64])
name:conv2.weight, parameter:torch.Size([64, 64, 3, 3])
name:conv2.bias, parameter:torch.Size([64])
name:features.0.weight, parameter:torch.Size([128, 64, 3, 3])
name:features.0.bias, parameter:torch.Size([128])
name:features.1.weight, parameter:torch.Size([128, 128, 3, 3])
name:features.1.bias, parameter:torch.Size([128])
name:features.2.weight, parameter:torch.Size([128])
name:features.2.bias, parameter:torch.Size([128])
name:features_list.0.weight, parameter:torch.Size([256, 128])
name:features_list.0.bias, parameter:torch.Size([256])


In [67]:
for item in net.children():
    print(item)
    print('---------------------------------------------------------')

Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
---------------------------------------------------------
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
---------------------------------------------------------
MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
---------------------------------------------------------
Sequential(
  (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))
  (2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (3): ReLU()
)
---------------------------------------------------------
ModuleList(
  (0): Linear(in_features=128, out_features=256, bias=True)
  (1): ReLU()
)
---------------------------------------------------------


In [66]:
for name, item in net.named_children():
    print(name, item)
    print('--------------------------------------------------------')

conv1 Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1))
--------------------------------------------------------
conv2 Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
--------------------------------------------------------
maxpool1 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
--------------------------------------------------------
features Sequential(
  (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (1): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1))
  (2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (3): ReLU()
)
--------------------------------------------------------
features_list ModuleList(
  (0): Linear(in_features=128, out_features=256, bias=True)
  (1): ReLU()
)
--------------------------------------------------------


In [71]:
# for data in net.state_dict():
#     print(data)
#     print()

AttributeError: 'str' object has no attribute 'shape'