In [21]:
import torch.nn as nn
import torch

# 5.1 pytorch模型定义

## 1. Sequential

### （1）md文件样例

In [1]:
import torch.nn as nn
class MySequential(nn.Module):
    from collections import OrderedDict
    def __init__(self, *args):
        super(MySequential, self).__init__()
        if len(args) == 1 and isinstance(args[0], OrderedDict): # 如果传入的是一个OrderedDict
            for key, module in args[0].items():
                self.add_module(key, module)  # add_module方法会将module添加进self._modules(一个OrderedDict)
        else:  # 传入的是一些Module
            for idx, module in enumerate(args):
                self.add_module(str(idx), module)
    def forward(self, input):
        # self._modules返回一个 OrderedDict，保证会按照成员添加时的顺序遍历成
        for module in self._modules.values():
            input = module(input)
        return input

# 直接排列
net = nn.Sequential(
        nn.Linear(784, 256),
        nn.ReLU(),
        nn.Linear(256, 10),
        )
print(net)
# 使用OrderedDict
import collections
import torch.nn as nn
net2 = nn.Sequential(collections.OrderedDict([
          ('fc1', nn.Linear(784, 256)),
          ('relu1', nn.ReLU()),
          ('fc2', nn.Linear(256, 10))
          ]))
print(net2)

Sequential(
  (0): Linear(in_features=784, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=10, bias=True)
)
Sequential(
  (fc1): Linear(in_features=784, out_features=256, bias=True)
  (relu1): ReLU()
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)


### （2）test1

Sequential实现了内部的 forward 函数，而且模块必须是按照顺序进行排列的，所以必须确保前一个模块的输出大小和下一个模块的输入大小是一致的

In [12]:
class net1_1(nn.Module):
    def __init__(self):
        super(net1_1, self).__init__()
        self.block = nn.Sequential(nn.Conv2d(1,20,5),
                                    nn.ReLU(),
                                    nn.Conv2d(20,64,5),
                                    nn.ReLU())
    def forward(self, x):
        x = self.block(x)
        return x

net = net1_1()
print(net)

net1_1(
  (block): Sequential(
    (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): Conv2d(20, 64, kernel_size=(5, 5), stride=(1, 1))
    (3): ReLU()
  )
)


## 2. ModuleList

### （1）md样例

In [2]:
class model(nn.Module):
  def __init__(self, *args):
        super(MySequential, self).__init__()
        if len(args) == 1 and isinstance(args[0], OrderedDict): # 如果传入的是一个OrderedDict
            for key, module in args[0].items():
                self.add_module(key, module)  # add_module方法会将module添加进self._modules(一个OrderedDict)
        else:  # 传入的是一些Module
            for idx, module in enumerate(args):
                self.add_module(str(idx), module)
  def forward(self, x):
    for layer in self.modulelist:
      x = layer(x)
    return x

net = nn.ModuleList([nn.Linear(784, 256), nn.ReLU()])
net.append(nn.Linear(256, 10)) # # 类似List的append操作
print(net[-1])  # 类似List的索引访问
print(net)

Linear(in_features=256, out_features=10, bias=True)
ModuleList(
  (0): Linear(in_features=784, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=10, bias=True)
)


### （2）test1

In [4]:
[nn.Linear(10,10) for i in range(2)]

[Linear(in_features=10, out_features=10, bias=True),
 Linear(in_features=10, out_features=10, bias=True)]

In [3]:
class net1(nn.Module):
    def __init__(self):
        super(net1, self).__init__()
        self.linears = nn.ModuleList([nn.Linear(10,10) for i in range(2)])
    def forward(self, x):
        for m in self.linears:
            x = m(x)
        return x

net = net1()
print(net)
for param in net.parameters():
    print(type(param.data), param.size())

net1(
  (linears): ModuleList(
    (0): Linear(in_features=10, out_features=10, bias=True)
    (1): Linear(in_features=10, out_features=10, bias=True)
  )
)
<class 'torch.Tensor'> torch.Size([10, 10])
<class 'torch.Tensor'> torch.Size([10])
<class 'torch.Tensor'> torch.Size([10, 10])
<class 'torch.Tensor'> torch.Size([10])


包含两个全连接层，他们的权重 (weithgs) 和偏置 (bias) 都在这个网络之内

### （3）test2

In [None]:
class net2(nn.Module):
    def __init__(self):
        super(net2, self).__init__()
        self.linears = [nn.Linear(10,10) for i in range(2)]
    def forward(self, x):
        for m in self.linears:
            x = m(x)
        return x

net = net2()
print(net)
print(list(net.parameters()))

该网络借助python自带的list来添加全连接层和 parameters ，但是并没有自动注册到网络中。但可以使用 forward 来计算输出结果。但进行实例化网络进行训练的时候，这些层的 parameters 不在整个网络之中，所以其网络参数也不会被更新，即无法训练。

nn.ModuleList 是一个储存不同 module，并自动将每个 module 的 parameters 添加到网络之中的容器。但是，其并未定义一个网络，它只是将不同的模块储存在一起，这些模块之间并没有什么先后顺序可言。

### （4）test3

In [6]:
[nn.Linear(10,20), nn.Linear(20,30), nn.Linear(5,10)]

[Linear(in_features=10, out_features=20, bias=True),
 Linear(in_features=20, out_features=30, bias=True),
 Linear(in_features=5, out_features=10, bias=True)]

In [9]:
class net3(nn.Module):
    def __init__(self):
        super(net3, self).__init__()
        self.linears = nn.ModuleList([nn.Linear(10,20), nn.Linear(20,30), nn.Linear(5,10)])
    def forward(self, x):
        x = self.linears[2](x)
        x = self.linears[0](x)
        x = self.linears[1](x) 
        return x

net = net3()
print(net)
input = torch.randn(32, 5)
print(net(input).shape)

net3(
  (linears): ModuleList(
    (0): Linear(in_features=10, out_features=20, bias=True)
    (1): Linear(in_features=20, out_features=30, bias=True)
    (2): Linear(in_features=5, out_features=10, bias=True)
  )
)
torch.Size([32, 30])


可以看出ModuleList 里面的顺序并不能决定什么，网络的执行顺序是根据 forward 函数来决定的。

In [10]:
input

tensor([[ 0.0583, -0.1632, -0.4806,  0.1888, -0.0617],
        [ 0.3558,  0.6158, -0.8599,  1.1136, -0.2691],
        [ 0.1366,  2.1078,  0.0322,  0.9173, -0.7663],
        [-0.2109, -1.1150,  0.3461,  0.5797, -0.0623],
        [ 0.4662, -0.2846,  1.5440, -0.7857,  0.7775],
        [-0.4085,  0.5001,  0.7159, -0.9582,  1.1342],
        [ 0.7358, -0.0676,  0.4093,  0.6470, -1.2525],
        [ 0.4209, -1.1493, -2.5390, -0.5123,  0.1896],
        [-0.1026,  0.6707, -1.5462, -1.3886,  0.8138],
        [-0.3662, -1.0210,  0.1277, -1.4965,  0.4556],
        [ 0.2223, -0.3035, -0.2468, -0.5720, -0.2104],
        [-0.8054,  1.6651,  1.6164,  0.9365, -0.2867],
        [ 1.0655, -0.8571, -0.3738,  0.4813,  1.0100],
        [-0.2176,  1.1231,  0.7701,  1.8725, -0.0440],
        [-0.6627, -1.3341,  0.2391, -0.1013, -2.0197],
        [ 2.3212,  0.5123,  0.0277,  0.4985,  0.9116],
        [ 0.3701, -1.0235, -0.9469, -0.2769,  1.3290],
        [-0.1712, -1.5453,  0.6391,  1.4576,  0.0420],
        [-

### （5）test4

一个模块也可以在 forward 函数中被调用多次。但是，被调用多次的模块，无论之后怎么更新，都使用同一组 parameters 的，即它们的参数是共享的，

In [11]:
class net4(nn.Module):
    def __init__(self):
        super(net4, self).__init__()
        self.linears = nn.ModuleList([nn.Linear(5, 10), nn.Linear(10, 10)])
    def forward(self, x):
        x = self.linears[0](x)
        x = self.linears[1](x)
        x = self.linears[1](x)
        return x

net = net4()
print(net)
for name, param in net.named_parameters():
    print(name, param.size())

net4(
  (linears): ModuleList(
    (0): Linear(in_features=5, out_features=10, bias=True)
    (1): Linear(in_features=10, out_features=10, bias=True)
  )
)
linears.0.weight torch.Size([10, 5])
linears.0.bias torch.Size([10])
linears.1.weight torch.Size([10, 10])
linears.1.bias torch.Size([10])


## 3. ModuleDict

In [22]:
net = nn.ModuleDict({
    'linear': nn.Linear(784, 256),
    'act': nn.ReLU(),
})
net['output'] = nn.Linear(256, 10) # 添加
print(net['linear']) # 访问
print(net.output)
print(net)

Linear(in_features=784, out_features=256, bias=True)
Linear(in_features=256, out_features=10, bias=True)
ModuleDict(
  (linear): Linear(in_features=784, out_features=256, bias=True)
  (act): ReLU()
  (output): Linear(in_features=256, out_features=10, bias=True)
)


## 4. 定义方式比较

### 场景一

网络中有很多相似或者重复的层,用 for 循环来创建它们。此时借助 ModuleList 定义方式

In [13]:
layers = [nn.Linear(10, 10) for i in range(5)]

In [15]:
class net5(nn.Module):
    def __init__(self):
        super(net5, self).__init__()
        self.linears = nn.ModuleList([nn.Linear(10, 10) for i in range(3)])

    def forward(self, x):
        for layer in self.linears:
            x = layer(x)
        return x

net = net5()
print(net)

net5(
  (linears): ModuleList(
    (0): Linear(in_features=10, out_features=10, bias=True)
    (1): Linear(in_features=10, out_features=10, bias=True)
    (2): Linear(in_features=10, out_features=10, bias=True)
  )
)


也可以用 Sequential 来实现，但要注意 * 这个操作符，它可以把一个 list 拆开成一个个独立的元素。但是，请注意这个 list 里面的模块必须是按照想要的顺序来进行排列的。

In [18]:
class net6(nn.Module):
    def __init__(self):
        super(net6, self).__init__()
        self.linear_list = [nn.Linear(10, 10) for i in range(3)]
        self.linears = nn.Sequential(*self.linears_list)

    def forward(self, x):
        self.x = self.linears(x)
        return x

net = net6()
print(net)

AttributeError: 'net6' object has no attribute 'linears_list'

### 场景二

当我们需要之前层的信息的时候，比如 ResNets 中的 shortcut 结构，或者是像 FCN 中用到的 skip architecture 之类的，当前层的结果需要和之前层中的结果进行融合，一般使用 ModuleList 比较方便，一个非常简单的例子如下：

In [8]:
import torch
import torch.nn as nn

In [17]:
class net7(nn.Module):
    def __init__(self):
        super(net7, self).__init__()
        self.linears = nn.ModuleList([nn.Linear(10, 20), nn.Linear(20, 30), nn.Linear(30, 50)])
        self.trace = []

    def forward(self, x):
        for layer in self.linears:
            x = layer(x)
            self.trace.append(x)
        return x

net = net7()
input  = torch.randn(32, 10) # input batch size: 32
output = net(input)
for each in net.trace:
    print(each.shape)

torch.Size([32, 20])
torch.Size([32, 30])
torch.Size([32, 50])


使用 trace 的列表来储存网络每层的输出结果，方便后面的层调用

# 5.2 快速搭建复杂网络

## 1. Unet模型块

In [19]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class DoubleConv(nn.Module):
    """(convolution => [BN] => ReLU) * 2"""

    def __init__(self, in_channels, out_channels, mid_channels=None):
        super().__init__()
        if not mid_channels:
            mid_channels = out_channels
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)

class Down(nn.Module):
    """Downscaling with maxpool then double conv"""

    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )

    def forward(self, x):
        return self.maxpool_conv(x)

class Up(nn.Module):
    """Upscaling then double conv"""

    def __init__(self, in_channels, out_channels, bilinear=True):
        super().__init__()

        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
            self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
        else:
            self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
            self.conv = DoubleConv(in_channels, out_channels)

    def forward(self, x1, x2):
        x1 = self.up(x1)
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]

        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
                        diffY // 2, diffY - diffY // 2])
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)
class OutConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(OutConv, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        return self.conv(x)

## 2. 利用模型块组装U-Net

In [20]:
class UNet(nn.Module):
    def __init__(self, n_channels, n_classes, bilinear=True):
        super(UNet, self).__init__()
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.bilinear = bilinear

        self.inc = DoubleConv(n_channels, 64)  #1
        self.down1 = Down(64, 128)  #2
        self.down2 = Down(128, 256)  #2
        self.down3 = Down(256, 512)  #2
        factor = 2 if bilinear else 1
        self.down4 = Down(512, 1024 // factor) #2
        self.up1 = Up(1024, 512 // factor, bilinear)
        self.up2 = Up(512, 256 // factor, bilinear)
        self.up3 = Up(256, 128 // factor, bilinear)
        self.up4 = Up(128, 64, bilinear)
        self.outc = OutConv(64, n_classes)

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        logits = self.outc(x)
        return logits

# 5.3 修改模型

# 5.4 模型保存与读取