In [1]:
import torch
from torch.autograd import Variable
from torch import nn, optim

In [2]:
class SimpleCNN(nn.Module) :
    def __init__(self) :
        # b, 3, 32, 32
        super().__init__()
        layer1 = nn.Sequential()
        layer1.add_module('conv_1', nn.Conv2d(3, 32, 3, 1, padding=1))
        #b, 32, 32, 32
        layer1.add_module('relu_1', nn.ReLU(True))
        layer1.add_module('pool_1', nn.MaxPool2d(2, 2)) # b, 32, 16, 16
        self.layer1 = layer1
        
        layer2 = nn.Sequential()
        layer2.add_module('conv_2', nn.Conv2d(32, 64, 3, 1, padding=1))
        # b, 64, 16, 16
        layer2.add_module('relu_2', nn.ReLU(True))
        layer2.add_module('pool_2', nn.MaxPool2d(2, 2)) # b, 64, 8, 8
        self.layer2 = layer2
        
        layer3 = nn.Sequential()
        layer3.add_module('conv_3', nn.Conv2d(64, 128, 3, 1, padding=1))
        # b, 128, 8, 8
        layer3.add_module('relu_3', nn.ReLU(True))
        layer3.add_module('pool_3', nn.MaxPool2d(2, 2)) # b, 128, 4, 4
        self.layer3 = layer3
        
        layer4 = nn.Sequential()
        layer4.add_module('fc_1', nn.Linear(2048, 512))
        layer4.add_module('fc_relu1', nn.ReLU(True))
        layer4.add_module('fc_2', nn.Linear(512, 64))
        layer4.add_module('fc_relu2', nn.ReLU(True))
        layer4.add_module('fc_3', nn.Linear(64, 10))
        self.layer4 = layer4
    
    def forward(self, x) :
        conv1 = self.layer1(x)
        conv2 = self.layer2(conv1)
        conv3 = self.layer3(conv2)
        fc_input = conv3.view(conv3.size(0), -1)
        fc_out = self.layer4(fc_input)
        return fc_out

In [3]:
# 建立模型

model = SimpleCNN()
print(model)

SimpleCNN(
  (layer1): Sequential(
    (conv_1): Conv2d (3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu_1): ReLU(inplace)
    (pool_1): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1))
  )
  (layer2): Sequential(
    (conv_2): Conv2d (32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu_2): ReLU(inplace)
    (pool_2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1))
  )
  (layer3): Sequential(
    (conv_3): Conv2d (64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu_3): ReLU(inplace)
    (pool_3): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1))
  )
  (layer4): Sequential(
    (fc_1): Linear(in_features=2048, out_features=512)
    (fc_relu1): ReLU(inplace)
    (fc_2): Linear(in_features=512, out_features=64)
    (fc_relu2): ReLU(inplace)
    (fc_3): Linear(in_features=64, out_features=10)
  )
)


In [4]:
# 提取前两层

new_model = nn.Sequential(*list(model.children())[:2])
print(new_model)

Sequential(
  (0): Sequential(
    (conv_1): Conv2d (3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu_1): ReLU(inplace)
    (pool_1): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1))
  )
  (1): Sequential(
    (conv_2): Conv2d (32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu_2): ReLU(inplace)
    (pool_2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), dilation=(1, 1))
  )
)


In [5]:
# 提取所有的卷积层

conv_model = nn.Sequential()
for name, module in model.named_modules() :
    if isinstance(module, nn.Conv2d) :
        conv_model.add_module(name, module)

print(conv_model)

Sequential(
  (layer1.conv_1): Conv2d (3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (layer2.conv_2): Conv2d (32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (layer3.conv_3): Conv2d (64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)


In [6]:
# 提取模型中的参数

for name, param in model.named_parameters() :
    print('{} : {}'.format(name, param.shape))

layer1.conv_1.weight : torch.Size([32, 3, 3, 3])
layer1.conv_1.bias : torch.Size([32])
layer2.conv_2.weight : torch.Size([64, 32, 3, 3])
layer2.conv_2.bias : torch.Size([64])
layer3.conv_3.weight : torch.Size([128, 64, 3, 3])
layer3.conv_3.bias : torch.Size([128])
layer4.fc_1.weight : torch.Size([512, 2048])
layer4.fc_1.bias : torch.Size([512])
layer4.fc_2.weight : torch.Size([64, 512])
layer4.fc_2.bias : torch.Size([64])
layer4.fc_3.weight : torch.Size([10, 64])
layer4.fc_3.bias : torch.Size([10])


In [7]:
# 权重初始化
from torch.nn import init

for m in model.modules() :
    if isinstance(m, nn.Conv2d) :
        init.normal(m.weight.data)
        init.xavier_normal(m.weight.data)
        init.kaiming_normal(m.weight.data)
        m.bias.data.fill_(0)
    elif isinstance(m, nn.Linear) :
        m.weight.data.normal_()

关于参数初始化可参考[深度学习的weight initialization](https://zhuanlan.zhihu.com/p/25110150)

可以从torch的[文档](http://pytorch.org/docs/master/nn.html?highlight=init%20xavier_normal#torch.nn.init.xavier_normal)中得到

- `init.xvaier_uniform()`一般用于tanh的初始化，结果采样于均匀分布 $$U(-a, a) \sim [-\frac {\sqrt{6}} {\sqrt{fan\_in + fan\_out}}, \frac {\sqrt{6}} {\sqrt{fan\_in + fan\_out}}]$$
- `init.xvarier_normal()`，结果采样于正态分布 $$N(0, \sqrt{\frac 2 {fan\_in + fan\_out}})$$
- `init.kaiming_uniform()` 结果采样于均匀分布 $$U(-a, a) \sim [-\frac {\sqrt{6}} {\sqrt{(1+a^2) \times fan\_out}}, \frac {\sqrt{6}} {\sqrt{(1+a^2) \times fan\_out}}]$$
- `init.kaiming_normal()`一般用于ReLU的初始化，初始化方法为正态分布 $$N(0, \sqrt{\frac 2 {(1 + a^2) \times fan\_in}})$$