## 1. PyTorch构建模型

In [1]:
import torch
torch.manual_seed(0)
torch.backends.cudnn.deterministic= False
torch.backends.cudnn.benchmark = True
import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data.dataset import Dataset

## pytorch常用网络

### Linear介绍 [全连接层]

In [None]:
linear = nn.Linear(input_feature, out_feature, bias=True)

### 卷积介绍 [2D卷积层]

In [None]:
conv = nn.Conv2d(in_channels,out_channels,kernel_size,stride=1,padding=0,dilation=1,groups,bias=True,padding_mode='zeros')
# kernel_size,stride,padding 都可以是元组
# dilation 为在卷积核中插入的数量

### 转置卷积介绍 [2D反卷积层]

In [None]:
convinv = nn.ConvTranspose2d(in_channels,out_channels,kernel_size,stride=1,padding=0,out_padding=0,groups=1,bias=True,dilation=1,padding_mode='zeros')
# padding是输入填充，out_padding填充到输出

### 最大值池化层 [2D池化层]

In [None]:
maxpool = nn.MaxPool2d(kernel_size,stride=None,padding=0,dilation=1)

### 批量归一化层 [2D归一化层]

In [None]:
ba = nn.BatchNorm2d(num_features,eps,momentum,affine=True,track_running_stats=True)

# affine=True 表示批量归一化的α，β是被学到的
# track_running_stats=True 表示对数据的统计特征进行关注

## 2. PyTorch 创建模型的四种方法

In [4]:
from collections import OrderedDict

### 定义在init，前向过程在forward

In [6]:
# 使用属性定义
class Net1(torch.nn.Module):
    def __init__(self):
        super(Net1, self).__init__()
        self.conv1 = torch.nn.Conv2d(3, 32, 3, 1, 1)
        self.dense1 = torch.nn.Linear(32 * 3 * 3, 128)
        self.dense2 = torch.nn.Linear(128, 10)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv(x)), 2)
        x = x.view(x.size(0), -1)
        x = F.relu(self.dense1(x))
        x = self.dense2(x)

        return x

In [8]:
model = Net1()

In [9]:
model.conv1

Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

### 序列集成型[利用nn.Squential(顺序执行的层函数)]

- 访问各层只能通过数字索引

In [7]:
class Net2(torch.nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        self.conv = torch.nn.Sequential(torch.nn.Conv2d(3, 32, 3, 1, 1),
                                        torch.nn.ReLU(), torch.nn.MaxPool2d(2))
        self.dense = torch.nn.Sequential(torch.nn.Linear(32 * 3 * 3, 128),
                                         torch.nn.ReLU(),
                                         torch.nn.Linear(128, 10))

    def forward(self, x):
        conv_out = self.conv(x)
        res = conv_out.view(conv_out.size(0), -1)
        out = self.dense(res)
        return out

In [12]:
model = Net2()
model.conv[0]

Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

### 序列添加型[利用Squential类add_module顺序逐层添加]

- 给予各层的name属性

In [13]:
class Net3(torch.nn.Module):
    def __init__(self):
        super(Net3, self).__init__()
        self.conv = torch.nn.Sequential()
        self.conv.add_module("conv1", torch.nn.Conv2d(3, 32, 3, 1, 1))
        self.conv.add_module("relu1", torch.nn.ReLU())
        self.conv.add_module("pool1", torch.nn.MaxPool2d(2))
        self.dense = torch.nn.Sequential()
        self.dense.add_module("dense1", torch.nn.Linear(32 * 3 * 3, 128))
        self.dense.add_module("relu2", torch.nn.ReLU())
        self.dense.add_module("dense2", torch.nn.Linear(128, 10))

    def forward(self, x):
        conv_out = self.conv(x)
        res = conv_out.view(conv_out.size(0), -1)
        out = self.dense(res)
        return out

In [17]:
model = Net3()
model.conv.conv1

Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

### 序列集成字典型[OrderDict集成模型字典【‘name’:层函数】]

- name为key

In [20]:
class Net4(torch.nn.Module):
    def __init__(self):
        super(Net4, self).__init__()
        self.conv = torch.nn.Sequential(
            OrderedDict([("conv1", torch.nn.Conv2d(3, 32, 3, 1, 1)),
                         ("relu1", torch.nn.ReLU()),
                         ("pool", torch.nn.MaxPool2d(2))]))

        self.dense = torch.nn.Sequential(
            OrderedDict([("dense1", torch.nn.Linear(32 * 3 * 3, 128)),
                         ("relu2", torch.nn.ReLU()),
                         ("dense2", torch.nn.Linear(128, 10))]))

    def forward(self, x):
        conv_out = self.conv1(x)
        res = conv_out.view(conv_out.size(0), -1)
        out = self.dense(res)
        return out

In [25]:
model = Net4()
model.conv.conv1

Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

## 3. PyTorch 对模型参数的访问，初始化，共享

### 访问参数

- 如果采用序列集成型,序列添加型或者字典集成性，都只能使用id索引访问层。eg:net[1]；

- 如果想以网络的name访问，eg:net.layer_name。

### 访问参数【权重参数名：层名_weight/bias】

- layer.parameters----访问该层参数字典；
- layer.weight , layer.bias-----访问该层权重和偏置；
- layer.weight.data()/grad() ------访问该层权重的具体数值/梯度【bias也使用】；
- net.collect_params() ----返回该网络的所有参数，返回一个由参数名称到实例的字典。

In [29]:
model.conv.conv1.parameters

<bound method Module.parameters of Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))>

In [31]:
model.conv.conv1.weight

Parameter containing:
tensor([[[[ 1.8588e-01,  1.1830e-01,  1.7910e-01],
          [-6.0694e-02,  1.2648e-01,  7.1135e-02],
          [ 7.3809e-02, -4.6837e-02, -1.2379e-01]],

         [[ 1.1960e-01, -6.5238e-02, -4.9806e-02],
          [ 1.8110e-01,  1.8658e-01, -1.1365e-01],
          [-6.7899e-02, -1.4177e-01, -9.2813e-02]],

         [[-5.5717e-02, -4.2587e-02,  1.2439e-01],
          [-1.0647e-01, -1.5759e-04,  3.6399e-02],
          [-1.4140e-01, -3.6170e-02,  4.5594e-02]]],


        [[[ 1.8899e-01,  3.7958e-02, -1.7555e-01],
          [-1.3407e-03, -1.1275e-01, -1.6665e-01],
          [ 6.3226e-02,  1.9218e-01, -1.7576e-01]],

         [[-1.8028e-01,  1.7894e-01,  1.9123e-01],
          [ 4.6231e-02,  1.2398e-02, -2.1908e-02],
          [ 1.6763e-01, -1.2918e-01,  7.9540e-02]],

         [[ 1.8170e-01, -3.5179e-02, -9.2398e-02],
          [-1.1363e-01, -1.6119e-01,  1.7509e-01],
          [-4.5471e-03, -1.9712e-02, -7.1500e-02]]],


        [[[-1.2019e-01, -2.4589e-02, -9.1015

In [32]:
model.conv.conv1.bias

Parameter containing:
tensor([ 0.0208,  0.1765,  0.0448,  0.1503,  0.0577,  0.1165, -0.0872,  0.0285,
         0.0732, -0.1383,  0.0878, -0.0737, -0.0869,  0.0606,  0.0911,  0.1191,
        -0.1159,  0.0227,  0.0961, -0.0410,  0.0785, -0.0181,  0.1075, -0.1841,
         0.0634, -0.0570, -0.1532, -0.0897, -0.0129, -0.1014, -0.0056, -0.1760],
       requires_grad=True)

In [36]:
model.named_parameters

<bound method Module.named_parameters of Net4(
  (conv): Sequential(
    (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu1): ReLU()
    (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (dense): Sequential(
    (dense1): Linear(in_features=288, out_features=128, bias=True)
    (relu2): ReLU()
    (dense2): Linear(in_features=128, out_features=10, bias=True)
  )
)>

### 初始化[若非首次初始化，force_reinit=True]

### init 利用各种分布初始化

In [None]:
net.initialize(init=init.Normal(sigma=0.1)，force_reinit=True)

### init 对网络参数进行常数初始化

In [None]:
net.initialize(init=init.Constant(1))

### 自定义初始化

### 继承init的Initialize类，并实现函数\_init\_weight(self,name,data)

In [None]:
def _init_weight(self, name, data):
    print('Init', name, data.shape)
    data[:] = nd.random.uniform(low=-10, high=10, shape=data.shape)
    # 表示一半几率为0，一半几率为[-10,-5]U[5，10]的均匀分布
    data *= data.abs() >= 5


# 调用自定义初始化函数1
net.initialize(MyInit(), force_reinit=True)

### 参数共享

- 参数共享，梯度共享，但是梯度计算的是所有共享层的和

- 梯度共享，且梯度只更新一次