# 模型构造

## 继承MODULE类来构造

In [1]:
import torch
from torch import nn

class MLP(nn.Module):
    def __init__(self, **kwargs):
        super(MLP, self).__init__(**kwargs)
        self.hidden = nn.Linear(784, 256)
        self.act = nn.ReLU()
        self.output = nn.Linear(256, 10)
        
    def forward(self, x):
        a = self.act(self.hidden(x))
        return self.output(a)

In [2]:
X = torch.rand(2,784)
net = MLP()
print(net)

MLP(
  (hidden): Linear(in_features=784, out_features=256, bias=True)
  (act): ReLU()
  (output): Linear(in_features=256, out_features=10, bias=True)
)


ModuleList可以像List一样进行append和extend操作

ModuleDict可以像字典一样进行添加和访问

In [3]:
net = nn.ModuleList([
    nn.Linear(784, 256),
    nn.ReLU(),
])
net.append(nn.Linear(256, 10))
print (net)

ModuleList(
  (0): Linear(in_features=784, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=10, bias=True)
)


In [5]:
net = nn.ModuleDict({
    'linear': nn.Linear(784, 256),
    'activation': nn.ReLU(),
})

net['output'] = nn.Linear(256, 10)
print(net.output, net.linear)
print(net)
net.activation = nn.ReLU6()
print(net)

Linear(in_features=256, out_features=10, bias=True) Linear(in_features=784, out_features=256, bias=True)
ModuleDict(
  (activation): ReLU()
  (linear): Linear(in_features=784, out_features=256, bias=True)
  (output): Linear(in_features=256, out_features=10, bias=True)
)
ModuleDict(
  (activation): ReLU6()
  (linear): Linear(in_features=784, out_features=256, bias=True)
  (output): Linear(in_features=256, out_features=10, bias=True)
)



创建一个fancymlp类

In [6]:
class FancyMLP(nn.Module):
    def __init__(self, **kwargs):
        super(FancyMLP, self).__init__(**kwargs)
        
        self.rand_weight = torch.rand((20,20), requires_grad=False) #不可训练的常数
        self.linear = nn.Linear(20, 20)
    def forward(self, x):
        x = self.linear(x)
        
        x = nn.functional.relu(torch.mm(x, self.rand_weight.data)+1)
        x = self.linear(x)
        while x.norm().item()>1:
            x/=2
        if x.norm().item() < 0.8:
            x *= 10
        return x.sum()

In [8]:
X = torch.rand(2, 20)
net = FancyMLP()
print(net)
print(net(X))

FancyMLP(
  (linear): Linear(in_features=20, out_features=20, bias=True)
)
tensor(-6.8781, grad_fn=<SumBackward0>)


嵌套调用

In [9]:
class NestMLP(nn.Module):
    def __init__(self, **kwargs):
        super(NestMLP, self).__init__(**kwargs)
        self.net = nn.Sequential(nn.Linear(40,30), nn.ReLU())
    def forward(self, x):
        return self.net(x)
    
net = nn.Sequential(
    NestMLP(),
    nn.Linear(30, 20),
    FancyMLP()
)
print(net)

Sequential(
  (0): NestMLP(
    (net): Sequential(
      (0): Linear(in_features=40, out_features=30, bias=True)
      (1): ReLU()
    )
  )
  (1): Linear(in_features=30, out_features=20, bias=True)
  (2): FancyMLP(
    (linear): Linear(in_features=20, out_features=20, bias=True)
  )
)


# 模型参数的访问 初始化和共享

In [12]:
import torch
from torch import nn
from torch.nn import init
net = nn.Sequential(nn.Linear(4, 3), nn.ReLU(), nn.Linear(3, 1)) #pytorch已进⾏行行默认初始化
print(net)
X = torch.rand(2, 4)
Y = net(X).sum()

Sequential(
  (0): Linear(in_features=4, out_features=3, bias=True)
  (1): ReLU()
  (2): Linear(in_features=3, out_features=1, bias=True)
)


In [13]:
for name, param in net.named_parameters():
    print(name, param.shape)

0.weight torch.Size([3, 4])
0.bias torch.Size([3])
2.weight torch.Size([1, 3])
2.bias torch.Size([1])


查看单个层中的变量

In [15]:
for name, param in net[0].named_parameters():
    print(name, param.shape, type(param))

weight torch.Size([3, 4]) <class 'torch.nn.parameter.Parameter'>
bias torch.Size([3]) <class 'torch.nn.parameter.Parameter'>


nn.parameter.Paremeter 是tensor的子类

如果一个tensor是parameter 它会自动被添加到模型的参数列表里

In [34]:
net = nn.Sequential(nn.Linear(4, 3), nn.ReLU(), nn.Linear(3, 1)) #pytorch已进⾏行行默认初始化
weight_0 = list(net[0].parameters())[0]
print(weight_0.data)
print(weight_0.grad)
Y = net(X).sum()
print(Y)
Y.backward()
print(weight_0.grad)

tensor([[-0.2938, -0.4711,  0.2268, -0.4570],
        [-0.2695,  0.1622,  0.2473, -0.3648],
        [ 0.1985, -0.1450,  0.1226,  0.3184]])
None
tensor(-0.4604, grad_fn=<SumBackward0>)
tensor([[0.0000, 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000, 0.0000],
        [0.4134, 0.0821, 0.4492, 0.5819]])


### 参数初始化

#### 调库初始化

In [37]:
for name, param in net.named_parameters():
    if 'weight' in name:
        init.normal_(param, 0, 0.01)
    if 'bias' in name:
        init.constant_(param, 0)
    print(name, param)

0.weight Parameter containing:
tensor([[ 0.0026,  0.0045, -0.0048, -0.0095],
        [ 0.0094,  0.0189,  0.0045,  0.0167],
        [ 0.0104, -0.0103, -0.0008,  0.0169]], requires_grad=True)
0.bias Parameter containing:
tensor([0., 0., 0.], requires_grad=True)
2.weight Parameter containing:
tensor([[ 0.0133,  0.0133, -0.0100]], requires_grad=True)
2.bias Parameter containing:
tensor([0.], requires_grad=True)


#### 自定义初始化方法

In [40]:
def normal_(param, mean=0, std=1):
    with torch.no_grad():
        return tensor.normal_(mean, std)

def init_weight(tensor):
    with torch.no_grad():
        tensor.uniform_(-10, 10)
        tensor *= (tensor.abs() >= 5).float()

In [41]:
for name, param in net.named_parameters():
    if 'weight' in name:
        init_weight(param)
        print(name, param.data)

0.weight tensor([[ 0.0000,  0.0000, -6.2283,  8.8010],
        [-0.0000,  9.8243, -7.3588, -6.8571],
        [-5.9090, -0.0000,  7.2179,  8.3420]])
2.weight tensor([[-8.9241, -8.2970,  0.0000]])


直接改变param.data 不改变梯度

## 共享模型参数

1. 在forward中调用同一层
2. 在Sequential中应用同一个模块

In [47]:
linear = nn.Linear(1, 1, bias=False)
net = nn.Sequential(
    linear,linear
)
for name, param in net.named_parameters():
    init.constant_(param, 3)
    print(name, param.data)
X=torch.ones(1,1)
y = net(X).sum()
y.backward()
print(net[0].weight.grad)

0.weight tensor([[3.]])
tensor([[6.]])


# 读取和存储

## 读写tensor

In [50]:
import torch
from torch import nn

x=torch.ones(3)
torch.save(x, './savings/x.pt')

In [53]:
x2 = torch.load('./savings/x.pt')
print(x2.data)

tensor([1., 1., 1.])


可以存储一个tensor列表

In [58]:
y = torch.zeros(4)
torch.save([x,y], './savings/xy.pt')
xy_list = torch.load('./savings/xy.pt')
torch.save({'x':x, 'y':y}, './savings/xy_dict.pt')
xy_list
xy = torch.load('./savings/xy_dict.pt')
xy

{'x': tensor([1., 1., 1.]), 'y': tensor([0., 0., 0., 0.])}

## 读写模型

In [83]:
net = nn.Sequential(
    nn.Linear(3,2),
    nn.ReLU(),
    # nn.Dropout(0.2),
    nn.Linear(2,1),
)
net.state_dict()

optimizer = torch.optim.SGD(net.parameters(), lr = 0.01, momentum=0.9)
optimizer.state_dict()

{'state': {},
 'param_groups': [{'lr': 0.01,
   'momentum': 0.9,
   'dampening': 0,
   'weight_decay': 0,
   'nesterov': False,
   'params': [4793198704, 4791889856, 4791887552, 4791814112]}]}

### 1. 保存和加载state_dict

In [91]:
PATH = './savings/model.pt'
X = torch.randn(10, 3)
y = net(X)

torch.save(net.state_dict(), PATH)

net2 = nn.Sequential(
    nn.Linear(3,2),
    nn.ReLU(),
    # nn.Dropout(0.2),
    nn.Linear(2,1),
)

net2.load_state_dict(torch.load(PATH))
y2 = net2(X)

y2==y

tensor([[True],
        [True],
        [True],
        [True],
        [True],
        [True],
        [True],
        [True],
        [True],
        [True]])