# 层的创建

In [2]:
import torch
from torch import nn
from torch.nn import  functional as F
# nn.Sequential定义了一个特殊的Module
net = nn.Sequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))

X = torch.rand(2,20)
net(X)

tensor([[-0.0196, -0.0897,  0.0513, -0.1154, -0.1532, -0.2161,  0.2603,  0.2345,
          0.4629, -0.1953],
        [ 0.0281,  0.0341, -0.0390, -0.0731, -0.1994, -0.1188,  0.0779,  0.1747,
          0.2231, -0.1378]], grad_fn=<AddmmBackward>)

In [2]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20,256)
        self.out = nn.Linear(256,10)
    def forward(self,X):
        return self.out(F.relu(self.hidden(X)))
net = MLP()
net(X)

tensor([[-0.0670,  0.0316,  0.1461, -0.0815,  0.0936,  0.0773,  0.0243, -0.0898,
          0.0654, -0.2084],
        [-0.1193,  0.1378,  0.0961,  0.1110,  0.0284,  0.1244, -0.0353, -0.2349,
          0.0255, -0.2897]], grad_fn=<AddmmBackward>)

In [3]:
class MySquential(nn.Module):
    def __init__(self, *args):
        super().__init__()
        for block in args:
            self._modules[block] = block
    def forward(self, X):
        for block in self._modules.values():
            X = block(X)
        return X
net = MySquential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))
net(X)

tensor([[ 0.0364,  0.0065,  0.0669,  0.0297, -0.0176, -0.2170,  0.0841,  0.1714,
          0.0037,  0.1256],
        [ 0.2219,  0.0308,  0.0727, -0.1311, -0.0157, -0.2818,  0.1174,  0.2104,
          0.0446,  0.0442]], grad_fn=<AddmmBackward>)

In [4]:
# 在正向传播函数中执行代码
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weight = torch.rand((20,20),requires_grad=False)
        self.linear = nn.Linear(20,20)
    def forward(self, X):
        X = self.linear(X)
        X = F.relu(torch.mm(X,self.rand_weight)+1)# 做矩阵乘法
        X = self.linear(X)
        while(X.abs().sum()>1):
            X /= 2
        return X.sum()
net = FixedHiddenMLP()
net(X)

tensor(-0.0759, grad_fn=<SumBackward0>)

In [5]:
# 嵌套、混合搭配各种组合块
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20,64),nn.ReLU(),
                                 nn.Linear(64,32),nn.ReLU())
        self.linear = nn.Linear(32,16)
    def forward(self, X):
        return self.linear(self.net(X))
chimera = nn.Sequential(NestMLP(),nn.Linear(16,20),FixedHiddenMLP())
chimera(X)

tensor(0.2178, grad_fn=<SumBackward0>)

# 参数管理

In [12]:
import torch
from torch import nn

net = nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,1))
X = torch.rand(size=(2,4))
net(X)
print(net)
print(net.state_dict())
print(net[2].state_dict())
print(type(net[2].bias))
print(net[2].bias)
print(net[2].bias.data)
print(net[2].weight.grad)# 访问梯度
print(*[(name,param.shape) for name,param in net[0].named_parameters()])
print(*[(name,param.shape) for name,param in net.named_parameters()])
net.state_dict()['2.bias'].data

Sequential(
  (0): Linear(in_features=4, out_features=8, bias=True)
  (1): ReLU()
  (2): Linear(in_features=8, out_features=1, bias=True)
)
OrderedDict([('0.weight', tensor([[ 0.1711,  0.4533, -0.4817,  0.2418],
        [-0.1085, -0.2881,  0.4541, -0.4623],
        [-0.1985,  0.1929, -0.2454, -0.0936],
        [ 0.2950, -0.0810,  0.1625,  0.2160],
        [ 0.3835,  0.2017, -0.2506,  0.1991],
        [ 0.3058, -0.0937,  0.3095,  0.1257],
        [ 0.0595, -0.0868,  0.3181, -0.3787],
        [ 0.4095, -0.0234, -0.3251, -0.2819]])), ('0.bias', tensor([-0.0442, -0.1934,  0.0892, -0.2354,  0.4709,  0.2289, -0.1532, -0.2953])), ('2.weight', tensor([[-0.1126, -0.1876, -0.1300,  0.1244, -0.3233, -0.0683,  0.1904,  0.1583]])), ('2.bias', tensor([0.0429]))])
OrderedDict([('weight', tensor([[-0.1126, -0.1876, -0.1300,  0.1244, -0.3233, -0.0683,  0.1904,  0.1583]])), ('bias', tensor([0.0429]))])
<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([0.0429], requires_grad=True)
tens

tensor([0.0429])

In [15]:
# 内置初始化
def init_normal(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight,mean=0,std=0.01)
        nn.init.zeros_(m.bias)
net.apply(init_normal)
net[0].weight.data[0],net[0].bias.data[0]
def init_constant(m):
    if type(m)==nn.Linear:
        nn.init.constant_(m.weight,1)
        nn.init.zeros_(m.bias)
net.apply(init_constant)
net[0].weight.data[0], net[0].bias.data[0]

(tensor([1., 1., 1., 1.]), tensor(0.))

In [16]:
# 对某些块应用不同的初始化方法
def xavier(m):
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)
def init_43(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight,42)
net[0].apply(xavier)
net[2].apply(init_43)
print(net[0].weight.data[0])
print(net[2].weight.data)

tensor([ 0.0667,  0.1990,  0.2898, -0.3886])
tensor([[42., 42., 42., 42., 42., 42., 42., 42.]])


In [17]:
def my_init(m):
    if type(m) == nn.Linear:
        print(
            'Init',
            *[(name,param.shape) for name,param in m.named_parameters()][0]
        )
        nn.init.uniform_(m.weight,-10,10)
        m.weight.data *= m.weight.data.abs() >=5
net.apply(my_init)
net[0].weight[:2]
net[0].weight.data[:] = 42
net[0].weight

Init weight torch.Size([8, 4])
Init weight torch.Size([1, 8])


tensor([[ 0.0000,  0.0000,  8.3999, -9.0357],
        [ 5.4485,  6.3329,  0.0000, -0.0000]], grad_fn=<SliceBackward>)

In [None]:
# 应用：参数绑定，在层之间分享权重
shared = nn.Linear(8,8)
net = nn.Sequential(nn.Linear(4,8),nn.ReLU(),shared,nn.ReLU(),shared,
                    nn.ReLU(),nn.Linear(8,1))
net(X)
print(net[2].weight.data[0]==net[4].weight.data[0])
net[2].weight.data[0,0]=100
print(net[2].weight.data[0]==net[4].weight.data[0])

# 自定义层

In [18]:
import torch
import torch.nn.functional as F
from torch import nn
class CenteredLayer(nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self, X):
        return X-X.mean()
layer = CenteredLayer()
layer(torch.FloatTensor([1,2,3,4,5]))

tensor([-2., -1.,  0.,  1.,  2.])

In [19]:
net = nn.Sequential(nn.Linear(8,128),CenteredLayer())
Y = net(torch.rand(4,8))
Y.mean()

tensor(4.1910e-09, grad_fn=<MeanBackward0>)

In [30]:
# 带参数的自定义层
class MyLinear(nn.Module):
    def __init__(self,in_units,units):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_units,units)) #自定义参数-->parameters类
        self.bias = nn.Parameter(torch.randn(units,))
    def forward(self, X):
        linear = torch.matmul(X,self.weight.data) + self.bias.data
        return F.relu(linear)
dense = MyLinear(5, 3)
dense.weight

Parameter containing:
tensor([[ 0.2374, -1.1353, -0.9672],
        [-2.1625, -0.3622,  0.2769],
        [ 0.4523, -0.7682, -0.7660],
        [ 0.3113,  0.2946,  1.0087],
        [ 1.7695,  1.0901,  1.2863]], requires_grad=True)

# 读写文件

In [22]:
import torch
from torch import nn
from torch.nn import functional as F

x = torch.arange(4)
torch.save(x,'x-file')

x2 = torch.load('x-file')
x2

tensor([0, 1, 2, 3])

In [23]:
# 存储一个张良列表
y = torch.zeros(4)
torch.save([x,y],'x-files')
x2,y2 = torch.load('x-files')

# 字典读取
mydict = {'x':x,'y':y}
torch.save(mydict,'mydict')
mydict2 = torch.load('mydict')
mydict2

{'x': tensor([0, 1, 2, 3]), 'y': tensor([0., 0., 0., 0.])}

In [34]:
# 加载和保存模型参数
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20,256)
        self.output = nn.Linear(256,10)
    def forward(self, X):
        return self.output(F.relu(self.hidden(X)))
net = MLP()
X = torch.randn(size=(2,20))
Y = net(X)
Y
torch.save(net.state_dict(),'mlp.params')

In [33]:
clone = MLP()
clone.load_state_dict(torch.load('mlp.params'))
clone.eval()

Y_clone = clone(X)
Y_clone == Y

tensor([[True, True, True, True, True, True, True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True]])