# 5.1 层和块


In [None]:
import torch
from torch import nn
from torch.nn import functional as F

net = nn.Sequential(nn.Linear(in_features=20, out_features=256),
                    nn.ReLU(),
                    nn.Linear(in_features=256, out_features=10))

X = torch.rand(size=(2, 20))
net(X)

In [None]:
import torch
from torch import nn
from torch.nn import functional as F

# 自定义块
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.hidden = nn.Linear(in_features=20, out_features=256)
        self.out = nn.Linear(in_features=256, out_features=10)

    def forward(self, X):
        return self.out(F.relu(self.hidden(X)))

X = torch.rand(size=(2, 20))
net = MLP()
net(X)

In [None]:
# 顺序块
class MySequential(nn.Module):
    def __init__(self, *args):
        super(MySequential, self).__init__()
        for idx, module in enumerate(args):
            # 这里，module是Module子类的一个实例。我们把它保存在'Module'类的成员
            # 变量_modules中。module的类型是OrderedDict
            self._modules[str(idx)] = module

    def forward(self, X):
        for block in self._modules.values():  # D.values -> an object providing a view on D's value
            X = block(X)
        return X

net = MySequential(nn.Linear(in_features=20, out_features=256),
                   nn.ReLU(),
                   nn.Linear(in_features=256, out_features=10))
X = torch.rand(size=(2, 20))
net(X)

In [None]:
import torch
from torch import nn
from torch.nn import functional as F

# 前向传播中执行代码
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super(FixedHiddenMLP, self).__init__()
        self.rand_weight = torch.rand(size=(20, 20), requires_grad=False)
        self.linear = nn.Linear(in_features=20, out_features=20)

    def forward(self, X: torch.Tensor):
        X = self.linear(X)
        X = F.relu(torch.mm(X, self.rand_weight) + 1)
        X = self.linear(X)
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()

X = torch.rand(size=(2, 20))
net = FixedHiddenMLP()
net(X)

In [None]:
# quiz

class MySequentialQuiz(nn.Module):
    def __init__(self, *args):
        super(MySequentialQuiz, self).__init__()
        for idx, module in enumerate(args):
            # 这里，module是Module子类的一个实例。我们把它保存在'Module'类的成员
            # 变量_modules中。module的类型是OrderedDict
            self._modules[str(idx)] = module

    def forward(self, X):
        out = []
        for block in self._modules.values():  # D.values -> an object providing a view on D's value
            X = block(X)
            out.append(X)
        return out

net = MySequentialQuiz(nn.Linear(in_features=20, out_features=256),
                   nn.ReLU(),
                   nn.Linear(in_features=256, out_features=10))
X = torch.rand(size=(2, 20))
net(X)

In [None]:
# 5.2 参数管理

import torch
from torch import nn

net = nn.Sequential(nn.Linear(in_features=4, out_features=8),
                    nn.ReLU(),
                    nn.Linear(in_features=8, out_features=1))
X = torch.rand(size=(2, 4))
net(X)

In [None]:

print(net[2].state_dict())

In [None]:
# 目标参数
print(net[2].weight)
print(net[2].bias)
print(net[2].bias.data)
print(net[2].bias.data.cpu().numpy())
print(type(net[2].bias.data.cpu().numpy()))
print(net[2].bias.data.cpu().numpy().tolist())
print(type(net[2].bias.data.cpu().numpy().tolist()))
print(net[2].weight.grad)

In [None]:
# 一次性访问所有参数
# print([module for module in net.named_modules() ])
# modules = [module for module in net.named_modules() ]
# print(isinstance(modules[0], nn.Module))
print(*[(name, param) for name, param in net[0].named_parameters()])
print(*[(name, param) for name, param in net.named_parameters()])

In [None]:
# 从嵌套层中收集参数

def block1():
    return nn.Sequential(nn.Linear(in_features=4, out_features=8), nn.ReLU(),
                        nn.Linear(in_features=8, out_features=4), nn.ReLU())

def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(name=f'block-{i}', module=block1())
    return net

rgnet = nn.Sequential(block2(), nn.Linear(in_features=4, out_features=1))
rgnet(X)

In [None]:
print(rgnet)

In [None]:
print(rgnet[0][1][0].bias.data)



In [None]:
# 内置初始化
def init_normal(m: nn.Module):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, mean=0, std=0.01)
        nn.init.zeros_(m.bias)

net.apply(init_normal)
print(net[0].weight.data)
print(net[0].bias.data[0])

In [None]:
# 内置初始化-常量初始化
def xavier(m):
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)

def init_42(m):
    if type(m) == nn.Linear:
        nn.init.constant_(tensor=m.weight, val=42)

net[0].apply(xavier)
net[2].apply(init_42)
print(net[0].weight.data)
print(net[2].weight.data)

In [None]:
# 自定义初始化
def init_my(m: nn.Module):
    if type(m) == nn.Linear:
        print('Init', *[(name, param.shape) for name, param in m.named_parameters()][0])
        nn.init.uniform_(tensor=m.weight, a=-10, b=10)
        m.weight.data *= m.weight.data.abs() >= 5  # m.weight.data.abs()的值大于5，则该值乘以1，否则乘以0

net.apply(init_my)
print(net[0].weight[:2])

In [45]:
# 直接设置参数
net[0].weight.data[:] += 1
net[0].weight.data[0, 0] =42
print(net[0].weight.data[0])



tensor([42.0000, -5.1847,  8.0597,  6.4100])


In [46]:
# 参数绑定
shared = nn.Linear(in_features=8, out_features=8)
net = nn.Sequential(nn.Linear(in_features=4, out_features=8),
                    nn.ReLU(),
                    shared,
                    nn.ReLU(),
                    shared,
                    nn.Linear(in_features=8, out_features=1))
net(X)
print(net[2].weight.data[0] == net[4].weight.data[0])
net[2].weight.data[0, 0] = 100
print(net[2].weight.data[0] == net[4].weight.data[0])
# tensor([True, True, True, True, True, True, True, True])
# tensor([True, True, True, True, True, True, True, True])
# net[2], net[4]的参数是绑定在一起的，就是用的是相同的参数，是内存中相同的地址块，改变其中一个，其他的也会相应的修改

tensor([True, True, True, True, True, True, True, True])
tensor([True, True, True, True, True, True, True, True])


In [47]:
# 不带参数
class CenteredLayer(nn.Module):
    def __init__(self):
        super(CenteredLayer, self).__init__()
    def forward(self, X):
        return X - X.mean(axis=0)


# 带参数

class MyLinear(nn.Module):
    def __init__(self, in_units, out_units):
        super(MyLinear, self).__init__()
        self.weight = nn.Parameter(data=torch.randn(size=(in_units, out_units)))
        self.bias = nn.Parameter(data=torch.randn(size=(out_units, )))
    def forward(self, X):
        linear = torch.matmul(X, self.weight.data) + self.bias.data
        return F.relu(linear)




In [50]:
# 读写文件
import torch
from torch import nn
from torch.nn import functional as F

x = torch.arange(4)
torch.save(x, 'x-file')
x2 = torch.load('x-file')
print(x2)

y = torch.zeros(4)
torch.save([x, y], 'x-files')
x2, y2 = torch.load('x-files')
print((x2, y2))

mydict = {'x': x, 'y': y}
torch.save(mydict, 'mydict')
mydict2 = torch.load('mydict')
print(mydict2)

class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.hidden = nn.Linear(in_features=20, out_features=256)
        self.out = nn.Linear(in_features=256, out_features=10)

    def forward(self, X):
        return self.out(F.relu(self.hidden(X)))

net = MLP()
X = torch.randn(size=(2, 20))
y = net(X)
torch.save(net.state_dict(), 'mlp.params')
clone = MLP()
clone.load_state_dict(torch.load('mlp.params'))
clone.eval()  # eval()相比train(),eval()的执行过程不会就行误差的方向传播，即不更新参数

tensor([0, 1, 2, 3])
(tensor([0, 1, 2, 3]), tensor([0., 0., 0., 0.]))
{'x': tensor([0, 1, 2, 3]), 'y': tensor([0., 0., 0., 0.])}


MLP(
  (hidden): Linear(in_features=20, out_features=256, bias=True)
  (out): Linear(in_features=256, out_features=10, bias=True)
)

In [51]:
torch.cuda.device_count()


1

In [52]:
import d2l
from d2l import torch as d2l
d2l.DATA_HUB['time_machine'] = (d2l.DATA_URL + 'timemachine.txt',
                                '090b5e7e70c295757f55df93cb0a180b9691891a')
d2l.download('time_machine')


Downloading ..\data\timemachine.txt from http://d2l-data.s3-accelerate.amazonaws.com/timemachine.txt...


'..\\data\\timemachine.txt'