## 一、层和块的个性化操作
### 1、自定义层和块

In [2]:
%matplotlib inline
import numpy as np
import pandas as pd
import torch
from torch import nn
from d2l import torch as d2l
from torch.nn import functional as F
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        # 不计算梯度的随机权重参数。因此其在训练期间保持不变
        self.rand_weight = torch.rand((20, 20), requires_grad=False)
        self.linear = nn.Linear(20, 20)

    def forward(self, X):
        X = self.linear(X)
        # 使用创建的常量参数以及relu和mm函数
        X = F.relu(torch.mm(X, self.rand_weight) + 1)
        # 复用全连接层。这相当于两个全连接层共享参数
        X = self.linear(X)
        # 控制流
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()
#这样可以不局限于Sequential，可以对网络做更好的调整

### 2、嵌套多个相同的层

In [None]:
def block1():
    return nn.Sequential(nn.Linear(4, 8), nn.ReLU(),
                         nn.Linear(8, 4), nn.ReLU())

def block2():
    net = nn.Sequential()
    for i in range(4):
        # 在这里嵌套
        net.add_module(f'block {i}', block1())
    return net

rgnet = nn.Sequential(block2(), nn.Linear(4, 1))
rgnet(X)

## 二、参数初始化技巧
### 1、默认情况下，PyTorch会根据一个范围均匀地初始化权重和偏置矩阵， 这个范围是根据输入和输出维度计算出的。 PyTorch的nn.init模块提供了多种预置初始化方法。

In [None]:
#下面的代码将所有权重参数初始化为标准差为0.01的高斯随机变量， 且将偏置参数设置为0。
net=nn.Sequential()
def init_normal(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, mean=0, std=0.01)
        nn.init.zeros_(m.bias)
net.apply(init_normal)
net[0].weight.data[0], net[0].bias.data[0]

#将所有参数初始化为给定的常数，比如初始化为1。
def init_constant(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight, 1)
        nn.init.zeros_(m.bias)
net.apply(init_constant)
net[0].weight.data[0], net[0].bias.data[0]

#我们使用Xavier初始化方法初始化第一个神经网络层， 然后将第三个神经网络层初始化为常量值42。
def init_xavier(m):
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)
def init_42(m):
    if type(m) == nn.Linear:
        nn.init.constant_(m.weight, 42)
net[0].apply(init_xavier)
net[2].apply(init_42)
print(net[0].weight.data[0])
print(net[2].weight.data)

### 2、共享参数

In [None]:
shared = nn.Linear(8, 8)
net = nn.Sequential(nn.Linear(4, 8), nn.ReLU(),shared, nn.ReLU(),shared, nn.ReLU(),nn.Linear(8, 1))

## 三、自定义层
### 1、不带参数的层

In [None]:
class CenteredLayer(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, X):
        return X - X.mean()
layer = CenteredLayer()
net=nn.Sequential(nn.Linear(8,4),CenteredLayer())

### 2、带参数的层

In [4]:
class MyLinear(nn.Module):
    def __init__(self, in_units, units):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(in_units, units))  #-1~1之间的正态分布
        self.bias = nn.Parameter(torch.randn(units,))
    def forward(self, X):
        linear = torch.matmul(X, self.weight.data) + self.bias.data
        return F.relu(linear)
layer=MyLinear(4,8)
layer.weight

Parameter containing:
tensor([[-0.4999, -0.1561, -1.1656,  0.6547,  0.3784,  2.4940,  0.0639, -1.9158],
        [-0.7281, -0.4229, -1.7671,  1.2451, -2.3020,  1.2156, -0.6230,  0.0265],
        [ 0.1714, -0.0729, -0.1006, -1.0416, -0.7219, -0.9801,  0.0430, -0.6228],
        [ 0.5458,  0.7118, -0.6498, -0.4517,  0.1681,  0.8291,  0.3054,  1.0181]],
       requires_grad=True)

## 四、读写文件
### 1、加载和保存张量

In [None]:
x = torch.arange(4)
torch.save(x, 'x-file')
x2 = torch.load('x-file') #存单个张量

y=torch.zeros(4)
torch.save([x,y],'x-file') #存张量列表

mydict = {'x': x, 'y': y}
torch.save(mydict, 'mydict')
mydict2 = torch.load('mydict') #存读张量字典

### 2、加载和保存模型参数

In [None]:
"""写一个MLP搞出参数"""
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden=nn.Linear(40,20)
        self.output=nn.Linear(20,10)
    def forward(self,x):
        return self.output(F.relu(self.hidden(x)))
x=torch.randn(size=(1,40),device='cuda')
net=nn.Sequential(MLP())
net.to(device='cuda:0')
net(x)
net[0].weight

In [None]:
torch.save(net.state_dict(),"MLP.param")
clone=MLP()  #注意这里要重新搞一个网络的副本，好直接用下面这个函数将参数填充进去
clone.load_state_dict(torch.load("MLP.param"))
clone.eval()