# 神经网络基础

## 自定义层和块（网络结构）
在初始化函数中定义每一层神经元的数量等，在forward函数中定义神经网络输出的内容

In [1]:
import torch
from torch import nn
from torch.nn import functional as F

X = torch.rand(2,20)

### 重写MLP的网络

In [2]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20,256)
        self.out = nn.Linear(256,10)

    # 定义前向传播，即根据输入返回所需的输出
    def forward(self,X):
        return self.out(F.relu(self.hidden(X)))
net = MLP()
net(X)

tensor([[ 0.1453, -0.1370,  0.1046, -0.2033,  0.1468, -0.1944,  0.1545, -0.1770,
          0.0692,  0.1076],
        [ 0.1695, -0.1748, -0.1046, -0.1091,  0.2038, -0.2719,  0.1860,  0.0338,
          0.0705,  0.1264]], grad_fn=<AddmmBackward0>)

### 重写Sequential类

In [3]:
class MySequential(nn.Module):
    def __init__(self,*args):
        super().__init__()
        for block in args:
            self._modules[block]=block

    def forward(self,X):
        for block in self._modules.values():
            X = block(X)
        return X
net = MySequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))
net(X)

tensor([[ 0.1137,  0.1248,  0.3415,  0.0520,  0.3365, -0.3136, -0.1970, -0.0303,
          0.1339,  0.1484],
        [ 0.0056, -0.0905,  0.2684,  0.3070,  0.0762, -0.1726, -0.0422,  0.1170,
          0.1037,  0.1673]], grad_fn=<AddmmBackward0>)

### 前向传播的过程中执行代码
有时我们希望既不是上一层的结果，但不更新参数

In [4]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weight = torch.rand(size = (20,20),requires_grad = False)
        self.linear = nn.Linear(20,20)
    def forward(self,X):
        X = self.linear(X)
        X = F.relu(torch.matmul(X,self.rand_weight))
        X = self.linear(X)
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()
net = FixedHiddenMLP()
net(X)

tensor(0.0405, grad_fn=<SumBackward0>)

### 拼接多个组合块

In [5]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20,64),nn.ReLU(),
                                 nn.Linear(64,32),nn.ReLU())
        self.linear = nn.Linear(32,16)
    def forward(self,X):
        return self.linear(self.net(X))

net = nn.Sequential(NestMLP(),nn.Linear(16,20),FixedHiddenMLP())
net(X)


tensor(-0.1530, grad_fn=<SumBackward0>)

## 参数管理（访问训练后的参数）

In [6]:
import torch
from torch import nn

net = nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,1))
X = torch.rand(2,4)
net(X)

tensor([[0.3763],
        [0.1984]], grad_fn=<AddmmBackward0>)

In [7]:
print(net[0].state_dict()) # nn.Linear(4,8)
print(net[1].state_dict()) # ReLU
print(net[2].state_dict()) # nn.Linear(8,1)

print(type(net[2].bias)) # nn.parameter
print(net[2].bias)
print(net[2].bias.data)

OrderedDict([('weight', tensor([[ 0.3462,  0.1429,  0.0163,  0.2945],
        [ 0.2426,  0.3157,  0.2454, -0.0122],
        [-0.0865,  0.0528, -0.4299, -0.2637],
        [ 0.4305,  0.4042, -0.0057,  0.1168],
        [ 0.0675, -0.1138, -0.4166,  0.1081],
        [ 0.3368,  0.2728, -0.3628,  0.1518],
        [-0.3245,  0.3184, -0.1615, -0.3213],
        [-0.2776,  0.0995, -0.1669, -0.2560]])), ('bias', tensor([ 0.0641, -0.4824,  0.3883, -0.2839,  0.2964,  0.2484,  0.2923,  0.2991]))])
OrderedDict()
OrderedDict([('weight', tensor([[-0.2101, -0.2803,  0.2713, -0.1288,  0.0774,  0.2232,  0.2494, -0.0565]])), ('bias', tensor([0.2680]))])
<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([0.2680], requires_grad=True)
tensor([0.2680])


In [8]:
print(*[(name,param.shape) for name,param in net.named_parameters()])

('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


In [9]:
print(net.state_dict()['2.weight'])
print(net.state_dict()['2.bias'])

tensor([[-0.2101, -0.2803,  0.2713, -0.1288,  0.0774,  0.2232,  0.2494, -0.0565]])
tensor([0.2680])


In [20]:
def block1():
    return nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,4))

def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(f'block{i}',block1()) # 使用add_module添加块可以多传入一个字符串
    return net

net = nn.Sequential(block2(),nn.Linear(4,1))
net(X)
print(net)

Sequential(
  (0): Sequential(
    (block0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
    )
    (block1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
    )
    (block2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
    )
    (block3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)
OrderedDict([('0.block0.0.weight', tensor([[-0.3903,  0.4064, -0.1030, -0.4715],
        [-0.3094,  0.0719,  0.4342,  0.3138],
        [ 0.2928, -0.4540, -0.2056,  0.2434],
        [ 0.1601, -0.3681, -0.4981, -0.4812],
        

### 内置的参数初始化

In [23]:
net = nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,1))
X = torch.rand(2,4)
net(X)

def init_normal(m):
    if type(m)==nn.Linear:
        nn.init.normal_(m.weight,0,0.01)
        nn.init.zeros_(m.bias)
net.apply(init_normal)
print(net[0].weight.data)
print(net[0].bias.data)

tensor([[-0.0048,  0.0107,  0.0003, -0.0148],
        [-0.0067, -0.0160,  0.0061, -0.0129],
        [ 0.0228,  0.0020,  0.0053,  0.0201],
        [-0.0188,  0.0016, -0.0184,  0.0054],
        [ 0.0060, -0.0003,  0.0122,  0.0121],
        [ 0.0085,  0.0021, -0.0073, -0.0088],
        [ 0.0161, -0.0089, -0.0069, -0.0026],
        [-0.0066,  0.0007, -0.0111,  0.0016]])
tensor([0., 0., 0., 0., 0., 0., 0., 0.])


In [25]:
def init_constant(m):
    if type(m)==nn.Linear:
        nn.init.constant_(m.weight,1)
        nn.init.zeros_(m.bias)
net.apply(init_constant)
print(net[0].weight.data)
print(net[0].bias.data)

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])
tensor([0., 0., 0., 0., 0., 0., 0., 0.])


In [28]:
def xavier(m):
    if type(m)==nn.Linear:
        nn.init.xavier_uniform_(m.weight)

net[0].apply(xavier)
net[2].apply(init_constant)
print(net[0].weight.data)
print(net[2].weight.data)

tensor([[ 0.2787,  0.2469,  0.5250, -0.5568],
        [-0.3217,  0.2586, -0.0198, -0.1410],
        [ 0.0334, -0.4799, -0.6248, -0.1607],
        [ 0.2892, -0.3281, -0.4636,  0.5669],
        [ 0.0501,  0.7030, -0.1168,  0.5477],
        [-0.6724, -0.6529,  0.2516, -0.2298],
        [ 0.6283, -0.1949,  0.5596,  0.2460],
        [ 0.0287,  0.6724,  0.4476, -0.4665]])
tensor([[1., 1., 1., 1., 1., 1., 1., 1.]])


### 参数绑定
参数相同的层

In [31]:
shared = nn.Linear(8,8)
net = nn.Sequential(nn.Linear(4,8),nn.ReLU(),shared,nn.ReLU(),shared,nn.ReLU(),nn.Linear(8,1))
print(net[0].weight.data)
print(net[2].weight.data)
print(net[4].weight.data)

tensor([[-0.2724,  0.1877, -0.1764,  0.1656],
        [-0.2817,  0.0061, -0.2304, -0.1595],
        [ 0.0167,  0.2272, -0.1183, -0.0085],
        [-0.2418, -0.3935,  0.4363,  0.4493],
        [-0.2859,  0.2566,  0.1678,  0.1539],
        [-0.0493,  0.4679,  0.0570,  0.3276],
        [-0.2413,  0.1631, -0.3888,  0.1512],
        [ 0.0638, -0.3136,  0.3000, -0.3908]])
tensor([[ 2.4051e-01,  1.2590e-01, -2.7689e-01, -1.2919e-01,  2.8693e-02,
          6.4387e-02,  3.4751e-01,  8.9035e-03],
        [-2.6194e-01, -3.2073e-01, -1.1763e-01, -2.0224e-02,  3.3490e-01,
          3.0266e-01, -9.9295e-02, -1.2610e-02],
        [-2.0657e-01,  1.7945e-01,  2.7102e-01,  1.9759e-04, -3.9683e-03,
         -3.0819e-01,  2.7293e-01, -1.1431e-01],
        [-1.4660e-02, -4.4140e-02, -2.8468e-01,  2.7438e-01,  5.1808e-02,
         -2.0387e-01, -1.4990e-02, -2.5242e-01],
        [-2.7888e-01,  2.8187e-01, -1.4106e-01,  4.0534e-02,  7.0759e-02,
         -9.5099e-02,  2.0322e-02, -3.0415e-01],
        [-1.0752