# 神经网络基础

## 自定义层和块（网络结构）
在初始化函数中定义每一层神经元的数量等，在forward函数中定义神经网络输出的内容

In [1]:
import torch
from torch import nn
from torch.nn import functional as F

X = torch.rand(2,20)

### 重写MLP的网络

In [2]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20,256)
        self.out = nn.Linear(256,10)

    # 定义前向传播，即根据输入返回所需的输出
    def forward(self,X):
        return self.out(F.relu(self.hidden(X)))
net = MLP()
net(X)

tensor([[ 0.0468,  0.1350,  0.0570,  0.2616,  0.1164, -0.0231,  0.0667,  0.2544,
          0.0185, -0.1810],
        [ 0.0335,  0.0357,  0.0334,  0.1683,  0.0340,  0.0354,  0.0584,  0.1771,
          0.0813, -0.1401]], grad_fn=<AddmmBackward0>)

### 重写Sequential类

In [3]:
class MySequential(nn.Module):
    def __init__(self,*args):
        super().__init__()
        for block in args:
            self._modules[block]=block

    def forward(self,X):
        for block in self._modules.values():
            X = block(X)
        return X
net = MySequential(nn.Linear(20,256),nn.ReLU(),nn.Linear(256,10))
net(X)

tensor([[ 0.2362, -0.0195,  0.1334,  0.0960, -0.0244,  0.0111, -0.1068, -0.0065,
         -0.0518, -0.1880],
        [ 0.1135,  0.0211,  0.1115,  0.1864, -0.1322,  0.0163, -0.0921, -0.0505,
         -0.0308, -0.1032]], grad_fn=<AddmmBackward0>)

### 前向传播的过程中执行代码
有时我们希望既不是上一层的结果，但不更新参数

In [4]:
class FixedHiddenMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.rand_weight = torch.rand(size = (20,20),requires_grad = False)
        self.linear = nn.Linear(20,20)
    def forward(self,X):
        X = self.linear(X)
        X = F.relu(torch.matmul(X,self.rand_weight))
        X = self.linear(X)
        while X.abs().sum() > 1:
            X /= 2
        return X.sum()
net = FixedHiddenMLP()
net(X)

tensor(0.3212, grad_fn=<SumBackward0>)

### 拼接多个组合块

In [5]:
class NestMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Linear(20,64),nn.ReLU(),
                                 nn.Linear(64,32),nn.ReLU())
        self.linear = nn.Linear(32,16)
    def forward(self,X):
        return self.linear(self.net(X))

net = nn.Sequential(NestMLP(),nn.Linear(16,20),FixedHiddenMLP())
net(X)


tensor(0.1704, grad_fn=<SumBackward0>)

## 参数管理（访问训练后的参数）

In [6]:
import torch
from torch import nn

net = nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,1))
X = torch.rand(2,4)
net(X)

tensor([[-0.0852],
        [-0.0385]], grad_fn=<AddmmBackward0>)

In [7]:
print(net[0].state_dict()) # nn.Linear(4,8)
print(net[1].state_dict()) # ReLU
print(net[2].state_dict()) # nn.Linear(8,1)

print(type(net[2].bias)) # nn.parameter
print(net[2].bias)
print(net[2].bias.data)

OrderedDict([('weight', tensor([[ 0.4666,  0.4631,  0.0953, -0.0285],
        [-0.0588,  0.4606, -0.3934,  0.4122],
        [-0.3251,  0.2309, -0.3357,  0.4679],
        [ 0.1092, -0.1846, -0.0482,  0.2321],
        [ 0.1629, -0.1277,  0.4303, -0.0567],
        [ 0.0293, -0.1303, -0.0661,  0.1289],
        [-0.3986,  0.0480, -0.1049,  0.1756],
        [ 0.1980, -0.3045,  0.4725,  0.3052]])), ('bias', tensor([ 0.1411,  0.0306, -0.0870, -0.0161, -0.4630, -0.3911, -0.2716,  0.3621]))])
OrderedDict()
OrderedDict([('weight', tensor([[ 0.2233,  0.0248, -0.1935, -0.1530,  0.1348, -0.0230, -0.0702,  0.0529]])), ('bias', tensor([-0.1751]))])
<class 'torch.nn.parameter.Parameter'>
Parameter containing:
tensor([-0.1751], requires_grad=True)
tensor([-0.1751])


In [8]:
print(*[(name,param.shape) for name,param in net.named_parameters()])

('0.weight', torch.Size([8, 4])) ('0.bias', torch.Size([8])) ('2.weight', torch.Size([1, 8])) ('2.bias', torch.Size([1]))


In [9]:
print(net.state_dict()['2.weight'])
print(net.state_dict()['2.bias'])

tensor([[ 0.2233,  0.0248, -0.1935, -0.1530,  0.1348, -0.0230, -0.0702,  0.0529]])
tensor([-0.1751])


In [10]:
def block1():
    return nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,4))

def block2():
    net = nn.Sequential()
    for i in range(4):
        net.add_module(f'block{i}',block1()) # 使用add_module添加块可以多传入一个字符串
    return net

net = nn.Sequential(block2(),nn.Linear(4,1))
net(X)
print(net)

Sequential(
  (0): Sequential(
    (block0): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
    )
    (block1): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
    )
    (block2): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
    )
    (block3): Sequential(
      (0): Linear(in_features=4, out_features=8, bias=True)
      (1): ReLU()
      (2): Linear(in_features=8, out_features=4, bias=True)
    )
  )
  (1): Linear(in_features=4, out_features=1, bias=True)
)


### 内置的参数初始化

In [11]:
net = nn.Sequential(nn.Linear(4,8),nn.ReLU(),nn.Linear(8,1))
X = torch.rand(2,4)
net(X)

def init_normal(m):
    if type(m)==nn.Linear:
        nn.init.normal_(m.weight,0,0.01)
        nn.init.zeros_(m.bias)
net.apply(init_normal)
print(net[0].weight.data)
print(net[0].bias.data)

tensor([[-0.0087,  0.0008,  0.0091,  0.0051],
        [ 0.0019, -0.0139, -0.0066, -0.0035],
        [ 0.0011,  0.0104, -0.0037,  0.0029],
        [-0.0032, -0.0047,  0.0210, -0.0259],
        [ 0.0061,  0.0096, -0.0032, -0.0061],
        [-0.0037, -0.0213,  0.0070, -0.0056],
        [ 0.0016, -0.0175, -0.0088,  0.0037],
        [ 0.0140,  0.0014, -0.0042, -0.0096]])
tensor([0., 0., 0., 0., 0., 0., 0., 0.])


In [12]:
def init_constant(m):
    if type(m)==nn.Linear:
        nn.init.constant_(m.weight,1)
        nn.init.zeros_(m.bias)
net.apply(init_constant)
print(net[0].weight.data)
print(net[0].bias.data)

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])
tensor([0., 0., 0., 0., 0., 0., 0., 0.])


In [13]:
def xavier(m):
    if type(m)==nn.Linear:
        nn.init.xavier_uniform_(m.weight)

net[0].apply(xavier)
net[2].apply(init_constant)
print(net[0].weight.data)
print(net[2].weight.data)

tensor([[ 0.4882,  0.1259, -0.5352, -0.1371],
        [-0.3418, -0.2946, -0.2709,  0.6576],
        [-0.3005,  0.4317,  0.1917,  0.3482],
        [ 0.2430, -0.1666, -0.0173,  0.5004],
        [-0.4137, -0.5417,  0.6387,  0.1514],
        [-0.6126,  0.5526, -0.3399, -0.4946],
        [-0.6226,  0.0560, -0.5855, -0.6663],
        [-0.3859, -0.5384,  0.2074,  0.2177]])
tensor([[1., 1., 1., 1., 1., 1., 1., 1.]])


### 参数绑定
参数相同的层

In [14]:
shared = nn.Linear(8,8)
net = nn.Sequential(nn.Linear(4,8),nn.ReLU(),shared,nn.ReLU(),shared,nn.ReLU(),nn.Linear(8,1))
print(net[0].weight.data)
print(net[2].weight.data)
print(net[4].weight.data)

tensor([[-1.1468e-01, -1.6295e-01,  4.9518e-01,  2.2858e-01],
        [ 2.3199e-01, -4.9075e-01,  4.6173e-01, -4.5443e-01],
        [ 4.8810e-01, -3.2350e-01, -1.3084e-01, -1.6645e-01],
        [ 4.9488e-01, -9.7150e-03,  1.6348e-01,  1.8681e-01],
        [-2.2781e-01, -4.7428e-01, -3.2466e-01, -4.7266e-04],
        [-3.7827e-01,  1.2379e-01, -2.1483e-01,  4.1587e-01],
        [-4.6888e-01, -4.4578e-01,  3.5723e-01, -2.0310e-01],
        [-4.6932e-02,  4.5746e-01,  3.8862e-02, -1.1697e-01]])
tensor([[-0.1737,  0.2253, -0.0538, -0.0835,  0.2945,  0.0411,  0.1312,  0.0864],
        [-0.1007, -0.2565, -0.3530, -0.3224,  0.2491,  0.2178, -0.1826,  0.1203],
        [ 0.0421,  0.1853, -0.1971,  0.1485, -0.3108,  0.0636,  0.1847,  0.1569],
        [ 0.3392, -0.2471, -0.0890,  0.3084, -0.2615, -0.2535, -0.1763,  0.0169],
        [-0.2217,  0.0794,  0.0474, -0.1461,  0.2627,  0.2565, -0.1689, -0.0934],
        [-0.3140,  0.1710, -0.0690, -0.1361,  0.1246,  0.3181, -0.2736,  0.2824],
        [ 0

## 参数的存储

In [15]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden = nn.Linear(20,256)
        self.out = nn.Linear(256,10)
    def forward(self,X):
        return self.out(F.relu(self.hidden(X)))

net = MLP()
X = torch.rand(size=(2,20))
net(X)

tensor([[-0.2702, -0.0776,  0.1907, -0.2793, -0.1714, -0.0742, -0.2040, -0.1597,
          0.0545,  0.0096],
        [-0.3035, -0.0950,  0.1523, -0.2274, -0.2713, -0.1258, -0.0129, -0.1498,
         -0.0074,  0.2829]], grad_fn=<AddmmBackward0>)

In [16]:
torch.save(net.state_dict(),'MLP_params')

clone = MLP()
clone.load_state_dict(torch.load('MLP_params'))
clone.eval()

MLP(
  (hidden): Linear(in_features=20, out_features=256, bias=True)
  (out): Linear(in_features=256, out_features=10, bias=True)
)

In [17]:
clone1 = clone(X)
clone1

tensor([[-0.2702, -0.0776,  0.1907, -0.2793, -0.1714, -0.0742, -0.2040, -0.1597,
          0.0545,  0.0096],
        [-0.3035, -0.0950,  0.1523, -0.2274, -0.2713, -0.1258, -0.0129, -0.1498,
         -0.0074,  0.2829]], grad_fn=<AddmmBackward0>)

## GPU训练

In [18]:
import torch
import My_utils
from torch import nn
torch.cuda.device_count()

1

In [19]:
x = torch.tensor([1,2,3],device=My_utils.try_gpu())
x

tensor([1, 2, 3], device='cuda:0')

In [20]:
net = nn.Sequential(nn.Linear(4,1))
X = torch.rand(size=(2,4),device=My_utils.try_gpu())
net = net.to(device=My_utils.try_gpu())
net(X)

tensor([[ 0.0170],
        [-0.0352]], device='cuda:0', grad_fn=<AddmmBackward0>)