### :
    定义单隐藏层的多层感知机，使用默认方式初始化它的参数，并做一次前向计算

In [31]:
import torch
import numpy as np
import sys

In [32]:
net = torch.nn.Sequential(
    torch.nn.Linear(4,3),
    torch.nn.ReLU(),
    torch.nn.Linear(3,1)
)
net

Sequential(
  (0): Linear(in_features=4, out_features=3, bias=True)
  (1): ReLU()
  (2): Linear(in_features=3, out_features=1, bias=True)
)

In [33]:
x = torch.rand(2,4)
y = net(x).sum()

#### 访问模型参数:
    可以通过Module类的 parameters()或者 named_parameters() 方法来访问所有参数（以迭代器的形式返回）
        后者除了返回参数 Tensor 外,还返回其对应的参数名字

In [34]:
print(type(net.named_parameters()))
for name,param in net.named_parameters():
    print('name:{} \t param size:{}'.format(name,param.size()))
#     可见返回的名字自动加上了层数的索引作为前缀

<class 'generator'>
name:0.weight 	 param size:torch.Size([3, 4])
name:0.bias 	 param size:torch.Size([3])
name:2.weight 	 param size:torch.Size([1, 3])
name:2.bias 	 param size:torch.Size([1])


In [35]:
# 访问net中单层的参数
for name,param in net[0].named_parameters():
    print(name,param.size())

weight torch.Size([3, 4])
bias torch.Size([3])


##### 
    返回的 param 的类型为torch.nn.parameter.Parameter
        是Tensor的子类，与Tensor不同的是如果一个 Tensor 是 Parameter，那么它会自动被添加到模型的参数列表里

In [36]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        
        self.weight_1 = torch.nn.Parameter(torch.rand(2,2))
        self.weight_2 = torch.rand(2,2)
        
    def forward(self,x):
        pass

In [37]:
n = Net()
n

Net()

In [38]:
for name,param in n.named_parameters():
    print(name,param.size()) # weight_1 在参数列表中，而weight_2未被添加

weight_1 torch.Size([2, 2])


In [39]:
# Parameter是Tensor，即 拥有 Tensor 所有属性
weight = list(net[0].parameters())[0]
print(weight.data)
print(weight.grad) # 反向传播前梯度为None
y.backward()
print(weight.grad)

tensor([[ 0.2292,  0.4193, -0.4085,  0.2712],
        [-0.3930,  0.4300,  0.1579,  0.1826],
        [ 0.3080, -0.3230, -0.0028,  0.1972]])
None
tensor([[-0.4358, -0.6243, -0.5315, -0.2189],
        [ 0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.3779,  0.5413,  0.4608,  0.1898]])


#### 初始化模型参数
    虽然 nn.Module 的模块参数都采取了较为合理的初始化策略
        但通常需要使用其他方法来初始化权重，torch.nn.init 模块里提供了多种预设的初始化方法

In [40]:
for name,param in net.named_parameters():
    if 'weight' in name:
        torch.nn.init.normal_(param,mean=0,std=0.01)
        print(name,param.data)

0.weight tensor([[-3.7858e-03,  1.0640e-02,  1.5413e-03,  1.4263e-03],
        [ 1.7665e-03,  1.0520e-05,  1.4230e-02, -1.8256e-03],
        [ 2.3419e-02, -4.2842e-03,  6.1279e-03,  1.6328e-02]])
2.weight tensor([[-0.0008,  0.0018, -0.0026]])


In [41]:
for name,param in net.named_parameters():
    if 'bias' in name:
        print('初始化前:',param.data)
        torch.nn.init.constant_(param,val=0)
        print('初始化后:',param.data)

初始化前: tensor([-0.0562, -0.4443,  0.1757])
初始化后: tensor([0., 0., 0.])
初始化前: tensor([-0.3594])
初始化后: tensor([0.])


#### 自定义初始化方法

In [42]:
def init_weight_(tensor):
    # 就是一个inplace改变Tensor值的函数，而且这个过程是不记录梯度的
    with torch.no_grad():
        tensor.uniform_(0,5)

In [43]:
for name,param in net.named_parameters():
    if 'weight' in name:
        print(name,param.data)
        init_weight_(param)
        print(name,param.data)

0.weight tensor([[-3.7858e-03,  1.0640e-02,  1.5413e-03,  1.4263e-03],
        [ 1.7665e-03,  1.0520e-05,  1.4230e-02, -1.8256e-03],
        [ 2.3419e-02, -4.2842e-03,  6.1279e-03,  1.6328e-02]])
0.weight tensor([[2.0285, 4.9196, 0.6261, 1.5578],
        [3.2110, 1.9888, 2.0549, 3.2557],
        [3.6277, 3.0509, 4.8495, 4.8336]])
2.weight tensor([[-0.0008,  0.0018, -0.0026]])
2.weight tensor([[4.6753, 0.9803, 1.4547]])


In [44]:
# 还可以通过改变这些参数的data来改写模型参数值同时不会影响梯度
for name,param in net.named_parameters():
    if 'bias' in name:
        print(name,param.data)
        param.data += 1
        print(name,param.data)

0.bias tensor([0., 0., 0.])
0.bias tensor([1., 1., 1.])
2.bias tensor([0.])
2.bias tensor([1.])


#### 共享模型参数
    希望在多个层之间共享模型参数
    可以通过 Module 类的 forward 函数里多次调用同一个层
    此外，传入 Sequential 的模块是同一个 Module 实例的话参数也是共享的

In [45]:
linear = torch.nn.Linear(1,1,bias=True)
net = torch.nn.Sequential(linear,linear)
print(net)

Sequential(
  (0): Linear(in_features=1, out_features=1, bias=True)
  (1): Linear(in_features=1, out_features=1, bias=True)
)


In [46]:
for name,param in net.named_parameters():
    print(name,param.data)

0.weight tensor([[0.6267]])
0.bias tensor([0.4458])


True


In [49]:
print(net[0] == net[1])
net[0].weight == net[1].weight
# 这两个线性层其实一个对象

True


tensor([[True]])