# 初始化模型参数
仍然使用 MLP 这个例子来详细解释如何初始化模型参数。

In [2]:
from mxnet.gluon import nn
from mxnet import nd

def get_net():
    net = nn.Sequential()
    with net.name_scope():
        net.add(
            nn.Dense(units=10, activation='relu')
        )
    return net

x = nd.random_uniform(shape=(3,5))


  from ._conv import register_converters as _register_converters


In [4]:
# 如果不进行初始化直接跑 forward，则系统会报错，因为没有进行初始化。
import sys
try:
    net = get_net()
    net(x)
except RuntimeError as err:
    sys.stderr.write(str(err))


Parameter sequential0_dense0_weight has not been initialized. Note that you should initialize parameters and create Trainer with Block.collect_params() instead of Block.params because the later does not include Parameters of nested child Blocks

In [6]:
# 应当像如下方式进行使用
net.initialize()
net(x)

  "Set force_reinit=True to re-initialize."%self.name)



[[0.         0.05347166 0.10563602 0.         0.         0.03620155
  0.10653425 0.04017445 0.         0.        ]
 [0.         0.04869549 0.06332826 0.         0.         0.06620608
  0.06004141 0.01575539 0.         0.        ]
 [0.         0.04273751 0.0169073  0.02372442 0.         0.07875059
  0.02189449 0.01091669 0.         0.        ]]
<NDArray 3x10 @cpu(0)>

## 访问模型参数
之前我们提到过可以通过 weight 和 bias 访问 Dense 的参数，他们是 Parameter 这个类：

In [9]:
w = net[0].weight
b = net[0].bias
print(w.data(),b.data())


[[ 0.01847461 -0.03004881 -0.02461551 -0.01465906 -0.05932271]
 [-0.0595007   0.0434817   0.04195441  0.05774786  0.00482907]
 [ 0.04922146  0.0243923  -0.06268584  0.04367422  0.03679534]
 [-0.06364554  0.03010933  0.05611894 -0.02152951  0.03825361]
 [-0.04667019  0.0062413   0.02105976 -0.00708959 -0.01553655]
 [ 0.01372761  0.0453613   0.06544485 -0.0313003   0.01013632]
 [ 0.03303149  0.0550149  -0.0692654   0.02017318  0.06154171]
 [-0.03021475  0.0246454  -0.00283234  0.02891331  0.04083297]
 [-0.04714675 -0.06862465  0.01053514 -0.02463352  0.05170998]
 [ 0.01649272  0.000875   -0.00304539 -0.06312138  0.00185619]]
<NDArray 10x5 @cpu(0)> 
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
<NDArray 10 @cpu(0)>


## 使用不同的初始函数来进行初始化
使用默认的初始化函数进行初始化权重会将所有的权重初始化在 [-0.07, 0.07] 之间均匀分布的随机数。我们也同样可以使用别的初始化方法。

In [11]:
# 使用均值为0，方差为 0.02 的正态分布
from mxnet import init

params = net.collect_params()
# print(params)
print(params['sequential0_dense0_bias'].data())
print(params.get('dense0_weight').data())

params.initialize(init=init.Normal(sigma=0.02), 
                  force_reinit=True)
print(net[0].weight.data(), net[0].bias.data())

# 使用全1 进行初始化
params.initialize(init=init.One(), 
                  force_reinit=True)
print(net[0].weight.data(), net[0].bias.data())
 


[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
<NDArray 10 @cpu(0)>

[[ 0.01847461 -0.03004881 -0.02461551 -0.01465906 -0.05932271]
 [-0.0595007   0.0434817   0.04195441  0.05774786  0.00482907]
 [ 0.04922146  0.0243923  -0.06268584  0.04367422  0.03679534]
 [-0.06364554  0.03010933  0.05611894 -0.02152951  0.03825361]
 [-0.04667019  0.0062413   0.02105976 -0.00708959 -0.01553655]
 [ 0.01372761  0.0453613   0.06544485 -0.0313003   0.01013632]
 [ 0.03303149  0.0550149  -0.0692654   0.02017318  0.06154171]
 [-0.03021475  0.0246454  -0.00283234  0.02891331  0.04083297]
 [-0.04714675 -0.06862465  0.01053514 -0.02463352  0.05170998]
 [ 0.01649272  0.000875   -0.00304539 -0.06312138  0.00185619]]
<NDArray 10x5 @cpu(0)>

[[ 0.0170481   0.01593742  0.00953097  0.01237212  0.01100659]
 [ 0.0149727  -0.04226414 -0.00451335  0.00651215 -0.01914961]
 [ 0.01555002 -0.01896955  0.01123714 -0.02291898 -0.00462218]
 [ 0.0240115   0.00868043 -0.00074824 -0.02312076 -0.00058032]
 [-0.00211779  0.00731891  0.0027211  

## 共享模型参数
有时候我们想在层之间共享同一份参数，我们可以通过 Block 的 params 输出参数来手动置顶参数，而不是让系统自动生成。

In [25]:
net = nn.Sequential()
with net.name_scope():
    net.add(nn.Dense(4, activation="relu"))
    net.add(nn.Dense(4, activation="relu"))
    net.add(nn.Dense(4, activation="relu", 
                     params=net[-1].params))
    net.add(nn.Dense(2))
    
net.initialize()
net(x)
print(net[1].weight.data())
print(net[2].weight.data())


[[ 0.02227607  0.06482667 -0.00704898 -0.05877154]
 [-0.01320309 -0.00964738  0.03057034  0.06518946]
 [ 0.04743372 -0.00585949  0.03721602 -0.00719421]
 [ 0.05950423  0.03667859 -0.0231606   0.06820413]]
<NDArray 4x4 @cpu(0)>

[[ 0.02227607  0.06482667 -0.00704898 -0.05877154]
 [-0.01320309 -0.00964738  0.03057034  0.06518946]
 [ 0.04743372 -0.00585949  0.03721602 -0.00719421]
 [ 0.05950423  0.03667859 -0.0231606   0.06820413]]
<NDArray 4x4 @cpu(0)>


## 自定义初始化方法
下面自定义一个初始化方法，通过重载 _init_weight 来实现不同的初始化方法。

In [12]:
class MyInit(init.Initializer):
    def __init__(self):
        super(MyInit, self).__init__()
        self.set_verbose = True
    def _init_weight(self, _, arr):
        # 初始化权重，使用out=arr后我们不需要指定形状
        print('init weight', arr.shape)
        nd.random_uniform(low=5,high=10, out=arr)
        
        
        
        
        