In [2]:
import mxnet as mx
from mxnet import init, nd
from mxnet.gluon import nn

net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'))
net.add(nn.Dense(10))
net.initialize()     #使用默认初始化方式

X = nd.random.uniform(shape=(2, 20))
Y = net(X) # 前向计算

In [7]:
net[0].params, type(net[0].params) # 通过方括号[]来访问网络的任一层，索引0表⽰隐藏层为Sequential实例最先添加的层。

(dense8_ (
   Parameter dense8_weight (shape=(256, 20), dtype=float32)
   Parameter dense8_bias (shape=(256,), dtype=float32)
 ), mxnet.gluon.parameter.ParameterDict)

In [9]:
net[0].params['dense8_weight'], net[0].weight # 使用名字或者变量明来访问

(Parameter dense8_weight (shape=(256, 20), dtype=float32),
 Parameter dense8_weight (shape=(256, 20), dtype=float32))

In [10]:
net[0].weight.data() # 分别通过data函数访问参数


[[ 0.06700657 -0.00369488  0.0418822  ... -0.05517294 -0.01194733
  -0.00369594]
 [-0.03296221 -0.04391347  0.03839272 ...  0.05636378  0.02545484
  -0.007007  ]
 [-0.0196689   0.01582889 -0.00881553 ...  0.01509629 -0.01908049
  -0.02449339]
 ...
 [ 0.00010955  0.0439323  -0.04911506 ...  0.06975312  0.0449558
  -0.03283203]
 [ 0.04106557  0.05671307 -0.00066976 ...  0.06387014 -0.01292654
   0.00974177]
 [ 0.00297424 -0.0281784  -0.06881659 ... -0.04047417  0.00457048
   0.05696651]]
<NDArray 256x20 @cpu(0)>

In [11]:
#grad函数来访问其梯度
# 还没有进行反向传播计算，所以梯度的值全为0
net[0].weight.grad()


[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
<NDArray 256x20 @cpu(0)>

In [12]:
#访问输出层的偏差值
net[1].bias.data()


[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
<NDArray 10 @cpu(0)>

In [13]:
# 获取net变量所有嵌套（例如通过add函数嵌套）的层所包含的所有参数。同样是参数名到参数实例的字典
net.collect_params()

sequential4_ (
  Parameter dense8_weight (shape=(256, 20), dtype=float32)
  Parameter dense8_bias (shape=(256,), dtype=float32)
  Parameter dense9_weight (shape=(10, 256), dtype=float32)
  Parameter dense9_bias (shape=(10,), dtype=float32)
)

In [14]:
#利用正则表达式来匹配参数名，进行筛选
net.collect_params('.*weight')

sequential4_ (
  Parameter dense8_weight (shape=(256, 20), dtype=float32)
  Parameter dense9_weight (shape=(10, 256), dtype=float32)
)

In [3]:
# 非首次对模型初始化需要指定force_reinit为真
net.initialize(init=init.Normal(sigma=0.01), force_reinit=True)
net[0].weight.data()[0]


[ 0.00841876 -0.01005536  0.03132214 -0.00435899 -0.00492951  0.01437187
 -0.00318141 -0.00162825  0.0068361   0.01610669 -0.01264223  0.00138954
  0.00667116  0.00214447  0.00993847  0.00108742  0.00535343 -0.0186105
 -0.00468956 -0.0067683 ]
<NDArray 20 @cpu(0)>

In [4]:
#使用常数来初始化权重参数
net.initialize(init=init.Constant(1), force_reinit=True)
net[0].weight.data()[0]


[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
<NDArray 20 @cpu(0)>

In [6]:
#自定义初试化方法
class MyInit(init.Initializer):
    #自定义权重的初始化方法
    def _init_weight(self, name, data):
        print('Init', name, data.shape)
        data[:] = nd.random.uniform(low=-10, high=10, shape=data.shape)
        # 绝对值小于5的设置为0，大于5的保持不变
        data *= data.abs() >= 5

net.initialize(MyInit(), force_reinit=True)
net[0].weight.data()[0]

Init dense2_weight (256, 20)
Init dense3_weight (10, 256)



[ 6.3104763 -8.299978  -6.8117104 -8.110842   0.         0.
 -0.         8.98595   -8.745741   8.73493   -0.        -0.
 -0.         0.         6.9807663  0.        -9.333907  -0.
  9.179653  -7.6045732]
<NDArray 20 @cpu(0)>

In [7]:
# 直接设置权重在现有基础上+1
net[0].weight.set_data(net[0].weight.data() + 1)
net[0].weight.data()[0]


[ 7.3104763 -7.2999783 -5.8117104 -7.1108418  1.         1.
  1.         9.98595   -7.745741   9.73493    1.         1.
  1.         1.         7.9807663  1.        -8.333907   1.
 10.179653  -6.6045732]
<NDArray 20 @cpu(0)>

In [10]:
net = nn.Sequential()
shared = nn.Dense(8, activation='relu')
# 我们让模型的第二隐藏层（shared变量）和第三隐藏层共享模型参数。
net.add(nn.Dense(8, activation='relu'),
        shared,
        nn.Dense(8, activation='relu', params=shared.params),
        nn.Dense(10))
net.initialize()

X = nd.random.uniform(shape=(2, 20))
net(X)

net[1].weight.data()[0] == net[2].weight.data()[0]
# # 在反向传播计算时，第二隐藏层和第三隐藏层的梯度都会被累加在shared.params.grad()


[1. 1. 1. 1. 1. 1. 1. 1.]
<NDArray 8 @cpu(0)>