我们先定义一个与上一节中相同的含单个隐藏层的多层感知机

In [2]:

from mxnet import init,nd
from mxnet.gluon import nn

model = nn.Sequential()

model.add(nn.Dense(256,activation='relu'))
model.add(nn.Dense(10))

model.initialize() ##默认方式初始化
X =nd.random.uniform(shape=(2,20))
y = model(X)




In [10]:
#访问模型参数
model[0].params,type(model[0].params)
##访问模型的具体参数
model[0].params['dense6_weight']

Parameter dense6_weight (shape=(256, 20), dtype=float32)

In [12]:
##查看参数的具体值
model[0].weight.data()
#查看参数的梯度
model[0].weight.grad()


[[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]]
<NDArray 256x20 @cpu(0)>

In [15]:
#使用collect_params函数来获取net变量所有嵌套
model.collect_params()
model.collect_params('.*weight') ##使用正则表达式来获取想要的参数

sequential3_ (
  Parameter dense6_weight (shape=(256, 20), dtype=float32)
  Parameter dense7_weight (shape=(10, 256), dtype=float32)
)

In [16]:
model.initialize(init=init.Normal(sigma=0.01),force_reinit=True)
model[0].weight.data()[0]


[ -6.54585892e-03   5.46171237e-03  -3.14823980e-03   6.53286325e-03
   2.39426945e-03   1.43013904e-02   8.93270038e-03   1.07622147e-02
  -4.81205340e-03   4.11936874e-03   5.70393493e-03  -3.80902132e-03
  -5.08007919e-03  -1.39692018e-03  -9.47653371e-06   8.14091600e-03
  -1.76932309e-02  -1.07601862e-02   8.86166934e-03  -1.24831125e-02]
<NDArray 20 @cpu(0)>

In [17]:
##常数初始化
model.initialize(init=init.Constant(1),force_reinit=True)
model[0].weight.data()[0]


[ 1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.
  1.  1.]
<NDArray 20 @cpu(0)>

In [20]:
class MyInit(init.Initializer):
    def _init_weight(self,name,data):
        print('Init',name,data.shape)
        data[:] = nd.random.uniform(low=-10,high=10,shape=data.shape)
        data *= data.abs() >=5
model.initialize(MyInit(),force_reinit=True)
model[0].weight.data()[0]

Init dense6_weight (256, 20)
Init dense7_weight (10, 256)



[-0.         -8.59105587 -5.17162752  0.          0.         -0.
 -5.07873631  0.          0.          8.5375824   0.          7.07880211
 -0.         -0.          0.         -0.         -0.          6.45118904
  0.          0.        ]
<NDArray 20 @cpu(0)>

In [24]:
model = nn.Sequential()
shared = nn.Dense(8,activation='relu')
model.add(nn.Dense(8,activation='relu'),
          shared,
          nn.Dense(8,activation='relu',params=shared.params),
          nn.Dense(10))
model.initialize(init=init.Normal(sigma=0.01))
x = nd.random.uniform(shape=(2,20))
model(x)
model[1].weight.data()[0] == model[2].weight.data()[0]


[ 1.  1.  1.  1.  1.  1.  1.  1.]
<NDArray 8 @cpu(0)>

In [4]:
#方法一
#先构建网络，重新为每一层初始化(这种方法每次初始化有问题)
model = nn.Sequential()
with model.name_scope():
    model.add(nn.Dense(4,in_units=4,activation='relu'))
    model.add(nn.Dense(2,in_units=4))
print(model)

paramas = model.collect_params()
print('model paramas is ',paramas)

paramas['sequential1_dense0_weight'].initialize(init=init.One(),force_reinit=True)
paramas['sequential1_dense1_weight '].initialize(init=init.Normal(sigma=0.01),force_reinit=True)

Sequential(
  (0): Dense(4 -> 4, Activation(relu))
  (1): Dense(4 -> 2, linear)
)
model paramas is  sequential2_ (
  Parameter sequential2_dense0_weight (shape=(4, 4), dtype=float32)
  Parameter sequential2_dense0_bias (shape=(4,), dtype=float32)
  Parameter sequential2_dense1_weight (shape=(2, 4), dtype=float32)
  Parameter sequential2_dense1_bias (shape=(2,), dtype=float32)
)


KeyError: 'sequential1_dense0_weight'

In [9]:
##2.构建网络时，为每一层初始化
model = nn.Sequential()
with model.name_scope():
    model.add(nn.Dense(4,in_units=4,activation='relu',weight_initializer=init.One()))
    model.add(nn.Dense(2,in_units=4,weight_initializer=init.Normal(sigma=0.01)))
print(model)
model.initialize()
model[0].weight.data()[0]

Sequential(
  (0): Dense(4 -> 4, Activation(relu))
  (1): Dense(4 -> 2, linear)
)



[ 1.  1.  1.  1.]
<NDArray 4 @cpu(0)>

In [25]:
class MyInit(init.Initializer):
    def __init__(self):
        super(MyInit,self).__init__()
        self._verbose=True
    def _init_weight(self,_,data):
        print('init weight',data.shape)
        nd.random.uniform(low=5,high=10,out=data)

In [26]:
model = nn.Sequential()
with model.name_scope():
    model.add(nn.Dense(4,in_units=4,activation='relu'))
    model.add(nn.Dense(4,in_units=4,activation='relu',params=model[-1].params))
    model.add(nn.Dense(1,in_units=4))

In [27]:
model.initialize(MyInit())
print(model[0].weight.data())
print(model[1].weight.data())

init weight (4, 4)
init weight (1, 4)

[[ 5.61910009  8.89525509  9.24004173  9.32466698]
 [ 9.03659439  7.05698347  7.84550381  5.69986296]
 [ 7.03591633  5.16611195  5.34583473  9.91287422]
 [ 8.48714352  6.86645365  7.26771355  7.10037708]]
<NDArray 4x4 @cpu(0)>

[[ 5.61910009  8.89525509  9.24004173  9.32466698]
 [ 9.03659439  7.05698347  7.84550381  5.69986296]
 [ 7.03591633  5.16611195  5.34583473  9.91287422]
 [ 8.48714352  6.86645365  7.26771355  7.10037708]]
<NDArray 4x4 @cpu(0)>


In [28]:
##参数更新

from mxnet import gluon
from mxnet import autograd
trainer = gluon.Trainer(model.collect_params(),'sgd',{"learning_rate":0.1})
a = nd.random_normal(shape=(4,4))
with autograd.record():
    y = model(a)
y.backward()
trainer.step(1)

In [29]:
print(model[0].weight.grad())
print(model[1].weight.grad())


[[  710.06091309   283.78112793   601.00909424  1105.48657227]
 [  925.45739746    76.22109985   467.36206055   663.99645996]
 [ 1020.82269287   285.03424072   489.3583374   1093.58325195]
 [ 1135.44628906   114.33351135   586.68701172   821.95471191]]
<NDArray 4x4 @cpu(0)>

[[  710.06091309   283.78112793   601.00909424  1105.48657227]
 [  925.45739746    76.22109985   467.36206055   663.99645996]
 [ 1020.82269287   285.03424072   489.3583374   1093.58325195]
 [ 1135.44628906   114.33351135   586.68701172   821.95471191]]
<NDArray 4x4 @cpu(0)>


In [30]:
print(model[0].weight.data())
print(model[1].weight.data())


[[ -65.38699341  -19.48285675  -50.86087036 -101.22399139]
 [ -83.50914001   -0.56512642  -38.89070129  -60.69978714]
 [ -95.04634857  -23.33731079  -43.58999634  -99.44545746]
 [-105.05748749   -4.56689787  -51.40098572  -75.09509277]]
<NDArray 4x4 @cpu(0)>

[[ -65.38699341  -19.48285675  -50.86087036 -101.22399139]
 [ -83.50914001   -0.56512642  -38.89070129  -60.69978714]
 [ -95.04634857  -23.33731079  -43.58999634  -99.44545746]
 [-105.05748749   -4.56689787  -51.40098572  -75.09509277]]
<NDArray 4x4 @cpu(0)>
