# 5.2 Parameter Management

In [4]:
import random

from d2l import mxnet as d2l
from mxnet import nd, np, npx, autograd
from mxnet.gluon import nn
from tools import beep_end, show_subtitle, show_title

npx.set_np()

In [5]:
net = nn.Sequential()
net.add(nn.Dense(8, activation='relu'))
net.add(nn.Dense(1))
net.initialize()

X = np.random.uniform(size=(2, 4))
net(X)

array([[0.0054572 ],
       [0.00488594]])

In [6]:
# 5.2.1 Parameter Access
print(net[1].params)

dense1_ (
  Parameter dense1_weight (shape=(1, 8), dtype=float32)
  Parameter dense1_bias (shape=(1,), dtype=float32)
)


In [7]:
# Targeted Parameters
print(type(net[1].bias))
print(net[1].bias)
print(net[1].bias.data())

<class 'mxnet.gluon.parameter.Parameter'>
Parameter dense1_bias (shape=(1,), dtype=float32)
[0.]


In [9]:
# 没有反向传播的梯度为零
net[1].weight.grad()

array([[0., 0., 0., 0., 0., 0., 0., 0.]])

In [15]:
# All Parameters at Once
print(net.collect_params())
print(net[0].collect_params())
print(net.collect_params()['dense1_bias'])
print(net.collect_params()['dense1_bias'].data)
print(net.collect_params()['dense1_bias'].data())

sequential1_ (
  Parameter dense0_weight (shape=(8, 4), dtype=float32)
  Parameter dense0_bias (shape=(8,), dtype=float32)
  Parameter dense1_weight (shape=(1, 8), dtype=float32)
  Parameter dense1_bias (shape=(1,), dtype=float32)
)
dense0_ (
  Parameter dense0_weight (shape=(8, 4), dtype=float32)
  Parameter dense0_bias (shape=(8,), dtype=float32)
)
Parameter dense1_bias (shape=(1,), dtype=float32)
<bound method Parameter.data of Parameter dense1_bias (shape=(1,), dtype=float32)>
[0.]


In [16]:
# Collecting Parameters from Nested Blocks
def block1():
    net=nn.Sequential()
    net.add(nn.Dense(32,activation='relu'))
    net.add(nn.Dense(16,activation='relu'))
    return net

def block2():
    net=nn.Sequential()
    for _ in range(4):
        net.add(block1())
    return net

rgnet=nn.Sequential()
rgnet.add(block2())
rgnet.add(nn.Dense(10))
rgnet.initialize()
rgnet(X)

array([[-6.3465850e-09, -1.1096741e-09,  6.4161783e-09,  6.6354131e-09,
        -1.1265494e-09,  1.3285140e-10,  9.3619370e-09,  3.2229102e-09,
         5.9429874e-09,  8.8181427e-09],
       [-8.6219414e-09, -7.5150730e-10,  8.3133251e-09,  8.9321119e-09,
        -1.6740000e-09,  3.2406028e-10,  1.2115975e-08,  4.4926440e-09,
         8.0741742e-09,  1.2075873e-08]])

In [17]:
print(rgnet.collect_params)
print(rgnet.collect_params())

<bound method Block.collect_params of Sequential(
  (0): Sequential(
    (0): Sequential(
      (0): Dense(4 -> 32, Activation(relu))
      (1): Dense(32 -> 16, Activation(relu))
    )
    (1): Sequential(
      (0): Dense(16 -> 32, Activation(relu))
      (1): Dense(32 -> 16, Activation(relu))
    )
    (2): Sequential(
      (0): Dense(16 -> 32, Activation(relu))
      (1): Dense(32 -> 16, Activation(relu))
    )
    (3): Sequential(
      (0): Dense(16 -> 32, Activation(relu))
      (1): Dense(32 -> 16, Activation(relu))
    )
  )
  (1): Dense(16 -> 10, linear)
)>
sequential2_ (
  Parameter dense2_weight (shape=(32, 4), dtype=float32)
  Parameter dense2_bias (shape=(32,), dtype=float32)
  Parameter dense3_weight (shape=(16, 32), dtype=float32)
  Parameter dense3_bias (shape=(16,), dtype=float32)
  Parameter dense4_weight (shape=(32, 16), dtype=float32)
  Parameter dense4_bias (shape=(32,), dtype=float32)
  Parameter dense5_weight (shape=(16, 32), dtype=float32)
  Parameter dense5_bi

In [22]:
rgnet[0][1][0].weight.data().shape

(32, 16)

# 5.2.2 Parameter Initialization

In [34]:
# Built-in Initialization
net.initialize(init=init.Normal(sigma=0.01),force_reinit=True)
print(net.collect_params)
for i in range(2):
    print(net[i].weight.data().shape)
print(net[1].weight.data())

<bound method Block.collect_params of Sequential(
  (0): Dense(4 -> 8, Activation(relu))
  (1): Dense(8 -> 1, linear)
)>
(8, 4)
(1, 8)
[[ 7.1235225e-03 -4.0872735e-03 -2.2102301e-03 -7.9420395e-03
  -1.4507023e-05 -6.5906444e-03  9.6617080e-03 -5.1257055e-04]]


In [35]:
net[0].weight.initialize(init=init.Xavier(),force_reinit=True)
net[1].initialize(init=init.Constant(42),force_reinit=True)
print(net[0].weight.data()[0])
print(net[1].weight.data())

[-0.10038978  0.07147181  0.24065346  0.1369614 ]
[[42. 42. 42. 42. 42. 42. 42. 42.]]


# Custom Initialization
$$
w\sim
\begin{cases}
    U(5,10)     &\text{with probability}\frac14\\
    0           &\text{with probability}\frac12\\
    U(-10,-5)   &\text{with probability}\frac14
\end{cases}
$$


In [36]:
class MyInit(init.Initializer):
    def _init_weight(self,name,data):
        print("Init",name,data.shape)
        data[:]=np.random.uniform(-10,10,data.shape)
        data *=np.abs(data)>=5
net.initialize(MyInit(), force_reinit=True)
net[0].weight.data()[:2]

Init dense0_weight (8, 4)
Init dense1_weight (1, 8)


array([[-5.4289   , -0.       ,  0.       ,  0.       ],
       [-7.6447773, -0.       , -0.       , -7.3833103]])

In [37]:
net[0].weight.data()[:]+=1
net[0].weight.data()[0,0]=42
net[0].weight.data()[0]

array([42.,  1.,  1.,  1.])

In [46]:
# 5.2.3 Tied Parameters
# 共享参数
net=nn.Sequential()
shared=nn.Dense(8,activation='relu')
net.add(nn.Dense(8,activation='relu'),
        shared,
        nn.Dense(8,activation='relu'),
        nn.Dense(8,activation='relu',params=shared.params),
        nn.Dense(10))
net.initialize()

X=np.random.uniform(size=(2,20))
net(X)

array([[ 4.5340348e-06,  2.7714882e-06, -5.8970040e-06, -1.9037822e-06,
        -3.3228816e-06, -2.0109283e-06, -1.0713403e-06, -3.0010981e-06,
        -6.3866952e-07, -3.3046599e-06],
       [ 4.9130149e-06,  2.7640087e-06, -6.5713130e-06, -1.5822588e-06,
        -3.2026455e-06, -1.7541328e-06, -1.1843973e-06, -2.9902185e-06,
        -1.1149889e-06, -3.5961193e-06]])

In [47]:
print(net.collect_params)
print(net[1].weight.data()[0]==net[2].weight.data()[0])
print(net[1].weight.data()[0]==net[3].weight.data()[0])

<bound method Block.collect_params of Sequential(
  (0): Dense(20 -> 8, Activation(relu))
  (1): Dense(8 -> 8, Activation(relu))
  (2): Dense(8 -> 8, Activation(relu))
  (3): Dense(8 -> 8, Activation(relu))
  (4): Dense(8 -> 10, linear)
)>
[False False False False False False False False]
[ True  True  True  True  True  True  True  True]
