# 创建神经网络

之前使用的`nn.Sequential`，它是`nn.Block`的简单形式

In [1]:
from mxnet import nd
from mxnet.gluon import nn

net = nn.Sequential()
with net.name_scope():
    net.add(nn.Dense(256, activation='relu'))
    net.add(nn.Dense(10))
    
print(net)

  from ._conv import register_converters as _register_converters


Sequential(
  (0): Dense(None -> 256, Activation(relu))
  (1): Dense(None -> 10, linear)
)


## 使用`nn.Block`定义

`nn.Sequential` 是`nn.Block`的简单形式

下面使用`nn.Block`定义相同的神经网络

In [2]:
class MLP(nn.Block):
    def __init__(self,**kwargs):
        # 调用 MLP 父类 Block 的构造函数来进行必要的初始化。这样在构造实例时还可以指定
        # 其他函数参数，例如下下一节将介绍的模型参数 params。
        super(MLP, self).__init__(**kwargs)
        with self.name_scope():
            # 隐藏层
            self.dense0 = nn.Dense(256)
            # 输出层
            self.dense1 = nn.Dense(10)
            
    def forward(self, x):
        return self.dense1(nd.relu(self.dense0(x)))

可以看到`nn.Block`是通过创建一个子类，至少包含两个函数

* `__init__`:创建参数。dense层
* `forward()`:定义网络计算

In [3]:
net2 = MLP()
print(net2)
net2.initialize()
x = nd.random_uniform(shape=(4,20))
y = net2(x)
y

MLP(
  (dense1): Dense(None -> 10, linear)
  (dense0): Dense(None -> 256, linear)
)



[[ 0.03126615  0.04562764  0.00039858 -0.08772386 -0.05355632  0.02904574
   0.08102557 -0.01433946 -0.0422415   0.06047882]
 [ 0.02871901  0.03652266  0.0063005  -0.05650971 -0.07189323  0.08615957
   0.05951559 -0.06045963 -0.02990259  0.05651   ]
 [ 0.02147349  0.04818897  0.05321142 -0.1261686  -0.06850231  0.09096343
   0.04064303 -0.05064792 -0.02200241  0.04859561]
 [ 0.03780477  0.0751239   0.03290457 -0.11641112 -0.03254965  0.0586529
   0.02542158 -0.01697343 -0.00049651  0.05892839]]
<NDArray 4x10 @cpu(0)>

MLP的其他指令

* `super(MLP, self).__init__(**kwargs)`:调用`nn.Block`的init函数，它提供了`prefix(指定名字)`和`params(指定模型参数)`两个参数
* `self.name_scope`:调用`nn.Block`的name_scope()函数。nn.Dense的定义在`scope`里面。作用是在参数的名字前面加上前缀(prefix)使得在系统中独一无二。默认也会自动生成名字，我们也可以手动指定。

In [4]:
# 默认的网络名字是mlp0,之后会是mlp1,mlp2...
print('default prefix:', net2.dense0.name)

# 通过prefix指定网络的名字
net3 = MLP(prefix='another_mlp_')
print('customized prefix', net3.dense0.name)

('default prefix:', 'mlp0_dense0')
('customized prefix', 'another_mlp_dense0')


## `nn.Block`到底是什么东西？

在`gluon`中,`nn.block`是一个一般化的部件。整个神经网络可以是一个`Block`，每一层也可以是一个`Block`。我们可以无限嵌套`Block`构建新的`Block`

`nn.Block`主要提供:
* 存储参数
* 描述`forward`如何执行
* 自动求导

## `nn.Sequential`是什么？

`nn.Sequential`是一个`nn.Block`的容器，通过`add`添加`nn.Block`。自动生成`forward()`函数。


`nn.Block`灵活的定义网络模型：

In [13]:
class FancyMLP(nn.Block):
    def __init__(self, **kwargs):
        super(FancyMLP, self).__init__(**kwargs)
        with self.name_scope():
            self.dense = nn.Dense(256)
            self.weight = nd.random_uniform(shape=(256,20))
            
    def forward(self, x):
        x = nd.relu(self.dense(x))
        print('layer 1:', x)
        x = nd.relu(nd.dot(x,self.weight) + 1)
        print('layer 2:', x)
        x = nd.relu(self.dense(x))
        return x

这里手动创建和初始了权重`weight`，重复利用`dense`层

In [14]:
fancy_mlp = FancyMLP()
fancy_mlp.initialize()
y = fancy_mlp(x)
print(y.shape)

('layer 1:', 
[[0.         0.         0.         ... 0.         0.09402385 0.        ]
 [0.         0.01917659 0.         ... 0.         0.10604069 0.0302888 ]
 [0.         0.00497293 0.         ... 0.         0.09607705 0.07647321]
 [0.         0.         0.         ... 0.         0.02536895 0.00385839]]
<NDArray 4x256 @cpu(0)>)
('layer 2:', 
[[6.2171364 6.167362  5.785584  5.6673203 6.154541  6.0474033 5.8763294
  6.005458  6.3182764 6.521799  6.4108615 6.086564  6.044205  6.610616
  6.5525775 6.420291  6.028851  6.4767056 6.013627  6.506603 ]
 [6.5929112 6.473681  5.97452   5.880336  6.349225  6.1609235 6.050352
  6.344292  6.5112767 6.889649  6.4470773 6.063143  6.0365057 7.1993375
  6.663166  6.9170494 6.1159873 6.4819307 6.259358  6.631396 ]
 [6.3576856 6.3509774 5.896789  5.50926   6.204206  5.8810306 5.8243394
  6.135947  6.00959   6.530128  6.0811534 5.9790435 6.0153184 6.7161665
  6.493421  6.7051153 6.0544477 5.821261  6.178963  6.1601143]
 [5.7539215 5.502466  4.952092  4.8

## `nn.Block`和`nn.Sequential`的嵌套使用

In [30]:
class RecMLP(nn.Block):
    def __init__(self, **kwargs):
        super(RecMLP, self).__init__(**kwargs)
        self.net = nn.Sequential()
        with self.name_scope():
            self.net.add(nn.Dense(256, activation='relu'))
            self.net.add(nn.Dense(128, activation='relu'))
            self.dense = nn.Dense(64)
            
    def forward(self, x):
        return nd.relu(self.dense(self.net(x)))

rec_mlp = nn.Sequential()
rec_mlp.add(RecMLP())
rec_mlp.add(nn.Dense(10))
print(rec_mlp)

Sequential(
  (0): RecMLP(
    (dense): Dense(None -> 64, linear)
    (net): Sequential(
      (0): Dense(None -> 256, Activation(relu))
      (1): Dense(None -> 128, Activation(relu))
    )
  )
  (1): Dense(None -> 10, linear)
)


In [40]:
class RecMLP(nn.Block):
    def __init__(self, **kwargs):
        super(RecMLP, self).__init__(**kwargs)
        with self.name_scope():
            # 必须实例化 不能 self.denses = [nn.Dense(256),nn.Dense(128)
            # ,nn.Dense(64)]
            self.dense0 = nn.Dense(256)
            self.dense1 = nn.Dense(128)
            self.dense2 = nn.Dense(64)
            self.denses =[self.dense0, self.dense1, 
                     self.dense2] 
            
    def forward(self, x):
        for dense in self.denses:
            x= nd.relu(dense(x))
        return x

rec_mlp = RecMLP()
rec_mlp.initialize()
print(rec_mlp)

<type 'list'>
RecMLP(
  (dense1): Dense(None -> 128, linear)
  (dense0): Dense(None -> 256, linear)
  (dense2): Dense(None -> 64, linear)
)
