In [2]:
!pip install mxnet

Collecting mxnet
[?25l  Downloading https://files.pythonhosted.org/packages/29/bb/54cbabe428351c06d10903c658878d29ee7026efbe45133fd133598d6eb6/mxnet-1.7.0.post1-py2.py3-none-manylinux2014_x86_64.whl (55.0MB)
[K     |████████████████████████████████| 55.0MB 74kB/s 
Collecting graphviz<0.9.0,>=0.8.1
  Downloading https://files.pythonhosted.org/packages/53/39/4ab213673844e0c004bed8a0781a0721a3f6bb23eb8854ee75c236428892/graphviz-0.8.4-py2.py3-none-any.whl
Installing collected packages: graphviz, mxnet
  Found existing installation: graphviz 0.10.1
    Uninstalling graphviz-0.10.1:
      Successfully uninstalled graphviz-0.10.1
Successfully installed graphviz-0.8.4 mxnet-1.7.0.post1


In [3]:
from mxnet import nd
from mxnet.gluon import nn

In [4]:
layer = nn.Dense(2)
layer

Dense(None -> 2, linear)

In [5]:
layer.initialize()

In [6]:
# 3*4 @ 4*2
x = nd.random.uniform(-1, 1, (3, 4))
layer(x)


[[-0.02524132 -0.00874885]
 [-0.06026538 -0.01308061]
 [ 0.02468396 -0.02181557]]
<NDArray 3x2 @cpu(0)>

In [7]:
layer.weight.data()


[[-0.00873779 -0.02834515  0.05484822 -0.06206018]
 [ 0.06491279 -0.03182812 -0.01631819 -0.00312688]]
<NDArray 2x4 @cpu(0)>

In [9]:
net = nn.Sequential()
# Add a sequence of layers.
net.add(# Similar to Dense, it is not necessary to specify the input channels
        # by the argument `in_channels`, which will be  automatically inferred
        # in the first forward pass. Also, we apply a relu activation on the
        # output. In addition, we can use a tuple to specify a  non-square
        # kernel size, such as `kernel_size=(2, 4)`
        nn.Conv2D(channels=6, kernel_size=5, activation='relu'),
        # One can also use a tuple to specify non-symmetric pool and stride sizes
        nn.MaxPool2D(pool_size=2, strides=2),
        nn.Conv2D(channels=16, kernel_size=3, activation='relu'),
        nn.MaxPool2D(pool_size=2, strides=2),
        # The dense layer will automatically reshape the 4-D output of last
        # max pooling layer into the 2-D shape: (x.shape[0], x.size/x.shape[0])
        nn.Dense(120, activation="relu"),
        nn.Dense(84, activation="relu"),
        nn.Dense(10))
net

Sequential(
  (0): Conv2D(None -> 6, kernel_size=(5, 5), stride=(1, 1), Activation(relu))
  (1): MaxPool2D(size=(2, 2), stride=(2, 2), padding=(0, 0), ceil_mode=False, global_pool=False, pool_type=max, layout=NCHW)
  (2): Conv2D(None -> 16, kernel_size=(3, 3), stride=(1, 1), Activation(relu))
  (3): MaxPool2D(size=(2, 2), stride=(2, 2), padding=(0, 0), ceil_mode=False, global_pool=False, pool_type=max, layout=NCHW)
  (4): Dense(None -> 120, Activation(relu))
  (5): Dense(None -> 84, Activation(relu))
  (6): Dense(None -> 10, linear)
)

In [10]:
net.initialize()
# Input shape is (batch_size, color_channels, height, width)
x = nd.random.uniform(shape=(4, 1, 28, 28))
y = net(x)
y.shape

(4, 10)

In [11]:
(net[0].weight.data().shape, net[5].bias.data().shape)

((6, 1, 5, 5), (84,))

In [12]:
class MixMLP(nn.Block):
    def __init__(self, **kwargs):
        # Run `nn.Block`'s init method
        super(MixMLP, self).__init__(**kwargs)
        self.blk = nn.Sequential()
        self.blk.add(nn.Dense(3, activation='relu'),
                     nn.Dense(4, activation='relu'))
        self.dense = nn.Dense(5)
    def forward(self, x):
        y = nd.relu(self.blk(x))
        print(y)
        return self.dense(y)

net = MixMLP()
net

MixMLP(
  (blk): Sequential(
    (0): Dense(None -> 3, Activation(relu))
    (1): Dense(None -> 4, Activation(relu))
  )
  (dense): Dense(None -> 5, linear)
)

In [13]:
net.initialize()
x = nd.random.uniform(shape=(2, 2))
net(x)


[[0.0000000e+00 0.0000000e+00 6.2900386e-04 7.6445540e-05]
 [0.0000000e+00 0.0000000e+00 1.1989386e-03 1.2375204e-03]]
<NDArray 2x4 @cpu(0)>



[[-3.8061840e-05  1.5568350e-05  4.3668215e-06  4.2853058e-05
   1.8710394e-05]
 [-1.8345519e-05  2.6403079e-05  2.4685731e-05  7.7019373e-05
   9.7785989e-05]]
<NDArray 2x5 @cpu(0)>

In [14]:
net.blk[1].weight.data()


[[-0.0343901  -0.05805862 -0.06187592]
 [-0.06210143 -0.00918167 -0.00170272]
 [-0.02634858  0.05334064  0.02748809]
 [ 0.06669661 -0.01711474  0.01647211]]
<NDArray 4x3 @cpu(0)>