In [2]:
from mxnet import autograd, nd

num_inputs = 2
num_examples = 1000
true_w = [2, -3,4]
true_b = 4.2
features = nd.random.normal(scale=1, shape=(num_examples, num_inputs))
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += nd.random.normal(scale=0.01, shape = labels.shape)

In [5]:
from mxnet.gluon import data as gdata

batch_size = 10
# 组合训练数据的特征和标签
dataset = gdata.ArrayDataset(features, labels)
# 随机读取一批数据
data_iter = gdata.DataLoader(dataset, batch_size, shuffle = True)
for X,y in data_iter:
    print(X, y)
    break


[[ 0.25081477 -0.30159083]
 [-0.7787229  -1.5016645 ]
 [-0.5158308  -1.1073749 ]
 [-0.72658265  1.1108662 ]
 [-0.8431894  -2.5688748 ]
 [-1.3606751   1.7915472 ]
 [ 0.31563997 -1.3605943 ]
 [-0.10506796 -1.3348366 ]
 [-0.9277095   0.6714998 ]
 [-0.08212578 -0.05052492]]
<NDArray 10x2 @cpu(0)> 
[ 5.624185    7.166808    6.4837646  -0.5959145  10.206541   -3.8822627
  8.915258    7.9938436   0.31090662  4.1973696 ]
<NDArray 10 @cpu(0)>


In [6]:
from mxnet.gluon import nn

# Sequential实例可以看作是一个串联各个层的容器。
net = nn.Sequential()
# 在Gluon中，全连接层是一个Dense实例。我们定义该层输出个数为1。
net.add(nn.Dense(1))

In [7]:
from mxnet import init

# 指定权重参数每个元素将在初始化时随机采样于均值为0、标准差为0.01的正态分布。
# 偏差参数b默认会初始化为零。
net.initialize(init.Normal(sigma=0.001))

In [8]:
from mxnet.gluon import loss as gloss

loss = gloss.L2Loss()  # 平方损失又称L2范数损失

In [9]:
from mxnet import gluon

# 自动收集参数，sgd梯度下降优化算法，超参数学习率
trainer = gluon.Trainer(net.collect_params(),'sgd',{'learning_rate':0.03})

In [10]:
num_epochs = 3
for epoch in range(1, num_epochs +1 ):
    for X , y in data_iter:
        with autograd.record():
            l = loss(net(X), y)
        l.backward()
        trainer.step(batch_size)
    l = loss(net(features),labels)
    print('epoch %d, loss: %f' % (epoch, l.mean().asnumpy()))

epoch 1, loss: 0.032371
epoch 2, loss: 0.000120
epoch 3, loss: 0.000048


In [11]:
dense = net[0]
true_w, dense.weight.data()

([2, -3, 4],
 
 [[ 2.0000746 -2.9998338]]
 <NDArray 1x2 @cpu(0)>)

In [12]:
true_b, dense.bias.data()

(4.2,
 
 [4.2000227]
 <NDArray 1 @cpu(0)>)

In [14]:
dense.weight.grad()



[[ 0.04321001 -0.02178555]]
<NDArray 1x2 @cpu(0)>

In [16]:
help(dense.weight)

Help on Parameter in module mxnet.gluon.parameter object:

class Parameter(builtins.object)
 |  A Container holding parameters (weights) of Blocks.
 |  
 |  :py:class:`Parameter` holds a copy of the parameter on each :py:class:`Context` after
 |  it is initialized with ``Parameter.initialize(...)``. If :py:attr:`grad_req` is
 |  not ``'null'``, it will also hold a gradient array on each :py:class:`Context`::
 |  
 |      ctx = mx.gpu(0)
 |      x = mx.nd.zeros((16, 100), ctx=ctx)
 |      w = mx.gluon.Parameter('fc_weight', shape=(64, 100), init=mx.init.Xavier())
 |      b = mx.gluon.Parameter('fc_bias', shape=(64,), init=mx.init.Zero())
 |      w.initialize(ctx=ctx)
 |      b.initialize(ctx=ctx)
 |      out = mx.nd.FullyConnected(x, w.data(ctx), b.data(ctx), num_hidden=64)
 |  
 |  Parameters
 |  ----------
 |  name : str
 |      Name of this parameter.
 |  grad_req : {'write', 'add', 'null'}, default 'write'
 |      Specifies how to update gradient to grad arrays.
 |  
 |      - ``'wr


[[ 1.1630785   0.4838046 ]
 [ 0.29956347  0.15302546]
 [-1.1688148   1.5580711 ]
 ...
 [-1.348146    1.541968  ]
 [-2.2382517  -0.34891927]
 [ 0.02030805  1.09498   ]]
<NDArray 1000x2 @cpu(0)>