In [13]:
from mxnet import autograd, gluon, nd
from mxnet.gluon import loss as gloss, nn
import os
import subprocess
import time

In [14]:
a = nd.ones((1, 2))
b = nd.ones((1, 2))
c = a * b + 2
c


[[3. 3.]]
<NDArray 1x2 @cpu(0)>

In [15]:
class Benchmark():  # 本类已保存在d2lzh包中方便以后使用
    def __init__(self, prefix=None):
        self.prefix = prefix + ' ' if prefix else ''

    def __enter__(self):
        self.start = time.time()

    def __exit__(self, *args):
        print('%stime: %.4f sec' % (self.prefix, time.time() - self.start))

In [16]:
with Benchmark('Workloads are queued.'):
    x = nd.random.uniform(shape=(2000, 2000))
    y = nd.dot(x, x).sum()

# 打印比较耗时
with Benchmark('Workloads are finished.'):
    print('sum =', y)
    
# 只要数据是保存在NDArray里并使用MXNet提供的运算符，MXNet将默认使用异步计算来获取高计算性能。

Workloads are queued. time: 0.0000 sec
sum = 
[2.0003645e+09]
<NDArray 1 @cpu(0)>
Workloads are finished. time: 0.6315 sec


In [17]:
with Benchmark():
    y = nd.dot(x, x)
    y.wait_to_read() # 等待y完成

time: 0.1646 sec


In [18]:
with Benchmark():
    y = nd.dot(x, x)
    z = nd.dot(x, x)
    nd.waitall() # 等待所有计算完成

time: 0.2947 sec


In [21]:
with Benchmark():
    y = nd.dot(x, x)
    y.asnumpy() # 触发让前端等待后端计算结果的行为，也就是同步

time: 0.1641 sec


In [22]:
with Benchmark():
    y = nd.dot(x, x)
    y.norm().asscalar() # 触发同步

time: 0.2155 sec


In [23]:
with Benchmark('synchronous.'):
    for _ in range(1000):
        y = x + 1
        y.wait_to_read()

with Benchmark('asynchronous.'):
    for _ in range(1000):
        y = x + 1
    nd.waitall()

synchronous. time: 3.3104 sec
asynchronous. time: 3.7470 sec


In [24]:
# 建议大家在训练模型时对每个小批量都使用同步函数，例如，用asscalar函数或者asnumpy函数评价模型的表现。
# 在使用模型预测时，为了减小内存的占用，也建议大家对每个小批量预测时都使用同步函数，例如，直接打印出当前小批量的预测结果。
def data_iter():
    start = time.time()
    num_batches, batch_size = 100, 1024
    for i in range(num_batches):
        X = nd.random.normal(shape=(batch_size, 512))
        y = nd.ones((batch_size,))
        yield X, y
        if (i + 1) % 50 == 0:
            print('batch %d, time %f sec' % (i + 1, time.time() - start))

In [25]:
net = nn.Sequential()
net.add(nn.Dense(2048, activation='relu'),
        nn.Dense(512, activation='relu'),
        nn.Dense(1))
net.initialize()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.005})
loss = gloss.L2Loss()

In [27]:
# 只能在linux或mac os上用
def get_mem():
    res = subprocess.check_output(['ps', 'u', '-p', str(os.getpid())])
    return int(str(res).split()[15]) / 1e3

In [28]:
for X, y in data_iter():
    break
loss(y, net(X)).wait_to_read()

FileNotFoundError: [WinError 2] 系统找不到指定的文件。