In [1]:
import d2lzh as d2l
import mxnet as mx
from mxnet import autograd, gluon, init, nd
from mxnet.gluon import loss as gloss, nn, utils as gutils
import time

In [2]:
def resnet18(num_classes):  # 本函数已保存在d2lzh包中方便以后使用
    def resnet_block(num_channels, num_residuals, first_block=False):
        blk = nn.Sequential()
        for i in range(num_residuals):
            if i == 0 and not first_block:
                blk.add(d2l.Residual(
                    num_channels, use_1x1conv=True, strides=2))
            else:
                blk.add(d2l.Residual(num_channels))
        return blk

    net = nn.Sequential()
    # 这里使用了较小的卷积核、步幅和填充，并去掉了最大池化层
    net.add(nn.Conv2D(64, kernel_size=3, strides=1, padding=1),
            nn.BatchNorm(), nn.Activation('relu'))
    net.add(resnet_block(64, 2, first_block=True),
            resnet_block(128, 2),
            resnet_block(256, 2),
            resnet_block(512, 2))
    net.add(nn.GlobalAvgPool2D(), nn.Dense(num_classes))
    return net

net = resnet18(10)

In [9]:
ctx = [mx.gpu(0),mx.cpu(0)]
net.initialize(init=init.Normal(sigma=0.01),force_reinit=True, ctx=ctx)

In [11]:
x = nd.random.uniform(shape=(4, 1, 28, 28))
cpu_gpu_x = gutils.split_and_load(x, ctx)
net(cpu_gpu_x[0]),net(cpu_gpu_x[1])

(
 [[ 1.0006834e-06  6.7762932e-07  4.8034249e-06 -2.0505538e-06
    3.6689387e-06  1.2511016e-06 -2.1064225e-06 -1.3070419e-06
    2.8637310e-06  3.2923047e-06]
  [ 7.4181497e-07  2.6879442e-07  4.6454452e-06 -1.8912955e-06
    3.9271749e-06  1.4061950e-06 -2.5943013e-06 -1.3177248e-06
    2.4489993e-06  3.5721191e-06]]
 <NDArray 2x10 @gpu(0)>,
 
 [[ 5.4366717e-07  6.8477402e-07  4.1176017e-06 -1.5228464e-06
    3.2764810e-06  1.4188942e-06 -1.6311110e-06 -1.1858672e-06
    2.1414864e-06  3.7148905e-06]
  [ 7.2043127e-07  8.3965318e-07  3.6648519e-06 -1.5985751e-06
    3.2497187e-06  7.2354050e-07 -1.9599513e-06 -1.0965462e-06
    2.6601374e-06  3.6148099e-06]]
 <NDArray 2x10 @cpu(0)>)

In [5]:
weight = net[0].params.get('weight')

# 默认情况下weight.data()会返回内存上的参数值。
try:
    weight.data()
except RuntimeError:
    print('not initialized on', mx.cpu())
weight.data(ctx[0])[0]


[[[-0.00227942  0.00201315  0.00350055]
  [ 0.00536052  0.01519444  0.01904088]
  [-0.01573443 -0.00140079  0.00296701]]]
<NDArray 1x3x3 @gpu(0)>

In [12]:
def train(ctx, batch_size, lr):
    train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
    print('running on:', ctx)
    net.initialize(init=init.Normal(sigma=0.01), ctx=ctx, force_reinit=True)
    trainer = gluon.Trainer(
        net.collect_params(), 'sgd', {'learning_rate': lr})
    loss = gloss.SoftmaxCrossEntropyLoss()
    for epoch in range(4):
        start = time.time()
        for X, y in train_iter:
            gpu_Xs = gutils.split_and_load(X, ctx)
            gpu_ys = gutils.split_and_load(y, ctx)
            with autograd.record():
                ls = [loss(net(gpu_X), gpu_y)
                      for gpu_X, gpu_y in zip(gpu_Xs, gpu_ys)]
            for l in ls:
                l.backward()
            trainer.step(batch_size)
        nd.waitall()
        train_time = time.time() - start
        test_acc = d2l.evaluate_accuracy(test_iter, net, ctx[0])
        print('epoch %d, time %.1f sec, test acc %.2f' % (
            epoch + 1, train_time, test_acc))

In [None]:
train(ctx, batch_size=256, lr=0.1)

running on: [gpu(0), cpu(0)]
