In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
from ndll.pipeline import Pipeline
import ndll.ops as ops
import ndll.types as types
import ndll.tfrecord as tfrec
import numpy as np
from timeit import default_timer as timer
import numpy as np
import matplotlib.pyplot as plt

db_folder = "/data/imagenet/train-c2lmdb-480"

class HybridPipe(Pipeline):
    def __init__(self, batch_size, num_threads, device_id, num_gpus, pipelined = True, async = True):
        super(HybridPipe, self).__init__(batch_size, num_threads, device_id, pipelined, async)
        self.input = ops.Caffe2Reader(path = db_folder, shard_id = device_id, num_shards = num_gpus)
        self.huffman = ops.HuffmanDecoder()
        self.idct = ops.DCTQuantInv(device = "gpu", output_type = types.RGB)
        self.resize = ops.Resize(device = "gpu", random_resize = True,
                                 resize_a = 256, resize_b = 480,
                                 image_type = types.RGB,
                                 interp_type = types.INTERP_LINEAR)
        self.cmnp = ops.CropMirrorNormalizePermute(device = "gpu",
                                                   output_type = types.FLOAT,
                                                   random_crop = True,
                                                   crop_h = 224,
                                                   crop_w = 224,
                                                   image_type = types.RGB,
                                                   mean = [128., 128., 128.],
                                                   std = [1., 1., 1.])
        self.cast = ops.Cast(dtype = types.FLOAT)
        self.iter = 0

    def define_graph(self):
        inputs, labels = self.input(name="Reader")
        dct_coeff, jpeg_meta = self.huffman(inputs)
        images = self.idct(dct_coeff.gpu(), jpeg_meta)
        images = self.resize(images)
        output = self.cmnp(images)
        return (output, self.cast(labels))

    def iter_setup(self):
        pass

In [3]:
N = 8
pipes = [HybridPipe(batch_size=128, num_threads=1, device_id = i, num_gpus = N, pipelined = True, async = True) for i in range(N)]

In [4]:
pipes[0].build()

In [5]:
from ndll.plugin.mxnet import NDLLIterator
ndll_iter = NDLLIterator(pipes, pipes[0].epoch_size("Reader"))

In [6]:
import os
import argparse
import logging
logging.basicConfig(level=logging.DEBUG)
from demo.common import find_mxnet, data, fit
import mxnet as mx

s = ['--gpu', '0,1,2,3,4,5,6,7',
     '--batch-size', '1024',
     '--num-epochs', '1',
     '--data-train', '/data/imagenet/train-480-val-256-recordio/train.rec',
     '--data-val', '/data/imagenet/train-480-val-256-recordio/val.rec',
     '--disp-batches', '100',
     '--network', 'resnet-v1',
     '--num-layers', '50',
     '--data-nthreads', '2',
     '--min-random-scale', '0.533',
     '--max-random-shear-ratio', '0',
     '--max-random-rotate-angle', '0',
     '--max-random-h', '0',
     '--max-random-l', '0',
     '--max-random-s', '0',
     '--dtype', 'float16']

# parse args
parser = argparse.ArgumentParser(description="train imagenet-1k",
                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
fit.add_fit_args(parser)
data.add_data_args(parser)
data.add_data_aug_args(parser)
# use a large aug level
data.set_data_aug_level(parser, 3)
parser.set_defaults(
        # network
        network          = 'resnet',
        num_layers       = 50,
        # data
        num_classes      = 1000,
        num_examples     = 1281167,
        image_shape      = '3,224,224',
        min_random_scale = 1, # if input image has min size k, suggest to use
                              # 256.0/x, e.g. 0.533 for 480
        # train
        num_epochs       = 80,
        lr_step_epochs   = '30,60',
        dtype            = 'float32'
    )
args = parser.parse_args(s)


# load network
from importlib import import_module
net = import_module('demo.symbols.'+args.network)
sym = net.get_symbol(1000, 50, "3,224,224", dtype='float16')

def get_ndll_iter(args, kv=None):
    return (ndll_iter, None)

# train
#fit.fit(args, sym, data.get_rec_iter)
fit.fit(args, sym, get_ndll_iter)

INFO:root:start with arguments Namespace(batch_size=1024, benchmark=0, data_nthreads=2, data_train='/data/imagenet/train-480-val-256-recordio/train.rec', data_train_idx='', data_val='/data/imagenet/train-480-val-256-recordio/val.rec', data_val_idx='', disp_batches=100, dtype='float16', gc_threshold=0.5, gc_type='none', gpus='0,1,2,3,4,5,6,7', image_shape='3,224,224', kv_store='device', load_epoch=None, lr=0.1, lr_factor=0.1, lr_step_epochs='30,60', max_random_aspect_ratio=0.25, max_random_h=0, max_random_l=0, max_random_rotate_angle=0, max_random_s=0, max_random_scale=1, max_random_shear_ratio=0.0, min_random_scale=0.533, model_prefix=None, mom=0.9, monitor=0, network='resnet-v1', num_classes=1000, num_epochs=1, num_examples=1281167, num_layers=50, optimizer='sgd', pad_size=0, random_crop=1, random_mirror=1, rgb_mean='123.68,116.779,103.939', test_io=0, top_k=0, wd=0.0001)
INFO:root:Epoch[0] Batch [100]	Speed: 1648.95 samples/sec	accuracy=0.001499
INFO:root:Epoch[0] Batch [200]	Speed: 