# MXNet/Gluon Inference

In [13]:
import os
import sys
import time
import numpy as np
import mxnet as mx
from mxnet import gluon, nd
from collections import namedtuple
from common.params_inf import *
from common.utils import *

In [2]:
# Force one-gpu
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [3]:
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("Numpy: ", np.__version__)
print("MXNet: ", mx.__version__)
print("GPU: ", get_gpu_name())
print(get_cuda_version())
print("CuDNN Version ", get_cudnn_version())

OS:  linux
Python:  3.6.3 |Anaconda custom (64-bit)| (default, Oct 13 2017, 12:02:49) 
[GCC 7.2.0]
Numpy:  1.13.3
MXNet:  1.3.0
GPU:  ['Tesla V100-SXM2-16GB', 'Tesla V100-SXM2-16GB', 'Tesla V100-SXM2-16GB', 'Tesla V100-SXM2-16GB']
CUDA Version 9.1.85
CuDNN Version  7.1.3


## Get pre-trained model

In [4]:
# We create the network
ctx = mx.gpu()
net = mx.gluon.model_zoo.vision.resnet50_v1(pretrained=True, ctx=ctx).features
# We hybridize the network
net.hybridize(static_alloc=True, static_shape=True)

## Get data

In [5]:
# Create batches of fake data
fake_input_data_cl, fake_input_data_cf = give_fake_data(BATCH_SIZE*BATCHES_GPU)
print(fake_input_data_cl.shape, fake_input_data_cf.shape)

(1280, 224, 224, 3) (1280, 3, 224, 224)


## Run inference

In [7]:
def predict_fn(classifier, data, batchsize):
    """ Return features from classifier """
    out = nd.zeros((len(data), RESNET_FEATURES), dtype=np.float32, ctx=ctx)
    for idx, dta in yield_mb_X(data, batchsize):
        outputs = classifier(mx.nd.array(dta, ctx=ctx))
        out[idx*batchsize:(idx+1)*batchsize] = outputs.squeeze()
    return out.asnumpy()

In [8]:
cold_start = predict_fn(net, fake_input_data_cf, BATCH_SIZE)

In [14]:
%%time
tick = time.time()
features = predict_fn(net, fake_input_data_cf, BATCH_SIZE)
total = time.time()-tick

CPU times: user 1.37 s, sys: 328 ms, total: 1.7 s
Wall time: 1.25 s


In [15]:
print("Images per second {}".format((BATCH_SIZE*BATCHES_GPU)/total))

Images per second 1024.1136844948533
