In [1]:
from random import randint
from PIL import Image
import numpy as np
import pickle
import pycuda.driver as cuda
import pycuda.autoinit
import uff
import tensorrt as trt
import ctypes

from matplotlib import pyplot as plt
from six.moves import cPickle
import time
import tensorflow as tf
import logging

import caffe
import skimage.transform



In [2]:
def prep_image(im, mean_val):
    h, w, _ = im.shape
    if h < w:
        im = skimage.transform.resize(im, (224, w*224/h), preserve_range=True)
    else:
        im = skimage.transform.resize(im, (h*224/w, 224), preserve_range=True)

    # Central crop to 224x224
    h, w, _ = im.shape
    im = im[h//2-112:h//2+112, w//2-112:w//2+112]
        
    #shuffle axes to c01
    im = np.swapaxes(np.swapaxes(im, 1, 2), 0, 1)
    
    # convert to BGR
    im = im[::-1, :, :]
    # scaled back to 0 ... 255 (caffe loads images as 0 ... 1)
    im = im * 255.0
    im = im - mean_val
    return im

In [3]:
def mAP(y_true, y_pred):
    
    y_true = np.array(y_true).astype(np.int64)
    y_true = tf.identity(y_true)

    y_pred = np.array(y_pred).astype(np.float32)
    y_pred = tf.identity(y_pred) # np to tensor
    _, m_ap = tf.metrics.average_precision_at_k(y_true, y_pred, 1)
    sess = tf.Session()
    sess.run(tf.local_variables_initializer())
    tf_map = sess.run(m_ap)
    return tf_map

In [4]:
def loadEngine(trt_engine_file_path):
    TRT_LOGGER = trt.Logger(trt.Logger.ERROR)
    with trt.Runtime(TRT_LOGGER) as runtime:
        with open(trt_engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            engine = runtime.deserialize_cuda_engine(f.read())
            return engine;

In [15]:
def inference(engine, imgs, labels):
    with engine.create_execution_context() as context:
        batch_size = 150
        print(batch_size)
        stream = cuda.Stream()
        # inference
        max_batches = ((len(imgs_transformed) // batch_size) + (1 if (len(imgs_transformed) % batch_size) else 0))
        results = []
        current = 0
        t0_with_transfer = time.time()   
        imgs = np.asarray(imgs)
        for i in range(0, max_batches):
            current = i * batch_size;
            next_ind = 0;
            imgs_ravel = [];
            if (i+1 == max_batches):
                next_ind = i * batch_size  + len(imgs) - i * batch_size
                imgs_ravel =  imgs[current : next_ind]
            else:
                next_ind = (i + 1) * batch_size                
        
            imgs_ravel = imgs[current : next_ind].ravel()
            #print(imgs_ravel.shape)
            current_batch_size = next_ind - current;
            output = np.empty(1000 * current_batch_size, dtype = np.float32)
            d_input = cuda.mem_alloc(1 * imgs_ravel.nbytes)
            d_output = cuda.mem_alloc(1 * output.nbytes)
            bindings=[d_input, d_output]
            t0_without_transer = time.time()     
            cuda.memcpy_htod_async(d_input, imgs_ravel, stream)
            context.execute_async(bindings = bindings, stream_handle=stream.handle, batch_size = current_batch_size)
            cuda.memcpy_dtoh_async(output, d_output, stream)
            stream.synchronize()
            t1_without_transfer = time.time() 
            total_t_without_transfer = t1_without_transfer - t0_without_transer;
            print("Time without transfer: " + str(total_t_without_transfer))
            results = np.append(results, output)
        t1_with_transfer = time.time()
        total_t_with_transfer = t1_with_transfer - t0_with_transfer;
        print("Time with transfer: " + str(total_t_with_transfer))
        results_reshaped = results.reshape(-1, 1000)
        precision = mAP(labels, results_reshaped)
        print(precision)
            
            



In [6]:
# img loading
img_load_count = 2000
img_path = "/home/vtpc/Documents/Alvils/tensorrt/data/ilsvrc12/imgs/"
img_names_and_labels_path = '/home/vtpc/Documents/Alvils/tensorrt/data/ilsvrc12/val.txt'
imgs_file_names_and_labels = np.loadtxt(img_names_and_labels_path,  dtype=str)
# img loading
imgs = []
labels = []
for i in range(0, img_load_count):
    imgs.append(caffe.io.load_image(img_path + imgs_file_names_and_labels[i][0]))
    labels.append(imgs_file_names_and_labels[i][1])



  warn('`as_grey` has been deprecated in favor of `as_gray`')


In [9]:
# preprocess
mean_values = np.array([104, 117, 123]).reshape((3,1,1))
imgs_transformed = [];
for img in imgs:
    imgs_transformed.append(prep_image(img, mean_values))
    
imgs_raveled = np.zeros((img_load_count, 224 * 224 * 3), dtype= np.float32)
for i in range(0, len(imgs_transformed)):
    imgs_raveled[i] = imgs_transformed[i].ravel()
    



In [14]:
trt_engine_path = "int8.engine"
engine = loadEngine(trt_engine_path)
inference(engine, imgs_raveled, labels)


150
Time without transfer: 0.0322749614716
Time without transfer: 0.031848192215
Time without transfer: 0.0318028926849
Time without transfer: 0.0305640697479
Time without transfer: 0.0290699005127
Time without transfer: 0.0291080474854
Time without transfer: 0.0290269851685
Time without transfer: 0.0285038948059
Time without transfer: 0.0281829833984
Time without transfer: 0.0282139778137
Time without transfer: 0.0289361476898
Time without transfer: 0.0278789997101
Time without transfer: 0.0277390480042
Time without transfer: 0.00987792015076
Time with transfer: 0.601088047028
0.6755
