In [1]:

from random import randint
from PIL import Image
import numpy as np
import pickle
import pycuda.driver as cuda
import pycuda.autoinit
import uff
import tensorrt as trt
import ctypes

from matplotlib import pyplot as plt
from six.moves import cPickle
import time
import tensorflow as tf
import logging


In [2]:
logger = logging.getLogger()
logger.setLevel(logging.ERROR)
logging.debug("test")

In [3]:
def mAP(y_true, y_pred):
    
    y_true = np.array(labels).astype(np.int64)
    y_true = tf.identity(y_true)

    y_pred = np.array(y_pred).astype(np.float32)
    y_pred = tf.identity(y_pred) # np to tensor
    _, m_ap = tf.metrics.average_precision_at_k(y_true, y_pred, 1)
    sess = tf.Session()
    sess.run(tf.local_variables_initializer())
    tf_map = sess.run(m_ap)
    return tf_map

In [4]:
def load_CIFAR10_dataset(file_name):
    with open(file_name, 'rb') as f:                
        d = cPickle.load(f)
        # decode utf8
        d_decoded = {}
        for k, v in d.items():
            d_decoded[k.decode('utf8')] = v
        d = d_decoded
        data = d['data']
        labels = d['labels']
        raw_float_data = np.array(data, dtype=float) / 255.0
        return raw_float_data, labels


In [5]:
def loadEngine(trt_engine_file_path):
    TRT_LOGGER = trt.Logger(trt.Logger.ERROR)
    with trt.Runtime(TRT_LOGGER) as runtime:
        with open(trt_engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            engine = runtime.deserialize_cuda_engine(f.read())
            return engine;

In [1]:
def inference(cifar_file_name, trt_engine_path):
    results = []
    engine = loadEngine(trt_engine_path)
    with engine.create_execution_context() as context:
        batch_size = engine.max_batch_size
        print("batch size: " + str(batch_size))
        stream = cuda.Stream()
    
        imgs, labels = load_CIFAR10_dataset(cifar_file_name)
        processed_imgs = imgs.reshape([-1, 3, 32, 32]).astype(trt.nptype(trt.float32))
        max_batches = ((len(processed_imgs) // batch_size) + (1 if (len(processed_imgs) % batch_size) else 0))
        current = 0
        t0 = time.time()     
        for i in range(0, max_batches):
                    
            current = i * batch_size;
            next_ind = 0;
            imgs_ravel = [];
            if (i+1 == max_batches):
                next_ind = i * batch_size  + len(processed_imgs) - i * batch_size
                imgs_ravel =  processed_imgs[current : next_ind]
            else:
                next_ind = (i + 1) * batch_size
            #print("current: " + str(current) + " next: " + str(next_ind))
                
        
            imgs_ravel = processed_imgs[current : next_ind].ravel()
            current_batch_size = next_ind - current;
            output = np.empty(10 * current_batch_size, dtype = np.float32)
            d_input = cuda.mem_alloc(1 * imgs_ravel.nbytes)
            d_output = cuda.mem_alloc(1 * output.nbytes)
            bindings=[d_input, d_output]
                    
            cuda.memcpy_htod_async(d_input, imgs_ravel, stream)

            context.execute_async(bindings = bindings, stream_handle=stream.handle, batch_size = current_batch_size)

            cuda.memcpy_dtoh_async(output, d_output, stream)
            stream.synchronize()
            results = np.append(results, output)
            
        t1 = time.time()   
        total_t = t1-t0 
        print("total time: " + str(total_t))
        results = results.reshape(-1, 10)
        return results
            
            
            
            



In [7]:
# INT 8

cifar_file = "/home/vtpc/Documents/Alvils/tensorrt/cifar-10-batches-py/test_batch"
engine_file = "int8.engine"
imgs, labels = load_CIFAR10_dataset(cifar_file)

results = inference(cifar_file, engine_file)
pred = mAP(labels, results)
print(pred)

batch size: 450
total time: 0.112390995026
0.8512


In [8]:
cifar_file = "/home/vtpc/Documents/Alvils/tensorrt/cifar-10-batches-py/test_batch"
engine_file = "fp32.engine"
imgs, labels = load_CIFAR10_dataset(cifar_file)

results1 = inference(cifar_file, engine_file)
pred = mAP(labels, results1)
print(pred)

batch size: 450
total time: 0.212811946869
0.852


In [9]:
a = np.array([1, 2, 3, 4, 5])