In [1]:
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import caffe
import logging
import numpy as np
import pickle
import pycuda.driver as cuda
import pycuda.autoinit
import uff
import tensorrt as trt
import ctypes
import skimage.transform
from matplotlib import pyplot as plt
from six.moves import cPickle
import time
import tensorflow as tf
import logging
import os, os.path

In [2]:
def prep_image(im, mean_val):
    h, w, _ = im.shape
    if h < w:
        im = skimage.transform.resize(im, (224, w*224/h), preserve_range=True)
    else:
        im = skimage.transform.resize(im, (h*224/w, 224), preserve_range=True)

    # Central crop to 224x224
    h, w, _ = im.shape
    im = im[h//2-112:h//2+112, w//2-112:w//2+112]
        
    #shuffle axes to c01
    im = np.swapaxes(np.swapaxes(im, 1, 2), 0, 1)
    
    # convert to BGR
    im = im[::-1, :, :]
    # scaled back to 0 ... 255 (caffe loads images as 0 ... 1)
    im = im * 255.0
    im = im - mean_val
    return im

In [3]:
class ImageBatchStream():
    def __init__(self, batch_size, data):
        self.batch_size = batch_size
        self.max_batches = (len(data) // batch_size) + (1 if (len(data) % batch_size) else 0)
        self.data = data
        self.calibration_data = np.zeros((batch_size, 224 * 224 * 3), dtype=np.float32)
        self.batch = 0

         
    def reset(self):
        self.batch = 0
     
    def next_batch(self):
        if self.batch < self.max_batches:
            batch_data = self.data[self.batch_size * self.batch : self.batch_size * (self.batch + 1)]
            for i in range(len(batch_data)):
                self.calibration_data[i] = batch_data[i]
            self.batch += 1
            return np.ascontiguousarray(self.calibration_data, dtype=np.float32)
        else:
            return np.array([])
        
class PythonEntropyCalibrator(trt.IInt8EntropyCalibrator):
    def __init__(self, input_layers, stream):
        trt.IInt8EntropyCalibrator.__init__(self)       
        self.input_layers = input_layers
        self.stream = stream
        self.d_input = cuda.mem_alloc(self.stream.data.nbytes)
        
        stream.reset()

    def get_batch_size(self):
        return self.stream.batch_size

    def get_batch(self, bindings, names):
        batch = self.stream.next_batch()
        if not batch.size:   
            return None
      
        cuda.memcpy_htod(self.d_input, batch)
        for i in self.input_layers[0]:
            assert names[0] != i

        bindings[0] = int(self.d_input)
        return bindings

    def read_calibration_cache(self, length):
        return None

    def write_calibration_cache(self, ptr, size):
        cache = ctypes.c_char_p(str(ptr))
        with open('calibration_cache.bin', 'wb') as f:
            f.write(cache.value)
        return None
    


In [4]:
# logger
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logging.debug("test")

In [5]:
## information about data needed to parse model and build engine
class ModelData(object):
    MODEL_FILE = "/home/vtpc/Documents/Alvils/tensorrt/libs/caffe/models/bvlc_googlenet/weights.caffemodel"
    DEPLOY_FILE = "/home/vtpc/Documents/Alvils/tensorrt/libs/caffe/models/bvlc_googlenet/deploy.prototxt"
    INPUT_SHAPE = (3, 224, 224) # always nchw
    INPUT_NAME = "data"
    OUTPUT_NAME = "prob"# 227 dim: 227
    DTYPE = trt.float32

In [6]:
# img loading
img_load_count = 1000
img_path = "/home/vtpc/Documents/Alvils/tensorrt/data/ilsvrc12/imgs/"
img_names_and_labels_path = '/home/vtpc/Documents/Alvils/tensorrt/data/ilsvrc12/val.txt'
imgs_file_names_and_labels = np.loadtxt(img_names_and_labels_path,  dtype=str)
# img loading
imgs = []
labels = []
for i in range(0, img_load_count):
    imgs.append(caffe.io.load_image(img_path + imgs_file_names_and_labels[i][0]))
    labels.append(imgs_file_names_and_labels[i][1])

# preprocess
mean_values = np.array([104, 117, 123]).reshape((3,1,1))
imgs_transformed = [];
for img in imgs:
    imgs_transformed.append(prep_image(img, mean_values))
    
imgs_raveled = np.zeros((1000, 224 * 224 * 3), dtype= np.float32)
for i in range(0, len(imgs_transformed)):
    imgs_raveled[i] = imgs_transformed[i].ravel()
    


  warn('`as_grey` has been deprecated in favor of `as_gray`')
  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "


In [7]:
Int8_calibrator = PythonEntropyCalibrator([ModelData.INPUT_NAME], ImageBatchStream(5, imgs_raveled[:500,:]))
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
# parses model to trt
with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.CaffeParser() as parser:
    # builder argums for optimization
    builder.int8_mode = True
    builder.int8_calibrator = Int8_calibrator
    builder.max_batch_size = 500
    builder.max_workspace_size = 1 << 30    

    # Load the Caffe model and parse it in order to populate the TensorRT network.
    # This function returns an object that we can query to find tensors by name.
    model_tensors = parser.parse(deploy=ModelData.DEPLOY_FILE, model=ModelData.MODEL_FILE, network=network, dtype=ModelData.DTYPE)
    # For Caffe, we need to manually mark the output of the network.
    # Since we know the name of the output tensor, we can find it in model_tensors.
    print(model_tensors)
    network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))

    # builds engine
    with builder.build_cuda_engine(network) as engine:
        with open("int8.engine", "wb") as f:
            f.write(engine.serialize())


<tensorrt.tensorrt.IBlobNameToTensor object at 0x7f29e95326c0>
