In [1]:

from random import randint
from PIL import Image
import numpy as np
import pickle
import pycuda.driver as cuda
import pycuda.autoinit
import uff
import tensorrt as trt
import ctypes

from matplotlib import pyplot as plt
from six.moves import cPickle

In [2]:
with open("/home/vtpc/Documents/Alvils/tensorrt/cifar-10-batches-py/test_batch", 'rb') as f:
                
                
    d = cPickle.load(f)
    # decode utf8
    d_decoded = {}
    for k, v in d.items():
        d_decoded[k.decode('utf8')] = v
    d = d_decoded
    data = d['data']
    labels = d['labels']
    raw_float = np.array(data, dtype=float) / 255.0
    # Reshape the array to 4-dimensions.
    images = raw_float.reshape([-1, 3, 32, 32])
    
    images_flatted = np.zeros((images.shape[0], images[0].ravel().shape[0]), dtype=np.float32)

    
    for i in range(0, len(images)):
        images_flatted[i] = images[i].ravel()
print(images_flatted.shape)

(10000, 3072)


In [3]:
print(images.shape)
print((images[:500, :, :, :]).shape)


(10000, 3, 32, 32)
(500, 3, 32, 32)


In [8]:
class ImageBatchStream():
    def __init__(self, batch_size, data):
        self.batch_size = batch_size
        self.max_batches = (len(data) // batch_size) + (1 if (len(data) % batch_size) else 0)
        self.data = data
        self.calibration_data = np.zeros((batch_size, 3072), dtype=np.float32)
        self.batch = 0

         
    def reset(self):
        self.batch = 0
     
    def next_batch(self):
        if self.batch < self.max_batches:
            batch_data = self.data[self.batch_size * self.batch : self.batch_size * (self.batch + 1)]
            for i in range(len(batch_data)):
                self.calibration_data[i] = batch_data[i]
            self.batch += 1
            return np.ascontiguousarray(self.calibration_data, dtype=np.float32)
        else:
            return np.array([])
        
class PythonEntropyCalibrator(trt.IInt8EntropyCalibrator):
    def __init__(self, input_layers, stream):
        trt.IInt8EntropyCalibrator.__init__(self)       
        self.input_layers = input_layers
        self.stream = stream
        self.d_input = cuda.mem_alloc(self.stream.data.nbytes)
        
        stream.reset()

    def get_batch_size(self):
        return self.stream.batch_size

    def get_batch(self, bindings, names):
        batch = self.stream.next_batch()
        if not batch.size:   
            return None
      
        cuda.memcpy_htod(self.d_input, batch)
        for i in self.input_layers[0]:
            assert names[0] != i

        bindings[0] = int(self.d_input)
        return bindings

    def read_calibration_cache(self, length):
        return None

    def write_calibration_cache(self, ptr, size):
        cache = ctypes.c_char_p(str(ptr))
        with open('calibration_cache.bin', 'wb') as f:
            f.write(cache.value)
        return None
    


In [5]:
## information about data needed to parse model and build engine
class ModelData(object):
    MODEL_FILE = "/home/vtpc/Documents/Alvils/tensorrt/pretrained-models/cifar10_resnet20v1_model/model.pb"
    INPUT_NAME ="input_1"
    INPUT_SHAPE = (3, 32, 32) # always nchw
    OUTPUT_NAME = "dense_1/Softmax"
    DTYPE = trt.float32

In [9]:
# tensorflow to uff
uff_model = uff.from_tensorflow_frozen_model(ModelData.MODEL_FILE, [ModelData.OUTPUT_NAME])
TRT_LOGGER = trt.Logger(trt.Logger.ERROR)

=== Automatically deduced input nodes ===
[name: "input_1"
op: "Placeholder"
attr {
  key: "dtype"
  value {
    type: DT_FLOAT
  }
}
attr {
  key: "shape"
  value {
    shape {
      dim {
        size: -1
      }
      dim {
        size: 32
      }
      dim {
        size: 32
      }
      dim {
        size: 3
      }
    }
  }
}
]

Using output node dense_1/Softmax
Converting to UFF graph
DEBUG: convert reshape to flatten node
No. nodes: 216


In [10]:
NUM_IMAGES_PER_BATCH = 5
Int8_calibrator = PythonEntropyCalibrator([ModelData.INPUT_NAME], ImageBatchStream(5, images_flatted[:500,:]))
# parses model to trt
with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
    # builder argums for optimization
    builder.int8_mode = True
    builder.int8_calibrator = Int8_calibrator
    builder.max_batch_size = 1
    builder.max_workspace_size = 1 << 30    
    # Parse the Uff Network
    parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE)#, trt.UffInputOrder.NHWC)
    parser.register_output(ModelData.OUTPUT_NAME)
    parsed = parser.parse_buffer(uff_model, network)
    print(parsed)
    # builds engine
    with builder.build_cuda_engine(network) as engine:
        with open("int8.engine", "wb") as f:
            f.write(engine.serialize())


True
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3072)
(5, 3

TypeError: string or integer address expected instead of PyCapsule instance