In [1]:
if 'EXECUTE_PIP' not in locals():
    EXECUTE_PIP = True    

In [2]:
# Solving a locale problem
if False:
  !pip install turicreate
  import turicreate as tc
  import os
  try:
    del os.environ['LC_ALL']
  except:
    pass

In [3]:
# Installing TensorRT with all its dependencies
if EXECUTE_PIP:
  !pip install --upgrade --index-url https://pypi.ngc.nvidia.com nvidia-tensorrt

Looking in indexes: https://pypi.ngc.nvidia.com, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting nvidia-tensorrt
  Downloading https://developer.download.nvidia.com/compute/redist/nvidia-tensorrt/nvidia_tensorrt-8.4.1.5-cp37-none-linux_x86_64.whl (774.4 MB)
[K     |████████████████████████████████| 774.4 MB 17 kB/s 
[?25hCollecting nvidia-cudnn-cu11
  Downloading https://developer.download.nvidia.com/compute/redist/nvidia-cudnn-cu11/nvidia-cudnn-cu11-2022.5.19.tar.gz (16 kB)
Collecting nvidia-cublas-cu11
  Downloading https://developer.download.nvidia.com/compute/redist/nvidia-cublas-cu11/nvidia-cublas-cu11-2022.4.8.tar.gz (16 kB)
Collecting nvidia-cuda-runtime-cu11
  Downloading https://developer.download.nvidia.com/compute/redist/nvidia-cuda-runtime-cu11/nvidia-cuda-runtime-cu11-2022.4.25.tar.gz (16 kB)
Collecting nvidia-cublas-cu117
  Downloading https://developer.download.nvidia.com/compute/redist/nvidia-cublas-cu117/nvidia_cublas_cu117-11.10.1.25-py3-none-manyli

# **TensorRTUtils:**

In [16]:
# TensorRTUtils
if EXECUTE_PIP:
  !pip install pycuda
  !pip install tensorrt
import tensorrt as trt
import pycuda.autoinit
import pycuda.driver as cuda
import numpy as np
import os

class MatrixIterator:
    """Class to implement an iterator on a matrix"""

    def __init__(self, matrix, n=0, max=0):
        self.matrix = matrix
        if max > 0:
          self.max    = max
        else:
          self.max    = matrix.shape[0]
        self.n      = n

    def __iter__(self):
        return self

    def __next__(self):
        if self.n <= self.max:
            result = self.matrix[self.n,:,:].squeeze()
            self.n += 1
            return result
        else:
            raise StopIteration

    def first(self):
        return self.matrix[0,:,:].squeeze()

class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()

class ErrorRecorder(trt.IErrorRecorder):
    def __init__(self):
        trt.IErrorRecorder.__init__(self)
        self.errorsStack = []

    def clear(self):
        self.errorsStack.clear()
    def get_error_code(self, arg0):
        #Error code saved in the error tuple first position
        return self.errorsStack[arg0][0]
    def get_error_desc(self, arg0):
        # Error code saved in the error tuple second position
        return self.errorsStack[arg0][1]
    def has_overflowed(self):
        return False
    def num_errors(self):
        return len(self.errorsStack)
    def report_error(self, arg0, arg1):
        error = (arg0, arg1)
        #Errors will be saved as a list of tuples, each tuple will be a pair of error code and error description
        self.errorsStack.append(error)

class Logger(trt.ILogger):
    def __init__(self):
        trt.ILogger.__init__(self)

    def log(self, severity, msg):
        if severity == trt.ILogger.INTERNAL_ERROR:
            print('INTERNAL_ERROR')
        elif severity == trt.ILogger.ERROR:
            print('TRT - ERROR')
        elif severity == trt.ILogger.WARNING:
            print('TRT - WARNING')
        elif severity == trt.ILogger.INFO:
            print('TRT - INFO')
        elif severity == trt.ILogger.VERBOSE:
            print('TRT - VERBOSE')
        else:
            print('TRT - Wrong severity')

        print(msg)

class Int8EntropyCalibrator(trt.IInt8EntropyCalibrator2):
    def __init__(self, calibrationSetPath = None, calibSet = None):
        # Whenever you specify a custom constructor for a TensorRT class,
        # you MUST call the constructor of the parent explicitly.
        trt.IInt8EntropyCalibrator2.__init__(self)

        self.cacheFile = calibrationSetPath + '/CacheFile.bin'
        self.batchSize = 1
        self.currentIndex = 0
        self.deviceInput = None
        self.currentIndex = 0
        self.PreProcessedSetPath = calibrationSetPath + '/PreProcessedSet'
        self.PreProcessedSetCount = calibSet.max
        self.PreProcessedSize = calibSet.first().size * 4 #float
        self.currentIndex = 0

        # Allocate enough memory for a whole batch.
        self.deviceInput = cuda.mem_alloc(self.PreProcessedSize)

        if os.path.exists(self.cacheFile):
            print('Calibration cache file already exists - ', self.cacheFile)
            return

        if os.path.isdir(self.PreProcessedSetPath):
            filesCnt = os.listdir(self.PreProcessedSetPath)

            if len(filesCnt) == self.PreProcessedSetCount:
                print('ERROR - Pre processed file set exists!!!')
                return
        else:
            os.mkdir(self.PreProcessedSetPath)

        if self.PreProcessedSetCount == 0:
            print('ERROR - Calibration set is empty!!!')

        print('Start calibration batches build')

        print(f"Nir: PreProcessedSetCount = {self.PreProcessedSetCount}") # Debug printing
        for idx in range(self.PreProcessedSetCount):
            preProcImg = next(calibSet)
            if idx % 100 == 0:
              print(f"Nir: {idx} preProcImg shape: {preProcImg.shape}") # Debug printing
            preProcessedFile = open(self.PreProcessedSetPath + '/' + str(idx) + '.bin', mode='wb')
            preProcImg.tofile(preProcessedFile)
            preProcessedFile.close()

        print('End calibration batches build')

    def get_algorithm(self):
        return trt.CalibrationAlgoType.ENTROPY_CALIBRATION_2

    def get_batch_size(self):
        return self.batchSize

    # TensorRT passes along the names of the engine bindings to the get_batch function.
    # You don't necessarily have to use them, but they can be useful to understand the order of
    # the inputs. The bindings list is expected to have the same ordering as 'names'.
    def get_batch(self, names):
        if not self.currentIndex < self.PreProcessedSetCount:
            return None

        print('Get pre processed file index - ', not self.currentIndex)

        batchData = np.fromfile(self.PreProcessedSetPath + '/' + str(self.currentIndex) + '.bin', dtype=np.single)
        cuda.memcpy_htod(self.deviceInput, batchData)
        self.currentIndex += 1

        return [self.deviceInput]

    def read_calibration_cache(self):
        # If there is a cache, use it instead of calibrating again. Otherwise, implicitly return None.
        if os.path.exists(self.cacheFile):
            with open(self.cacheFile, "rb") as f:
                return f.read()

    def write_calibration_cache(self, cache):
        with open(self.cacheFile, "wb") as f:
            f.write(cache)

logger = Logger()
errorRecorder = ErrorRecorder()

builder = trt.Builder(logger)
builder.max_batch_size = 1

calib = None
config = builder.create_builder_config()
config.max_workspace_size = 1073741824

optimizationProfiler = builder.create_optimization_profile()

networkFlags = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
network = builder.create_network(networkFlags)
parser = trt.OnnxParser(network, logger)
runtime = trt.Runtime(logger)

engine = None
context = None

modelName = None

inputs = []
outputs = []
bindings = []
stream = None

def TrtModelParse(modelPath):
    global modelName
    global parser
    global network

    modelName = modelPath.split('.')[0]
    parseResult = parser.parse_from_file(modelPath)

    if (not parseResult):
        for error in range(parser.num_errors):
            print(str(parser.get_error(error)))
    else:
        print("Model parsing OK!")

        print("Network Description")

        inputs = [network.get_input(i) for i in range(network.num_inputs)]
        outputs = [network.get_output(i) for i in range(network.num_outputs)]

        for input in inputs:
            print("Input '{}' with shape {} and dtype {}".format(input.name, input.shape, input.dtype))
        for output in outputs:
            print("Output '{}' with shape {} and dtype {}".format(output.name, output.shape, output.dtype))

def TrtModelOptimizeAndSerialize(precision = 'fp32',calibPath="", calibSet=None):
    global modelName
    global builder
    global optimizationProfiler
    global calib
    global config
    global network
    global engine
    global runtime

    global g_DEBUG_network
    global g_DEBUG_config

    modelOptName = modelName + precision + '.trt.engine'

    if os.path.exists(modelOptName):
        with open(modelOptName, 'rb') as f:
            engine = runtime.deserialize_cuda_engine(f.read())
    else:
        inputs = [network.get_input(i) for i in range(network.num_inputs)]
        input = network.get_input(0)

        inputShape = [1, input.shape[1], input.shape[2], input.shape[3]]

        optimizationProfiler.set_shape(input.name, inputShape, inputShape, inputShape)

        config.add_optimization_profile(optimizationProfiler)

        if precision == 'fp16':
            if builder.platform_has_fast_fp16:
                config.set_flag(trt.BuilderFlag.FP16)
        elif precision == 'int8':
            if builder.platform_has_fast_int8:
                if builder.platform_has_fast_fp16:
                    # Also enable fp16, as some layers may be even more efficient in fp16 than int8
                    config.set_flag(trt.BuilderFlag.FP16)

                config.set_flag(trt.BuilderFlag.INT8)

                calib = Int8EntropyCalibrator(calibPath, calibSet)
                config.int8_calibrator = calib

        g_DEBUG_network = network
        g_DEBUG_config  = config
        engine = builder.build_engine(network, config)

        serializedEngine = engine.serialize()

        engineFD = open(modelOptName, 'wb')
        engineFD.write(serializedEngine)
        engineFD.close()

    print('TRT engine - ', engine.device_memory_size, ' Bytes')
    engineDeviceMemory = 0
    engineDeviceMemory += engine.device_memory_size
    print('TRT engine number of layers - ', engine.num_layers)
    print('TRT engine number of bindings - ', engine.num_bindings)
    print('TRT engine number of profils - ', engine.num_optimization_profiles)

    print('Completion optimized model')

def ModelInferSetup():
    global context
    global engine
    global inputs
    global outputs
    global bindings
    global stream

    stream = cuda.Stream()

    #Over all Tensors inputs & outputs of the TRT engine
    #TRT hold first all Tensors inputs and after the Tensor outptus
    for binding in engine:
        #Get current binded Tensor volume size in elemente units
        size = trt.volume(engine.get_binding_shape(binding))
        #Get current binded Tensor element type
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host page locked bbuffer
        host_mem = cuda.pagelocked_empty(size, dtype)
        # Allocate device bbuffer
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))

    # Contexts are used to perform inference.
    context = engine.create_execution_context()
    context.error_recorder = errorRecorder

def Inference(externalnputs = None):

    global context
    global stream
    global inputs
    global outputs
    global bindings

    try:
        #verify that TRT context generated successfully
        if context is not None:
            #Verify that inputs to inference are exist
            if externalnputs is not None:
                #Copy all Tensors inputs data from user memory to TRT host page locked memory before loading it to the device
                if len(externalnputs) == len(inputs):
                    for index in range(len(externalnputs)):
                        if len(inputs[index].host) == externalnputs[index].size:
                            np.copyto(inputs[index].host, externalnputs[index].ravel())
                        else:
                            print('TRT external input size - ', externalnputs[index].size,
                                  ' is not equal to model inputs size - ', len(inputs[index].host))
                            return None

                    # Transfer input data to the GPU from the host page locked memory.
                    [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
                    # Run asynchronously inference using the user\internal stream.
                    context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
                    # Transfer predictions back from the GPU.
                    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]

                    stream.synchronize()
                    # Build a list of Tensors outputs and return only the host outputs.
                    return [out.host for out in outputs]
                else:
                    print('External inputs list size - ', len(externalnputs), ' is not equal to model inputs list size - ', len(inputs))
                    return None
            else:
                print('External inputs list is None ERROR')
                return None
    except BaseException as e:
        msg = e
        print('TRT inference exception ERROR - ', msg)

TRT - INFO
The logger passed into createInferBuilder differs from one already provided for an existing builder, runtime, or refitter. Uses of the global logger, returned by nvinfer1::getLogger(), will return the existing value.

TRT - INFO
[MemUsageChange] Init CUDA: CPU +0, GPU +0, now: CPU 0, GPU 1243 (MiB)
TRT - INFO
[MemUsageChange] Init builder kernel library: CPU +0, GPU +68, now: CPU 0, GPU 1311 (MiB)
TRT - INFO
The logger passed into createInferRuntime differs from one already provided for an existing builder, runtime, or refitter. Uses of the global logger, returned by nvinfer1::getLogger(), will return the existing value.

TRT - INFO
[MemUsageChange] Init CUDA: CPU +0, GPU +0, now: CPU 0, GPU 1311 (MiB)




# **onnxUtils:**

In [5]:
# onnxUtils
if EXECUTE_PIP:
  !pip install tf2onnx onnx onnxsim
import json
import time
import tf2onnx
import onnx
#import onnxsim
import os.path


# Save model into h5 and ONNX formats
def convertKerasToONNX(name, model, overwrite_existing = False):
    modelFile = name + '.onnx'
    if not os.path.isfile(modelFile) or overwrite_existing:
        # Save model with ONNX format
        (onnx_model_proto, storage) = tf2onnx.convert.from_keras(model)
        with open(os.path.join(modelFile), "wb") as f:
            f.write(onnx_model_proto.SerializeToString())
            f.close()
    
    return modelFile, onnx_model_proto, storage

def ModelOnnxCheck(name):

    msg = 'OK'
    isCheckOk = True

    print("===============================================================")
    print("Onnx model check report:")

    try:
        # Perform basic check on the model input
        onnx.checker.check_model(name + '.onnx')
        isCheckOk = True
    except onnx.checker.ValidationError as e:
        msg = e
        isCheckOk=False
    except BaseException as e:
        msg = e
        isCheckOk=False

    if isCheckOk:
        print('Model check completed Successfully')
    else:
        print('ERROR - Model check failure')

    print('Model onnx checker, check model - ', msg)

    return isCheckOk

def RemoveInitializerFromInput(model, modelPath):
    modelGraphInputs = model.graph.input
    startInputsCount = len(modelGraphInputs)

    nameToInput = {}
    for input in modelGraphInputs:
        nameToInput[input.name] = input

    for initializer in model.graph.initializer:
        if initializer.name in nameToInput:
            modelGraphInputs.remove(nameToInput[initializer.name])

    endInputsCount = len(modelGraphInputs)

    if startInputsCount != endInputsCount:
        print('Model includes several Initializers which considered as inputs to the graph - ', startInputsCount - endInputsCount)
        print('All Initializers were removed from graph inputs')
        print('Replace the model *.onx file with the updated one')
        onnx.save(model, modelPath)

def ProcessModelInputs(model, modelPath):
    RemoveInitializerFromInput(model, modelPath)
    modelGraphInputs = model.graph.input

    modelInputsDims = {}
    modelDynamicInputsDict = {}
    modelInputs = modelGraphInputs
    modelInputsNames = []
    print(str(modelInputs))

    for tensorInput in modelInputs:
        isInputDynamic = False
        modelDynamicInputShape = []
        for dim in tensorInput.type.tensor_type.shape.dim:
            if dim.dim_value == 0:
                isInputDynamic = True
                print('CAUTION!!! - Tensor input name' + ' - ', tensorInput.name, ', dimension - ' , dim.dim_param, ', set its value to 1 for Onnx simplify operation')
                modelDynamicInputShape.append(1)
            else:
                modelDynamicInputShape.append(dim.dim_value)

        modelInputsNames.append(tensorInput.name)

        if isInputDynamic is True:
            modelDynamicInputsDict[tensorInput.name] = modelDynamicInputShape

    return modelDynamicInputsDict

def ModelSimplify(name):

    msg = 'OK'
    nameSimp = name + 'Simp'
    model = None
    isSimplifiedOK = True

    if os.path.exists(nameSimp + '.onnx'):
        print('Model Onnx simplify is already exist, No model check and\or simplify operations is required')
        model = onnx.load(nameSimp + '.onnx')
        isSimplifiedOK = True
    else:
        print("===============================================================")
        print("Onnx model simplifier report:")
        model = onnx.load(name + '.onnx')

        modelDynamicInputsDict = ProcessModelInputs(model, name + '.onnx')

        try:
            print('Start model onnx simplify...')
            # Perform simplification on the model input
            model, check = onnxsim.simplify(model,input_shapes=modelDynamicInputsDict,
                                                  dynamic_input_shape=(len(modelDynamicInputsDict) > 0))
            print('Completion model onnx simplify')
            if (check):
                isSimplifiedOK = True
                print('Onnx simplification success!')
                print('Save Onnx simplified model to - ', nameSimp + '.onnx')
                onnx.save(model, nameSimp + '.onnx')
            else:
                isSimplifiedOK = False
                print('Onnx simplification failure!')
                print('Simplified Onnx model could not be generated and validated')
        except BaseException as e:
            print('Onnx simplification exception - ', e)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tf2onnx
  Downloading tf2onnx-1.12.0-py3-none-any.whl (442 kB)
[K     |████████████████████████████████| 442 kB 32.2 MB/s 
[?25hCollecting onnx
  Downloading onnx-1.12.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.1 MB)
[K     |████████████████████████████████| 13.1 MB 57.8 MB/s 
[?25hCollecting onnxsim
  Downloading onnxsim-0.4.7-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)
[K     |████████████████████████████████| 2.0 MB 56.8 MB/s 
Collecting rich
  Downloading rich-12.5.1-py3-none-any.whl (235 kB)
[K     |████████████████████████████████| 235 kB 72.3 MB/s 
Collecting commonmark<0.10.0,>=0.9.0
  Downloading commonmark-0.9.1-py2.py3-none-any.whl (51 kB)
[K     |████████████████████████████████| 51 kB 8.4 MB/s 
[?25hInstalling collected packages: commonmark, rich, onnx, tf2onnx, onnxsim
Successfully installed commonmark-0.9.1 onnx

# **wandb_helpers:**

In [6]:
# wandb_helpers
if EXECUTE_PIP:
  !pip install wandb
  EXECUTE_PIP = False
from datetime import datetime
import wandb
from collections import namedtuple
import numpy as np
import os
import tensorflow as tf

Dataset = namedtuple("Dataset", ["images", "labels"])
dataset_names = ["training", "validation", "test"]

def start_wandb_run(model_name, config):
    timestamp = datetime.now().strftime("%H%M%S")
    return wandb.init(project=f"ml-p2", entity="ml-p2", name=f"{model_name}-{timestamp}" , 
        notes = f"Training FCNN model @{timestamp}", config = config)

def read_datasets(wandb_run, dataset_tag = "latest"):
    '''
    Read all datasets from W&B.
    Usage example: train_set, validation_set, test_set = wbh.read_datasets(run)
    '''
    artifact = wandb_run.use_artifact(f'ml-p2/ml-p2/fashion-mnist:{dataset_tag}', type='dataset')
    data_dir = artifact.download()
    return [ read_dataset(data_dir, ds_name) for ds_name in dataset_names ]

def read_dataset(data_dir, ds_name):
    filename = ds_name + ".npz"
    data = np.load(os.path.join(data_dir, filename))
    return Dataset(images = data["x"], labels = data["y"])

def read_model(wandb_run, model_name, model_tag = "latest") -> tf.keras.models.Model:
    artifact = wandb_run.use_artifact(f'ml-p2/ml-p2/{model_name}:{model_tag}', type='model')
    artifact_dir = artifact.download()
    return tf.keras.models.load_model(artifact_dir)

def save_model(wandb_run, model, config, model_name, model_description):
    model_file = f'./saved-models/{model_name}.tf'
    tf.keras.models.save_model(model, model_file)
    model_artifact = wandb.Artifact(model_name, type = "model", description=model_description, metadata= dict(config))
    model_artifact.add_dir(model_file)
    wandb_run.log_artifact(model_artifact)

def load_best_model(sweep_id):
    api = wandb.Api()
    sweep = api.sweep(f"ml-p2/ml-p2/{sweep_id}")
    runs = sorted(sweep.runs,
        key=lambda run: run.summary.get("val_accuracy", 0), reverse=True)
    val_acc = runs[0].summary.get("val_accuracy", 0)
    print(f"Best run {runs[0].name} with {val_acc} validation accuracy")

    model_file = runs[0].file("model-best.h5").download(replace=True)
    model_file.close()

#if (__name__ == "__main__"):
#    load_best_model("6zmewzd0")

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting wandb
  Downloading wandb-0.13.1-py2.py3-none-any.whl (1.8 MB)
[K     |████████████████████████████████| 1.8 MB 18.6 MB/s 
Collecting shortuuid>=0.5.0
  Downloading shortuuid-1.0.9-py3-none-any.whl (9.4 kB)
Collecting pathtools
  Downloading pathtools-0.1.2.tar.gz (11 kB)
Collecting sentry-sdk>=1.0.0
  Downloading sentry_sdk-1.9.5-py2.py3-none-any.whl (157 kB)
[K     |████████████████████████████████| 157 kB 63.3 MB/s 
Collecting docker-pycreds>=0.4.0
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting GitPython>=1.0.0
  Downloading GitPython-3.1.27-py3-none-any.whl (181 kB)
[K     |████████████████████████████████| 181 kB 66.1 MB/s 
Collecting setproctitle
  Downloading setproctitle-1.3.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)
Collecting gitdb<5,>=4.0.1
  Downloading gitdb-4.0.9-py3-no

# **trt-inference:**

In [3]:
# trt-inference
#!pip install sklearn -qqq

#from TensorRTUtils import *
#from onnxUtils import convertKerasToONNX
#import wandb_helpers as wbh

import time
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
import tensorrt as trt
import onnx
import tf2onnx
import numpy as np
from PIL import Image as im
import os

import seaborn as sns
import matplotlib.pyplot as plt     

modelName = "FCNN"

'''
Stage 1: Load an existing model
===============================
In this part we load the model we created in the previous project
which is built to infer from FASHION-MNIST images.
It is not a sofisticated model, but the idea to use something we
know.
'''
dataset_path = '.\\artifacts\\fashion-mnist-v2'

if not os.path.exists(dataset_path):
    with start_wandb_run("FCNN-metrics", None) as run:
        train_set, validation_set, test_set = read_datasets(run)
        model = read_model(run, "FCNN", "latest")
else:
    test_set = read_dataset('.\\artifacts\\fashion-mnist-v2', 'test')
    model = tf.keras.models.load_model('.\\artifacts\\FCNN-v3')

'''
Stage 2: Convert to ONNX
========================
Convert the model to ONNX and save it to a file. This will allow
us to load the model into a tensor-rt engine.
'''
modelFile, _, _ = convertKerasToONNX(modelName, model, True)

'''
Stage 3: Create the tensor-rt engine
====================================
Now that we a model file, we can load it into a 
tensor rt engine.
We use FP 32 precision.
'''
TrtModelParse(modelFile)
print("===================================")
print("Before TrtModelOptimizeAndSerialize")
print("===================================")
#TrtModelOptimizeAndSerialize(precision='fp32')
#TrtModelOptimizeAndSerialize(precision='fp16')
calibSet=MatrixIterator(validation_set.images)
TrtModelOptimizeAndSerialize(precision='int8', calibPath="/content", calibSet=calibSet)
print("===================================")
print("After TrtModelOptimizeAndSerialize")
print("===================================")
ModelInferSetup()

'''
Stage 4: Inference
==================
Now the model is ready for inference. The model is executed several
times on different images from the test set we've loaded on Stage 1
'''
inputs = []

startTimeCpu = time.time()
for i in range(len(test_set)):
    img = test_set.images[i]
    lbl = test_set.labels[i]
    inputs.append(img)
    outputsTrt = Inference(externalnputs=inputs)
    #print(' topClassIdx - ', np.argmax(outputsTrt[0]))
    inputs.clear()
    
    
endTimeCpu = time.time()

# total time taken
averageTime = (endTimeCpu - startTimeCpu) / 1e-3 / len(test_set)
print(f"TRT Keras inference average time is: {averageTime} milliseconds")
print(f"TRT Keras inference average FPS is: {1000 / averageTime}")

# Perform the DlewareAnalyzer inference with TRT & ORT

#np.testing.assert_allclose(kerasPredictions, onnxPredictions[0], rtol=0, atol=1e-05, err_msg='Keras Vs. Onnx Failure!!!')


#y_test = np.argmax(test_set.labels)
# predictions = model.predict(test_set.images)
# y_test = np.argmax(predictions, axis = 1)
# print (classification_report(test_set.labels, y_test))
# cm = confusion_matrix(test_set.labels, y_test)

# class_names = ["T-shirt/top","Trouser","Pullover","Dress","Coat","Sandal","Shirt","Sneaker","Bag","Ankle boot"]

# ax = plt.subplot()
# h = sns.heatmap(cm, annot=True, fmt='g', ax=ax);  #annot=True to annotate cells, ftm='g' to disable scientific notation

# # labels, title and ticks
# ax.set_xlabel('Predicted labels')
# ax.set_ylabel('True labels')
# ax.set_title('Confusion Matrix')
# ax.xaxis.set_ticklabels(class_names)
# ax.yaxis.set_ticklabels(class_names)

# plt.show()

ModuleNotFoundError: ignored

In [46]:
# Debugging builder.build_engin
global g_DEBUG_network
global g_DEBUG_config

print(g_DEBUG_network)
print(g_DEBUG_config)

print(f"PreProcessedSetCount = {g_DEBUG_config.int8_calibrator.PreProcessedSetCount}")
print(f"PreProcessedSetPath  = {g_DEBUG_config.int8_calibrator.PreProcessedSetPath}")
print(f"PreProcessedSize     = {g_DEBUG_config.int8_calibrator.PreProcessedSize}")
print(f"batchSize            = {g_DEBUG_config.int8_calibrator.batchSize}")
print(f"cacheFile            = {g_DEBUG_config.int8_calibrator.cacheFile}")
print(f"currentIndex         = {g_DEBUG_config.int8_calibrator.currentIndex}")
print(f"deviceInput          = {g_DEBUG_config.int8_calibrator.deviceInput}")

engine = builder.build_engine(network, config)
engine
print(f"engine               = {engine}")

<tensorrt.tensorrt.INetworkDefinition object at 0x7fb582c4c4b0>
<tensorrt.tensorrt.IBuilderConfig object at 0x7fb582c2dcb0>
PreProcessedSetCount = 5000
PreProcessedSetPath  = /content/PreProcessedSet
PreProcessedSize     = 3136
batchSize            = 1
cacheFile            = /content/CacheFile.bin
currentIndex         = 0
deviceInput          = <pycuda._driver.DeviceAllocation object at 0x7fb5727cb670>
TRT - VERBOSE
Original: 26 layers
TRT - VERBOSE
After dead-layer removal: 26 layers
TRT - VERBOSE
Running: ConstShuffleFusion on sequential/dense/BiasAdd/ReadVariableOp:0
TRT - VERBOSE
ConstShuffleFusion: Fusing sequential/dense/BiasAdd/ReadVariableOp:0 with (Unnamed Layer* 4) [Shuffle]
TRT - VERBOSE
Running: ConstShuffleFusion on sequential/dense_1/BiasAdd/ReadVariableOp:0
TRT - VERBOSE
ConstShuffleFusion: Fusing sequential/dense_1/BiasAdd/ReadVariableOp:0 with (Unnamed Layer* 10) [Shuffle]
TRT - VERBOSE
Running: ConstShuffleFusion on sequential/dense_2/BiasAdd/ReadVariableOp:0
TRT - VE

  app.launch_new_instance()


TRT - VERBOSE
--------------- Timing Runner: reshape_after_sequential/dense_3/MatMul (Shuffle)
TRT - VERBOSE
Tactic: 0x0000000000000000 Time: 0.02048
TRT - VERBOSE
Tactic: 0x0000000000000000 A valid tactic is found. Rest of the tactics are skipped.
TRT - VERBOSE
>>>>>>>>>>>>>>> Chose Runner Type: Shuffle Tactic: 0x0000000000000000
TRT - VERBOSE
TRT - VERBOSE
*************** Autotuning format combination: Float(10,1) -> Float(10,1) ***************
TRT - VERBOSE
--------------- Timing Runner: sequential/dense_3/Softmax (CudaSoftMax)
TRT - VERBOSE
Tactic: 0x00000000000003ea Time: 0.034496
TRT - VERBOSE
Tactic: 0x00000000000003ea A valid tactic is found. Rest of the tactics are skipped.
TRT - VERBOSE
>>>>>>>>>>>>>>> Chose Runner Type: CudaSoftMax Tactic: 0x00000000000003ea
TRT - VERBOSE
Formats and tactics selection completed in 0.160029 seconds.
TRT - VERBOSE
After reformat layers: 16 layers
TRT - VERBOSE
Pre-optimized block assignment.
TRT - VERBOSE
Block size 1024
TRT - VERBOSE
Block si

In [None]:
# Debugging
#if False:
os.listdir('PreProcessedSet')

In [None]:
# Debugging
if False:
  print(f"train_set_shape: {train_set.images.shape}")
  print(f"val_set_shape: {validation_set.images.shape}")
  print(f"test_set_shape: {test_set.images.shape}")

In [None]:
# Debugging
if False:
  my_iter = iter(validation_set)
  curr_val = next(my_iter)
  curr_val.shape

In [None]:
# Debugging
if False:
  #print(validation_set.images)
  my_iter = MatrixIterator(validation_set.images)
  curr_val = next(my_iter)
  type(curr_val)
  print(np.asarray(curr_val).shape)
  print(np.asarray(curr_val).size)
  #print(curr_val)

In [None]:
# Debugging
if False:
  my_list = [0,1,2,3,4]
  my_iter = iter(my_list)
  print(next(my_iter))
  print(next(my_iter))
  print(next(my_iter))

In [None]:
# Debugging
if False:
  from subprocess import run
  from shlex import split
  #string = "pwd".encode()
  #run(split("cd ~"))
  #run(split(string))
  #print("pwd".encode())
  completed_process = run(split('ls'))
  print(completed_process.args)
  print(completed_process.returncode)
  print(completed_process.stdout)
  print(completed_process.stderr)
  #CompletedProcess(args=['python', '--version'], returncode=0)

In [None]:
# Debugging
if False:
  import textwrap
  args = 'pwd'
  cp = run(args)
  print(cp.stdout)

In [None]:
# Debugging
if False:
  import subprocess
  subprocess.run("pwd", shell=True, check=True)
  print(completed_process.args)
  print(completed_process.returncode)
  print(completed_process.stdout)
  print(completed_process.stderr)

In [None]:
# Debugging
if False:
  #os.getcwd()
  #os.mkdir('/content/PreProcessedSet')
  os.listdir()

In [None]:
# Deleting all the global variables:
if False:
  del(parser)
  del(modelName)
  del(builder)
  del(optimizationProfiler)
  del(calib)
  del(config)
  del(network)
  del(engine)
  del(runtime)
  del(context)
  del(inputs)
  del(outputs)
  del(bindings)
  del(stream)