In [37]:
!pip install turicreate
import turicreate as tc
import os
try:
  del os.environ['LC_ALL']
except:
  pass

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [38]:
!pip install --upgrade --index-url https://pypi.ngc.nvidia.com nvidia-tensorrt

Looking in indexes: https://pypi.ngc.nvidia.com, https://us-python.pkg.dev/colab-wheels/public/simple/


# **TensorRTUtils:**

In [39]:
# TensorRTUtils
!pip install pycuda
!pip install tensorrt
import tensorrt as trt
import pycuda.autoinit
import pycuda.driver as cuda
import numpy as np
import os

class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()

class ErrorRecorder(trt.IErrorRecorder):
    def __init__(self):
        trt.IErrorRecorder.__init__(self)
        self.errorsStack = []

    def clear(self):
        self.errorsStack.clear()
    def get_error_code(self, arg0):
        #Error code saved in the error tuple first position
        return self.errorsStack[arg0][0]
    def get_error_desc(self, arg0):
        # Error code saved in the error tuple second position
        return self.errorsStack[arg0][1]
    def has_overflowed(self):
        return False
    def num_errors(self):
        return len(self.errorsStack)
    def report_error(self, arg0, arg1):
        error = (arg0, arg1)
        #Errors will be saved as a list of tuples, each tuple will be a pair of error code and error description
        self.errorsStack.append(error)

class Logger(trt.ILogger):
    def __init__(self):
        trt.ILogger.__init__(self)

    def log(self, severity, msg):
        if severity == trt.ILogger.INTERNAL_ERROR:
            print('INTERNAL_ERROR')
        elif severity == trt.ILogger.ERROR:
            print('TRT - ERROR')
        elif severity == trt.ILogger.WARNING:
            print('TRT - WARNING')
        elif severity == trt.ILogger.INFO:
            print('TRT - INFO')
        elif severity == trt.ILogger.VERBOSE:
            print('TRT - VERBOSE')
        else:
            print('TRT - Wrong severity')

        print(msg)

class Int8EntropyCalibrator(trt.IInt8EntropyCalibrator2):
    def __init__(self, calibrationSetPath = None, calibSet = None):
        # Whenever you specify a custom constructor for a TensorRT class,
        # you MUST call the constructor of the parent explicitly.
        trt.IInt8EntropyCalibrator2.__init__(self)

        self.cacheFile = calibrationSetPath + '/CacheFile.bin'
        self.batchSize = 1
        self.currentIndex = 0
        self.deviceInput = None
        self.currentIndex = 0
        self.PreProcessedSetPath = calibrationSetPath + '/PreProcessedSet'
        self.PreProcessedSetCount = calibSet.n
        self.PreProcessedSize = calibSet[0][0].size * 4 #float
        self.currentIndex = 0

        # Allocate enough memory for a whole batch.
        self.deviceInput = cuda.mem_alloc(self.PreProcessedSize)

        if os.path.exists(self.cacheFile):
            print('Calibration cache file is already exist - ', self.cacheFile)
            return

        filesCnt = os.listdir(self.PreProcessedSetPath)

        if len(filesCnt) == self.PreProcessedSetCount:
            print('ERROR - Pre processed file set is exist!!!')
            return

        if self.PreProcessedSetCount == 0:
            print('ERROR - Calibration set is empty!!!')

        print('Start calibration batches build')

        for idx in range(self.PreProcessedSetCount):
            preProcImg, label = calibSet.next()
            preProcessedFile = open(self.PreProcessedSetPath + '/' + str(idx) + '.bin', mode='wb')
            preProcImg.tofile(preProcessedFile)
            preProcessedFile.close()

        print('End calibration batches build')

    def get_algorithm(self):
        return trt.CalibrationAlgoType.ENTROPY_CALIBRATION_2

    def get_batch_size(self):
        return self.batchSize

    # TensorRT passes along the names of the engine bindings to the get_batch function.
    # You don't necessarily have to use them, but they can be useful to understand the order of
    # the inputs. The bindings list is expected to have the same ordering as 'names'.
    def get_batch(self, names):
        if not self.currentIndex < self.PreProcessedSetCount:
            return None

        print('Get pre processed file index - ', not self.currentIndex)

        batchData = np.fromfile(self.PreProcessedSetPath + '/' + str(self.currentIndex) + '.bin', dtype=np.single)
        cuda.memcpy_htod(self.deviceInput, batchData)
        self.currentIndex += 1

        return [self.deviceInput]

    def read_calibration_cache(self):
        # If there is a cache, use it instead of calibrating again. Otherwise, implicitly return None.
        if os.path.exists(self.cacheFile):
            with open(self.cacheFile, "rb") as f:
                return f.read()

    def write_calibration_cache(self, cache):
        with open(self.cacheFile, "wb") as f:
            f.write(cache)

logger = Logger()
errorRecorder = ErrorRecorder()

builder = trt.Builder(logger)
builder.max_batch_size = 1

calib = None
config = builder.create_builder_config()
config.max_workspace_size = 1073741824

optimizationProfiler = builder.create_optimization_profile()

networkFlags = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
network = builder.create_network(networkFlags)
parser = trt.OnnxParser(network, logger)
runtime = trt.Runtime(logger)

engine = None
context = None

modelName = None

inputs = []
outputs = []
bindings = []
stream = None

def TrtModelParse(modelPath):
    global modelName
    global parser
    global network

    modelName = modelPath.split('.')[0]
    parseResult = parser.parse_from_file(modelPath)

    if (not parseResult):
        for error in range(parser.num_errors):
            print(str(parser.get_error(error)))
    else:
        print("Model parsing OK!")

        print("Network Description")

        inputs = [network.get_input(i) for i in range(network.num_inputs)]
        outputs = [network.get_output(i) for i in range(network.num_outputs)]

        for input in inputs:
            print("Input '{}' with shape {} and dtype {}".format(input.name, input.shape, input.dtype))
        for output in outputs:
            print("Output '{}' with shape {} and dtype {}".format(output.name, output.shape, output.dtype))

def TrtModelOptimizeAndSerialize(precision = 'fp32',calibPath="", calibSet=None):
    global modelName
    global builder
    global optimizationProfiler
    global calib
    global config
    global network
    global engine
    global runtime

    modelOptName = modelName + precision + '.trt.engine'

    if os.path.exists(modelOptName):
        with open(modelOptName, 'rb') as f:
            engine = runtime.deserialize_cuda_engine(f.read())
    else:
        inputs = [network.get_input(i) for i in range(network.num_inputs)]
        input = network.get_input(0)

        inputShape = [1, input.shape[1], input.shape[2], input.shape[3]]

        optimizationProfiler.set_shape(input.name, inputShape, inputShape, inputShape)

        config.add_optimization_profile(optimizationProfiler)

        if precision == 'fp16':
            if builder.platform_has_fast_fp16:
                config.set_flag(trt.BuilderFlag.FP16)
        elif precision == 'int8':
            if builder.platform_has_fast_int8:
                if builder.platform_has_fast_fp16:
                    # Also enable fp16, as some layers may be even more efficient in fp16 than int8
                    config.set_flag(trt.BuilderFlag.FP16)

                config.set_flag(trt.BuilderFlag.INT8)

                calib = Int8EntropyCalibrator(calibPath, calibSet)
                config.int8_calibrator = calib

        engine = builder.build_engine(network, config)

        serializedEngine = engine.serialize()

        engineFD = open(modelOptName, 'wb')
        engineFD.write(serializedEngine)
        engineFD.close()

    print('TRT engine - ', engine.device_memory_size, ' Bytes')
    engineDeviceMemory = 0
    engineDeviceMemory += engine.device_memory_size
    print('TRT engine number of layers - ', engine.num_layers)
    print('TRT engine number of bindings - ', engine.num_bindings)
    print('TRT engine number of profils - ', engine.num_optimization_profiles)

    print('Completion optimized model')

def ModelInferSetup():
    global context
    global engine
    global inputs
    global outputs
    global bindings
    global stream

    stream = cuda.Stream()

    #Over all Tensors inputs & outputs of the TRT engine
    #TRT hold first all Tensors inputs and after the Tensor outptus
    for binding in engine:
        #Get current binded Tensor volume size in elemente units
        size = trt.volume(engine.get_binding_shape(binding))
        #Get current binded Tensor element type
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        # Allocate host page locked bbuffer
        host_mem = cuda.pagelocked_empty(size, dtype)
        # Allocate device bbuffer
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        # Append the device buffer to device bindings.
        bindings.append(int(device_mem))
        # Append to the appropriate list.
        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))

    # Contexts are used to perform inference.
    context = engine.create_execution_context()
    context.error_recorder = errorRecorder

def Inference(externalnputs = None):

    global context
    global stream
    global inputs
    global outputs
    global bindings

    try:
        #verify that TRT context generated successfully
        if context is not None:
            #Verify that inputs to inference are exist
            if externalnputs is not None:
                #Copy all Tensors inputs data from user memory to TRT host page locked memory before loading it to the device
                if len(externalnputs) == len(inputs):
                    for index in range(len(externalnputs)):
                        if len(inputs[index].host) == externalnputs[index].size:
                            np.copyto(inputs[index].host, externalnputs[index].ravel())
                        else:
                            print('TRT external input size - ', externalnputs[index].size,
                                  ' is not equal to model inputs size - ', len(inputs[index].host))
                            return None

                    # Transfer input data to the GPU from the host page locked memory.
                    [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
                    # Run asynchronously inference using the user\internal stream.
                    context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
                    # Transfer predictions back from the GPU.
                    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]

                    stream.synchronize()
                    # Build a list of Tensors outputs and return only the host outputs.
                    return [out.host for out in outputs]
                else:
                    print('External inputs list size - ', len(externalnputs), ' is not equal to model inputs list size - ', len(inputs))
                    return None
            else:
                print('External inputs list is None ERROR')
                return None
    except BaseException as e:
        msg = e
        print('TRT inference exception ERROR - ', msg)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
[31mERROR: Operation cancelled by user[0m
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/pip/_vendor/pkg_resources/__init__.py", line 3021, in _dep_map
    return self.__dep_map
  File "/usr/local/lib/python3.7/dist-packages/pip/_vendor/pkg_resources/__init__.py", line 2815, in __getattr__
    raise AttributeError(attr)
AttributeError: _DistInfoDistribution__dep_map

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/pip/_internal/cli/base_command.py", line 180, in _main
    status = self.run(options, args)
  File "/usr/local/lib/python3.7/dist-packages/pip/_internal/cli/req_command.py", line 199, in wrapper
    return func(self, options, args)
  File "/usr



# **onnxUtils:**

In [40]:
# onnxUtils
!pip install tf2onnx onnx onnxsim
import json
import time
import tf2onnx
import onnx
#import onnxsim
import os.path


# Save model into h5 and ONNX formats
def convertKerasToONNX(name, model, overwrite_existing = False):
    modelFile = name + '.onnx'
    if not os.path.isfile(modelFile) or overwrite_existing:
        # Save model with ONNX format
        (onnx_model_proto, storage) = tf2onnx.convert.from_keras(model)
        with open(os.path.join(modelFile), "wb") as f:
            f.write(onnx_model_proto.SerializeToString())
            f.close()
    
    return modelFile, onnx_model_proto, storage

def ModelOnnxCheck(name):

    msg = 'OK'
    isCheckOk = True

    print("===============================================================")
    print("Onnx model check report:")

    try:
        # Perform basic check on the model input
        onnx.checker.check_model(name + '.onnx')
        isCheckOk = True
    except onnx.checker.ValidationError as e:
        msg = e
        isCheckOk=False
    except BaseException as e:
        msg = e
        isCheckOk=False

    if isCheckOk:
        print('Model check completed Successfully')
    else:
        print('ERROR - Model check failure')

    print('Model onnx checker, check model - ', msg)

    return isCheckOk

def RemoveInitializerFromInput(model, modelPath):
    modelGraphInputs = model.graph.input
    startInputsCount = len(modelGraphInputs)

    nameToInput = {}
    for input in modelGraphInputs:
        nameToInput[input.name] = input

    for initializer in model.graph.initializer:
        if initializer.name in nameToInput:
            modelGraphInputs.remove(nameToInput[initializer.name])

    endInputsCount = len(modelGraphInputs)

    if startInputsCount != endInputsCount:
        print('Model includes several Initializers which considered as inputs to the graph - ', startInputsCount - endInputsCount)
        print('All Initializers were removed from graph inputs')
        print('Replace the model *.onx file with the updated one')
        onnx.save(model, modelPath)

def ProcessModelInputs(model, modelPath):
    RemoveInitializerFromInput(model, modelPath)
    modelGraphInputs = model.graph.input

    modelInputsDims = {}
    modelDynamicInputsDict = {}
    modelInputs = modelGraphInputs
    modelInputsNames = []
    print(str(modelInputs))

    for tensorInput in modelInputs:
        isInputDynamic = False
        modelDynamicInputShape = []
        for dim in tensorInput.type.tensor_type.shape.dim:
            if dim.dim_value == 0:
                isInputDynamic = True
                print('CAUTION!!! - Tensor input name' + ' - ', tensorInput.name, ', dimension - ' , dim.dim_param, ', set its value to 1 for Onnx simplify operation')
                modelDynamicInputShape.append(1)
            else:
                modelDynamicInputShape.append(dim.dim_value)

        modelInputsNames.append(tensorInput.name)

        if isInputDynamic is True:
            modelDynamicInputsDict[tensorInput.name] = modelDynamicInputShape

    return modelDynamicInputsDict

def ModelSimplify(name):

    msg = 'OK'
    nameSimp = name + 'Simp'
    model = None
    isSimplifiedOK = True

    if os.path.exists(nameSimp + '.onnx'):
        print('Model Onnx simplify is already exist, No model check and\or simplify operations is required')
        model = onnx.load(nameSimp + '.onnx')
        isSimplifiedOK = True
    else:
        print("===============================================================")
        print("Onnx model simplifier report:")
        model = onnx.load(name + '.onnx')

        modelDynamicInputsDict = ProcessModelInputs(model, name + '.onnx')

        try:
            print('Start model onnx simplify...')
            # Perform simplification on the model input
            model, check = onnxsim.simplify(model,input_shapes=modelDynamicInputsDict,
                                                  dynamic_input_shape=(len(modelDynamicInputsDict) > 0))
            print('Completion model onnx simplify')
            if (check):
                isSimplifiedOK = True
                print('Onnx simplification success!')
                print('Save Onnx simplified model to - ', nameSimp + '.onnx')
                onnx.save(model, nameSimp + '.onnx')
            else:
                isSimplifiedOK = False
                print('Onnx simplification failure!')
                print('Simplified Onnx model could not be generated and validated')
        except BaseException as e:
            print('Onnx simplification exception - ', e)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# **wandb_helpers:**

In [41]:
# wandb_helpers
!pip install wandb
from datetime import datetime
import wandb
from collections import namedtuple
import numpy as np
import os
import tensorflow as tf

Dataset = namedtuple("Dataset", ["images", "labels"])
dataset_names = ["training", "validation", "test"]

def start_wandb_run(model_name, config):
    timestamp = datetime.now().strftime("%H%M%S")
    return wandb.init(project=f"ml-p2", entity="ml-p2", name=f"{model_name}-{timestamp}" , 
        notes = f"Training FCNN model @{timestamp}", config = config)

def read_datasets(wandb_run, dataset_tag = "latest"):
    '''
    Read all datasets from W&B.
    Usage example: train_set, validation_set, test_set = wbh.read_datasets(run)
    '''
    artifact = wandb_run.use_artifact(f'ml-p2/ml-p2/fashion-mnist:{dataset_tag}', type='dataset')
    data_dir = artifact.download()
    return [ read_dataset(data_dir, ds_name) for ds_name in dataset_names ]

def read_dataset(data_dir, ds_name):
    filename = ds_name + ".npz"
    data = np.load(os.path.join(data_dir, filename))
    return Dataset(images = data["x"], labels = data["y"])

def read_model(wandb_run, model_name, model_tag = "latest") -> tf.keras.models.Model:
    artifact = wandb_run.use_artifact(f'ml-p2/ml-p2/{model_name}:{model_tag}', type='model')
    artifact_dir = artifact.download()
    return tf.keras.models.load_model(artifact_dir)

def save_model(wandb_run, model, config, model_name, model_description):
    model_file = f'./saved-models/{model_name}.tf'
    tf.keras.models.save_model(model, model_file)
    model_artifact = wandb.Artifact(model_name, type = "model", description=model_description, metadata= dict(config))
    model_artifact.add_dir(model_file)
    wandb_run.log_artifact(model_artifact)

def load_best_model(sweep_id):
    api = wandb.Api()
    sweep = api.sweep(f"ml-p2/ml-p2/{sweep_id}")
    runs = sorted(sweep.runs,
        key=lambda run: run.summary.get("val_accuracy", 0), reverse=True)
    val_acc = runs[0].summary.get("val_accuracy", 0)
    print(f"Best run {runs[0].name} with {val_acc} validation accuracy")

    model_file = runs[0].file("model-best.h5").download(replace=True)
    model_file.close()

#if (__name__ == "__main__"):
#    load_best_model("6zmewzd0")

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


# **trt-inference:**

In [42]:
# trt-inference
#!pip install sklearn -qqq

#from TensorRTUtils import *
#from onnxUtils import convertKerasToONNX
#import wandb_helpers as wbh

import time
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
import tensorrt as trt
import onnx
import tf2onnx
import numpy as np
from PIL import Image as im
import os

import seaborn as sns
import matplotlib.pyplot as plt     

modelName = "FCNN"

'''
Stage 1: Load an existing model
===============================
In this part we load the model we created in the previous project
which is built to infer from FASHION-MNIST images.
It is not a sofisticated model, but the idea to use something we
know.
'''
dataset_path = '.\\artifacts\\fashion-mnist-v2'

if not os.path.exists(dataset_path):
    with start_wandb_run("FCNN-metrics", None) as run:
        train_set, validation_set, test_set = read_datasets(run)
        model = read_model(run, "FCNN", "latest")
else:
    test_set = read_dataset('.\\artifacts\\fashion-mnist-v2', 'test')
    model = tf.keras.models.load_model('.\\artifacts\\FCNN-v3')

'''
Stage 2: Convert to ONNX
========================
Convert the model to ONNX and save it to a file. This will allow
us to load the model into a tensor-rt engine.
'''
modelFile, _, _ = convertKerasToONNX(modelName, model, True)

'''
Stage 3: Create the tensor-rt engine
====================================
Now that we a model file, we can load it into a 
tensor rt engine.
We use FP 32 precision.
'''
TrtModelParse(modelFile)
#TrtModelOptimizeAndSerialize(precision='fp32')
TrtModelOptimizeAndSerialize(precision='fp16')
ModelInferSetup()

'''
Stage 4: Inference
==================
Now the model is ready for inference. The model is executed several
times on different images from the test set we've loaded on Stage 1
'''
inputs = []

startTimeCpu = time.time()
for i in range(len(test_set)):
    img = test_set.images[i]
    lbl = test_set.labels[i]
    inputs.append(img)
    outputsTrt = Inference(externalnputs=inputs)
    #print(' topClassIdx - ', np.argmax(outputsTrt[0]))
    inputs.clear()
    
    
endTimeCpu = time.time()

# total time taken
averageTime = (endTimeCpu - startTimeCpu) / 1e-3 / len(test_set)
print(f"TRT Keras inference average time is: {averageTime} milliseconds")
print(f"TRT Keras inference average FPS is: {1000 / averageTime}")

# Perform the DlewareAnalyzer inference with TRT & ORT

#np.testing.assert_allclose(kerasPredictions, onnxPredictions[0], rtol=0, atol=1e-05, err_msg='Keras Vs. Onnx Failure!!!')


#y_test = np.argmax(test_set.labels)
# predictions = model.predict(test_set.images)
# y_test = np.argmax(predictions, axis = 1)
# print (classification_report(test_set.labels, y_test))
# cm = confusion_matrix(test_set.labels, y_test)

# class_names = ["T-shirt/top","Trouser","Pullover","Dress","Coat","Sandal","Shirt","Sneaker","Bag","Ankle boot"]

# ax = plt.subplot()
# h = sns.heatmap(cm, annot=True, fmt='g', ax=ax);  #annot=True to annotate cells, ftm='g' to disable scientific notation

# # labels, title and ticks
# ax.set_xlabel('Predicted labels')
# ax.set_ylabel('True labels')
# ax.set_title('Confusion Matrix')
# ax.xaxis.set_ticklabels(class_names)
# ax.yaxis.set_ticklabels(class_names)

# plt.show()

[34m[1mwandb[0m: Downloading large artifact fashion-mnist:latest, 418.77MB. 3 files... Done. 0:0:0.1


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

[34m[1mwandb[0m: [32m[41mERROR[0m Control-C detected -- Run data was not synced


JSONDecodeError: ignored

In [None]:
del(run)