# Imports

### Libraries

In [22]:
import time
import h5py

import onnx
import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt

from torch import nn
import torch.nn.functional as F

import numpy as np

### Path

In [23]:
path_converted_model = "../models/full_model.onnx"
path_dataset = "../datasets/clean_dataset/dataset.h5py"

### Dataset 

In [45]:
h5f = h5py.File(path_dataset, 'r')

X_train = np.array(h5f['X_train'])
X_test = np.array(h5f['X_test'])
y_train = np.array(h5f['y_train'])
y_test = np.array(h5f['y_test'])

# Convert to TensorRT

### Tensor RT engine

In [25]:
TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE)
flags = 1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)

def build_engine(model_path):
    """Build and return the TensorRT engine and context."""
    
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(flags) as network, trt.OnnxParser(network, TRT_LOGGER) as parser: 
        
        # Set builder 1GB Vram, batch size 1 & fp16 if possible
        builder.max_workspace_size = 1 << 30
        builder.max_batch_size = 1
        
        if builder.platform_has_fast_fp16:
            builder.fp16_mode = True
        
        # Load ONNX model
        with open(model_path, "rb") as f:
            parser.parse(f.read())
        
        # Generate an engine optimized for the target platform
        engine = builder.build_cuda_engine(network)
        context = engine.create_execution_context()
        
        print(engine)
        print(context)

        return engine, context

### Inference pipeline

This function allocate memory on the CPU and GPU. It has to be called before any inference. Once the memory has been allocated, the data can be passed and an inference can be done.

In [26]:
def allocate_buffer(engine):
    
    # Host CPU memory
    h_in_size = trt.volume(engine.get_binding_shape(0))
    h_out_size = trt.volume(engine.get_binding_shape(1))
    h_in_dtype = trt.nptype(engine.get_binding_dtype(0))
    h_out_dtype = trt.nptype(engine.get_binding_dtype(1))
    
    in_cpu = cuda.pagelocked_empty(h_in_size, h_in_dtype)
    out_cpu = cuda.pagelocked_empty(h_out_size, h_out_dtype)
    
    # Allocate GPU memory
    in_gpu = cuda.mem_alloc(in_cpu.nbytes)
    out_gpu = cuda.mem_alloc(out_cpu.nbytes)
    
    stream = cuda.Stream()
    return in_cpu, out_cpu, in_gpu, out_gpu, stream

This function take allocated space, an input matrix, and run the inference.

In [27]:
def inference(engine, context, inputs, out_cpu, in_gpu, out_gpu, stream):
    
    cuda.memcpy_htod(in_gpu, inputs)
    context.execute(1, [int(in_gpu), int(out_gpu)])
    cuda.memcpy_dtoh(out_cpu, out_gpu)
    
    return out_cpu

# Inference

In [28]:
inputs = np.random.random((1, 3, 120, 120)).astype(np.float32)
engine, context = build_engine(path_converted_model)

<tensorrt.tensorrt.ICudaEngine object at 0x7f363fe34dc0>
<tensorrt.tensorrt.IExecutionContext object at 0x7f363fe34b58>


In [46]:
X_test =  X_test / 255
X_test = X_test.astype(np.float32)

results = []
t1 = time.time()

for image in X_test:
    
    in_cpu, out_cpu, in_gpu, out_gpu, stream = allocate_buffer(engine)
    results.append(inference(engine, context, image.reshape(-1), out_cpu, in_gpu, out_gpu, stream))

print("cost time: ", time.time() - t1)

cost time:  0.3489267826080322


# Evaluation

Inference time: **0.3608** sec, for **755** images.

In [47]:
len(results)

755

In [48]:
test_preds = np.array(results)
prediction = test_preds.argmax(axis=1)

In [49]:
prediction

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,