In [32]:
import numpy as np
import tensorflow
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from sklearn.model_selection import train_test_split
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import time

import time
import onnxruntime as ort
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit

In [5]:
print("Num GPUs Available: ", len(tensorflow.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


In [6]:
fashion_mnist = tf.keras.datasets.fashion_mnist

(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

In [7]:
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

In [8]:
train_images = train_images / 255.0

test_images = test_images / 255.0

In [9]:
Train_images, Val_images, Train_labels, Val_labels = train_test_split(train_images, train_labels, test_size=0.2, random_state=42)

In [39]:
# Reshape the data to include the channel dimension (for grayscale images)
Train_images = Train_images.reshape(-1, 28, 28, 1)
Val_images = Val_images.reshape(-1, 28, 28, 1)
test_images = test_images.reshape(-1, 28, 28, 1)

# Ensure the data is of type float32 and normalized
Train_images = Train_images.astype('float32')
Val_images = Val_images.astype('float32')
test_images = test_images.astype('float32')

In [23]:
test_images.shape

(10000, 28, 28)

In [13]:
augmented_h5_model = tf.keras.models.load_model(r'D:\Pravin\MCW\Assignments\resnerArchi\models\resnet_hypertuned_model.h5')

In [43]:
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
with open("models/resnet_abi_model.trt", "rb") as f:
    engine_data = f.read()

In [44]:
runtime = trt.Runtime(TRT_LOGGER)
engine = runtime.deserialize_cuda_engine(engine_data)

In [16]:
context = engine.create_execution_context()

In [45]:
onnx_model_path = "models/resnet_augmented_model.onnx"  # Replace with the path to your ONNX model
onnx_session = ort.InferenceSession(onnx_model_path)

In [21]:
test_images[1].shape

(28, 28)

### Infrencing .h5 vs .trt vs .onnx

In [46]:
trt_total_latency = 0.0
trt_correct_predictions = 0

onnx_total_latency = 0.0
onnx_correct_predictions = 0

h5_total_latency = 0.0
h5_correct_predictions = 0

input_shape = (28, 28)
output_shape = (1, 10)

for i in range(1000):
    true_label = test_labels[i]
    input_data = test_images[i].reshape(input_shape)

    # ----------------------
    # TRT Inference
    # ----------------------

    d_input = cuda.mem_alloc(input_data.nbytes)
    d_output = cuda.mem_alloc(np.empty(output_shape, dtype=np.float32).nbytes)
    bindings = [int(d_input), int(d_output)]

    cuda.memcpy_htod(d_input, input_data)

    trt_start_time = time.time()
    context.execute_v2(bindings)
    trt_end_time = time.time()

    trt_latency = trt_end_time - trt_start_time
    trt_total_latency += trt_latency

    output_data = np.empty(output_shape, dtype=np.float32)
    cuda.memcpy_dtoh(output_data, d_output)
    trt_predicted_label = np.argmax(output_data)
    if true_label == trt_predicted_label:
        trt_correct_predictions += 1

    # ----------------------
    # ONNX Inference
    # ----------------------
    # Ensure input_data has the shape [batch_size, height, width, channels] for ONNX model
    input_data_onnx = input_data.reshape((1, 28, 28, 1))  # Shape: (1, 28, 28, 1)

    onnx_start_time = time.time()
    onnx_output = onnx_session.run(None, {onnx_session.get_inputs()[0].name: input_data_onnx.astype(np.float32)})[0]
    onnx_end_time = time.time()

    onnx_latency = onnx_end_time - onnx_start_time
    onnx_total_latency += onnx_latency

    onnx_predicted_label = np.argmax(onnx_output)
    if true_label == onnx_predicted_label:
        onnx_correct_predictions += 1

    # ----------------------
    # H5 Inference
    # ----------------------
    # Add the channels dimension for the .h5 model
    # input_data_h5 = input_data.reshape((1, 28, 28, 1))  # Shape: (1, 28, 28, 1)

    h5_start_time = time.time()
    h5_output = augmented_h5_model.predict(input_data_h5, verbose=0)  # Assuming augmented_h5_model is loaded
    h5_end_time = time.time()

    h5_latency = h5_end_time - h5_start_time
    h5_total_latency += h5_latency

    h5_predicted_label = np.argmax(h5_output)
    if true_label == h5_predicted_label:
        h5_correct_predictions += 1

# ----------------------
# Calculate Metrics
# ----------------------
trt_average_latency = trt_total_latency / len(test_images)
trt_accuracy = trt_correct_predictions / len(test_images)

onnx_average_latency = onnx_total_latency / len(test_images)
onnx_accuracy = onnx_correct_predictions / len(test_images)

h5_average_latency = h5_total_latency / len(test_images)
h5_accuracy = h5_correct_predictions / len(test_images)

# Print results
print(f"TRT Model - Average Latency: {trt_average_latency:.5f}s, Accuracy: {trt_accuracy}%")
print(f"ONNX Model - Average Latency: {onnx_average_latency:.5f}s, Accuracy: {onnx_accuracy}%")
print(f"H5 Model - Average Latency: {h5_average_latency:.5f}s, Accuracy: {h5_accuracy}%")


TRT Model - Average Latency: 0.00183s, Accuracy: 0.0942%
ONNX Model - Average Latency: 0.00011s, Accuracy: 0.0942%
H5 Model - Average Latency: 0.00611s, Accuracy: 0.0095%
