In [135]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from sklearn.model_selection import train_test_split
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import time

In [136]:
(x_data, y_data), (x_test, y_test) = mnist.load_data()
x_train, x_val, y_train, y_val = train_test_split(x_data, y_data, test_size=0.2, random_state=42)
x_train = x_train.astype('float32') / 255.0
x_val = x_val.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

In [137]:
def preprocess(image):
    image = tf.expand_dims(image, axis=-1)
    image = tf.image.resize(image, (224, 224))
    image = tf.image.grayscale_to_rgb(image)
    return image

In [143]:
model = tf.keras.models.load_model("mnist_data_aug_resnet.h5")



In [138]:
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
with open("mnist_resnet_final.trt", "rb") as f:
    engine_data = f.read()

In [139]:
runtime = trt.Runtime(TRT_LOGGER)
engine = runtime.deserialize_cuda_engine(engine_data)

In [140]:
context = engine.create_execution_context()

In [147]:
trt_total_latency = 0.0
trt_correct_predictions = 0

h5_total_latency = 0.0
h5_correct_predictions = 0

input_shape = (1, 224, 224, 3)
output_shape = (1, 10)

for i in range(len(x_test)):
    true_label = y_test[i]
    input_data = preprocess(x_test[i]).numpy()
    input_data = np.expand_dims(input_data, axis=0)

    #TRT Inference

    d_input = cuda.mem_alloc(input_data.nbytes)
    d_output = cuda.mem_alloc(np.empty(output_shape, dtype=np.float32).nbytes)
    bindings = [int(d_input), int(d_output)]

    cuda.memcpy_htod(d_input, input_data)

    trt_start_time = time.time()
    context.execute_v2(bindings)
    trt_end_time = time.time()

    trt_latency = trt_end_time - trt_start_time
    trt_total_latency += trt_latency

    output_data = np.empty(output_shape, dtype=np.float32)
    cuda.memcpy_dtoh(output_data, d_output)
    trt_predicted_label = np.argmax(output_data)
    if true_label == trt_predicted_label:
        trt_correct_predictions += 1

    #H5 Inference

    h5_start_time = time.time()
    h5_output = model.predict(input_data, verbose=0)
    h5_end_time = time.time()

    h5_latency = h5_end_time - h5_start_time
    h5_total_latency += h5_latency

    h5_predicted_label = np.argmax(h5_output)
    if true_label == h5_predicted_label:
        h5_correct_predictions += 1

trt_average_latency = trt_total_latency / len(x_test)
trt_accuracy = trt_correct_predictions / len(x_test)

h5_average_latency = h5_total_latency / len(x_test)
h5_accuracy = h5_correct_predictions / len(x_test)

Average Prediction Latency: 0.004294 seconds
Accuracy: 98.83%
.h5 Model - Average Prediction Latency: 0.132996 seconds
.h5 Model - Accuracy: 98.82%


In [149]:
print(f"TRT - Average Prediction Latency: {trt_average_latency:.6f} seconds")
print(f"TRT - Accuracy: {trt_accuracy:.2%}")

print(f"H5 Model - Average Prediction Latency: {h5_average_latency:.6f} seconds")
print(f"H5 Model - Accuracy: {h5_accuracy:.2%}")

TRT - Average Prediction Latency: 0.004294 seconds
TRT - Accuracy: 98.83%
H5 Model - Average Prediction Latency: 0.132996 seconds
H5 Model - Accuracy: 98.82%
