# TensorRT

In [1]:
import pathlib
import os
import time
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
# from tensorflow.keras.models import load_model

### Benchmarking

In [3]:
# load model
model = tf.keras.models.load_model("../best_model/mobilenetv2_best_f1score_fold_1.h5")



In [4]:
data_root = pathlib.Path("../data/Original_Images")    # points to the folder containing the images that will be used for training

# hyperparameters
batch_size = 32         # size of the batch that will be fed to model
img_height = 224        # input image height
img_width = 224         # input image width
test_size = 0.2

# Load dataset without splitting
dataset = tf.keras.utils.image_dataset_from_directory(
    data_root,                                  # loads images from the data_root directory
    image_size=(img_height, img_width),         # resizes all images to (224, 224) pixels
    batch_size=batch_size,                      # set the batch size
    shuffle=True                                # shufle data when loaded
)

# Preprocess dataset (we only need the images for inference)
image_batches = []
for image_batch, _ in dataset:
    image_batches.append(image_batch)

image_batches = np.concatenate(image_batches)  # Flatten batches to get all images
print(f"Total images: {image_batches.shape[0]}")

# Split the data into test subset for benchmarking
_, X_test = train_test_split(image_batches, test_size=test_size, random_state=42)

Found 228 files belonging to 2 classes.
Total images: 228


In [6]:
# run inference
trials = 1000
inference_times = []

print(f"Running {trials} inference trials on {len(X_test)} test images...")
for i in range(trials):
    start_time = time.perf_counter()

    # Run inference for a batch of images
    model.predict(X_test, batch_size=batch_size, verbose=0)

    end_time = time.perf_counter()
    inference_time = end_time - start_time
    inference_times.append(inference_time)

    if i % 50 == 0:
        avg_inference = np.mean(inference_times)  # Average inference time per trial
        print(f"Step {i}: average inference time = {avg_inference:.6f} seconds")
        
    tf.keras.backend.clear_session()
        
# Compute throughput (images per second)
total_time = np.sum(inference_times)
throughput = (trials * len(X_test)) / total_time
print(f"Throughput: {throughput:.2f} images/second")

Running 1000 inference trials on 46 test images...
Step 0: average inference time = 0.197487 seconds
Step 50: average inference time = 0.530172 seconds
Step 100: average inference time = 0.532068 seconds
Step 150: average inference time = 0.534038 seconds
Step 200: average inference time = 0.538387 seconds
Step 250: average inference time = 0.542415 seconds
Step 300: average inference time = 0.544953 seconds
Step 350: average inference time = 0.546347 seconds
Step 400: average inference time = 0.548982 seconds
Step 450: average inference time = 0.551061 seconds
Step 500: average inference time = 0.551467 seconds
Step 550: average inference time = 0.552484 seconds
Step 600: average inference time = 0.553712 seconds
Step 650: average inference time = 0.554324 seconds
Step 700: average inference time = 0.555828 seconds
Step 750: average inference time = 0.555979 seconds
Step 800: average inference time = 0.555808 seconds
Step 850: average inference time = 0.555597 seconds
Step 900: averag

### TensorRT Optimization

In [7]:
from tensorflow.python.compiler.tensorrt import trt_convert as trt

# Define the conversion parameters
conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(
    precision_mode=trt.TrtPrecisionMode.FP16,                   # You can use FP32 or INT8 if supported
    max_workspace_size_bytes=8000000000                         # 8GB, adjust as per your GPU memory
)

converter = trt.TrtGraphConverterV2(
    input_saved_model_dir='../best_model/mobilenetv2_best_f1score_fold_1.h5',
    conversion_params=conversion_params
)

# Convert the model
converter.convert()
converter.summary()

# Save the optimized model
converter.save('../tensorRT_model/test')

ERROR:tensorflow:Tensorflow needs to be built with TensorRT support enabled to allow TF-TRT to operate.


ERROR:tensorflow:Tensorflow needs to be built with TensorRT support enabled to allow TF-TRT to operate.


RuntimeError: Tensorflow has not been built with TensorRT support.

### Run Inference on Optimized Model

In [None]:
# # Load the optimized TensorRT model
# saved_model_loaded = tf.saved_model.load('path/to/save/tensorrt_model')
# infer = saved_model_loaded.signatures['serving_default']

# # Example input data (adjust as per your model's input requirements)
# input_tensor = tf.convert_to_tensor(your_input_data)

# # Run inference
# output = infer(input_tensor)

# # Process output as needed