In [2]:
# Ensure CUDA and cuDNN are installed
!nvcc --version
!nvidia-smi
# Install the required dependencies for building TensorFlow with TensorRT support
!sudo apt-get update
!sudo apt-get install -y libnvinfer8 libnvinfer-dev libnvinfer-plugin8
# (Install other necessary packages as mentioned in TensorFlow documentation)
# Clone the TensorFlow repository and checkout the desired branch
!git clone https://github.com/tensorflow/tensorflow.git
%cd tensorflow
!git checkout r2.10 # Check the TensorFlow-TensorRT compatibility matrix for the correct branch.
# Configure TensorFlow build with TensorRT enabled
# ./configure
# (During configuration, enable TensorRT support when prompted)
# If you are using a virtual environment, activate it before building TensorFlow.
# Build and install TensorFlow
!bazel build --config=cuda --config=monolithic ... (Specify the build target with TensorRT support)
!bazel install ... (Install the built TensorFlow package)
# After successful installation, restart the runtime to ensure the new TensorFlow installation is used.

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0
Tue Jan 21 13:35:43 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   41C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                      

In [3]:
!nvidia-smi

Tue Jan 21 13:45:59 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [4]:
!pip install tf2onnx

Collecting tf2onnx
  Downloading tf2onnx-1.16.1-py3-none-any.whl.metadata (1.3 kB)
Collecting onnx>=1.4.1 (from tf2onnx)
  Downloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Collecting protobuf~=3.20 (from tf2onnx)
  Downloading protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)
Downloading tf2onnx-1.16.1-py3-none-any.whl (455 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m455.8/455.8 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.0/16.0 MB[0m [31m70.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading protobuf-3.20.3-py2.py3-none-any.whl (162 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.1/162.1 kB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: protobuf, onnx, tf2onnx
  Attempting uninstall: pr

In [5]:
%cd ~

/root


In [6]:
#from google.colab import drive
#drive.mount('/content/drive')

In [7]:
import tensorflow as tf
from tensorflow.python.compiler.tensorrt import trt_convert as trt
import numpy as np
import os

In [8]:
def convert_keras_to_tensorrt(keras_model_path, trt_model_dir):
    """
    Converts a Keras model to a TensorRT-optimized SavedModel.

    Args:
        keras_model_path (str): Path to the .keras model file.
        trt_model_dir (str): Directory where the TensorRT-optimized model will be saved.
    """
    print("Loading Keras model...")
    # Load the Keras model
    keras_model = tf.keras.models.load_model(keras_model_path)

    # Create a temporary directory to save the SavedModel
    temp_saved_model_dir = "temp_saved_model"
    os.makedirs(temp_saved_model_dir, exist_ok=True)

    # Save the Keras model as a SavedModel
    print("Saving Keras model as SavedModel...")
    tf.saved_model.save(keras_model, temp_saved_model_dir)

    print("Converting Keras model to TensorRT...")
    # Initialize the TensorRT converter
    params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(
        precision_mode=trt.TrtPrecisionMode.FP16,  # Use FP16 for faster inference (if supported)
        max_workspace_size_bytes=1 << 30          # 1GB workspace size
    )
    # Pass the temporary SavedModel directory to the converter
    converter = trt.TrtGraphConverterV2(input_saved_model_dir=temp_saved_model_dir, conversion_params=params)

    # Convert the Keras model
    converter.convert()

    # Save the TensorRT-optimized model
    print(f"Saving TensorRT-optimized model to {trt_model_dir}...")
    converter.save(trt_model_dir)
    print(f"TensorRT-optimized model saved at {trt_model_dir}")

    # Optionally remove the temporary SavedModel directory
    # import shutil
    # shutil.rmtree(temp_saved_model_dir)

In [9]:
def load_tensorrt_model_and_infer(trt_model_dir, input_data):
    """
    Loads a TensorRT-optimized model and performs inference.

    Args:
        trt_model_dir (str): Directory of the TensorRT-optimized model.
        input_data (numpy.ndarray): Input data for inference.

    Returns:
        numpy.ndarray: Model predictions.
    """
    print("Loading TensorRT-optimized model...")
    trt_model = tf.saved_model.load(trt_model_dir)
    infer = trt_model.signatures["serving_default"]

    print("Running inference...")
    # Perform inference
    predictions = infer(tf.convert_to_tensor(input_data))
    return predictions

In [10]:
keras_model_path = "/content/drive/MyDrive/MNIST_H5_model/lightweight_resnet_mnist.h5"         # Path to your .keras model
trt_model_dir = "/content/drive/MyDrive/MNIST_H5_model/TRT_Model"

In [11]:
convert_keras_to_tensorrt(keras_model_path, trt_model_dir)

Loading Keras model...




Saving Keras model as SavedModel...
Converting Keras model to TensorRT...
Saving TensorRT-optimized model to /content/drive/MyDrive/MNIST_H5_model/TRT_Model...
TensorRT-optimized model saved at /content/drive/MyDrive/MNIST_H5_model/TRT_Model
