In [1]:
# Ensure CUDA and cuDNN are installed
!nvcc --version
!nvidia-smi
# Install the required dependencies for building TensorFlow with TensorRT support
!sudo apt-get update
!sudo apt-get install -y libnvinfer8 libnvinfer-dev libnvinfer-plugin8
# (Install other necessary packages as mentioned in TensorFlow documentation)
# Clone the TensorFlow repository and checkout the desired branch
!git clone https://github.com/tensorflow/tensorflow.git
%cd tensorflow
!git checkout r2.10 # Check the TensorFlow-TensorRT compatibility matrix for the correct branch.
# Configure TensorFlow build with TensorRT enabled
# ./configure
# (During configuration, enable TensorRT support when prompted)
# If you are using a virtual environment, activate it before building TensorFlow.
# Build and install TensorFlow
!bazel build --config=cuda --config=monolithic ... (Specify the build target with TensorRT support)
!bazel install ... (Install the built TensorFlow package)
# After successful installation, restart the runtime to ensure the new TensorFlow installation is used.

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0
Tue Dec 24 16:13:14 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   54C    P8              10W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                      

In [2]:
%cd ~

/root


In [57]:
import tensorflow as tf
import time
import numpy as np
import os
import subprocess

In [41]:
(x_test, y_test), _ = tf.keras.datasets.cifar10.load_data()
x_test = x_test.astype('float32') / 255.0
y_test = tf.keras.utils.to_categorical(y_test, 10)

In [42]:
model_path = '/content/drive/MyDrive/optimized_model'
model = tf.saved_model.load(model_path)
infer = model.signatures["serving_default"]

In [44]:
batch_size = 32

In [46]:
num_batches = len(x_test) // batch_size
latencies = []
total_samples = 0

In [61]:
start_total_time = time.time()

for i in range(num_batches):
    batch_x = x_test[i * batch_size : (i + 1) * batch_size]
    start_time = time.time()
    _ = infer(tf.constant(batch_x))
    latencies.append(time.time() - start_time)
    total_samples += len(batch_x)

end_total_time = time.time()

In [62]:
print(end_total_time - start_total_time)

9.326685667037964


In [63]:
avg_latency = np.mean(latencies)
throughput = total_samples / (end_total_time - start_total_time)

In [64]:
predictions = []
for i in range(num_batches):
    batch_x = x_test[i * batch_size : (i + 1) * batch_size]
    outputs = infer(tf.constant(batch_x))
    predictions.extend(np.argmax(outputs['output_0'].numpy(), axis=1))

In [65]:
accuracy = np.mean(np.argmax(y_test[:num_batches * batch_size], axis=1) == predictions)

In [66]:
total_size = 0
for dirpath, dirnames, filenames in os.walk(model_path):
    for f in filenames:
        fp = os.path.join(dirpath, f)
        total_size += os.path.getsize(fp)
total_size = total_size / (1024 * 1024)

In [67]:
result = subprocess.run(
            ["nvidia-smi", "--query-gpu=memory.used", "--format=csv,nounits,noheader"],
            stdout=subprocess.PIPE,
            text=True
        )
memory_used = int(result.stdout.splitlines()[0])

In [68]:
print(f"TensorRT Model - Latency: {avg_latency:.4f}s, Throughput: {throughput:.2f} samples/s, "
      f"Accuracy: {accuracy:.4%}, Size: {total_size:.2f} MB, GPU Memory: {memory_used} MB")

TensorRT Model - Latency: 0.0122s, Throughput: 10718.49 samples/s, Accuracy: 91.3792%, Size: 10.74 MB, GPU Memory: 257 MB
