In [None]:
# Ensure CUDA and cuDNN are installed
!nvcc --version
!nvidia-smi

# Install the required dependencies for building TensorFlow with TensorRT support
!sudo apt-get update
!sudo apt-get install -y libnvinfer8 libnvinfer-dev libnvinfer-plugin8
# (Install other necessary packages as mentioned in TensorFlow documentation)

# Clone the TensorFlow repository and checkout the desired branch
!git clone https://github.com/tensorflow/tensorflow.git
%cd tensorflow
!git checkout r2.10 # Check the TensorFlow-TensorRT compatibility matrix for the correct branch.

# Configure TensorFlow build with TensorRT enabled
# ./configure
# (During configuration, enable TensorRT support when prompted)
# If you are using a virtual environment, activate it before building TensorFlow.

# Build and install TensorFlow
!bazel build --config=cuda --config=monolithic ... (Specify the build target with TensorRT support)
!bazel install ... (Install the built TensorFlow package)

# After successful installation, restart the runtime to ensure the new TensorFlow installation is used.

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0
Mon Dec 23 06:44:25 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   45C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                      

In [None]:
!nvidia-smi

Thu Jan  2 10:02:16 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   40C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [None]:
try:
    # Try importing TensorRT support from TensorFlow
    from tensorflow.python.compiler.tensorrt import trt_convert as trt
    print("TensorFlow TensorRT (TF-TRT) is available.")
except ImportError:
    print("TensorFlow is not linked with TensorRT.")


TensorFlow TensorRT (TF-TRT) is available.


In [None]:
import tensorflow as tf
from tensorflow import keras
from keras import datasets, layers, models, Input
from sklearn.metrics import classification_report
import numpy as np
import time

# Load and preprocess CIFAR-10 dataset
(x_train, y_train), (x_test, y_test) = datasets.cifar10.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
y_train, y_test = y_train.flatten(), y_test.flatten()

# Define the CNN model with Dropout
def create_model(dropout_rate=0.2):
    model = models.Sequential([
        Input(shape=(32, 32, 3)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(dropout_rate),
        layers.Dense(10, activation='softmax'),
    ])
    return model

# Create and compile the model
model = create_model(dropout_rate=0.2)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)
lr_schedule = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 0.001 * 0.95 ** epoch)

history = model.fit(x_train, y_train, batch_size=64,
                    epochs=15,
                    validation_data=(x_test, y_test),callbacks=[early_stopping,lr_schedule])


# Evaluate the model
def evaluate_model(model, x_test, y_test):
    y_pred = model.predict(x_test)
    y_pred_classes = np.argmax(y_pred, axis=1)
    report = classification_report(y_test, y_pred_classes, target_names=[str(i) for i in range(10)])
    accuracy = np.mean(y_pred_classes == y_test)
    return report, accuracy

report, accuracy = evaluate_model(model, x_test, y_test)
print(f"Model Accuracy: {accuracy:.4f}")
print(f"Classification Report:\n{report}")

# Step 3: Measure latency of the best model
def measure_latency(model, x_sample, device):
    with tf.device(device):
        start_time = time.time()
        model.predict(np.expand_dims(x_sample, axis=0))
        end_time = time.time()
    return end_time - start_time

# Measure latency on CPU and GPU
print("\n=== Measuring Latency for Best Model ===")
sample_image = x_test[1]
try:
    cpu_latency = measure_latency(model, sample_image, '/CPU:0')
except:
    cpu_latency = "CPU not available"
try:
    gpu_latency = measure_latency(model, sample_image, '/GPU:0')
except:
    gpu_latency = "GPU not available"

print(f"Latency on CPU: {cpu_latency}")
print(f"Latency on GPU: {gpu_latency}")


Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 0us/step
Epoch 1/15
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 13ms/step - accuracy: 0.3289 - loss: 1.8121 - val_accuracy: 0.5430 - val_loss: 1.2861 - learning_rate: 0.0010
Epoch 2/15
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 9ms/step - accuracy: 0.5558 - loss: 1.2447 - val_accuracy: 0.6044 - val_loss: 1.1046 - learning_rate: 9.5000e-04
Epoch 3/15
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - accuracy: 0.6287 - loss: 1.0522 - val_accuracy: 0.6599 - val_loss: 0.9751 - learning_rate: 9.0250e-04
Epoch 4/15
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - accuracy: 0.6738 - loss: 0.9268 - val_accuracy: 0.6831 - val_loss: 0.9156 - learning_rate: 8.5737e-04
Epoch 5/15
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0

In [None]:
import tensorflow as tf
from tensorflow.python.compiler.tensorrt import trt_convert as trt

# # Save the model
# saved_model = '/content/drive/MyDrive/cifar_model_2'
# model.export(saved_model)
model.save('cifar10_model1.h5')



In [None]:
# Define TensorRT conversion
trt_model_dir = '/content/drive/MyDrive/trt_model_2'
converter = trt.TrtGraphConverterV2(input_saved_model_dir=saved_model)

# Convert to TensorRT
converter.convert()

# Save the converted model
converter.save(trt_model_dir)

print(f"TensorRT model saved to {trt_model_dir}")

TensorRT model saved to /content/drive/MyDrive/trt_model_2


In [None]:
import tensorflow as tf
import numpy as np
from sklearn.metrics import classification_report
import time

# Path to the saved TensorRT-optimized model
saved_model_dir = '/content/drive/MyDrive/trt_model_2'

# Load the TensorRT-optimized model
trt_model = tf.saved_model.load(saved_model_dir)
infer = trt_model.signatures["serving_default"]


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Convert the input data to float32 before inference
def evaluate_trt_model(infer, x_test, y_test):
    y_pred_classes = []
    for img in x_test:
        # Perform inference with the correct data type
        predictions = infer(tf.convert_to_tensor(np.expand_dims(img, axis=0), dtype=tf.float32))
        # print(predictions)
        y_pred = tf.argmax(predictions['output_0'], axis=1).numpy()
        y_pred_classes.append(y_pred[0])

    # Calculate accuracy
    accuracy = np.mean(np.array(y_pred_classes) == y_test)
    report = classification_report(y_test, y_pred_classes, target_names=[str(i) for i in range(10)])
    return accuracy, report

# Evaluate the model after converting inputs to float32
accuracy, report = evaluate_trt_model(infer, x_test, y_test)
print(f"TensorRT Model Accuracy: {accuracy * 100:.2f}%")
print(f"Classification Report:\n{report}")


TensorRT Model Accuracy: 75.95%
Classification Report:
              precision    recall  f1-score   support

           0       0.78      0.81      0.79      1000
           1       0.83      0.88      0.86      1000
           2       0.73      0.58      0.65      1000
           3       0.56      0.67      0.61      1000
           4       0.70      0.74      0.72      1000
           5       0.63      0.72      0.67      1000
           6       0.86      0.78      0.82      1000
           7       0.84      0.77      0.80      1000
           8       0.88      0.82      0.85      1000
           9       0.87      0.81      0.84      1000

    accuracy                           0.76     10000
   macro avg       0.77      0.76      0.76     10000
weighted avg       0.77      0.76      0.76     10000



In [None]:
import time

def measure_latency(infer, inputs, batch_size=1):
    """
    Measures latency for single and batch inference.

    Parameters:
    - infer: TensorRT-optimized inference function
    - inputs: Input data for inference (e.g., test dataset)
    - batch_size: Number of images in a batch for batch inference

    Returns:
    - single_latency: Latency for a single inference (seconds)
    - batch_latency: Latency for batch inference (seconds)
    """
    # Single input
    single_input = tf.convert_to_tensor(np.expand_dims(inputs[0], axis=0), dtype=tf.float32)

    # Single inference latency
    start_time = time.time()
    infer(single_input)
    single_latency = time.time() - start_time

    # Batch input
    batch_input = tf.convert_to_tensor(inputs[:batch_size], dtype=tf.float32)

    # Batch inference latency
    start_time = time.time()
    infer(batch_input)
    batch_latency = time.time() - start_time

    return single_latency, batch_latency

batch_size = 64
single_latency, batch_latency = measure_latency(infer, x_test, batch_size=batch_size)

print(f"Single Inference Latency: {single_latency:.6f} seconds")
print(f"Batch Inference Latency (batch size={batch_size}): {batch_latency:.6f} seconds")


Single Inference Latency: 0.002719 seconds
Batch Inference Latency (batch size=64): 12.533403 seconds


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model

# Load the saved model
model = load_model('/content/cifar10_model1.h5')  # Replace with your model path

# Create dummy input (the shape should match your model's expected input)
# Set all input values to 1
dummy_input = np.full((1, 32, 32, 3), 1.0, dtype=np.float32)  # Ensure the dtype is float32

# Convert the dummy input to a TensorFlow tensor
dummy_input = tf.convert_to_tensor(dummy_input)

# Print the initial dummy input
print("Initial Dummy Input:")
print(dummy_input.shape)
print(dummy_input.numpy())

# Iterate through each layer of the model to get the output at that layer
output = dummy_input

# Process each layer one by one and print the output at each layer
for i, layer in enumerate(model.layers):
    output = layer(output)  # Pass the output from the previous layer to the next layer
    print(f"\nOutput of layer {layer.name}:")
    print(output.shape)
    print(output.numpy())  # Use .numpy() to convert tensor to NumPy array for easy printing

# Get the final output prediction
final_prediction = model(dummy_input)

# Print the final prediction
print("\nFinal Prediction (class probabilities):")
print(final_prediction.numpy())

# Get the predicted class (max probability)
predicted_class = tf.argmax(final_prediction, axis=-1).numpy()[0]
print(f"\nPredicted Class: {predicted_class}")




Initial Dummy Input:
(1, 32, 32, 3)
[[[[1. 1. 1.]
   [1. 1. 1.]
   [1. 1. 1.]
   ...
   [1. 1. 1.]
   [1. 1. 1.]
   [1. 1. 1.]]

  [[1. 1. 1.]
   [1. 1. 1.]
   [1. 1. 1.]
   ...
   [1. 1. 1.]
   [1. 1. 1.]
   [1. 1. 1.]]

  [[1. 1. 1.]
   [1. 1. 1.]
   [1. 1. 1.]
   ...
   [1. 1. 1.]
   [1. 1. 1.]
   [1. 1. 1.]]

  ...

  [[1. 1. 1.]
   [1. 1. 1.]
   [1. 1. 1.]
   ...
   [1. 1. 1.]
   [1. 1. 1.]
   [1. 1. 1.]]

  [[1. 1. 1.]
   [1. 1. 1.]
   [1. 1. 1.]
   ...
   [1. 1. 1.]
   [1. 1. 1.]
   [1. 1. 1.]]

  [[1. 1. 1.]
   [1. 1. 1.]
   [1. 1. 1.]
   ...
   [1. 1. 1.]
   [1. 1. 1.]
   [1. 1. 1.]]]]

Output of layer conv2d:
(1, 30, 30, 128)
[[[[0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]
   ...
   [0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]]

  [[0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]
   ...
   [0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]]

  [[0. 0. 0. .