In [21]:
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras import datasets, layers, models, losses
from sklearn.metrics import classification_report, f1_score
import numpy as np

In [2]:
(x_train,y_train),(x_test,y_test) = datasets.mnist.load_data()
x_train.shape

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


(60000, 28, 28)

In [3]:
x_train = tf.pad(x_train, [[0, 0], [2,2], [2,2]])/255
x_test = tf.pad(x_test, [[0, 0], [2,2], [2,2]])/255
x_train.shape

TensorShape([60000, 32, 32])

In [4]:
x_train = tf.expand_dims(x_train, axis=3, name=None)
x_test = tf.expand_dims(x_test, axis=3, name=None)
x_train.shape

TensorShape([60000, 32, 32, 1])

In [5]:
x_val = x_train[-2000:,:,:,:]
y_val = y_train[-2000:]
x_train = x_train[:-2000,:,:,:]
y_train = y_train[:-2000]

In [6]:
model = models.Sequential()
model.add(layers.Conv2D(6, 5, activation='elu', input_shape=x_train.shape[1:]))
model.add(layers.AveragePooling2D(2))
model.add(layers.Activation('sigmoid'))
model.add(layers.Conv2D(16, 5, activation='elu'))
model.add(layers.AveragePooling2D(2))
model.add(layers.Activation('sigmoid'))
model.add(layers.Conv2D(120, 5, activation='elu'))
model.add(layers.Flatten())
model.add(layers.Dense(84, activation='elu'))
model.add(layers.Dense(10, activation='softmax'))
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [7]:
model.compile(optimizer='sgd', loss=losses.sparse_categorical_crossentropy, metrics=['accuracy'])
history = model.fit(x_train, y_train, batch_size=64, epochs=5, validation_data=(x_val, y_val))

Epoch 1/5
[1m907/907[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 35ms/step - accuracy: 0.1051 - loss: 2.3069 - val_accuracy: 0.1070 - val_loss: 2.3074
Epoch 2/5
[1m907/907[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 36ms/step - accuracy: 0.1161 - loss: 2.3002 - val_accuracy: 0.1550 - val_loss: 2.2920
Epoch 3/5
[1m907/907[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 34ms/step - accuracy: 0.1688 - loss: 2.2726 - val_accuracy: 0.2970 - val_loss: 2.1153
Epoch 4/5
[1m907/907[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 34ms/step - accuracy: 0.4781 - loss: 1.8619 - val_accuracy: 0.7900 - val_loss: 0.8601
Epoch 5/5
[1m907/907[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 34ms/step - accuracy: 0.7630 - loss: 0.8230 - val_accuracy: 0.7395 - val_loss: 0.7474


In [8]:
model.evaluate(x_test, y_test)
print(f"Accuracy on test set: {model.evaluate(x_test, y_test)[1]:.2f}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.6598 - loss: 0.9794
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.6598 - loss: 0.9794
Accuracy on test set: 0.68


In [9]:
y_pred_probs = model.predict(x_test)
y_pred_classes = np.argmax(y_pred_probs, axis=1)


[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step


In [10]:
f1 = f1_score(y_test, y_pred_classes, average='macro')
print(f"F1 Score (Weighted): {f1:.2f}")


F1 Score (Weighted): 0.69


In [12]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [13]:
model.save('/content/drive/MyDrive/DL_Model/model.keras')

In [14]:
from tensorflow.keras.models import load_model
import tensorflow as tf

# Load the Keras model
model = load_model('/content/drive/MyDrive/DL_Model/model.keras')

# Define the directory where the TensorFlow SavedModel will be exported
export_path = '/content/drive/MyDrive/DL_Model/saved_model'

# Save the model using the TensorFlow SavedModel format
tf.saved_model.save(model, export_path)

print(f"Model successfully saved as a TensorFlow SavedModel at {export_path}")

Model successfully saved as a TensorFlow SavedModel at /content/drive/MyDrive/DL_Model/saved_model


In [15]:
!pip install -U tf2onnx

Collecting tf2onnx
  Downloading tf2onnx-1.16.1-py3-none-any.whl.metadata (1.3 kB)
Collecting onnx>=1.4.1 (from tf2onnx)
  Downloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Collecting protobuf~=3.20 (from tf2onnx)
  Downloading protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)
Downloading tf2onnx-1.16.1-py3-none-any.whl (455 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m455.8/455.8 kB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.0/16.0 MB[0m [31m77.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading protobuf-3.20.3-py2.py3-none-any.whl (162 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.1/162.1 kB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: protobuf, onnx, tf2onnx
  Attempting uninstall: pr

In [16]:
!python -m tf2onnx.convert --saved-model "/content/drive/MyDrive/DL_Model/saved_model" --output "/content/drive/MyDrive/DL_Model/saved_model.onnx" --opset 16

2025-01-24 11:04:22.402221: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-24 11:04:22.430298: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-24 11:04:22.438455: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-24 11:04:29,110 - INFO - Signatures found in model: [serving_default].
2025-01-24 11:04:29,111 - INFO - Output names: ['output_0']
I0000 00:00:1737716669.142850    4926 devices.cc:67] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
I0000 00:00:1737716669.370369    4926 devices.cc:67] Number of eligible GPUs (core count >= 8, comput

In [17]:
!wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.0/tars/TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-12.4.tar.gz -O tensorrt.tar.gz

--2025-01-24 11:05:22--  https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.0/tars/TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-12.4.tar.gz
Resolving developer.nvidia.com (developer.nvidia.com)... 23.54.19.192, 23.54.19.131
Connecting to developer.nvidia.com (developer.nvidia.com)|23.54.19.192|:443... connected.
HTTP request sent, awaiting response... 302 Moved Temporarily
Location: https://developer.download.nvidia.com/compute/machine-learning/tensorrt/10.0.0/tars/TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-12.4.tar.gz [following]
--2025-01-24 11:05:23--  https://developer.download.nvidia.com/compute/machine-learning/tensorrt/10.0.0/tars/TensorRT-10.0.0.6.Linux.x86_64-gnu.cuda-12.4.tar.gz
Resolving developer.download.nvidia.com (developer.download.nvidia.com)... 23.54.19.131, 23.54.19.192
Connecting to developer.download.nvidia.com (developer.download.nvidia.com)|23.54.19.131|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2348693005 (2.2G) 

In [None]:
!/content/TensorRT-10.0.0.6/bin/trtexec --onnx=/content/drive/MyDrive/DL_Model/saved_model.onnx --saveEngine=/content/drive/MyDrive/ResNet20/saved_model.trt

In [20]:
!tar -xvzf tensorrt.tar.gz

TensorRT-10.0.0.6/
TensorRT-10.0.0.6/targets/
TensorRT-10.0.0.6/targets/x86_64-linux-gnu/
TensorRT-10.0.0.6/targets/x86_64-linux-gnu/samples
TensorRT-10.0.0.6/targets/x86_64-linux-gnu/lib/
TensorRT-10.0.0.6/targets/x86_64-linux-gnu/lib/libnvinfer_lean.so.10
TensorRT-10.0.0.6/targets/x86_64-linux-gnu/lib/libnvinfer.so.10.0.0
TensorRT-10.0.0.6/targets/x86_64-linux-gnu/lib/libnvonnxparser_static.a
TensorRT-10.0.0.6/targets/x86_64-linux-gnu/lib/libnvinfer_vc_plugin.so
TensorRT-10.0.0.6/targets/x86_64-linux-gnu/lib/libnvinfer_lean_static.a
TensorRT-10.0.0.6/targets/x86_64-linux-gnu/lib/libonnx_proto.a
TensorRT-10.0.0.6/targets/x86_64-linux-gnu/lib/libnvinfer.so
TensorRT-10.0.0.6/targets/x86_64-linux-gnu/lib/libnvinfer_builder_resource.so.10.0.0
TensorRT-10.0.0.6/targets/x86_64-linux-gnu/lib/libnvinfer_plugin.so.10
TensorRT-10.0.0.6/targets/x86_64-linux-gnu/lib/stubs/
TensorRT-10.0.0.6/targets/x86_64-linux-gnu/lib/stubs/libnvinfer_vc_plugin.so
TensorRT-10.0.0.6/targets/x86_64-linux-gnu/lib/s

In [22]:
!/content/TensorRT-10.0.0.6/bin/trtexec --onnx=/content/drive/MyDrive/DL_Model/saved_model.onnx --saveEngine=/content/drive/MyDrive/DL_Model/saved_model.trt

&&&& RUNNING TensorRT.trtexec [TensorRT v100000] # /content/TensorRT-10.0.0.6/bin/trtexec --onnx=/content/drive/MyDrive/DL_Model/saved_model.onnx --saveEngine=/content/drive/MyDrive/DL_Model/saved_model.trt
[01/24/2025-11:24:31] [I] === Model Options ===
[01/24/2025-11:24:31] [I] Format: ONNX
[01/24/2025-11:24:31] [I] Model: /content/drive/MyDrive/DL_Model/saved_model.onnx
[01/24/2025-11:24:31] [I] Output:
[01/24/2025-11:24:31] [I] === Build Options ===
[01/24/2025-11:24:31] [I] Memory Pools: workspace: default, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default, tacticSharedMem: default
[01/24/2025-11:24:31] [I] avgTiming: 8
[01/24/2025-11:24:31] [I] Precision: FP32
[01/24/2025-11:24:31] [I] LayerPrecisions: 
[01/24/2025-11:24:31] [I] Layer Device Types: 
[01/24/2025-11:24:31] [I] Calibration: 
[01/24/2025-11:24:31] [I] Refit: Disabled
[01/24/2025-11:24:31] [I] Strip weights: Disabled
[01/24/2025-11:24:31] [I] Version Compatible: Disabled
[01/24/2025-11:24:31] [I] ONNX Pl

In [25]:
!pip install /content/TensorRT-10.0.0.6/python/tensorrt-10.0.0b6-cp311-none-linux_x86_64.whl

Processing ./TensorRT-10.0.0.6/python/tensorrt-10.0.0b6-cp311-none-linux_x86_64.whl
Installing collected packages: tensorrt
Successfully installed tensorrt-10.0.0b6


In [None]:
import tensorflow as tf
import tensorrt as trt
import numpy as np
import time
import pycuda.driver as cuda
import pycuda.autoinit
from tensorflow.keras.datasets import cifar10

BATCH_SIZE = 1
INPUT_SHAPE = (32, 32, 3)  
NUM_ITERATIONS = 1000
NUM_WARMUP = 50

(_, _), (x_test, _) = cifar10.load_data()
x_test = x_test.astype(np.float32) / 255.0  
x_test = x_test[:NUM_ITERATIONS]  

class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

def allocate_buffers(engine):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()

    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * BATCH_SIZE
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        bindings.append(int(device_mem))

        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))

    return inputs, outputs, bindings, stream

def do_inference(context, bindings, inputs, outputs, stream, batch_size=BATCH_SIZE):
    [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
    context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle)
    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
    stream.synchronize()
    return [out.host for out in outputs]

def benchmark_tensorrt(model_path):
    with open(model_path, 'rb') as f, trt.Runtime(trt.Logger(trt.Logger.WARNING)) as runtime:
        engine = runtime.deserialize_cuda_engine(f.read())

    context = engine.create_execution_context()
    inputs, outputs, bindings, stream = allocate_buffers(engine)

    for i in range(NUM_WARMUP):
        img = x_test[i % len(x_test)].ravel()
        np.copyto(inputs[0].host, img)
        do_inference(context, bindings, inputs, outputs, stream)

    times = []
    for i in range(NUM_ITERATIONS):
        img = x_test[i % len(x_test)].ravel()
        np.copyto(inputs[0].host, img)

        start_time = time.time()
        do_inference(context, bindings, inputs, outputs, stream)
        times.append(time.time() - start_time)

    avg_latency = np.mean(times) * 1000  
    throughput = 1000 / avg_latency * BATCH_SIZE  
    return avg_latency, throughput

def benchmark_tensorflow(model_path):
    model = tf.saved_model.load(model_path)
    infer = model.signatures['serving_default']

    for i in range(NUM_WARMUP):
        img = x_test[i % len(x_test)][np.newaxis, ...]  
        infer(tf.constant(img))

    times = []
    for i in range(NUM_ITERATIONS):
        img = x_test[i % len(x_test)][np.newaxis, ...]

        start_time = time.time()
        infer(tf.constant(img))
        times.append(time.time() - start_time)

    avg_latency = np.mean(times) * 1000  
    throughput = 1000 / avg_latency * BATCH_SIZE  
    return avg_latency, throughput

if __name__ == '__main__':
    print("Benchmarking TensorRT model...")
    trt_latency, trt_throughput = benchmark_tensorrt('/content/drive/MyDrive/DL_Model/saved_model.trt')

    print("\nBenchmarking TensorFlow model...")
    tf_latency, tf_throughput = benchmark_tensorflow('/content/drive/MyDrive/DL_Model/saved_model')

    print("\nResults:")
    print(f"TensorRT Average Latency: {trt_latency:.2f}ms")
    print(f"TensorRT Throughput: {trt_throughput:.2f} requests/sec")
    print(f"TensorFlow Average Latency: {tf_latency:.2f}ms")
    print(f"TensorFlow Throughput: {tf_throughput:.2f} requests/sec")