Importing necessary modules 

In [1]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models, losses
from sklearn.metrics import f1_score
import numpy as np
import time
import tensorflow.experimental.tensorrt as trt

Loading the mnist classification data

In [2]:
(x_train,y_train),(x_test,y_test) = datasets.mnist.load_data()
x_train.shape

(60000, 28, 28)

Padding the dataset

In [3]:
x_train = tf.pad(x_train, [[0, 0], [2,2], [2,2]])/255
x_test = tf.pad(x_test, [[0, 0], [2,2], [2,2]])/255
x_train.shape

TensorShape([60000, 32, 32])

Expanding the tensor dimensions

In [4]:
x_train = tf.expand_dims(x_train, axis=3, name=None)
x_test = tf.expand_dims(x_test, axis=3, name=None)
x_train.shape

TensorShape([60000, 32, 32, 1])

Separating for validation set

In [5]:
x_val = x_train[-2000:,:,:,:] 
y_val = y_train[-2000:] 
x_train = x_train[:-2000,:,:,:] 
y_train = y_train[:-2000]

Building the model by changing Activation function from Dying Relu to tanh 

In [6]:
model = models.Sequential()

model.add(layers.Conv2D(6, 5, activation='tanh', input_shape=x_train.shape[1:]))
model.add(layers.AveragePooling2D(2))
model.add(layers.Activation('sigmoid'))

model.add(layers.Conv2D(16, 5, activation='tanh'))
model.add(layers.AveragePooling2D(2))
model.add(layers.Activation('sigmoid'))

model.add(layers.Conv2D(120, 5, activation='tanh'))

model.add(layers.Flatten())

model.add(layers.Dense(84, activation='tanh'))
model.add(layers.Dense(10, activation='softmax'))

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Compiling model with Adam optimizer for 40 epochs

In [8]:
model.compile(optimizer='adam', loss=losses.sparse_categorical_crossentropy, metrics=['accuracy'])
history = model.fit(x_train, y_train, batch_size=64, epochs=5, validation_data=(x_val, y_val))

Epoch 1/5
[1m907/907[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 9ms/step - accuracy: 0.8766 - loss: 0.4020 - val_accuracy: 0.9460 - val_loss: 0.1939
Epoch 2/5
[1m907/907[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 8ms/step - accuracy: 0.9061 - loss: 0.2976 - val_accuracy: 0.9540 - val_loss: 0.1640
Epoch 3/5
[1m907/907[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 8ms/step - accuracy: 0.9188 - loss: 0.2571 - val_accuracy: 0.9665 - val_loss: 0.1276
Epoch 4/5
[1m907/907[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 10ms/step - accuracy: 0.9330 - loss: 0.2098 - val_accuracy: 0.9585 - val_loss: 0.1506
Epoch 5/5
[1m907/907[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 13ms/step - accuracy: 0.9382 - loss: 0.1898 - val_accuracy: 0.9645 - val_loss: 0.1286


Model's accuracy

In [9]:
model.evaluate(x_test, y_test)
print(f"Accuracy on test set: {model.evaluate(x_test, y_test)[1]:.2f}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9405 - loss: 0.1946
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9405 - loss: 0.1946
Accuracy on test set: 0.95


F1 SCORE

F1 Score Range: The F1 score ranges from 0 to 1:

In [10]:
predictions = model.predict(x_test)

predicted_classes = np.argmax(predictions, axis=1)

f1 = f1_score(y_test, predicted_classes, average='macro')

print(f"f1 score: {f1:.2f}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
f1 score: 0.95




    The accuracy here is 0.98

    The F1 Score here is 0.98

Testing the latency (Time taken for processing single sample)

In [11]:
single_sample = np.expand_dims(x_test[0], axis=0) 
start_time = time.time()   
model.predict(single_sample) 
end_time = time.time() 

latency = (end_time - start_time) * 1000  
print(f"Time taken to process a single sample: {latency:.4f} ms")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
Time taken to process a single sample: 82.0088 ms


Latency value:

     Time taken to process a single sample is 77.2564 ms

TensorRT

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
model.save('/content/drive/MyDrive/DL_Model/model.keras')

In [None]:
from tensorflow.keras.models import load_model
import tensorflow as tf

model1 = load_model('/content/drive/MyDrive/DL_Model/model.keras')

export_path = '/content/drive/MyDrive/DL_Model/saved_model'

tf.saved_model.save(model1, export_path)

print(f"Model successfully saved as a TensorFlow SavedModel at {export_path}")

Comparing latency numbers of tensorflow and tensorrt

In [None]:
import tensorflow as tf
import tensorrt as trt
import numpy as np
import time
import pycuda.driver as cuda
import pycuda.autoinit
from tensorflow.keras.datasets import mnist

BATCH_SIZE = 1
INPUT_SHAPE = (32, 32, 1)  
NUM_ITERATIONS = 1000
NUM_WARMUP = 50


(_, _), (x_test, _) = mnist.load_data()
x_test = x_test.astype(np.float32) / 255.0 
x_test = x_test[:NUM_ITERATIONS] 

class HostDeviceMem(object):
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

def allocate_buffers(engine):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()

    for binding in engine:
        size = trt.volume(engine.get_binding_shape(binding)) * BATCH_SIZE
        dtype = trt.nptype(engine.get_binding_dtype(binding))
        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        bindings.append(int(device_mem))

        if engine.binding_is_input(binding):
            inputs.append(HostDeviceMem(host_mem, device_mem))
        else:
            outputs.append(HostDeviceMem(host_mem, device_mem))

    return inputs, outputs, bindings, stream

def do_inference(context, bindings, inputs, outputs, stream, batch_size=BATCH_SIZE):
    [cuda.memcpy_htod_async(inp.device, inp.host, stream) for inp in inputs]
    context.execute_async(batch_size=batch_size, bindings=bindings, stream_handle=stream.handle)
    [cuda.memcpy_dtoh_async(out.host, out.device, stream) for out in outputs]
    stream.synchronize()
    return [out.host for out in outputs]

def benchmark_tensorrt(model_path):
    with open(model_path, 'rb') as f, trt.Runtime(trt.Logger(trt.Logger.WARNING)) as runtime:
        engine = runtime.deserialize_cuda_engine(f.read())

    context = engine.create_execution_context()
    inputs, outputs, bindings, stream = allocate_buffers(engine)

    for i in range(NUM_WARMUP):
        img = x_test[i % len(x_test)].ravel()
        np.copyto(inputs[0].host, img)
        do_inference(context, bindings, inputs, outputs, stream)

    times = []
    for i in range(NUM_ITERATIONS):
        img = x_test[i % len(x_test)].ravel()
        np.copyto(inputs[0].host, img)

        start_time = time.time()
        do_inference(context, bindings, inputs, outputs, stream)
        times.append(time.time() - start_time)

    avg_latency = np.mean(times) * 1000 
    throughput = 1000 / avg_latency * BATCH_SIZE  
    return avg_latency, throughput

def benchmark_tensorflow(model_path):
    model = tf.saved_model.load(model_path)
    infer = model.signatures['serving_default']

    for i in range(NUM_WARMUP):
        img = x_test[i % len(x_test)][np.newaxis, ...] 
        infer(tf.constant(img))

    times = []
    for i in range(NUM_ITERATIONS):
        img = x_test[i % len(x_test)][np.newaxis, ...]

        start_time = time.time()
        infer(tf.constant(img))
        times.append(time.time() - start_time)

    avg_latency = np.mean(times) * 1000 
    throughput = 1000 / avg_latency * BATCH_SIZE  
    return avg_latency, throughput

if __name__ == '__main__':
    
    print("Benchmarking TensorRT model...")
    trt_latency, trt_throughput = benchmark_tensorrt('/content/drive/MyDrive/DL_Model/saved_model.trt')

    print("\nBenchmarking TensorFlow model...")
    tf_latency, tf_throughput = benchmark_tensorflow('/content/drive/MyDrive/DL_Model/saved_model')

    print("\nResults:")
    print(f"TensorRT Average Latency: {trt_latency:.2f}ms")
    print(f"TensorRT Throughput: {trt_throughput:.2f} requests/sec")
    print(f"TensorFlow Average Latency: {tf_latency:.2f}ms")
    print(f"TensorFlow Throughput: {tf_throughput:.2f} requests/sec")

Latency value after TensorRT:

     Time taken to process a single sample (TensorRT optimized) is 00.0046 ms