# TensorRT

In [1]:
import pathlib
import os
import time
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers

# How to create the docker run directly in Lambda2
# docker run --gpus all -it --mount type=bind,source=/home/jacob-delgado/Documents/CAPSTONE,target=/workspace/CAPSTONE nvcr.io/nvidia/tensorflow:24.10-tf2-py3

2024-11-12 23:54:47.056173: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-12 23:54:47.320984: I tensorflow/core/platform/cpu_feature_guard.cc:211] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE3 SSE4.1 SSE4.2 AVX, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Benchmarking

In [None]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

In [None]:
from tensorflow.python.compiler.tensorrt import trt_convert as trt
print(trt.trt_utils._pywrap_py_utils.get_linked_tensorrt_version())

(10, 5, 0)


### Benchmarking

In [10]:
data_root = pathlib.Path("../data/Monkeypox_Data/Original_Images")    # points to the folder containing the images that will be used for training
saved_model_dir = '../best_model/model1/best_f1score_fold'
optimized_model_dir = '../tensorRT_model/test'
# optimized_model_dir = '../tensorRT_model/test_INT8'

# hyperparameters
batch_size = 32         # size of the batch that will be fed to model
img_height = 224        # input image height
img_width = 224         # input image width
test_size = 0.2

# Load dataset without splitting
dataset = tf.keras.utils.image_dataset_from_directory(
    data_root,                                  # loads images from the data_root directory
    image_size=(img_height, img_width),         # resizes all images to (224, 224) pixels
    batch_size=batch_size,                      # set the batch size
    shuffle=True                                # shufle data when loaded
)

# normalization_layer = layers.Rescaling(1./255)
# dataset = dataset.map(lambda x, y: (normalization_layer(x), y))

Found 228 files belonging to 2 classes.


### Inference and Metrics Setup

In [3]:
image_batches, labels = [], []
for image_batch, label_batch in dataset:
    image_batches.append(image_batch)
    labels.append(label_batch)

image_batches = np.concatenate(image_batches) # Flatten batches to get all images
labels = np.concatenate(labels)               # Flatten batches to get all labels  
print(f"Total Images: {image_batches.shape[0]} \nTotal Labels: {labels.shape[0]}")

# Split the data into test subset for benchmarking
_, X_test, _, Y_test = train_test_split(image_batches, labels, test_size=test_size, random_state=42)


X_test = X_test / 255.0


2024-11-12 23:45:54.492110: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Total Images: 228 
Total Labels: 228


### Model Size Difference

In [4]:
def get_model_size(model_dir):
    total_size = 0
    for dirpath, _, filenames in os.walk(model_dir):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            total_size += os.path.getsize(fp)
    return total_size / (1024 * 1024)  # Convert to MB

# original_size = get_model_size(saved_model_dir)
# optimized_size = get_model_size(optimized_model_dir)
# print(f"Original Model Size: {original_size:.2f} MB")
# print(f"Optimized Model Size: {optimized_size:.2f} MB")
# print(f"Compression Ratio: {original_size / optimized_size:.2f}")


### Run inference on original model


In [10]:
# run inference
trials = 500
inference_times = []

# Load the SavedModel directly
model = tf.saved_model.load('../best_model/model1/best_f1score_fold')

# # Load the SavedModel using TFSMLayer, treating it as a Keras layer
# model_layer = tf.keras.layers.TFSMLayer('../best_model/model1/best_f1score_fold', call_endpoint='serving_default')

# # Wrap the TFSMLayer in a Sequential model for inference
# model = tf.keras.Sequential([model_layer])

# # Wrap the SavedModel for inference
# def model_call(inputs):
#     return model.signatures["serving_default"](inputs)["output_0"]

# # Create a Keras Sequential model
# model = tf.keras.Sequential([
#     tf.keras.layers.InputLayer(input_shape=(224, 224, 3)),  # Replace with your model's input shape
#     tf.keras.layers.Lambda(model_call)
# ])

print(f"Running {trials} inference trials on {len(X_test)} test images...")
for i in range(trials):
    start_time = time.perf_counter()

    # Run inference for a batch of images
    # saved_model.predict(X_test, batch_size=batch_size, verbose=0)
    model.signatures["serving_default"](X_test)  # Perform inference directly

    end_time = time.perf_counter()
    inference_time = end_time - start_time
    inference_times.append(inference_time)

    if i % 50 == 0:
        avg_inference = np.mean(inference_times)  # Average inference time per trial
        print(f"Step {i}: average inference time = {avg_inference:.6f} seconds")
        
    tf.keras.backend.clear_session()
        
# Compute throughput (images per second)
# total_time = np.sum(inference_times)
# throughput = (trials * len(X_test)) / total_time
# print(f"Throughput: {throughput:.2f} images/second")

Running 500 inference trials on 46 test images...


TypeError: Binding inputs to tf.function failed due to `too many positional arguments`. Received args: (array([[[[0.68235296, 0.53333336, 0.49019608],
         [0.6862745 , 0.5372549 , 0.49411765],
         [0.6901961 , 0.5411765 , 0.49803922],
         ...,
         [0.85490197, 0.72156864, 0.68235296],
         [0.85882354, 0.7254902 , 0.6862745 ],
         [0.85882354, 0.7254902 , 0.6862745 ]],

        [[0.68235296, 0.53333336, 0.49019608],
         [0.6862745 , 0.5372549 , 0.49411765],
         [0.6901961 , 0.5411765 , 0.49803922],
         ...,
         [0.85882354, 0.7254902 , 0.6862745 ],
         [0.85882354, 0.7254902 , 0.6862745 ],
         [0.85882354, 0.7254902 , 0.6862745 ]],

        [[0.6862745 , 0.5372549 , 0.49411765],
         [0.6862745 , 0.5372549 , 0.49411765],
         [0.6901961 , 0.5411765 , 0.49803922],
         ...,
         [0.8627451 , 0.7294118 , 0.6901961 ],
         [0.8627451 , 0.7294118 , 0.6901961 ],
         [0.8627451 , 0.7294118 , 0.6901961 ]],

        ...,

        [[0.6666667 , 0.5019608 , 0.41568628],
         [0.67058825, 0.5058824 , 0.41960785],
         [0.67058825, 0.5058824 , 0.41960785],
         ...,
         [0.8156863 , 0.627451  , 0.54901963],
         [0.827451  , 0.627451  , 0.5529412 ],
         [0.827451  , 0.627451  , 0.5529412 ]],

        [[0.6745098 , 0.50980395, 0.42352942],
         [0.6745098 , 0.50980395, 0.42352942],
         [0.6745098 , 0.50980395, 0.42352942],
         ...,
         [0.8117647 , 0.62352943, 0.54509807],
         [0.81960785, 0.61960787, 0.54509807],
         [0.8235294 , 0.62352943, 0.54901963]],

        [[0.6784314 , 0.5137255 , 0.42745098],
         [0.6784314 , 0.5137255 , 0.42745098],
         [0.6745098 , 0.50980395, 0.42352942],
         ...,
         [0.80784315, 0.61960787, 0.5411765 ],
         [0.81960785, 0.61960787, 0.54509807],
         [0.81960785, 0.61960787, 0.54509807]]],


       [[[0.7176471 , 0.4509804 , 0.3764706 ],
         [0.7176471 , 0.4509804 , 0.3764706 ],
         [0.72156864, 0.45490196, 0.38039216],
         ...,
         [0.9764706 , 0.77254903, 0.7294118 ],
         [0.9647059 , 0.7607843 , 0.7176471 ],
         [0.9490196 , 0.74509805, 0.7019608 ]],

        [[0.7137255 , 0.44705883, 0.37254903],
         [0.7137255 , 0.44705883, 0.37254903],
         [0.7137255 , 0.44705883, 0.37254903],
         ...,
         [0.972549  , 0.76862746, 0.7254902 ],
         [0.9607843 , 0.75686276, 0.7137255 ],
         [0.9490196 , 0.74509805, 0.7019608 ]],

        [[0.7137255 , 0.44705883, 0.3647059 ],
         [0.7137255 , 0.44705883, 0.3647059 ],
         [0.70980394, 0.44313726, 0.36078432],
         ...,
         [0.9607843 , 0.75686276, 0.7137255 ],
         [0.9529412 , 0.7490196 , 0.7058824 ],
         [0.94509804, 0.7411765 , 0.69803923]],

        ...,

        [[0.8235294 , 0.5411765 , 0.44705883],
         [0.827451  , 0.54509807, 0.4509804 ],
         [0.8352941 , 0.5529412 , 0.45882353],
         ...,
         [0.98039216, 0.87058824, 0.85882354],
         [0.98039216, 0.87058824, 0.85882354],
         [0.9764706 , 0.8666667 , 0.85490197]],

        [[0.8392157 , 0.5568628 , 0.4627451 ],
         [0.8352941 , 0.5529412 , 0.45882353],
         [0.8352941 , 0.5529412 , 0.45882353],
         ...,
         [0.99607843, 0.8862745 , 0.8745098 ],
         [0.99607843, 0.8862745 , 0.8745098 ],
         [0.9882353 , 0.8784314 , 0.8666667 ]],

        [[0.85490197, 0.57254905, 0.47843137],
         [0.84313726, 0.56078434, 0.46666667],
         [0.8352941 , 0.5529412 , 0.45882353],
         ...,
         [1.        , 0.8980392 , 0.8862745 ],
         [1.        , 0.89411765, 0.88235295],
         [0.99607843, 0.8862745 , 0.8745098 ]]],


       [[[0.77254903, 0.6509804 , 0.6313726 ],
         [0.78039217, 0.65882355, 0.6392157 ],
         [0.8039216 , 0.6745098 , 0.654902  ],
         ...,
         [0.8666667 , 0.74509805, 0.6313726 ],
         [0.8745098 , 0.7529412 , 0.6392157 ],
         [0.8784314 , 0.75686276, 0.6431373 ]],

        [[0.77254903, 0.6431373 , 0.62352943],
         [0.7921569 , 0.6627451 , 0.6431373 ],
         [0.8117647 , 0.68235296, 0.6627451 ],
         ...,
         [0.8666667 , 0.74509805, 0.6313726 ],
         [0.8745098 , 0.7529412 , 0.6392157 ],
         [0.8784314 , 0.75686276, 0.6431373 ]],

        [[0.8039216 , 0.6666667 , 0.65882355],
         [0.827451  , 0.6901961 , 0.68235296],
         [0.85882354, 0.70980394, 0.7058824 ],
         ...,
         [0.8627451 , 0.7411765 , 0.627451  ],
         [0.87058824, 0.7490196 , 0.63529414],
         [0.8784314 , 0.75686276, 0.6431373 ]],

        ...,

        [[0.9529412 , 0.88235295, 0.84313726],
         [0.95686275, 0.8862745 , 0.84705883],
         [0.94509804, 0.8745098 , 0.8352941 ],
         ...,
         [0.6901961 , 0.50980395, 0.3764706 ],
         [0.70980394, 0.5294118 , 0.3882353 ],
         [0.7254902 , 0.54509807, 0.40392157]],

        [[0.95686275, 0.8784314 , 0.84313726],
         [0.95686275, 0.8784314 , 0.84313726],
         [0.94509804, 0.8666667 , 0.83137256],
         ...,
         [0.69411767, 0.5137255 , 0.38039216],
         [0.70980394, 0.5294118 , 0.3882353 ],
         [0.7176471 , 0.5372549 , 0.39607844]],

        [[0.95686275, 0.8784314 , 0.84313726],
         [0.95686275, 0.8784314 , 0.84313726],
         [0.94509804, 0.8666667 , 0.83137256],
         ...,
         [0.69411767, 0.5137255 , 0.38039216],
         [0.7058824 , 0.5254902 , 0.38431373],
         [0.7137255 , 0.53333336, 0.39215687]]],


       ...,


       [[[0.5882353 , 0.5372549 , 0.5137255 ],
         [0.5882353 , 0.5372549 , 0.5137255 ],
         [0.5882353 , 0.5372549 , 0.5137255 ],
         ...,
         [0.654902  , 0.67058825, 0.68235296],
         [0.654902  , 0.67058825, 0.68235296],
         [0.654902  , 0.67058825, 0.68235296]],

        [[0.58431375, 0.53333336, 0.50980395],
         [0.58431375, 0.53333336, 0.50980395],
         [0.58431375, 0.53333336, 0.50980395],
         ...,
         [0.654902  , 0.67058825, 0.68235296],
         [0.654902  , 0.67058825, 0.68235296],
         [0.654902  , 0.67058825, 0.68235296]],

        [[0.5803922 , 0.52156866, 0.5019608 ],
         [0.58431375, 0.5254902 , 0.5058824 ],
         [0.5803922 , 0.5294118 , 0.5058824 ],
         ...,
         [0.654902  , 0.67058825, 0.68235296],
         [0.654902  , 0.67058825, 0.68235296],
         [0.654902  , 0.67058825, 0.68235296]],

        ...,

        [[0.47058824, 0.40392157, 0.3647059 ],
         [0.47058824, 0.40784314, 0.35686275],
         [0.47058824, 0.40392157, 0.3647059 ],
         ...,
         [0.5411765 , 0.50980395, 0.46666667],
         [0.5411765 , 0.50980395, 0.46666667],
         [0.5411765 , 0.50980395, 0.46666667]],

        [[0.4627451 , 0.4       , 0.34901962],
         [0.46666667, 0.40392157, 0.34509805],
         [0.47058824, 0.40784314, 0.35686275],
         ...,
         [0.5411765 , 0.50980395, 0.46666667],
         [0.5411765 , 0.50980395, 0.46666667],
         [0.5411765 , 0.50980395, 0.46666667]],

        [[0.45882353, 0.39607844, 0.3372549 ],
         [0.4627451 , 0.4       , 0.34117648],
         [0.47058824, 0.40784314, 0.34901962],
         ...,
         [0.54509807, 0.5137255 , 0.47058824],
         [0.54509807, 0.5137255 , 0.47058824],
         [0.54509807, 0.5137255 , 0.47058824]]],


       [[[0.49411765, 0.36078432, 0.22352941],
         [0.5019608 , 0.36862746, 0.23137255],
         [0.5058824 , 0.37254903, 0.23529412],
         ...,
         [0.47058824, 0.35686275, 0.22352941],
         [0.46666667, 0.3529412 , 0.22745098],
         [0.5647059 , 0.4627451 , 0.33333334]],

        [[0.49019608, 0.35686275, 0.21960784],
         [0.49803922, 0.3647059 , 0.22745098],
         [0.5019608 , 0.36862746, 0.23137255],
         ...,
         [0.46666667, 0.3529412 , 0.21960784],
         [0.4627451 , 0.34901962, 0.22352941],
         [0.56078434, 0.45882353, 0.32941177]],

        [[0.4862745 , 0.3529412 , 0.21568628],
         [0.49019608, 0.35686275, 0.21960784],
         [0.49803922, 0.3647059 , 0.22745098],
         ...,
         [0.4627451 , 0.34901962, 0.21568628],
         [0.45882353, 0.34509805, 0.21960784],
         [0.5568628 , 0.45490196, 0.3254902 ]],

        ...,

        [[0.6117647 , 0.4627451 , 0.30980393],
         [0.6117647 , 0.4627451 , 0.30980393],
         [0.6156863 , 0.46666667, 0.3137255 ],
         ...,
         [0.5921569 , 0.4745098 , 0.33333334],
         [0.60784316, 0.49411765, 0.36862746],
         [0.68235296, 0.5686275 , 0.44313726]],

        [[0.6039216 , 0.45490196, 0.3019608 ],
         [0.6039216 , 0.45490196, 0.3019608 ],
         [0.60784316, 0.45882353, 0.30588236],
         ...,
         [0.5882353 , 0.47058824, 0.32941177],
         [0.6039216 , 0.49019608, 0.3647059 ],
         [0.6784314 , 0.5647059 , 0.4392157 ]],

        [[0.5921569 , 0.44313726, 0.2901961 ],
         [0.59607846, 0.44705883, 0.29411766],
         [0.6039216 , 0.45490196, 0.3019608 ],
         ...,
         [0.5882353 , 0.47058824, 0.32941177],
         [0.6       , 0.4862745 , 0.36078432],
         [0.6745098 , 0.56078434, 0.43529412]]],


       [[[0.8117647 , 0.5294118 , 0.5764706 ],
         [0.827451  , 0.54509807, 0.5921569 ],
         [0.8509804 , 0.5686275 , 0.6156863 ],
         ...,
         [0.28235295, 0.35686275, 0.38431373],
         [0.27450982, 0.3647059 , 0.39607844],
         [0.26666668, 0.3647059 , 0.39215687]],

        [[0.84313726, 0.56078434, 0.60784316],
         [0.84705883, 0.5647059 , 0.6117647 ],
         [0.85490197, 0.57254905, 0.61960787],
         ...,
         [0.27058825, 0.3372549 , 0.36862746],
         [0.2627451 , 0.34117648, 0.3764706 ],
         [0.24705882, 0.3372549 , 0.36862746]],

        [[0.8666667 , 0.5921569 , 0.63529414],
         [0.8627451 , 0.5882353 , 0.6313726 ],
         [0.8627451 , 0.5803922 , 0.627451  ],
         ...,
         [0.3019608 , 0.34117648, 0.3764706 ],
         [0.2784314 , 0.33333334, 0.36862746],
         [0.2627451 , 0.32941177, 0.36078432]],

        ...,

        [[0.87058824, 0.654902  , 0.6431373 ],
         [0.87058824, 0.654902  , 0.6431373 ],
         [0.8745098 , 0.65882355, 0.64705884],
         ...,
         [0.90588236, 0.627451  , 0.61960787],
         [0.90588236, 0.6156863 , 0.6117647 ],
         [0.9098039 , 0.6117647 , 0.6117647 ]],

        [[0.8745098 , 0.64705884, 0.6392157 ],
         [0.88235295, 0.654902  , 0.64705884],
         [0.8901961 , 0.6627451 , 0.654902  ],
         ...,
         [0.9098039 , 0.6313726 , 0.62352943],
         [0.8980392 , 0.60784316, 0.6039216 ],
         [0.89411765, 0.6039216 , 0.6       ]],

        [[0.8745098 , 0.64705884, 0.6392157 ],
         [0.8862745 , 0.65882355, 0.6509804 ],
         [0.8980392 , 0.67058825, 0.6627451 ],
         ...,
         [0.90588236, 0.6313726 , 0.62352943],
         [0.89411765, 0.6039216 , 0.6       ],
         [0.8862745 , 0.59607846, 0.5921569 ]]]], dtype=float32),) and kwargs: {} for signature: (*, keras_tensor_312: TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='keras_tensor_312')) -> Dict[['output_0', TensorSpec(shape=(None, 1), dtype=tf.float32, name='output_0')]].
Fallback to flat signature also failed due to: signature_wrapper___call__(keras_tensor_312): expected argument #0(zero-based) to be a Tensor; got ndarray ([[[[0.68235296 0.53333336 0.49019608]
   [0.6862745  0.5372549  0.49411765]
   [0.6901961  0.5411765  0.49803922]
   ...
   [0.85490197 0.72156864 0.68235296]
   [0.85882354 0.7254902  0.6862745 ]
   [0.85882354 0.7254902  0.6862745 ]]

  [[0.68235296 0.53333336 0.49019608]
   [0.6862745  0.5372549  0.49411765]
   [0.6901961  0.5411765  0.49803922]
   ...
   [0.85882354 0.7254902  0.6862745 ]
   [0.85882354 0.7254902  0.6862745 ]
   [0.85882354 0.7254902  0.6862745 ]]

  [[0.6862745  0.5372549  0.49411765]
   [0.6862745  0.5372549  0.49411765]
   [0.6901961  0.5411765  0.49803922]
   ...
   [0.8627451  0.7294118  0.6901961 ]
   [0.8627451  0.7294118  0.6901961 ]
   [0.8627451  0.7294118  0.6901961 ]]

  ...

  [[0.6666667  0.5019608  0.41568628]
   [0.67058825 0.5058824  0.41960785]
   [0.67058825 0.5058824  0.41960785]
   ...
   [0.8156863  0.627451   0.54901963]
   [0.827451   0.627451   0.5529412 ]
   [0.827451   0.627451   0.5529412 ]]

  [[0.6745098  0.50980395 0.42352942]
   [0.6745098  0.50980395 0.42352942]
   [0.6745098  0.50980395 0.42352942]
   ...
   [0.8117647  0.62352943 0.54509807]
   [0.81960785 0.61960787 0.54509807]
   [0.8235294  0.62352943 0.54901963]]

  [[0.6784314  0.5137255  0.42745098]
   [0.6784314  0.5137255  0.42745098]
   [0.6745098  0.50980395 0.42352942]
   ...
   [0.80784315 0.61960787 0.5411765 ]
   [0.81960785 0.61960787 0.54509807]
   [0.81960785 0.61960787 0.54509807]]]


 [[[0.7176471  0.4509804  0.3764706 ]
   [0.7176471  0.4509804  0.3764706 ]
   [0.72156864 0.45490196 0.38039216]
   ...
   [0.9764706  0.77254903 0.7294118 ]
   [0.9647059  0.7607843  0.7176471 ]
   [0.9490196  0.74509805 0.7019608 ]]

  [[0.7137255  0.44705883 0.37254903]
   [0.7137255  0.44705883 0.37254903]
   [0.7137255  0.44705883 0.37254903]
   ...
   [0.972549   0.76862746 0.7254902 ]
   [0.9607843  0.75686276 0.7137255 ]
   [0.9490196  0.74509805 0.7019608 ]]

  [[0.7137255  0.44705883 0.3647059 ]
   [0.7137255  0.44705883 0.3647059 ]
   [0.70980394 0.44313726 0.36078432]
   ...
   [0.9607843  0.75686276 0.7137255 ]
   [0.9529412  0.7490196  0.7058824 ]
   [0.94509804 0.7411765  0.69803923]]

  ...

  [[0.8235294  0.5411765  0.44705883]
   [0.827451   0.54509807 0.4509804 ]
   [0.8352941  0.5529412  0.45882353]
   ...
   [0.98039216 0.87058824 0.85882354]
   [0.98039216 0.87058824 0.85882354]
   [0.9764706  0.8666667  0.85490197]]

  [[0.8392157  0.5568628  0.4627451 ]
   [0.8352941  0.5529412  0.45882353]
   [0.8352941  0.5529412  0.45882353]
   ...
   [0.99607843 0.8862745  0.8745098 ]
   [0.99607843 0.8862745  0.8745098 ]
   [0.9882353  0.8784314  0.8666667 ]]

  [[0.85490197 0.57254905 0.47843137]
   [0.84313726 0.56078434 0.46666667]
   [0.8352941  0.5529412  0.45882353]
   ...
   [1.         0.8980392  0.8862745 ]
   [1.         0.89411765 0.88235295]
   [0.99607843 0.8862745  0.8745098 ]]]


 [[[0.77254903 0.6509804  0.6313726 ]
   [0.78039217 0.65882355 0.6392157 ]
   [0.8039216  0.6745098  0.654902  ]
   ...
   [0.8666667  0.74509805 0.6313726 ]
   [0.8745098  0.7529412  0.6392157 ]
   [0.8784314  0.75686276 0.6431373 ]]

  [[0.77254903 0.6431373  0.62352943]
   [0.7921569  0.6627451  0.6431373 ]
   [0.8117647  0.68235296 0.6627451 ]
   ...
   [0.8666667  0.74509805 0.6313726 ]
   [0.8745098  0.7529412  0.6392157 ]
   [0.8784314  0.75686276 0.6431373 ]]

  [[0.8039216  0.6666667  0.65882355]
   [0.827451   0.6901961  0.68235296]
   [0.85882354 0.70980394 0.7058824 ]
   ...
   [0.8627451  0.7411765  0.627451  ]
   [0.87058824 0.7490196  0.63529414]
   [0.8784314  0.75686276 0.6431373 ]]

  ...

  [[0.9529412  0.88235295 0.84313726]
   [0.95686275 0.8862745  0.84705883]
   [0.94509804 0.8745098  0.8352941 ]
   ...
   [0.6901961  0.50980395 0.3764706 ]
   [0.70980394 0.5294118  0.3882353 ]
   [0.7254902  0.54509807 0.40392157]]

  [[0.95686275 0.8784314  0.84313726]
   [0.95686275 0.8784314  0.84313726]
   [0.94509804 0.8666667  0.83137256]
   ...
   [0.69411767 0.5137255  0.38039216]
   [0.70980394 0.5294118  0.3882353 ]
   [0.7176471  0.5372549  0.39607844]]

  [[0.95686275 0.8784314  0.84313726]
   [0.95686275 0.8784314  0.84313726]
   [0.94509804 0.8666667  0.83137256]
   ...
   [0.69411767 0.5137255  0.38039216]
   [0.7058824  0.5254902  0.38431373]
   [0.7137255  0.53333336 0.39215687]]]


 ...


 [[[0.5882353  0.5372549  0.5137255 ]
   [0.5882353  0.5372549  0.5137255 ]
   [0.5882353  0.5372549  0.5137255 ]
   ...
   [0.654902   0.67058825 0.68235296]
   [0.654902   0.67058825 0.68235296]
   [0.654902   0.67058825 0.68235296]]

  [[0.58431375 0.53333336 0.50980395]
   [0.58431375 0.53333336 0.50980395]
   [0.58431375 0.53333336 0.50980395]
   ...
   [0.654902   0.67058825 0.68235296]
   [0.654902   0.67058825 0.68235296]
   [0.654902   0.67058825 0.68235296]]

  [[0.5803922  0.52156866 0.5019608 ]
   [0.58431375 0.5254902  0.5058824 ]
   [0.5803922  0.5294118  0.5058824 ]
   ...
   [0.654902   0.67058825 0.68235296]
   [0.654902   0.67058825 0.68235296]
   [0.654902   0.67058825 0.68235296]]

  ...

  [[0.47058824 0.40392157 0.3647059 ]
   [0.47058824 0.40784314 0.35686275]
   [0.47058824 0.40392157 0.3647059 ]
   ...
   [0.5411765  0.50980395 0.46666667]
   [0.5411765  0.50980395 0.46666667]
   [0.5411765  0.50980395 0.46666667]]

  [[0.4627451  0.4        0.34901962]
   [0.46666667 0.40392157 0.34509805]
   [0.47058824 0.40784314 0.35686275]
   ...
   [0.5411765  0.50980395 0.46666667]
   [0.5411765  0.50980395 0.46666667]
   [0.5411765  0.50980395 0.46666667]]

  [[0.45882353 0.39607844 0.3372549 ]
   [0.4627451  0.4        0.34117648]
   [0.47058824 0.40784314 0.34901962]
   ...
   [0.54509807 0.5137255  0.47058824]
   [0.54509807 0.5137255  0.47058824]
   [0.54509807 0.5137255  0.47058824]]]


 [[[0.49411765 0.36078432 0.22352941]
   [0.5019608  0.36862746 0.23137255]
   [0.5058824  0.37254903 0.23529412]
   ...
   [0.47058824 0.35686275 0.22352941]
   [0.46666667 0.3529412  0.22745098]
   [0.5647059  0.4627451  0.33333334]]

  [[0.49019608 0.35686275 0.21960784]
   [0.49803922 0.3647059  0.22745098]
   [0.5019608  0.36862746 0.23137255]
   ...
   [0.46666667 0.3529412  0.21960784]
   [0.4627451  0.34901962 0.22352941]
   [0.56078434 0.45882353 0.32941177]]

  [[0.4862745  0.3529412  0.21568628]
   [0.49019608 0.35686275 0.21960784]
   [0.49803922 0.3647059  0.22745098]
   ...
   [0.4627451  0.34901962 0.21568628]
   [0.45882353 0.34509805 0.21960784]
   [0.5568628  0.45490196 0.3254902 ]]

  ...

  [[0.6117647  0.4627451  0.30980393]
   [0.6117647  0.4627451  0.30980393]
   [0.6156863  0.46666667 0.3137255 ]
   ...
   [0.5921569  0.4745098  0.33333334]
   [0.60784316 0.49411765 0.36862746]
   [0.68235296 0.5686275  0.44313726]]

  [[0.6039216  0.45490196 0.3019608 ]
   [0.6039216  0.45490196 0.3019608 ]
   [0.60784316 0.45882353 0.30588236]
   ...
   [0.5882353  0.47058824 0.32941177]
   [0.6039216  0.49019608 0.3647059 ]
   [0.6784314  0.5647059  0.4392157 ]]

  [[0.5921569  0.44313726 0.2901961 ]
   [0.59607846 0.44705883 0.29411766]
   [0.6039216  0.45490196 0.3019608 ]
   ...
   [0.5882353  0.47058824 0.32941177]
   [0.6        0.4862745  0.36078432]
   [0.6745098  0.56078434 0.43529412]]]


 [[[0.8117647  0.5294118  0.5764706 ]
   [0.827451   0.54509807 0.5921569 ]
   [0.8509804  0.5686275  0.6156863 ]
   ...
   [0.28235295 0.35686275 0.38431373]
   [0.27450982 0.3647059  0.39607844]
   [0.26666668 0.3647059  0.39215687]]

  [[0.84313726 0.56078434 0.60784316]
   [0.84705883 0.5647059  0.6117647 ]
   [0.85490197 0.57254905 0.61960787]
   ...
   [0.27058825 0.3372549  0.36862746]
   [0.2627451  0.34117648 0.3764706 ]
   [0.24705882 0.3372549  0.36862746]]

  [[0.8666667  0.5921569  0.63529414]
   [0.8627451  0.5882353  0.6313726 ]
   [0.8627451  0.5803922  0.627451  ]
   ...
   [0.3019608  0.34117648 0.3764706 ]
   [0.2784314  0.33333334 0.36862746]
   [0.2627451  0.32941177 0.36078432]]

  ...

  [[0.87058824 0.654902   0.6431373 ]
   [0.87058824 0.654902   0.6431373 ]
   [0.8745098  0.65882355 0.64705884]
   ...
   [0.90588236 0.627451   0.61960787]
   [0.90588236 0.6156863  0.6117647 ]
   [0.9098039  0.6117647  0.6117647 ]]

  [[0.8745098  0.64705884 0.6392157 ]
   [0.88235295 0.654902   0.64705884]
   [0.8901961  0.6627451  0.654902  ]
   ...
   [0.9098039  0.6313726  0.62352943]
   [0.8980392  0.60784316 0.6039216 ]
   [0.89411765 0.6039216  0.6       ]]

  [[0.8745098  0.64705884 0.6392157 ]
   [0.8862745  0.65882355 0.6509804 ]
   [0.8980392  0.67058825 0.6627451 ]
   ...
   [0.90588236 0.6313726  0.62352943]
   [0.89411765 0.6039216  0.6       ]
   [0.8862745  0.59607846 0.5921569 ]]]]).

### Run Inference on Models

In [7]:
def measure_inference_time(model, X_test, batch_size, trials=500):
    inference_times = []

    print(f"Running {trials} inference trials on {len(X_test)} test images...")
    num_batches = len(X_test) // batch_size

    for i in range(trials):
        start_time = time.perf_counter()

        # Loop over the batches of X_test
        for j in range(num_batches):
            batch_start = j * batch_size
            batch_end = (j + 1) * batch_size
            batch_images = X_test[batch_start:batch_end]  # Get a batch of images
            inputs = tf.convert_to_tensor(batch_images) # Ensure the batch is in tensor format

            # Run inference for the batch
            model.signatures["serving_default"](inputs)  # Perform inference directly
            tf.keras.backend.clear_session()

        end_time = time.perf_counter()
        inference_time = end_time - start_time
        inference_times.append(inference_time)

        if i % 100 == 0:
            avg_inference = np.mean(inference_times)  # Average inference time per trial
            print(f"Step {i}: average inference time = {avg_inference:.6f} seconds")

        tf.keras.backend.clear_session()

    # Compute throughput (images per second)
    total_time = np.sum(inference_times)
    throughput = (trials * len(X_test)) / total_time
    return np.mean(inference_times), throughput

### Run Metrics on Models

In [8]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load models
original_model = tf.saved_model.load('../best_model/model1/best_f1score_fold')
# optimized_model = tf.saved_model.load('../tensorRT_model/test_INT8')

def measure_metrics(model, X_test, Y_test, batch_size):
    print(f"Evaluating metrics on {len(X_test)} test images...")
    
    num_batches = len(X_test) // batch_size
    all_predicted_classes = []

    # Loop over the test dataset in batches
    for j in range(num_batches):
        batch_start = j * batch_size
        batch_end = (j + 1) * batch_size
        batch_images = X_test[batch_start:batch_end]
        inputs = tf.convert_to_tensor(batch_images)  # Convert batch to tensor

        # Run predictions
        result = model.signatures["serving_default"](inputs)  # Inference
        prediction_logits = result["output_0"].numpy()  # Extract logits
        probabilities = tf.nn.sigmoid(prediction_logits).numpy()  # Apply sigmoid
        predicted_classes = (probabilities > 0.5).astype(int)  # Threshold for binary classification

        # Collect predictions
        all_predicted_classes.extend(predicted_classes)

    # Handle any remaining images that don't fit evenly in batches
    remaining_samples = len(X_test) % batch_size
    if remaining_samples > 0:
        batch_images = X_test[-remaining_samples:]
        inputs = tf.convert_to_tensor(batch_images)
        result = model.signatures["serving_default"](inputs)
        prediction_logits = result["output_0"].numpy()
        probabilities = tf.nn.sigmoid(prediction_logits).numpy()
        predicted_classes = (probabilities > 0.5).astype(int)
        all_predicted_classes.extend(predicted_classes)

    # Flatten predictions and labels to ensure they are 1D arrays
    all_predicted_classes = np.array(all_predicted_classes).flatten()
    Y_test = np.array(Y_test).flatten()

    # Ensure the number of predictions matches the number of ground truth labels
    if len(all_predicted_classes) != len(Y_test):
        raise ValueError(f"Number of predicted classes ({len(all_predicted_classes)}) "
                         f"does not match the number of ground truth labels ({len(Y_test)}).")

    # Calculate metrics
    accuracy = accuracy_score(Y_test, all_predicted_classes)
    precision = precision_score(Y_test, all_predicted_classes, average="binary")
    recall = recall_score(Y_test, all_predicted_classes, average="binary")
    f1 = f1_score(Y_test, all_predicted_classes, average="binary")

    # Output metrics
    metrics_dict = {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1_score": f1,
    }

    print(f"Metrics: {metrics_dict}")
    return metrics_dict

In [None]:
print(tf.__version__)

2.16.1


### Perform Benchmarking

In [11]:
original_size = get_model_size(saved_model_dir)
optimized_size = get_model_size(optimized_model_dir)
print(f"Original Model Size: {original_size:.2f} MB")
print(f"Optimized Model Size: {optimized_size:.2f} MB")
print(f"Compression Ratio: {original_size / optimized_size:.2f}")

# Int8 compression ratio: 1.50, speedup factor: 2.50
# Load models
original_model = tf.saved_model.load(saved_model_dir)
optimized_model = tf.saved_model.load(optimized_model_dir)

# # Measure inference time for both models
# original_avg_time, original_throughput = measure_inference_time(
#     original_model, X_test, batch_size=batch_size
# )
# optimized_avg_time, optimized_throughput = measure_inference_time(
#     optimized_model, X_test, batch_size=batch_size
# )

# # Print results
# print("\nResults:")
# print(f"Original Model - Average Inference Time: {original_avg_time:.6f} seconds")
# print(f"Original Model - Throughput: {original_throughput:.2f} images/second")
# print(f"Optimized Model - Average Inference Time: {optimized_avg_time:.6f} seconds")
# print(f"Optimized Model - Throughput: {optimized_throughput:.2f} images/second")

# # Compute speedup
# speedup_factor = original_avg_time / optimized_avg_time
# print(f"\nSpeedup Factor: {speedup_factor:.2f}")

# Measure metrics for both models
original_metrics = measure_metrics(
    model=original_model, 
    X_test=X_test, 
    Y_test=Y_test, 
    batch_size=batch_size
)

optimized_metrics = measure_metrics(
    model=optimized_model, 
    X_test=X_test, 
    Y_test=Y_test, 
    batch_size=batch_size
)

print("\nComparison of Metrics:")
print(f"{'Metric':<12} {'Original Model':<15} {'Optimized Model':<15}")
print("-" * 42)
for metric in original_metrics.keys():
    print(f"{metric:<12} {original_metrics[metric]:<15.4f} {optimized_metrics[metric]:<15.4f}")

# Highlighting model differences
print("\nSummary of Changes:")
for metric in original_metrics.keys():
    change = optimized_metrics[metric] - original_metrics[metric]
    print(f"{metric:<12} Change: {change:.4f}")

Original Model Size: 18.90 MB
Optimized Model Size: 9.01 MB
Compression Ratio: 2.10


: 

In [None]:
# # Load the optimized TensorRT model
# saved_model_loaded = tf.saved_model.load('path/to/save/tensorrt_model')
# infer = saved_model_loaded.signatures['serving_default']

# # Example input data (adjust as per your model's input requirements)
# input_tensor = tf.convert_to_tensor(your_input_data)

# # Run inference
# output = infer(input_tensor)

# # Process output as needed