This notebook is the implementation of performance evaluation of Tensorflow-TensorRT. The first half runs with official weights from ImageNet, and the last half runs with models trained by myself.

In [None]:
# Restart the VM when the RAM is full
import os
os.kill(os.getpid(), 9)

In [None]:
!pip install pillow matplotlib
!pip install tensorflow-gpu==2.0.0

In [None]:
import tensorflow as tf
print("Tensorflow version: ", tf.version.VERSION)

In [None]:
!wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb

!dpkg -i nvidia-machine-learning-repo-*.deb
!apt-get update

!sudo apt-get install libnvinfer5

In [None]:
# check TensorRT version
print("TensorRT version: ")
!dpkg -l | grep nvinfer

In [None]:
from tensorflow.python.client import device_lib

def check_tensor_core_gpu_present():
    local_device_protos = device_lib.list_local_devices()
    for line in local_device_protos:
        if "compute capability" in str(line):
            compute_capability = float(line.physical_device_desc.split("compute capability: ")[-1])
            if compute_capability>=7.0:
                return True
    
print("Tensor Core GPU Present:", check_tensor_core_gpu_present())
tensor_core_gpu = check_tensor_core_gpu_present()

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import time

import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.python.compiler.tensorrt import trt_convert as trt
from tensorflow.python.saved_model import tag_constants
from tensorflow.keras.applications import densenet, inception_v3, mobilenet_v2, resnet50, vgg16, vgg19
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions

In [None]:
%cd /content/
!mkdir ./data
!wget -O ./data/img0.JPG "https://thumbs-prod.si-cdn.com/ej9KRK9frB5AXD6W9LXKFnuRc-0=/fit-in/1600x0/https://public-media.si-cdn.com/filer/ad/7b/ad7b3860-ad5f-43dc-800e-af57830cd1d3/labrador.jpg"
!wget -O ./data/img1.JPG "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRHjtOYuK2n_CZoxQs9zxK96N1_qMiv3ZWSYQ&usqp=CAUg"
!wget -O ./data/img2.JPG "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRoEAt7d8PuZPBxWsjzvgQ_Y8Zfhgn1MvvA3Q&usqp=CAU"
!wget -O ./data/img3.JPG "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQ9BZGaN2WhgsJJfLmEcEiwMRmgpSzJPjnacg&usqp=CAU"
!wget -O ./data/img4.JPG "https://media.nature.com/lw800/magazine-assets/d41586-020-01430-5/d41586-020-01430-5_17977552.jpg"
!wget -O ./data/img5.JPG "https://hips.hearstapps.com/hmg-prod.s3.amazonaws.com/images/golden-retriever-royalty-free-image-506756303-1560962726.jpg?crop=1.00xw:0.756xh;0,0.0756xh&resize=980:*"
!wget -O ./data/img6.JPG "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRH7_Z_Frxo_RbvJ6StY2TzQ0zFCgv6podjzw&usqp=CAU"
!wget -O ./data/img7.JPG "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcR4X0fwAtbfiSwRvN3-Fk1pE1rKMsAgWjcpbA&usqp=CAU"

In [None]:
from tensorflow.keras.preprocessing import image
import os

plot_row = 2
plot_col = 4

model_name = 'resnet'
model = resnet50.ResNet50(weights='imagenet')
size = 224

fig, axes = plt.subplots(nrows=2, ncols=2)
img_names = os.listdir('./data/')
img_names = list(map(lambda x: './data/' + x, sorted(img_names)))

for i, img_path in enumerate(img_names):
  # img_path = './data/img%d.JPG'%i
  img = image.load_img(img_path, target_size=(size, size))
  plt.subplot(plot_row,plot_col,i+1)
  plt.imshow(img);
  plt.axis('off');

In [None]:
for i, img_path in enumerate(img_names):
  # img_path = './data/img%d.JPG'%i
  img = image.load_img(img_path, target_size=(size, size))
  x = image.img_to_array(img)
  x = np.expand_dims(x, axis=0)
  x = preprocess_input(x)

  preds = model.predict(x)
  # decode the results into a list of tuples (class, description, probability)
  # (one such list for each sample in the batch)
  print('{} - Predicted: {}'.format(img_path, decode_predictions(preds, top=3)[0]))

  plt.subplot(plot_row,plot_col,i+1)
  plt.imshow(img);
  plt.axis('off');
  plt.title(decode_predictions(preds, top=3)[0][0][1])

In [None]:
# Save the entire model as a SavedModel.
model.save(model_name)
# !saved_model_cli show --all --dir resnet50_saved_model

In [None]:
model = tf.keras.models.load_model(model_name)
img_path = './data/img0.JPG'  # Siberian_husky
img = image.load_img(img_path, target_size=(size, size))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)

preds = model.predict(x)
# decode the results into a list of tuples (class, description, probability)
# (one such list for each sample in the batch)
print('{} - Predicted: {}'.format(img_path, decode_predictions(preds, top=3)[0]))
plt.subplot(2,2,1)
plt.imshow(img);
plt.axis('off');
plt.title(decode_predictions(preds, top=3)[0][0][1])

In [13]:
batch_size = 8
batched_input = np.zeros((batch_size, size, size, 3), dtype=np.float32)

for i in range(batch_size):
  img_path = './data/img%d.JPG' % (i % len(img_names))
  img = image.load_img(img_path, target_size=(size, size))
  x = image.img_to_array(img)
  x = np.expand_dims(x, axis=0)
  x = preprocess_input(x)
  batched_input[i, :] = x
batched_input = tf.constant(batched_input)
print('batched_input shape: ', batched_input.shape)

batched_input shape:  (8, 224, 224, 3)


In [None]:
model = tf.keras.models.load_model(model_name)

# Benchmarking throughput
N_warmup_run = 50
N_run = 1000
elapsed_time = []

for i in range(N_warmup_run):
  preds = model.predict(batched_input)

for i in range(N_run):
  start_time = time.time()
  preds = model.predict(batched_input)
  end_time = time.time()
  elapsed_time = np.append(elapsed_time, end_time - start_time)
  if i % 50 == 0:
    print('Step {}: {:.3f}ms'.format(i, (elapsed_time[-50:].mean()) * 1000))

print('Throughput: {:.3f} ms/image'.format(elapsed_time.sum() * 1000 / (N_run * batch_size)))

In [None]:
def predict_tftrt(input_saved_model):
    """Runs prediction on a single image and shows the result.
    input_saved_model (string): Name of the input model stored in the current dir
    """
    img_path = './data/img0.JPG'  # Siberian_husky
    img = image.load_img(img_path, target_size=(224, 224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    x = tf.constant(x)
    
    saved_model_loaded = tf.saved_model.load(input_saved_model, tags=[tag_constants.SERVING])
    signature_keys = list(saved_model_loaded.signatures.keys())
    print(signature_keys)

    infer = saved_model_loaded.signatures['serving_default']
    print(infer.structured_outputs)

    labeling = infer(x)
    preds = labeling['probs'].numpy()
    print('{} - Predicted: {}'.format(img_path, decode_predictions(preds, top=3)[0]))
    plt.subplot(2,2,1)
    plt.imshow(img);
    plt.axis('off');
    plt.title(decode_predictions(preds, top=3)[0][0][1])

In [7]:
def benchmark_tftrt(input_saved_model):
    saved_model_loaded = tf.saved_model.load(input_saved_model, tags=[tag_constants.SERVING])
    infer = saved_model_loaded.signatures['serving_default']

    N_warmup_run = 50
    N_run = 1000
    elapsed_time = []

    for i in range(N_warmup_run):
      labeling = infer(batched_input)

    for i in range(N_run):
      start_time = time.time()
      labeling = infer(batched_input)
      #prob = labeling['probs'].numpy()
      end_time = time.time()
      elapsed_time = np.append(elapsed_time, end_time - start_time)
      if i % 50 == 0:
        print('Step {}: {:.3f}ms'.format(i, (elapsed_time[-50:].mean()) * 1000))

    print('Throughput: {:.3f} ms/image'.format(elapsed_time.sum() * 1000 / (N_run * batch_size)))

In [None]:
print('Converting to TF-TRT FP32...')
conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(precision_mode=trt.TrtPrecisionMode.FP32,
                                                               max_workspace_size_bytes=8000000000)

converter = trt.TrtGraphConverterV2(input_saved_model_dir=model_name,
                                    conversion_params=conversion_params)
converter.convert()
converter.save(output_saved_model_dir=model_name+'_TFTRT_FP32')
print('Done Converting to TF-TRT FP32')
benchmark_tftrt(model_name+'_TFTRT_FP32')

print('Converting to TF-TRT FP16...')
conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(
    precision_mode=trt.TrtPrecisionMode.FP16,
    max_workspace_size_bytes=8000000000)
converter = trt.TrtGraphConverterV2(
   input_saved_model_dir=model_name, conversion_params=conversion_params)
converter.convert()
converter.save(output_saved_model_dir=model_name+'_TFTRT_FP16')
print('Done Converting to TF-TRT FP16')
benchmark_tftrt(model_name+'_TFTRT_FP16')

In [None]:
print('Converting to TF-TRT INT8...')
conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(
    precision_mode=trt.TrtPrecisionMode.INT8, 
    max_workspace_size_bytes=8000000000, 
    use_calibration=True)
converter = trt.TrtGraphConverterV2(
    input_saved_model_dir=model_name, 
    conversion_params=conversion_params)
def calibration_input_fn():
    yield (batched_input, )
converter.convert(calibration_input_fn=calibration_input_fn)
converter.save(output_saved_model_dir=model_name+'_TFTRT_INT8')
print('Done Converting to TF-TRT INT8')
benchmark_tftrt(model_name+'_TFTRT_INT8')

Evaluate performance using models trained by myself.

In [2]:
model = resnet50.ResNet50(
    weights=None,
    classes=10
)
size = 224
model_name = 'resnet_mnist'
(train_image,train_label),(test_image,test_label)=tf.keras.datasets.fashion_mnist.load_data()

In [None]:
import cv2 as cv

train_data = []
for img in train_image[:500]:
    resized_img = cv.resize(img, (size, size))
    resized_img = cv.cvtColor(resized_img, cv.COLOR_GRAY2BGR)
    train_data.append(resized_img)

train_data=np.array(train_data)
print(train_data.shape)

train_data=train_data/255

model.compile(optimizer="Adam",
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(train_data,train_label[0:500], epochs=50, batch_size=6)
model.save(model_name)

In [4]:
import cv2 as cv

model = tf.keras.models.load_model(model_name)
batch_size = 8
batched_input = np.zeros((batch_size, size, size, 3), dtype=np.float32)

test_data = []
# for i in range(batch_size):
for img in test_image[:batch_size]:
  resized_img = cv.resize(img, (size, size))
  resized_img = cv.cvtColor(resized_img, cv.COLOR_GRAY2BGR)
  test_data.append(resized_img)

test_data=np.array(test_data)
test_data=test_data/255.0

batched_input = tf.constant(test_data, dtype=tf.float32)
print('batched_input shape: ', batched_input.shape)

batched_input shape:  (8, 224, 224, 3)


In [None]:
# Benchmarking throughput
N_warmup_run = 50
N_run = 1000
elapsed_time = []

for i in range(N_warmup_run):
  preds = model.predict(batched_input)

for i in range(N_run):
  start_time = time.time()
  preds = model.predict(batched_input)
  end_time = time.time()
  elapsed_time = np.append(elapsed_time, end_time - start_time)
  if i % 50 == 0:
    print('Step {}: {:.3f}ms'.format(i, (elapsed_time[-50:].mean()) * 1000))

print('Throughput: {:.3f} ms/image'.format(elapsed_time.sum() * 1000 / (N_run * batch_size)))

In [None]:
def benchmark_tftrt(input_saved_model):
    saved_model_loaded = tf.saved_model.load(input_saved_model, tags=[tag_constants.SERVING])
    infer = saved_model_loaded.signatures['serving_default']
    print('infer:', infer)

    N_warmup_run = 50
    N_run = 1000
    elapsed_time = []

    for i in range(N_warmup_run):
      labeling = infer(batched_input)

    for i in range(N_run):
      start_time = time.time()
      labeling = infer(batched_input)
      #prob = labeling['probs'].numpy()
      end_time = time.time()
      elapsed_time = np.append(elapsed_time, end_time - start_time)
      if i % 50 == 0:
        print('Step {}: {:.3f}ms'.format(i, (elapsed_time[-50:].mean()) * 1000))

    print('Throughput: {:.3f} ms/image'.format(elapsed_time.sum() * 1000 / (N_run * batch_size)))

print('Converting to TF-TRT FP32...')
conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(precision_mode=trt.TrtPrecisionMode.FP32,
                                                               max_workspace_size_bytes=8000000000)

converter = trt.TrtGraphConverterV2(input_saved_model_dir=model_name,
                                    conversion_params=conversion_params)
converter.convert()
converter.save(output_saved_model_dir=model_name+'_TFTRT_FP32')
print('Done Converting to TF-TRT FP32')
benchmark_tftrt(model_name+'_TFTRT_FP32')

print('Converting to TF-TRT FP16...')
conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(
    precision_mode=trt.TrtPrecisionMode.FP16,
    max_workspace_size_bytes=8000000000)
converter = trt.TrtGraphConverterV2(
   input_saved_model_dir=model_name, conversion_params=conversion_params)
converter.convert()
converter.save(output_saved_model_dir=model_name+'_TFTRT_FP16')
print('Done Converting to TF-TRT FP16')
benchmark_tftrt(model_name+'_TFTRT_FP16')

In [None]:
def benchmark_tftrt(input_saved_model):
    saved_model_loaded = tf.saved_model.load(input_saved_model, tags=[tag_constants.SERVING])
    infer = saved_model_loaded.signatures['serving_default']
    print('infer:', infer)

    N_warmup_run = 50
    N_run = 1000
    elapsed_time = []

    for i in range(N_warmup_run):
      labeling = infer(batched_input)

    for i in range(N_run):
      start_time = time.time()
      labeling = infer(batched_input)
      #prob = labeling['probs'].numpy()
      end_time = time.time()
      elapsed_time = np.append(elapsed_time, end_time - start_time)
      if i % 50 == 0:
        print('Step {}: {:.3f}ms'.format(i, (elapsed_time[-50:].mean()) * 1000))

    print('Throughput: {:.3f} ms/image'.format(elapsed_time.sum() * 1000 / (N_run * batch_size)))

print('Converting to TF-TRT INT8...')
conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(
    precision_mode=trt.TrtPrecisionMode.INT8, 
    max_workspace_size_bytes=8000000000, 
    use_calibration=True)
converter = trt.TrtGraphConverterV2(
    input_saved_model_dir=model_name, 
    conversion_params=conversion_params)
def calibration_input_fn():
    yield (batched_input, )
converter.convert(calibration_input_fn=calibration_input_fn)
converter.save(output_saved_model_dir=model_name+'_TFTRT_INT8')
print('Done Converting to TF-TRT INT8')
benchmark_tftrt(model_name+'_TFTRT_INT8')