# Using TensorRT 

DenseNet201로 학습된 모델의 Inference 성능 향상을 위해 TensroRT를 사용해보자

In [1]:
import tensorflow as tf
import numpy as np
import time
import os

from tqdm import tqdm
from tensorflow.python.saved_model import tag_constants

In [2]:
# !pip install tensorflow==2.0.0

In [3]:
import tensorflow as tf
tf.__version__

'2.0.0'

In [4]:
# !nvidia-smi

In [5]:
# check TensorRT version
# print("TensorRT version: ")
# !dpkg -l | grep nvinfer

In [6]:
# from tensorflow.python.client import device_lib

# def check_tensor_core_gpu_present():
#     local_device_protos = device_lib.list_local_devices()
#     for line in local_device_protos:
#         if "compute capability" in str(line):
#             compute_capability = float(line.physical_device_desc.split("compute capability: ")[-1])
#             if compute_capability>=7.0:
#                 return True
    
# print("Tensor Core GPU Present:", check_tensor_core_gpu_present())
# tensor_core_gpu = check_tensor_core_gpu_present()

## PREDICT

In [7]:
img_size = (299, 299)
label_list = ['COVID','non-COVID']

In [8]:
# LOAD model
model = tf.keras.models.load_model('./model/saved_model')

In [9]:
# !saved_model_cli show --all --dir model/saved_model

In [16]:
# image path

# COVID
# image_path="data/TRAIN/COVID/0.jpeg"

# NON-COVID
image_path = "data/TRAIN/non-COVID/00293de0-a530-41dc-9621-0b3def01d06d.jpg"

In [17]:
# LOAD image and data preprocessing
start_time = time.time()

img = tf.keras.preprocessing.image.load_img(image_path, target_size=img_size)
img = tf.keras.preprocessing.image.img_to_array(img)
img /= 255.0
img = np.expand_dims(img, axis=0)

prediction = model.predict(img)

end_time = time.time()

In [18]:
elapsed_time = end_time - start_time

print("경과 시간: ", elapsed_time)

경과 시간:  0.6367158889770508


In [19]:
print("prediction: ", prediction)

val = prediction.item(0)

print("val: ", val)

prediction:  [[0.9992717]]
val:  0.9992716908454895


In [20]:
idx = int(np.round(val))

print("idx: ", idx)

idx:  1


In [21]:
label = label_list[idx]

print("label: ", label)

label:  non-COVID


# TensorRT

## TF-TRT FP32 model

In [22]:
import tensorflow as tf
import numpy as np
import time
import os

from tqdm import tqdm
from tensorflow.python.saved_model import tag_constants
from tensorflow.python.compiler.tensorrt import trt_convert as trt

In [23]:
print('Converting to TF-TRT FP32...')
conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(precision_mode=trt.TrtPrecisionMode.FP32,
                                                               max_workspace_size_bytes=8000000000)

converter = trt.TrtGraphConverterV2(input_saved_model_dir='./model/saved_model',
                                    conversion_params=conversion_params)

converter.convert()
converter.save(output_saved_model_dir='./model/FP32_saved_model')

Converting to TF-TRT FP32...
INFO:tensorflow:Linked TensorRT version: (0, 0, 0)
INFO:tensorflow:Loaded TensorRT version: (0, 0, 0)
INFO:tensorflow:Running against TensorRT version 0.0.0
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: ./model/FP32_saved_model/assets


In [24]:
!saved_model_cli show --all --dir model/FP32_saved_model


MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:

signature_def['__saved_model_init_op']:
  The given SavedModel SignatureDef contains the following input(s):
  The given SavedModel SignatureDef contains the following output(s):
    outputs['__saved_model_init_op'] tensor_info:
        dtype: DT_INVALID
        shape: unknown_rank
        name: NoOp
  Method name is: 

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['input_3'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 299, 299, 3)
        name: serving_default_input_3:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['model_3'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 1)
        name: PartitionedCall:0
  Method name is: tensorflow/serving/predict


In [25]:
# LOAD FP32 saved model
input_saved_model = "./model/FP32_saved_model"

saved_model_loaded = tf.saved_model.load(input_saved_model, tags=[tag_constants.SERVING])

In [26]:
img_size = (299, 299)
label_list = ['COVID','non-COVID']

# image path - COVID
# image_path="data/TRAIN/COVID/0.jpeg"

# image path - NON-COVID
image_path = "data/TRAIN/non-COVID/00293de0-a530-41dc-9621-0b3def01d06d.jpg"

# LOAD image and data preprocessing
img = tf.keras.preprocessing.image.load_img(image_path, target_size=img_size)
img = tf.keras.preprocessing.image.img_to_array(img)
img /= 255.0
img = np.expand_dims(img, axis=0)
img = tf.constant(img)

print(img.shape)

(1, 299, 299, 3)


In [27]:
signature_keys = list(saved_model_loaded.signatures.keys())
print(signature_keys)

infer = saved_model_loaded.signatures['serving_default']
print(infer.structured_outputs)

pred = infer(img)
print(pred)

key = list(pred.keys())[0]
print(key)

val = pred['model_3'].numpy().item(0)
print(val)

idx = int(np.round(val))
label = label_list[idx]

print("label: ", label)

['serving_default']
{'model_3': TensorSpec(shape=(None, 1), dtype=tf.float32, name='model_3')}
{'model_3': <tf.Tensor: id=442135, shape=(1, 1), dtype=float32, numpy=array([[0.9992717]], dtype=float32)>}
model_3
0.9992716908454895
label:  non-COVID


### Predict Function

In [28]:
# MAKE inference Function
def predict_cxr(image_path):
    img_size = (299, 299)
    label_list = ['COVID','non-COVID']
    img = tf.keras.preprocessing.image.load_img(image_path, target_size=img_size)
    img = tf.keras.preprocessing.image.img_to_array(img)
    img /= 255.0
    img = np.expand_dims(img, axis=0)
    img = tf.constant(img)
    
    signature_keys = list(saved_model_loaded.signatures.keys())
    
    infer = saved_model_loaded.signatures['serving_default']
    
    pred = infer(img)
    
    key = list(pred.keys())[0]
    val = pred[key].numpy().item(0)
    
    idx = int(np.round(val))
    label = label_list[idx]

    return label

In [29]:
# image path - COVID
image_path="data/TRAIN/COVID/0.jpeg"

# image path - NON-COVID
# image_path = "data/TRAIN/non-COVID/00293de0-a530-41dc-9621-0b3def01d06d.jpg"

In [30]:
# PREDICT CXR image
start_time = time.time()
label = predict_cxr(image_path)
end_time = time.time()

elapsed_time = end_time - start_time
print("경과 시간: ", elapsed_time)

print("label: ", label)

경과 시간:  0.36749696731567383
label:  COVID


## TF-TRT FP16 model

In [31]:
print('Converting to TF-TRT FP16...')

conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(
    precision_mode=trt.TrtPrecisionMode.FP16,
    max_workspace_size_bytes=8000000000)

converter = trt.TrtGraphConverterV2(
   input_saved_model_dir='./model/saved_model', conversion_params=conversion_params)

converter.convert()

converter.save(output_saved_model_dir='./model/FP16_saved_model')

print('Done Converting to TF-TRT FP16')

Converting to TF-TRT FP16...
INFO:tensorflow:Linked TensorRT version: (0, 0, 0)
INFO:tensorflow:Loaded TensorRT version: (0, 0, 0)
INFO:tensorflow:Running against TensorRT version 0.0.0
INFO:tensorflow:Assets written to: ./model/FP16_saved_model/assets
Done Converting to TF-TRT FP16


In [32]:
# LOAD FP16 saved model
input_saved_model = "./model/FP16_saved_model"

saved_model_loaded = tf.saved_model.load(input_saved_model, tags=[tag_constants.SERVING])

In [33]:
# image path - COVID
image_path="data/TRAIN/COVID/0.jpeg"

# image path - NON-COVID
# image_path = "data/TRAIN/non-COVID/00293de0-a530-41dc-9621-0b3def01d06d.jpg"

In [37]:
# PREDICT CXR image
start_time = time.time()
label = predict_cxr(image_path)
end_time = time.time()

elapsed_time = end_time - start_time
print("경과 시간: ", elapsed_time)

print("label: ", label)

경과 시간:  0.49012207984924316
label:  COVID


# TOTAL

### 속도 체크하기 - 방법1

In [62]:
covid_lst = os.listdir('./data/TRAIN/COVID')
covid_lst.pop(covid_lst.index(".ipynb_checkpoints"))
print(len(covid_lst))

covid_lst = covid_lst[:100]
print(len(covid_lst))

non_covid_lst = os.listdir('./data/TRAIN/non-COVID')
non_covid_lst.pop(non_covid_lst.index(".ipynb_checkpoints"))
print(len(non_covid_lst))

non_covid_lst = non_covid_lst[:100]
print(len(non_covid_lst))

1069
100
6092
100


In [64]:
# batch_size = len(covid_lst)
# batched_input = np.zeros((batch_size, 299, 299, 3), dtype=np.float32)

# for idx in tqdm(range(batch_size)):
#     image_path = "./data/TRAIN/COVID/" + covid_lst[idx]
#     img_size = (299, 299)
    
#     img = tf.keras.preprocessing.image.load_img(image_path, target_size=img_size)
#     img = tf.keras.preprocessing.image.img_to_array(img)
#     img /= 255.0
#     img = np.expand_dims(img, axis=0)
#     batched_input[idx, :] = img
    
# batched_input = tf.constant(batched_input)

# print('batched_input shape: ', batched_input.shape)

In [65]:
# def benchmark_tftrt(input_saved_model):
#     saved_model_loaded = tf.saved_model.load(input_saved_model, tags=[tag_constants.SERVING])
#     infer = saved_model_loaded.signatures['serving_default']

#     N_warmup_run = 50
#     N_run = 1000
#     elapsed_time = []
    
# #     for i in tqdm(range(N_warmup_run)):
# #       labeling = infer(batched_input)

#     for i in tqdm(range(N_run)):
#       start_time = time.time()
#       labeling = infer(batched_input)
#       #prob = labeling['probs'].numpy()
#       end_time = time.time()
#       elapsed_time = np.append(elapsed_time, end_time - start_time)
#       if i % 50 == 0:
#         print('Step {}: {:4.1f}ms'.format(i, (elapsed_time[-50:].mean()) * 1000))

#     print('Throughput: {:.0f} images/s'.format(N_run * batch_size / elapsed_time.sum()))

In [66]:
# benchmark_tftrt("./model/FP32_saved_model")

### 속도 체크하기 - 방법2

In [38]:
def predict_cxr(image_path, saved_model):
    img_size = (299, 299)
    label_list = ['COVID','non-COVID']
    img = tf.keras.preprocessing.image.load_img(image_path, target_size=img_size)
    img = tf.keras.preprocessing.image.img_to_array(img)
    img /= 255.0
    img = np.expand_dims(img, axis=0)
    img = tf.constant(img)
    signature_keys = list(saved_model.signatures.keys())
    infer = saved_model.signatures['serving_default']
    pred = infer(img)
    key = list(pred.keys())[0]
    val = pred[key].numpy().item(0)
    idx = int(np.round(val))
    label = label_list[idx]
    return label

In [39]:
# image path - COVID
image_path="data/TRAIN/COVID/0.jpeg"

# image path - NON-COVID
# image_path = "data/TRAIN/non-COVID/00293de0-a530-41dc-9621-0b3def01d06d.jpg"

### LOAD MODEL

In [47]:
native_model = tf.saved_model.load("./model/saved_model", tags=[tag_constants.SERVING])

In [43]:
FP32_model = tf.saved_model.load("./model/FP32_saved_model", tags=[tag_constants.SERVING])

In [45]:
FP16_model = tf.saved_model.load("./model/FP16_saved_model", tags=[tag_constants.SERVING])

In [58]:
start_time = time.time()
label = predict_cxr(image_path, native_model)
end_time = time.time()
print(native_model)
print(f"label: {label}, 경과 시간: {end_time-start_time}")

<tensorflow.python.saved_model.load.Loader._recreate_base_user_object.<locals>._UserObject object at 0x7f602c6e7198>
label: COVID, 경과 시간: 0.4170095920562744


In [64]:
start_time = time.time()
label = predict_cxr(image_path, FP32_model)
end_time = time.time()
print(FP32_model)
print(f"label: {label}, 경과 시간: {end_time-start_time}")

<tensorflow.python.saved_model.load.Loader._recreate_base_user_object.<locals>._UserObject object at 0x7f6080de5b00>
label: COVID, 경과 시간: 0.3546781539916992


In [65]:
start_time = time.time()
label = predict_cxr(image_path, FP16_model)
end_time = time.time()
print(FP16_model)
print(f"label: {label}, 경과 시간: {end_time-start_time}")

<tensorflow.python.saved_model.load.Loader._recreate_base_user_object.<locals>._UserObject object at 0x7f6057264390>
label: COVID, 경과 시간: 0.37476181983947754
