# Inference test using TF-TRT

#### DenseNet201로 학습된 모델의 Inference 성능 향상을 위해 TensroRT를 사용해보자.

In [1]:
import tensorflow as tf
import numpy as np
import time
import os

from tqdm import tqdm
from tensorflow.python.saved_model import tag_constants

Check TensorFlow version

In [2]:
tf.__version__

'2.3.1'

Check GPU

In [4]:
# !nvidia-smi

Check TensorRT version

In [7]:
# print("TensorRT version: ")
# !dpkg -l | grep nvinfer

## PREDICT CODE

In [15]:
img_size = (224, 224)
label_list = ['COVID','non-COVID']

In [10]:
# LOAD model
model = tf.keras.models.load_model('../models/image_models/DenseNet201')



In [21]:
# !saved_model_cli show --all --dir ../models/image_models/DenseNet201

In [16]:
# image path

# COVID
image_path="./test_data/covid-1.png"

# NON-COVID
# image_path = "./test_data/normal-1.jpg"

In [19]:
# LOAD image and data preprocessing
start_time = time.time()

img = tf.keras.preprocessing.image.load_img(image_path, target_size=img_size)
img = tf.keras.preprocessing.image.img_to_array(img)
img /= 255.0
img = np.expand_dims(img, axis=0)

prediction = model.predict(img)

end_time = time.time()

elapsed_time = end_time - start_time
print("경과 시간: ", elapsed_time)

# print("prediction: ", prediction)

val = prediction.item(0)
print("val: ", val)

idx = int(np.round(val))
# print("idx: ", idx)

label = label_list[idx]
print("label: ", label)

경과 시간:  0.08185672760009766
val:  0.0004235157393850386
label:  COVID


# TensorRT

## TF-TRT FP32 model

In [20]:
import tensorflow as tf
import numpy as np
import time
import os

from tqdm import tqdm
from tensorflow.python.saved_model import tag_constants
from tensorflow.python.compiler.tensorrt import trt_convert as trt

In [22]:
print('Converting to TF-TRT FP32...')
conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(precision_mode=trt.TrtPrecisionMode.FP32,
                                                               max_workspace_size_bytes=8000000000)

converter = trt.TrtGraphConverterV2(input_saved_model_dir='../models/image_models/DenseNet201',
                                    conversion_params=conversion_params)

converter.convert()
converter.save(output_saved_model_dir='./model/FP32_saved_model')

Converting to TF-TRT FP32...
INFO:tensorflow:Linked TensorRT version: (7, 2, 1)
INFO:tensorflow:Loaded TensorRT version: (7, 2, 1)
INFO:tensorflow:Could not find TRTEngineOp_0_0 in TF-TRT cache. This can happen if build() is not called, which means TensorRT engines will be built and cached at runtime.
INFO:tensorflow:Assets written to: ./model/FP32_saved_model/assets


In [24]:
# !saved_model_cli show --all --dir model/FP32_saved_model

In [25]:
# LOAD FP32 saved model
input_saved_model = "./model/FP32_saved_model"

saved_model_loaded = tf.saved_model.load(input_saved_model, tags=[tag_constants.SERVING])

In [26]:
img_size = (224, 224)
label_list = ['COVID','non-COVID']

# image path
# COVID
image_path="./test_data/covid-1.png"

# NON-COVID
# image_path = "./test_data/normal-1.jpg"

# LOAD image and data preprocessing
img = tf.keras.preprocessing.image.load_img(image_path, target_size=img_size)
img = tf.keras.preprocessing.image.img_to_array(img)
img /= 255.0
img = np.expand_dims(img, axis=0)
img = tf.constant(img)

print(img.shape)

(1, 224, 224, 3)


In [28]:
signature_keys = list(saved_model_loaded.signatures.keys())
print(signature_keys)

infer = saved_model_loaded.signatures['serving_default']
print(infer.structured_outputs)

pred = infer(img)
# print(pred)

key = list(pred.keys())[0]
print(key)

val = pred[key].numpy().item(0)
print(val)

idx = int(np.round(val))
label = label_list[idx]

print("label: ", label)

['serving_default']
{'relu': TensorSpec(shape=<unknown>, dtype=tf.float32, name='relu')}
relu
0.0004235160013195127
label:  COVID


### Predict Function

In [29]:
# MAKE inference Function
def predict_cxr(image_path):
    img_size = (224, 224)
    label_list = ['COVID','non-COVID']
    img = tf.keras.preprocessing.image.load_img(image_path, target_size=img_size)
    img = tf.keras.preprocessing.image.img_to_array(img)
    img /= 255.0
    img = np.expand_dims(img, axis=0)
    img = tf.constant(img)
    
    signature_keys = list(saved_model_loaded.signatures.keys())
    
    infer = saved_model_loaded.signatures['serving_default']
    
    pred = infer(img)
    
    key = list(pred.keys())[0]
    val = pred[key].numpy().item(0)
    
    idx = int(np.round(val))
    label = label_list[idx]

    return label

In [30]:
# image path
# COVID
image_path="./test_data/covid-1.png"

# NON-COVID
# image_path = "./test_data/normal-1.jpg"

In [32]:
# PREDICT CXR image
start_time = time.time()
label = predict_cxr(image_path)
end_time = time.time()

elapsed_time = end_time - start_time
print("경과 시간: ", elapsed_time)

print("label: ", label)

경과 시간:  0.020002126693725586
label:  COVID


## TF-TRT FP16 model

In [34]:
print('Converting to TF-TRT FP16...')

conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(
    precision_mode=trt.TrtPrecisionMode.FP16,
    max_workspace_size_bytes=8000000000)

converter = trt.TrtGraphConverterV2(
   input_saved_model_dir='../models/image_models/DenseNet201', conversion_params=conversion_params)

converter.convert()

converter.save(output_saved_model_dir='./model/FP16_saved_model')

print('Done Converting to TF-TRT FP16')

Converting to TF-TRT FP16...
INFO:tensorflow:Linked TensorRT version: (7, 2, 1)
INFO:tensorflow:Loaded TensorRT version: (7, 2, 1)
INFO:tensorflow:Could not find TRTEngineOp_1_0 in TF-TRT cache. This can happen if build() is not called, which means TensorRT engines will be built and cached at runtime.
INFO:tensorflow:Assets written to: ./model/FP16_saved_model/assets
Done Converting to TF-TRT FP16


In [35]:
# LOAD FP16 saved model
input_saved_model = "./model/FP16_saved_model"

saved_model_loaded = tf.saved_model.load(input_saved_model, tags=[tag_constants.SERVING])

In [38]:
# image path
# COVID
image_path="./test_data/covid-1.png"

# NON-COVID
# image_path = "./test_data/normal-1.jpg"

In [41]:
# PREDICT CXR image
start_time = time.time()
label = predict_cxr(image_path)
end_time = time.time()

elapsed_time = end_time - start_time
print("경과 시간: ", elapsed_time)

print("label: ", label)

경과 시간:  0.01824784278869629
label:  COVID


# 실행 시간 비교하기

In [44]:
def predict_cxr(image_path, saved_model):
    img_size = (224, 224)
    label_list = ['COVID','non-COVID']
    img = tf.keras.preprocessing.image.load_img(image_path, target_size=img_size)
    img = tf.keras.preprocessing.image.img_to_array(img)
    img /= 255.0
    img = np.expand_dims(img, axis=0)
    img = tf.constant(img)
    signature_keys = list(saved_model.signatures.keys())
    infer = saved_model.signatures['serving_default']
    pred = infer(img)
    key = list(pred.keys())[0]
    val = pred[key].numpy().item(0)
    idx = int(np.round(val))
    label = label_list[idx]
    
    return label

In [45]:
# image path
# COVID
image_path="./test_data/covid-1.png"

# NON-COVID
# image_path = "./test_data/normal-1.jpg"

### Load model

In [46]:
native_model = tf.saved_model.load("../models/image_models/DenseNet201", tags=[tag_constants.SERVING])

In [47]:
FP32_model = tf.saved_model.load("./model/FP32_saved_model", tags=[tag_constants.SERVING])

In [48]:
FP16_model = tf.saved_model.load("./model/FP16_saved_model", tags=[tag_constants.SERVING])

In [60]:
start_time = time.time()
label = predict_cxr(image_path, native_model)
end_time = time.time()
print(native_model)
print(f"label: {label}, 경과 시간: {end_time-start_time}")

<tensorflow.python.saved_model.load.Loader._recreate_base_user_object.<locals>._UserObject object at 0x7ef727a28208>
label: COVID, 경과 시간: 0.03979849815368652


In [62]:
start_time = time.time()
label = predict_cxr(image_path, FP32_model)
end_time = time.time()
print(FP32_model)
print(f"label: {label}, 경과 시간: {end_time-start_time}")

<tensorflow.python.saved_model.load.Loader._recreate_base_user_object.<locals>._UserObject object at 0x7ef6e8f5b7f0>
label: COVID, 경과 시간: 0.019219636917114258


In [63]:
start_time = time.time()
label = predict_cxr(image_path, FP16_model)
end_time = time.time()
print(FP16_model)
print(f"label: {label}, 경과 시간: {end_time-start_time}")

<tensorflow.python.saved_model.load.Loader._recreate_base_user_object.<locals>._UserObject object at 0x7ef6767d3b70>
label: COVID, 경과 시간: 0.016704559326171875


### 여러개를 예측할때 비교