In [1]:
import os
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2 as Net

model = Net(weights='imagenet')

os.makedirs('./model', exist_ok=True)

# Save the h5 file to path specified.
model.save("./model/model.h5")

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [2]:
import tensorflow as tf
from tensorflow.python.framework import graph_io
from tensorflow.keras.models import load_model


# Clear any previous session.
tf.keras.backend.clear_session()

save_pb_dir = './model'
model_fname = './model/model.h5'
def freeze_graph(graph, session, output, save_pb_dir='.', save_pb_name='frozen_model.pb', save_pb_as_text=False):
    with graph.as_default():
        graphdef_inf = tf.graph_util.remove_training_nodes(graph.as_graph_def())
        graphdef_frozen = tf.graph_util.convert_variables_to_constants(session, graphdef_inf, output)
        graph_io.write_graph(graphdef_frozen, save_pb_dir, save_pb_name, as_text=save_pb_as_text)
        return graphdef_frozen

# This line must be executed before loading Keras model.
tf.keras.backend.set_learning_phase(0) 

model = load_model(model_fname)

session = tf.keras.backend.get_session()

input_names = [t.op.name for t in model.inputs]
output_names = [t.op.name for t in model.outputs]

# Prints input and output nodes names, take notes of them.
print(input_names, output_names)

frozen_graph = freeze_graph(session.graph, session, [out.op.name for out in model.outputs], save_pb_dir=save_pb_dir)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
['input_1'] ['Logits/Softmax']
Instructions for updating:
Use `tf.compat.v1.graph_util.remove_training_nodes`
Instructions for updating:
Use `tf.compat.v1.graph_util.convert_variables_to_constants`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
INFO:tensorflow:Froze 262 variables.
INFO:tensorflow:Converted 262 variables to const ops.


In [3]:
input_names

['input_1']

In [4]:
output_names

['Logits/Softmax']

In [5]:
import tensorflow.contrib.tensorrt as trt

trt_graph = trt.create_inference_graph(
    input_graph_def=frozen_graph,
    outputs=output_names,
    max_batch_size=1,
    max_workspace_size_bytes=1 << 25,
    precision_mode='FP32',
    minimum_segment_size=50
)

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

INFO:tensorflow:Linked TensorRT version: (5, 1, 5)
INFO:tensorflow:Loaded TensorRT version: (5, 1, 5)
INFO:tensorflow:Running against TensorRT version 5.1.5


In [6]:
from tensorflow.python.framework import graph_io
graph_io.write_graph(trt_graph, "./model/",
                     "trt_graph.pb", as_text=False)

'./model/trt_graph.pb'

In [1]:
output_names = ['Logits/Softmax']
input_names = ['input_1']

import tensorflow as tf


def get_frozen_graph(graph_file):
    """Read Frozen Graph file from disk."""
    with tf.gfile.FastGFile(graph_file, "rb") as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    return graph_def


trt_graph = get_frozen_graph('./model/trt_graph.pb')

# Create session and load graph
tf_config = tf.ConfigProto()
tf_config.gpu_options.allow_growth = True
tf_sess = tf.Session(config=tf_config)
tf.import_graph_def(trt_graph, name='')


# Get graph input size
for node in trt_graph.node:
    if 'input_' in node.name:
        size = node.attr['shape'].shape
        image_size = [size.dim[i].size for i in range(1, 4)]
        break
print("image_size: {}".format(image_size))


# input and output tensor names.
input_tensor_name = input_names[0] + ":0"
output_tensor_name = output_names[0] + ":0"

print("input_tensor_name: {}\noutput_tensor_name: {}".format(
    input_tensor_name, output_tensor_name))

output_tensor = tf_sess.graph.get_tensor_by_name(output_tensor_name)

Instructions for updating:
Use tf.gfile.GFile.
image_size: [224, 224, 3]
input_tensor_name: input_1:0
output_tensor_name: Logits/Softmax:0


In [2]:
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input, decode_predictions
import numpy as np

# Optional image to test model prediction.
img_path = './data/elephant.jpg'

img = image.load_img(img_path, target_size=image_size[:2])
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)

feed_dict = {
    input_tensor_name: x
}
preds = tf_sess.run(output_tensor, feed_dict)

# decode the results into a list of tuples (class, description, probability)
# (one such list for each sample in the batch)
print('Predicted:', decode_predictions(preds, top=3)[0])

Predicted: [('n02504458', 'African_elephant', 0.5241106), ('n01871265', 'tusker', 0.17158976), ('n02504013', 'Indian_elephant', 0.15654081)]


In [3]:
# test FP16 TR-TRT
import time
times = []

one_prediction = tf_sess.run(output_tensor, feed_dict)

for i in range(200):
    start_time = time.time()
    one_prediction = tf_sess.run(output_tensor, feed_dict)
    delta = (time.time() - start_time)
    times.append(delta)
mean_delta = np.array(times).mean()
fps = 1 / mean_delta
print('average(sec):{:.2f},fps:{:.2f}'.format(mean_delta, fps))

average(sec):0.00,fps:434.85


In [3]:
# test FP32
import time
times = []

one_prediction = tf_sess.run(output_tensor, feed_dict)

for i in range(200):
    start_time = time.time()
    one_prediction = tf_sess.run(output_tensor, feed_dict)
    delta = (time.time() - start_time)
    times.append(delta)
mean_delta = np.array(times).mean()
fps = 1 / mean_delta
print('average(sec):{:.2f},fps:{:.2f}'.format(mean_delta, fps))

average(sec):0.00,fps:272.42


In [3]:
# test FP32 TR-TRT
import time
times = []

one_prediction = tf_sess.run(output_tensor, feed_dict)
print(one_prediction)

for i in range(200):
    start_time = time.time()
    one_prediction = tf_sess.run(output_tensor, feed_dict)
    delta = (time.time() - start_time)
    times.append(delta)
mean_delta = np.array(times).mean()
fps = 1 / mean_delta
print('average(sec):{:.2f},fps:{:.2f}'.format(mean_delta, fps))

[[2.48120079e-04 1.44475765e-04 4.26392326e-05 9.49431778e-05
  1.64853045e-04 1.43113764e-04 1.52562177e-04 2.97887073e-05
  1.32943742e-05 6.77644275e-05 3.37749625e-05 4.47046696e-05
  5.84257796e-05 2.92381574e-05 6.70682057e-05 8.50238430e-05
  1.14919276e-04 6.61836166e-05 8.38351698e-05 8.47236297e-05
  5.60509980e-05 9.54317438e-05 1.31356253e-04 4.61354139e-05
  2.08447949e-04 9.06247878e-05 8.87341594e-05 1.30184097e-04
  5.93808982e-05 7.68957616e-05 6.63713436e-05 6.33821255e-05
  7.27094230e-05 1.79737835e-04 2.31868835e-04 4.35032009e-04
  2.59438442e-04 1.27681807e-04 8.77550046e-05 9.89781183e-05
  3.02576391e-05 4.63254728e-05 6.35501492e-05 2.87439063e-04
  1.13533526e-04 9.19483136e-05 9.10659510e-05 1.25720268e-04
  2.35271742e-04 5.39930297e-05 9.97695388e-05 2.82283267e-03
  2.31692335e-04 9.99316107e-05 7.94608786e-05 1.41412980e-04
  9.61551777e-05 8.59367792e-05 2.80167187e-05 2.21998373e-04
  3.69808040e-05 2.10869075e-05 2.24530177e-05 1.17465803e-04
  1.4153

average(sec):0.00,fps:411.08


In [4]:
np.argmax(one_prediction)

386