# Predictions with Tensor RT
In this notebook we show how to use the Tensor RT engine that we created after training our model and serialized to disk. 

### Imports
We import here the packages that are required to run this notebook

In [None]:
import tensorflow as tf

In [None]:
from tensorrt.lite import Engine
from tensorrt.infer import LogSeverity
import tensorrt

In [None]:
from PIL import Image
import numpy as np
import os
import functools
import time

In [None]:
PLAN_single = '/home/data/model/keras_vgg19_b1_fp32.engine'  # engine filename for batch size 1
PLAN_tensorflow = '/home/data/model/keras_vgg19_frozen_model.pb'  # frozen model filename tensorflow
IMAGE_DIR = '/home/data/val/roses'
BATCH_SIZE = 1

### Utility functions
We define here a few utility functions. These functions are used to 
* Analyze the prediction
* Convert image to a format that is identical to the format used durign training
* Organize the images into a list of numpy array
* Time the compute time of a function

In [None]:
def analyze(output_data):
    LABELS=["daisy", "dandelion", "roses", "sunflowers", "tulips"]
    output = output_data.reshape(-1, len(LABELS))
    
    top_classes = [LABELS[idx] for idx in np.argmax(output, axis=1)]
    top_classes_prob = np.amax(output, axis=1)  

    return top_classes, top_classes_prob


def image_to_np_CHW(image): 
    return np.asarray(
        image.resize(
            (engine_single.input_dim[0].H(), engine_single.input_dim[0].W()), 
            Image.ANTIALIAS
        )).transpose([2,0,1]).astype(engine_single.data_type.input_type())


def load_and_preprocess_images():
    file_list = [f for f in os.listdir(IMAGE_DIR) if os.path.isfile(os.path.join(IMAGE_DIR, f))]
    images_trt = []
    images_tf = []
    for f in file_list:
        images_trt.append(image_to_np_CHW(Image.open(os.path.join(IMAGE_DIR, f))))
        images_tf.append(image_to_np_CHW(Image.open(os.path.join(IMAGE_DIR, f))).transpose([1, 2, 0]))
    
    images_trt = np.stack(images_trt)
    images_tf = np.stack(images_tf)
    
    num_batches = int(len(images_trt) / BATCH_SIZE)
    
    images_trt = np.reshape(images_trt[0:num_batches * BATCH_SIZE], [
        num_batches, 
        BATCH_SIZE, 
        images_trt.shape[1],
        images_trt.shape[2],
        images_trt.shape[3]
    ]) 
    
    images_tf = np.reshape(images_tf[0:num_batches * BATCH_SIZE], [
        num_batches, 
        BATCH_SIZE, 
        images_tf.shape[1],
        images_tf.shape[2],
        images_tf.shape[3]
    ])
    
    return images_trt, images_tf


def timeit(func):
    @functools.wraps(func)
    def newfunc(*args, **kwargs):
        startTime = time.time()
        retargs = func(*args, **kwargs)
        elapsedTime = time.time() - startTime
        print('function [{}] finished in {} ms'.format(
            func.__name__, int(elapsedTime * 1000)))
        return retargs
    return newfunc

### Prepare TensorRT Engine
Here we simply load the TRT engine such that we can do inference. We can also attach a function (utility function) to postprocess the outputs before returning them (in this case we use the function analyze)

In [None]:
def load_TRT_engine(plan):
    engine = Engine(PLAN=plan,postprocessors={"dense_2/Softmax":analyze})   
    return engine

engine_single = load_TRT_engine(PLAN_single)

### Prepare Tensorflow inference
For comparison purposes we also instantiate a Tensorflow session and we load the frozen model into it. We will process data with tensorflow in the exact same way as TRT and then compare the timings.

In [None]:
def load_TF_engine():
    # read the frozen graph
    with tf.gfile.FastGFile(PLAN_tensorflow, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        _ = tf.import_graph_def(graph_def, name='')

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
    
    # define input and output tensors (by name)
    output_tensor = sess.graph.get_tensor_by_name('dense_2/Softmax:0')
    input_tensor = sess.graph.get_tensor_by_name('input_1:0')
    
    return input_tensor, output_tensor, sess

tf_input_tensor, tf_output_tensor, tf_sess = load_TF_engine()

### Load all data
Here we load all the test data from the directory specified above in "IMAGE_DIR"

In [None]:
images_trt, images_tf = load_and_preprocess_images()

### Prepare function to do inference with Tensor RT

In [None]:
@timeit
def infer_all_images_trt():
    results = []
    for image in images_trt:
        result = engine_single.infer(image) 
        results.append(result)
    return results

### Prepare function to do inference with Tensorflow

In [None]:
@timeit
def infer_all_images_tensorflow():
    results = []
    for image in images_tf:
        prediction = tf_sess.run(tf_output_tensor, feed_dict={tf_input_tensor: image})
        result = analyze(prediction)
        results.append(result)
    return results
        

In [None]:
# DO inference with TRT
results_trt = infer_all_images_trt()

In [None]:
# DO inference with TF
results_tf = infer_all_images_tensorflow()

In [None]:
for i in range(len(results_trt)):
    if results_trt[i][0][0][0] != results_tf[i][0]:
        print 'ERROR!'