# Latent representation inference

This notebook uses TensorRT to infer latent represantation of S2 tensor set based on the CAE.

The NVIDIA Docker container TensorFlow 21.02-tf2 was used to generate a computational environment.

In [None]:
import tensorflow as tf
from tensorflow.python.compiler.tensorrt import trt_convert as trt
import numpy as np
from tensorflow import keras
import datetime as dt

In [None]:
datestring = dt.datetime.now().strftime("%Y%m%d")
print(datestring)
suffix = '3'

In [None]:
%%time
x_tensors = np.load('sentinel_gb_995_'+suffix+'_20210317.npz')['tensors']

In [None]:
x_tensors

In [None]:
conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(
                    precision_mode='FP32',
                    # Set this to a large enough number so it can cache all the engines.
                    maximum_cached_engines=16)

In [None]:
!rm -rf tensorrt/*

In [None]:
input_saved_model_dir="models/gb_postcode_encoder_" + suffix
output_saved_model_dir="tensorrt/gb_postcode_encoder_inter_" + suffix

In [None]:
converter = trt.TrtGraphConverterV2(
            input_saved_model_dir=input_saved_model_dir,
            conversion_params=conversion_params)

In [None]:
converter.convert()

In [None]:
def my_input_fn():
    num_runs = 10
    for _ in range(num_runs):
        inp1 = np.random.normal(size=(1000, 16, 16, 4)).astype(np.float32)
        yield inp1,

In [None]:
%%time
converter.build(my_input_fn)

In [None]:
converter.save(output_saved_model_dir)

In [None]:
%%time
input_data = tf.convert_to_tensor(x_tensors)

In [None]:
%%time
saved_model_loaded = tf.saved_model.load(output_saved_model_dir, tags=[trt.tag_constants.SERVING])

In [None]:
graph_func = saved_model_loaded.signatures[trt.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY]

In [None]:
frozen_func = trt.convert_to_constants.convert_variables_to_constants_v2(graph_func)

In [None]:
!nvidia-smi

In [None]:
%%time
# Initialise TensorRT Engine with random array
_ = frozen_func(tf.convert_to_tensor(np.random.normal(size=(1000, 16, 16, 4)).astype(np.float32)))

In [None]:
output = []

In [None]:
%%time
batch_size = 1000
import time
start = time.time()

for i in range(0,len(x_tensors),batch_size):
    end = i + len(input_data[i:i+batch_size])
    output += [frozen_func(input_data[i:i+batch_size])[0].numpy()]
    if end % 100000 == 0:
        elapsed = time.time()-start
        print("Records:",end,round(elapsed,3),"secs",round(end/elapsed,3),"recs/sec")
        
print("Records:",end,round(elapsed,3),"secs",round(end/elapsed,3),"recs/sec")

In [None]:
%%time
trt_output = np.concatenate(output)

In [None]:
%%time
trt_results = trt_output.reshape(-1,64)

In [None]:
model = None
model = keras.models.load_model("models/gb_postcode_encoder_" + suffix)
model.compile()

In [None]:
%%time
keras_output = model(x_tensors[:1000])
print(keras_output[0].numpy().reshape(-1,64))

In [None]:
keras_results = keras_output.numpy().reshape(-1,64)

In [None]:
# Inference with TensorRT
trt_results[:1000]

In [None]:
# Inference with TensorFlow/Keras
keras_results

In [None]:
np.testing.assert_allclose(trt_results[:1000],keras_results,atol=1e-5)

In [None]:
trt_error = abs(trt_results[:1000]-keras_results)

In [None]:
print("TensorRT Absolute Error mean:",trt_error.mean(),"std dev:",trt_error.std(),"min:",trt_error.min(),"max:",trt_error.max())

In [None]:
trt_error

In [None]:
%%time
filename = "sentinel_flattened_gb_postcode_trt_" + suffix + "_" + datestring
np.savez_compressed(filename,trt=trt_results)