# Running DeepConsensus in TensorRT

## Imports

In [1]:
import tensorflow as tf
import numpy as np
import random
import PIL
from IPython.display import display
from deepconsensus.models import model_utils
from deepconsensus.models import data_providers

2023-02-06 22:27:28.323235: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Load the examples

In [2]:
%%script false --no-raise-error
%%bash
# Download Model
mkdir -p deepconsensus_model
gsutil cp -r gs://brain-genomics-public/research/deepconsensus/models/v1.1/model_checkpoint/* deepconsensus_model/
# Download test data
gsutil cp gs://brain-genomics-public/research/deepconsensus/training-tutorial/v1.1/test/tf-test-00000-of-00500.tfrecord.gz ./tf-test.tfrecord.gz

In [3]:
checkpoint_path = 'deepconsensus_model/checkpoint'
params = model_utils.read_params_from_json(checkpoint_path=checkpoint_path)

tfrecord_path = 'tf-test.tfrecord.gz'

batch_size = 1024

ds = data_providers.get_dataset(tfrecord_path,
                                num_epochs=None,
                                batch_size=batch_size,
                                params=params,
                                inference=False)

# Just get one batch to inspect:
for batch in ds.take(1):
  break

keys = ['name', 'label', 'rows', 'num_passes', 'window_pos']
for key in keys:
  print(f'{key}.shape: {batch[key].shape}')

2023-02-06 22:27:31.041007: I tensorflow/core/platform/cpu_feature_guard.cc:194] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE3 SSE4.1 SSE4.2 AVX
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-06 22:27:31.399962: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31119 MB memory:  -> device: 0, name: Tesla V100-DGXS-32GB, pci bus id: 0000:08:00.0, compute capability: 7.0
2023-02-06 22:27:31.400630: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 31119 MB memory:  -> device: 1, name: Tesla V100-DGXS-32GB, pci bus id: 0000:0e:00.0, compute capability: 7.0


name.shape: (1024, 1)
label.shape: (1024, 100)
rows.shape: (1024, 85, 100, 1)
num_passes.shape: (1024, 1)
window_pos.shape: (1024, 1)


## Load the model

In [4]:
loss = model_utils.get_deepconsensus_loss(params)

model = model_utils.get_model(params)
checkpoint = tf.train.Checkpoint(model=model)

row_size = data_providers.get_total_rows(params.max_passes)
input_shape = (1, row_size, params.max_length, params.num_channels)
model_utils.print_model_summary(model, input_shape)
checkpoint.restore(
    checkpoint_path).expect_partial().assert_existing_objects_matched()

model_utils.modify_params(
  params=params,
  speedy=True,
  max_length=100,
  is_training=False)

Model: "encoder_only_learned_values_transformer"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 relative_position_embedding  multiple                 0         
  (RelativePositionEmbedding                                     
 )                                                               
                                                                 
 encoder_stack (EncoderStack  multiple                 7320200   
 )                                                               
                                                                 
 dense (Dense)               multiple                  1405      
                                                                 
 softmax (Softmax)           multiple                  0         
                                                                 
 bases_embedding (ModifiedOn  multiple                 40        
 DeviceEmbedding)          

In [5]:
from keras.layers import Input

inputs = Input(shape=input_shape[1:])
outputs = model.call(inputs, training=False)
model.summary()

Model: "encoder_only_learned_values_transformer"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 relative_position_embedding  (100, 280)               0         
  (RelativePositionEmbedding                                     
 )                                                               
                                                                 
 encoder_stack (EncoderStack  {'self_attention_layer_0  7320200  
 )                           ': (None, 100, 280),                
                              'attention_scores_0': (            
                             None, 2, 100, 100),                 
                              'ffn_layer_0': (None, 1            
                             00, 280),                           
                              'self_attention_layer_1            
                             ': (None, 100, 280),                
                           

In [6]:
softmax_output = model.predict(batch['rows'])

In [7]:
tf_preds = np.argmax(softmax_output, -1)

In [8]:
tf.saved_model.save(model, 'deepconsensus_model/SavedModel')



INFO:tensorflow:Assets written to: deepconsensus_model/SavedModel/assets


INFO:tensorflow:Assets written to: deepconsensus_model/SavedModel/assets


## Assessing DeepConsensus accuracy

In [9]:
inference_batch_size = 4096

dataset_path = './train/tf-train-00001-of-00500.tfrecord.gz'

dsi = data_providers.get_dataset(dataset_path,
                                 num_epochs=None,
                                 batch_size=inference_batch_size,
                                 params=params,
                                 inference=False)

In [10]:
predictions_tf = [(batch["label"], model.predict(batch["rows"])) for batch in dsi.take(count=-1)]

2023-02-06 22:28:09.769882: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 75998 of 1000000
2023-02-06 22:28:19.769274: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 151821 of 1000000
2023-02-06 22:28:29.769689: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 228286 of 1000000
2023-02-06 22:28:39.770146: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 304818 of 1000000
2023-02-06 22:28:49.770101: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 381296 of 1000000
2023-02-06 22:28:59.769564: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 457637 of 1000000
2023-02-06 22:29:09.769417: I tensorflow/core/kernels/data/shuffle_data

In [11]:
batch_loss_tf = [loss.eval(label, pred).numpy().mean() for label, pred in predictions_tf]

print("Dataset average accuracy: {:.3f}".format(np.array(batch_loss_tf).mean()))

Dataset average accuracy: 0.956


## Testing TF-TRT INT8 Quantization

Based on this user guide https://docs.nvidia.com/deeplearning/frameworks/tf-trt-user-guide/index.html#post-train 

In [14]:
calibration_dataset_path = './train/tf-train-00001-of-00500.tfrecord.gz'

calibration_batch_size = 1024

ds = data_providers.get_dataset(calibration_dataset_path,
                                num_epochs=None,
                                batch_size=calibration_batch_size,
                                params=params,
                                inference=False)


In [15]:
from tensorflow.python.compiler.tensorrt import trt_convert as trt


converter = trt.TrtGraphConverterV2(
   input_saved_model_dir='deepconsensus_model/SavedModel',
   precision_mode=trt.TrtPrecisionMode.INT8,
   use_calibration=True
)

INFO:tensorflow:Linked TensorRT version: (8, 5, 1)


INFO:tensorflow:Linked TensorRT version: (8, 5, 1)


INFO:tensorflow:Loaded TensorRT version: (8, 5, 1)


INFO:tensorflow:Loaded TensorRT version: (8, 5, 1)


In [16]:
n_batches_calibration=25

def row_generator():
    for batch in ds.take(count=n_batches_calibration):
        yield batch['rows']

In [17]:
do_inference = converter.convert(calibration_input_fn=row_generator)

2023-02-06 21:51:51.211903: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 2
2023-02-06 21:51:51.212013: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session
2023-02-06 21:51:51.214995: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31119 MB memory:  -> device: 0, name: Tesla V100-DGXS-32GB, pci bus id: 0000:08:00.0, compute capability: 7.0
2023-02-06 21:51:51.215316: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 31119 MB memory:  -> device: 1, name: Tesla V100-DGXS-32GB, pci bus id: 0000:0e:00.0, compute capability: 7.0


INFO:tensorflow:Clearing prior device assignments in loaded saved model


INFO:tensorflow:Clearing prior device assignments in loaded saved model


INFO:tensorflow:Automatic mixed precision will be used on the whole TensorFlow Graph. This behavior can be deactivated using the environment variable: TF_TRT_EXPERIMENTAL_FEATURES=deactivate_mixed_precision.
More information can be found on: https://www.tensorflow.org/guide/mixed_precision.


INFO:tensorflow:Automatic mixed precision will be used on the whole TensorFlow Graph. This behavior can be deactivated using the environment variable: TF_TRT_EXPERIMENTAL_FEATURES=deactivate_mixed_precision.
More information can be found on: https://www.tensorflow.org/guide/mixed_precision.
2023-02-06 21:51:54.894523: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 2
2023-02-06 21:51:54.894628: I tensorflow/core/grappler/clusters/single_machine.cc:358] Starting new session
2023-02-06 21:51:54.897473: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31119 MB memory:  -> device: 0, name: Tesla V100-DGXS-32GB, pci bus id: 0000:08:00.0, compute capability: 7.0
2023-02-06 21:51:54.897792: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1637] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 31119 MB memory:  -> device: 1, name: Tesla V100

2023-02-06 21:51:55.862953: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1533] No allowlist ops found, nothing to do
2023-02-06 21:51:55.870709: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1533] No allowlist ops found, nothing to do
2023-02-06 21:51:55.874485: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1533] No allowlist ops found, nothing to do
2023-02-06 21:51:55.878244: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1533] No allowlist ops found, nothing to do
2023-02-06 21:51:55.881460: I tensorflow/core/grappler/optimizers/auto_mixed_precision.cc:1533] No allowlist ops found, nothing to do
2023-02-06 21:52:09.847922: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 74977 of 1000000
2023-02-06 21:52:19.847282: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 151238 of 1000000
2023-02-06 21:52:29.8

In [18]:
inference_batch_size = 8192

inference_dataset_path = './train/tf-train-00001-of-00500.tfrecord.gz'

ds = data_providers.get_dataset(dataset_path,
                                num_epochs=None,
                                batch_size=inference_batch_size,
                                params=params,
                                inference=False)

In [19]:
def sample():
    for batch in ds.take(count=1):
        yield [batch['rows']]

In [20]:
converter.build(input_fn=sample)

2023-02-06 22:06:07.670464: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 75833 of 1000000
2023-02-06 22:06:17.670868: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 151997 of 1000000
2023-02-06 22:06:27.670187: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 228274 of 1000000
2023-02-06 22:06:37.670723: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 303751 of 1000000
2023-02-06 22:06:47.670689: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 379050 of 1000000
2023-02-06 22:06:57.670281: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 454351 of 1000000
2023-02-06 22:07:07.670575: I tensorflow/core/kernels/data/shuffle_data

In [21]:
converter.save(output_saved_model_dir="deepconsensus_model/SavedModel-TMP") # To get around onnx bugs
model_tmp = tf.saved_model.load("deepconsensus_model/SavedModel-TMP/")
tf.saved_model.save(model, "deepconsensus_model/SavedModel-TRT")

!rm -rf deepconsensus_model/SavedModel-TMP



INFO:tensorflow:Assets written to: deepconsensus_model/SavedModel-TRT/assets


INFO:tensorflow:Assets written to: deepconsensus_model/SavedModel-TRT/assets


In [22]:
converter.summary()

TRTEngineOP Name                 Device        # Nodes # Inputs      # Outputs     Input DTypes       Output Dtypes      Input Shapes       Output Shapes     

----------------------------------------

TRTEngineOp_000_000              device:GPU:0  6       2             2             ['float32', 'f ... ['float32', 'f ... [[-1, 100, 85] ... [[-1, 1, 1, 10 ...

	- AddV2: 1x
	- Const: 2x
	- ExpandDims: 2x
	- Sum: 1x

----------------------------------------

TRTEngineOp_000_001              device:GPU:0  344     170           2             ['float32', 'f ... ['float16', 'f ... [[-1, 100], [- ... [[-1, 100, 560 ...

	- Cast: 85x
	- ConcatV2: 1x
	- Const: 3x
	- ExpandDims: 85x
	- Mul: 170x

----------------------------------------

TRTEngineOp_000_002              device:GPU:0  6       4             4             ['int32', 'int ... ['float32', 'f ... [[-1], [-1], [ ... [[-1, 8], [-1, ...

	- Const: 2x
	- GatherV2: 4x

----------------------------------------

TRTEngineOp_000_003            

In [23]:
%%script false --no-raise-error
%%bash

python -m tf2onnx.convert --saved-model deepconsensus_model/SavedModel-TRT/ --output deepconsensus_model/pbdc_i8.onnx

In [24]:
predictions_trt = [(batch["label"], do_inference(batch["rows"])) for batch in ds.take(count=-1)]

2023-02-06 22:11:08.107125: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 75990 of 1000000
2023-02-06 22:11:18.107396: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 151741 of 1000000
2023-02-06 22:11:28.106815: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 228145 of 1000000
2023-02-06 22:11:38.107700: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 304582 of 1000000
2023-02-06 22:11:48.107305: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 381036 of 1000000
2023-02-06 22:11:58.106823: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:390] Filling up shuffle buffer (this may take a while): 456266 of 1000000
2023-02-06 22:12:08.106912: I tensorflow/core/kernels/data/shuffle_data

In [26]:
batch_loss = [loss.eval(label, pred['output_1']).numpy().mean() for label, pred in predictions_trt]

print("Dataset average accuracy: {:.3f}".format(np.array(batch_loss).mean()))

Dataset average accuracy: 1.275
