<a href="https://colab.research.google.com/github/rakesh4real/role-models/blob/main/optimize.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

- **Tool:** [TF Graph Transforms Python API](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/graph_transforms)
- **Input:** `SavedModel` [format](https://www.tensorflow.org/guide/saved_model) combines a `GraphDef` with checkpoint files that store weights, **all collected in a folder**.

# Steps

1. Freeze the `SavedModel` model by converting to `Graphdef` format
2. Optimize frozen `GraphDef` mode;
3. Unfreeze to `SavedModel` format

# Setting up environment

In [1]:
!apt install tree
!pip install tensorflow==1.15.0 # currently v2 is not supported https://github.com/tensorflow/tensorflow/issues/30746. Temp fix - use 1.15.0

import tensorflow as tf
print(f"{'='*60}\nCurrently using {tf.__version__}")

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following NEW packages will be installed:
  tree
0 upgraded, 1 newly installed, 0 to remove and 6 not upgraded.
Need to get 40.7 kB of archives.
After this operation, 105 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 tree amd64 1.7.0-5 [40.7 kB]
Fetched 40.7 kB in 0s (145 kB/s)
Selecting previously unselected package tree.
(Reading database ... 144617 files and directories currently installed.)
Preparing to unpack .../tree_1.7.0-5_amd64.deb ...
Unpacking tree (1.7.0-5) ...
Setting up tree (1.7.0-5) ...
Processing triggers for man-db (2.8.3-2ubuntu0.1) ...
Collecting tensorflow==1.15.0
[?25l  Downloading https://files.pythonhosted.org/packages/3f/98/5a99af92fb911d7a88a0005ad55005f35b4c1ba8d75fba02df726cd936e6/tensorflow-1.15.0-cp36-cp36m-manylinux2010_x86_64.whl (412.3MB)
[K     |████████████████████████████████| 412.3MB 37kB/s 
C

In [2]:
from __future__ import print_function
import os
import numpy as np
from datetime import datetime
import sys
import warnings
warnings.filterwarnings("ignore")

import tensorflow as tf
from tensorflow import data
from tensorflow.python.saved_model import tag_constants
from tensorflow.python.tools import freeze_graph
from tensorflow.python import ops

from tensorflow.tools.graph_transforms import TransformGraph # currently, not avl. in v2

In [3]:
NUM_CLASSES = 10
MODELS_LOCATION = 'models/mnist'
MODEL_NAME = 'keras_classifier'

def load_mnist_keras():
  (train_data, train_labels), (eval_data, eval_labels) = tf.keras.datasets.mnist.load_data()
  return train_data, train_labels, eval_data, eval_labels

def keras_model_fn(params):
    
  inputs = tf.keras.layers.Input(shape=(28, 28), name='input_image')
  input_layer = tf.keras.layers.Reshape(target_shape=(28, 28, 1), name='reshape')(inputs)
  
  # convolutional layers
  conv_inputs = input_layer
  for i in range(params.num_conv_layers):      
    filters = params.init_filters * (2**i)
    conv = tf.keras.layers.Conv2D(kernel_size=3, filters=filters, strides=1, padding='SAME', activation='relu')(conv_inputs)
    max_pool = tf.keras.layers.MaxPool2D(pool_size=2, strides=2, padding='SAME')(conv)
    batch_norm = tf.keras.layers.BatchNormalization()(max_pool)
    conv_inputs = batch_norm

  flatten = tf.keras.layers.Flatten(name='flatten')(conv_inputs)
  
  # fully-connected layers
  dense_inputs = flatten
  for i in range(len(params.hidden_units)):      
    dense = tf.keras.layers.Dense(units=params.hidden_units[i], activation='relu')(dense_inputs)
    dropout = tf.keras.layers.Dropout(params.dropout)(dense)
    dense_inputs = dropout
      
  # softmax classifier
  logits = tf.keras.layers.Dense(units=NUM_CLASSES, name='logits')(dense_inputs)
  softmax = tf.keras.layers.Activation('softmax', name='softmax')(logits)

  # keras model
  model = tf.keras.models.Model(inputs, softmax)
  return model


def create_estimator_keras(params, run_config):
    
  keras_model = keras_model_fn(params)
  print(keras_model.summary())
  
  optimizer = tf.keras.optimizers.Adam(lr=params.learning_rate)
  keras_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
  mnist_classifier = tf.keras.estimator.model_to_estimator(
      keras_model=keras_model,
      config=run_config
  )
  
  return mnist_classifier

In [4]:
def run_experiment(hparams, train_data, train_labels, run_config, create_estimator_fn=create_estimator_keras):
  train_spec = tf.estimator.TrainSpec(
      input_fn = tf.estimator.inputs.numpy_input_fn(
          x={'input_image': train_data},
          y=train_labels,
          batch_size=hparams.batch_size,
          num_epochs=None,
          shuffle=True),
      max_steps=hparams.max_training_steps
  )
  eval_spec = tf.estimator.EvalSpec(
      input_fn = tf.estimator.inputs.numpy_input_fn(
          x={'input_image': train_data},
          y=train_labels,
          batch_size=hparams.batch_size,
          num_epochs=1,
          shuffle=False),
      steps=None,
      throttle_secs=hparams.eval_throttle_secs
  )

  tf.logging.set_verbosity(tf.logging.INFO)

  time_start = datetime.utcnow()
  print('Experiment started at {}'.format(time_start.strftime('%H:%M:%S')))
  print('.......................................')

  estimator = create_estimator_fn(hparams, run_config)

  tf.estimator.train_and_evaluate(
      estimator=estimator,
      train_spec=train_spec,
      eval_spec=eval_spec
  )

  time_end = datetime.utcnow()
  print('.......................................')
  print('Experiment finished at {}'.format(time_end.strftime('%H:%M:%S')))
  print('')
  time_elapsed = time_end - time_start
  print('Experiment elapsed time: {} seconds'.format(time_elapsed.total_seconds()))

  return estimator


def train_and_export_model(train_data, train_labels):
  model_dir = os.path.join(MODELS_LOCATION, MODEL_NAME)

  hparams  = tf.contrib.training.HParams(
      batch_size=100,
      hidden_units=[1024],
      num_conv_layers=2,
      init_filters=64,
      dropout=0.85,
      max_training_steps=50,
      eval_throttle_secs=10,
      learning_rate=1e-3,
      debug=True
  )

  run_config = tf.estimator.RunConfig(
      tf_random_seed=19830610,
      save_checkpoints_steps=1000,
      keep_checkpoint_max=3,
      model_dir=model_dir
  )

  if tf.gfile.Exists(model_dir):
      print('Removing previous artifacts...')
      tf.gfile.DeleteRecursively(model_dir)

  os.makedirs(model_dir)

  estimator = run_experiment(hparams, train_data, train_labels, run_config, create_estimator_keras)

  def make_serving_input_receiver_fn():
      inputs = {'input_image': tf.placeholder(
          shape=[None,28,28], dtype=tf.float32, name='serving_input_image')}
      return tf.estimator.export.build_raw_serving_input_receiver_fn(inputs)

  export_dir = os.path.join(model_dir, 'export')

  if tf.gfile.Exists(export_dir):
      tf.gfile.DeleteRecursively(export_dir)

  estimator.export_savedmodel(
      export_dir_base=export_dir,
      serving_input_receiver_fn=make_serving_input_receiver_fn()
  )

  return export_dir

**Train and generate `SavedModel` in `models` folder**

In [5]:
train_data, train_labels, eval_data, eval_labels = load_mnist_keras()
export_dir = train_and_export_model(train_data.astype('float32'), train_labels.astype('float32'))

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Experiment started at 06:17:29
.......................................
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_image (InputLayer)     [(None, 28, 28)]          0         
_________________________________________________________________
reshape (Reshape)            (None, 28, 28, 1)         0         
_______________________________________________

In [6]:
!tree models 

models
└── mnist
    └── keras_classifier
        ├── checkpoint
        ├── eval
        │   └── events.out.tfevents.1602137935.7114768df0c4
        ├── events.out.tfevents.1602137851.7114768df0c4
        ├── export
        │   └── 1602137935
        │       ├── saved_model.pb
        │       └── variables
        │           ├── variables.data-00000-of-00001
        │           └── variables.index
        ├── graph.pbtxt
        ├── keras
        │   ├── checkpoint
        │   ├── keras_model.ckpt.data-00000-of-00001
        │   ├── keras_model.ckpt.index
        │   └── keras_model.ckpt.meta
        ├── model.ckpt-0.data-00000-of-00001
        ├── model.ckpt-0.index
        ├── model.ckpt-0.meta
        ├── model.ckpt-50.data-00000-of-00001
        ├── model.ckpt-50.index
        └── model.ckpt-50.meta

7 directories, 17 files


In [7]:
EXPORT_DIR = '/content/models/mnist/keras_classifier/export'

SAVED_MODEL_DIR = os.path.join(
    EXPORT_DIR, 
    [f for f in os.listdir(export_dir) if f.isdigit()][0]
)
print(SAVED_MODEL_DIR)

/content/models/mnist/keras_classifier/export/1602137935


# Visualize in Tensorboard

In [8]:
# %load_ext tensorboard
# %tensorboard --logdir models/mnist/keras_classifier
# click on "GRAPHS" on top section

# TF Graph Transforms

In [9]:
os.environ['DIR'] = SAVED_MODEL_DIR
!saved_model_cli show --dir ${DIR} --all


MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['input_image'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 28, 28)
        name: serving_input_image:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['softmax'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 10)
        name: softmax/Softmax:0
  Method name is: tensorflow/serving/predict


In [10]:
def get_graph_def_from_saved_model(saved_model_dir): 
  with tf.Session() as session:
    meta_graph_def = tf.saved_model.loader.load(
    session,
    tags=[tag_constants.SERVING],
    export_dir=saved_model_dir
  ) 
  return meta_graph_def.graph_def

In [11]:
def describe_graph(graph_def, show_nodes=False):
  print('Input Feature Nodes: {}'.format(
      [node.name for node in graph_def.node if node.op=='Placeholder']))
  print('')
  print('Unused Nodes: {}'.format(
      [node.name for node in graph_def.node if 'unused'  in node.name]))
  print('')
  print('Output Nodes: {}'.format( 
      [node.name for node in graph_def.node if (
          'predictions' in node.name or 'softmax' in node.name)]))
  print('')
  print('Quantization Nodes: {}'.format(
      [node.name for node in graph_def.node if 'quant' in node.name]))
  print('')
  print('Constant Count: {}'.format(
      len([node for node in graph_def.node if node.op=='Const'])))
  print('')
  print('Variable Count: {}'.format(
      len([node for node in graph_def.node if 'Variable' in node.op])))
  print('')
  print('Identity Count: {}'.format(
      len([node for node in graph_def.node if node.op=='Identity'])))
  print('')
  print('Total nodes: {}'.format(len(graph_def.node)), '')

  if show_nodes==True:
    for node in graph_def.node:
      print('Op:{} - Name: {}'.format(node.op, node.name))

**Graph Before Optimisation**

In [12]:
describe_graph(get_graph_def_from_saved_model(SAVED_MODEL_DIR))

Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.
INFO:tensorflow:Restoring parameters from /content/models/mnist/keras_classifier/export/1602137935/variables/variables
Input Feature Nodes: ['serving_input_image']

Unused Nodes: []

Output Nodes: ['softmax/Softmax']

Quantization Nodes: []

Constant Count: 48

Variable Count: 65

Identity Count: 20

Total nodes: 223 


In [13]:
# show nodes (as in tensorboard)
# describe_graph(get_graph_def_from_saved_model('/content/models/mnist/keras_classifier/export/1602114568'), True)

**Size of model**

In [14]:
def get_size(model_dir, model_file='saved_model.pb'):
  """
   SavedModel size can be roughly summed as 
   size of the GraphDef and the size of the Variables 
   (i.e. the weights of the model)
   """
  model_file_path = os.path.join(model_dir, model_file)
  print(model_file_path, '')
  pb_size = os.path.getsize(model_file_path)
  variables_size = 0
  if os.path.exists(
      os.path.join(model_dir,'variables/variables.data-00000-of-00001')):
    variables_size = os.path.getsize(os.path.join(
        model_dir,'variables/variables.data-00000-of-00001'))
    variables_size += os.path.getsize(os.path.join(
        model_dir,'variables/variables.index'))
  print('Model size\t: {} KB'.format(round(pb_size/(1024.0),3)))
  print('Variables size\t: {} KB'.format(round( variables_size/(1024.0),3)))
  print('Total Size\t: {} KB'.format(round((pb_size + variables_size)/(1024.0),3)))

In [15]:
get_size(SAVED_MODEL_DIR)

/content/models/mnist/keras_classifier/export/1602137935/saved_model.pb 
Model size	: 40.51 KB
Variables size	: 25426.71 KB
Total Size	: 25467.22 KB


# Inference

In [16]:
def inference_test(saved_model_dir, signature="serving_default", input_name='input_image', batch=300, repeat=100):

    tf.logging.set_verbosity(tf.logging.ERROR)
    
    time_start = datetime.utcnow() 
    predictor = tf.contrib.predictor.from_saved_model(
        export_dir        = saved_model_dir,
        signature_def_key = signature
    )
    time_end = datetime.utcnow()     
    time_elapsed = time_end - time_start
   
    print(f"Model loading time : {time_elapsed.total_seconds()} seconds")
    
    time_start = datetime.utcnow() 
    output = None
    for i in range(repeat):
        predictions = predictor({
                input_name: eval_data[:batch]
        })
        output=[np.argmax(prediction) for prediction in predictions['softmax']]
    time_end = datetime.utcnow() 
    time_elapsed_sec = (time_end - time_start).total_seconds()
    
    print(f"\nPrediction produced for {len(output)} instances batch, repeated {repeat} times\n")
    print(f"Inference elapsed time\t\t: {time_elapsed_sec} seconds")
    print(f"Average latency per batch\t: {time_elapsed_sec/repeat} seconds")

In [17]:
inference_test(SAVED_MODEL_DIR)

Model loading time : 0.083352 seconds

Prediction produced for 300 instances batch, repeated 100 times

Inference elapsed time		: 29.558926 seconds
Average latency per batch	: 0.29558926 seconds


# Freeze Graph

In [18]:
def freeze_graph(saved_model_dir):
    
    from tensorflow.python.tools import freeze_graph
    from tensorflow.python.saved_model import tag_constants
    
    output_graph_filename = os.path.join(saved_model_dir, "freezed_model.pb")
    output_node_names = "softmax/Softmax"
    initializer_nodes = ""

    freeze_graph.freeze_graph(
        input_saved_model_dir=saved_model_dir,
        output_graph=output_graph_filename,
        saved_model_tags = tag_constants.SERVING,
        output_node_names=output_node_names,
        initializer_nodes=initializer_nodes,

        input_graph=None, 
        input_saver=False,
        input_binary=False, 
        input_checkpoint=None, 
        restore_op_name=None, 
        filename_tensor_name=None, 
        clear_devices=False,
        input_meta_graph=False,
    )
    
    print("SUCCESS:SavedModel graph freezed!")

In [19]:
freeze_graph(SAVED_MODEL_DIR)

SUCCESS:SavedModel graph freezed!


# Optimize Graph

In [20]:
def get_graph_def_from_file(graph_filepath):
    from tensorflow.python import ops
    with ops.Graph().as_default():
        with tf.gfile.GFile(graph_filepath, "rb") as f:
            graph_def = tf.GraphDef()
            graph_def.ParseFromString(f.read())
            return graph_def

In [21]:
def optimize_graph(model_dir, graph_filename, transforms):
    
    from tensorflow.tools.graph_transforms import TransformGraph
    
    input_names = []
    output_names = ['softmax/Softmax']
    
    graph_def = get_graph_def_from_file(os.path.join(model_dir, graph_filename))
    optimised_graph_def = TransformGraph( graph_def, input_names, 
                                         output_names, transforms )
    # save
    tf.train.write_graph(
        optimised_graph_def,
        logdir=model_dir,
        as_text=False,
        name='optimised_model.pb'
    )
    
    print("SUCCESS:Freezed graph optimised! saved as `optimised_model.pb`")

In [22]:
transforms = [
    'remove_nodes(op=Identity)', 
    'fold_constants(ignore_errors=true)',
    'fold_batch_norms',
    #'fuse_resize_pad_and_conv',
    #'quantize_weights',
    #'quantize_nodes',
    'merge_duplicate_nodes',
    'strip_unused_nodes', 
    'sort_by_execution_order'
]

optimize_graph(SAVED_MODEL_DIR, 'freezed_model.pb', transforms)

SUCCESS:Freezed graph optimised! saved as `optimised_model.pb`


**Describe optimized graph**

In [23]:
describe_graph(get_graph_def_from_file(SAVED_MODEL_DIR + "/optimised_model.pb"))

Input Feature Nodes: ['serving_input_image']

Unused Nodes: []

Output Nodes: ['softmax/Softmax']

Quantization Nodes: []

Constant Count: 21

Variable Count: 0

Identity Count: 0

Total nodes: 46 


In [24]:
# Note: Total Nodes

# Unfreeze GraphDef model to SavedModels format

In [25]:
def convert_graph_def_to_saved_model(graph_filepath):

    from tensorflow.python import ops
    export_dir=os.path.join(SAVED_MODEL_DIR,'optimised')

    if tf.gfile.Exists(export_dir):
        tf.gfile.DeleteRecursively(export_dir)

    graph_def = get_graph_def_from_file(graph_filepath)
    
    with tf.Session(graph=tf.Graph()) as session:
        tf.import_graph_def(graph_def, name="")
        tf.saved_model.simple_save(session,
                export_dir,
                inputs={
                    node.name: session.graph.get_tensor_by_name("{}:0".format(node.name)) 
                    for node in graph_def.node if node.op=='Placeholder'},
                outputs={
                    "softmax": session.graph.get_tensor_by_name("softmax/Softmax:0"),
                }
            )

        print("SUCCESS:Optimised graph converted to SavedModel!")

In [26]:
OPTIMIZED_MODEL_PATH = SAVED_MODEL_DIR + '/optimised_model.pb'

convert_graph_def_to_saved_model(OPTIMIZED_MODEL_PATH)

SUCCESS:Optimised graph converted to SavedModel!


**Optimized model size**

In [30]:
get_size("/content/models/mnist/keras_classifier/export/1602137935/optimised/")

/content/models/mnist/keras_classifier/export/1602137935/optimised/saved_model.pb 
Model size	: 25434.507 KB
Variables size	: 0.0 KB
Total Size	: 25434.507 KB


In [31]:
get_size(SAVED_MODEL_DIR) # unoptimized

/content/models/mnist/keras_classifier/export/1602137935/saved_model.pb 
Model size	: 40.51 KB
Variables size	: 25426.71 KB
Total Size	: 25467.22 KB


In [28]:
# test inference

# References

- [Original notebook (Outdated)](https://github.com/GoogleCloudPlatform/tf-estimator-tutorials/blob/master/00_Miscellaneous/model_optimisation/Tutorial%20-%20TensorFlow%20Model%20Optimisation%20for%20Serving%20-%20MNIST%20with%20Keras.ipynb)
- [Google Community Blog](https://medium.com/google-cloud/optimizing-tensorflow-models-for-serving-959080e9ddbf)