# [Graph Transform Tool](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/graph_transforms)
Great [Blog Post](https://petewarden.com/2016/12/30/rewriting-tensorflow-graphs-with-the-gtt/) by [Pete Warden](https://www.linkedin.com/in/petewarden) from Google

# Optimize Trained Models for Inference
## Types of Optimizations
* Remove training-only operations (checkpoint saving, drop out)
* Strip out unused nodes
* Remove debug operations
* Fold batch normalization ops into weights (super cool)
* Round weights
* Quantize weights

# Compare Types of Optimizations

## Original Model (CPU)

### File Size

In [None]:
%%bash

ls -l /root/models/optimize_me/linear/cpu/

### Graph

In [None]:
%%bash

summarize_graph --in_graph=/root/models/optimize_me/linear/cpu/unoptimized_model_cpu.pb

In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import re
from google.protobuf import text_format
from tensorflow.core.framework import graph_pb2

def convert_graph_to_dot(input_graph, output_dot, is_input_graph_binary):
    graph = graph_pb2.GraphDef()
    with open(input_graph, "rb") as fh:
        if is_input_graph_binary:
            graph.ParseFromString(fh.read())
        else:
            text_format.Merge(fh.read(), graph)
    with open(output_dot, "wt") as fh:
        print("digraph graphname {", file=fh)
        for node in graph.node:
            output_name = node.name
            print("  \"" + output_name + "\" [label=\"" + node.op + "\"];", file=fh)
            for input_full_name in node.input:
                parts = input_full_name.split(":")
                input_name = re.sub(r"^\^", "", parts[0])
                print("  \"" + input_name + "\" -> \"" + output_name + "\";", file=fh)
        print("}", file=fh)
        print("Created dot file '%s' for graph '%s'." % (output_dot, input_graph))
        

In [None]:
input_graph='/root/models/optimize_me/linear/cpu/unoptimized_model_cpu.pb'
output_dot='/root/notebooks/unoptimized_cpu.dot'
convert_graph_to_dot(input_graph=input_graph, output_dot=output_dot, is_input_graph_binary=True)

In [None]:
%%bash

dot -T png /root/notebooks/unoptimized_cpu.dot \
    -o /root/notebooks/unoptimized_cpu.png > /tmp/a.out

In [None]:
from IPython.display import Image

Image('/root/notebooks/unoptimized_cpu.png', width=1024, height=768)

### Benchmarks

In [None]:
%%bash

benchmark_model --graph=/root/models/optimize_me/linear/cpu/unoptimized_model_cpu.pb \
    --input_layer=weights,bias,x_observed \
    --input_layer_type=float,float,float \
    --input_layer_shape=:: \
    --output_layer=add

## Strip Unused Nodes

In [None]:
%%bash

transform_graph \
    --in_graph=/root/models/optimize_me/linear/cpu/unoptimized_model_cpu.pb \
    --out_graph=/root/models/optimize_me/linear/cpu/strip_unused_optimized_cpu.pb \
    --inputs='x_observed,weights,bias' \
    --outputs='add' \
    --transforms='
strip_unused_nodes'

In [None]:
%%bash

ls -l /root/models/optimize_me/linear/cpu

### Graph

In [None]:
%%bash

summarize_graph --in_graph=/root/models/optimize_me/linear/cpu/strip_unused_optimized_cpu.pb

In [None]:
input_graph='/root/models/optimize_me/linear/cpu/strip_unused_optimized_cpu.pb'
output_dot='/root/notebooks/strip_unused_optimized_cpu.dot'
convert_graph_to_dot(input_graph=input_graph, output_dot=output_dot, is_input_graph_binary=True)

In [None]:
%%bash

dot -T png /root/notebooks/strip_unused_optimized_cpu.dot \
    -o /root/notebooks/strip_unused_optimized_cpu.png > /tmp/a.out

In [None]:
from IPython.display import Image

Image('/root/notebooks/strip_unused_optimized_cpu.png')

### Benchmark

In [None]:
%%bash

benchmark_model --graph=/root/models/optimize_me/linear/cpu/strip_unused_optimized_cpu.pb \
    --input_layer=weights,bias,x_observed \
    --input_layer_type=float,float,float \
    --input_layer_shape=:: \
    --output_layer=add

## Remove Nodes
Remove pesky `Identity` and `CheckNumerics`

In [None]:
%%bash

transform_graph \
    --in_graph=/root/models/optimize_me/linear/cpu/strip_unused_optimized_cpu.pb \
    --out_graph=/root/models/optimize_me/linear/cpu/remove_nodes_optimized_cpu.pb \
    --inputs='x_observed,weights,bias' \
    --outputs='add' \
    --transforms='
strip_unused_nodes
remove_nodes(op=Identity, op=CheckNumerics)'

In [None]:
%%bash

ls -l /root/models/optimize_me/linear/cpu

### Graph

In [None]:
%%bash

summarize_graph --in_graph=/root/models/optimize_me/linear/cpu/remove_nodes_optimized_cpu.pb

In [None]:
input_graph='/root/models/optimize_me/linear/cpu/remove_nodes_optimized_cpu.pb'
output_dot='/root/notebooks/remove_nodes_optimized_cpu.dot'
convert_graph_to_dot(input_graph=input_graph, output_dot=output_dot, is_input_graph_binary=True)

In [None]:
%%bash

dot -T png /root/notebooks/remove_nodes_optimized_cpu.dot \
    -o /root/notebooks/remove_nodes_optimized_cpu.png > /tmp/a.out

In [None]:
from IPython.display import Image

Image('/root/notebooks/remove_nodes_optimized_cpu.png')

### Benchmark

In [None]:
%%bash

benchmark_model --graph=/root/models/optimize_me/linear/cpu/strip_unused_optimized_cpu.pb \
    --input_layer=weights,bias,x_observed \
    --input_layer_type=float,float,float \
    --input_layer_shape=:: \
    --output_layer=add

## Fold Constants

In [None]:
%%bash

transform_graph \
--in_graph=/root/models/optimize_me/linear/cpu/unoptimized_model_cpu.pb \
--out_graph=/root/models/optimize_me/linear/cpu/fold_constants_optimized_cpu.pb \
--inputs='x_observed' \
--outputs='add' \
--transforms='
strip_unused_nodes
remove_nodes(op=Identity, op=CheckNumerics)
fold_constants(ignore_errors=true)'

### File Size

In [None]:
%%bash

ls -l /root/models/optimize_me/linear/cpu

### Graph

In [None]:
%%bash

summarize_graph --in_graph=/root/models/optimize_me/linear/cpu/fold_constants_optimized_cpu.pb

In [None]:
input_graph='/root/models/optimize_me/linear/cpu/fold_constants_optimized_cpu.pb'
output_dot='/root/notebooks/fold_constants_optimized_cpu.dot'
convert_graph_to_dot(input_graph=input_graph, output_dot=output_dot, is_input_graph_binary=True)

In [None]:
%%bash

dot -T png /root/notebooks/fold_constants_optimized_cpu.dot \
    -o /root/notebooks/fold_constants_optimized_cpu.png > /tmp/a.out

In [None]:
from IPython.display import Image

Image('/root/notebooks/fold_constants_optimized_cpu.png')

### Benchmarks

In [None]:
%%bash

benchmark_model --graph=/root/models/optimize_me/linear/cpu/fold_constants_optimized_cpu.pb \
                --input_layer=x_observed,bias,weights \
                --input_layer_type=float,float,float \
                --input_layer_shape=:: \
                --output_layer=add

## Fold Batch Normalizations
Prereq: `fold_constants`

In [None]:
%%bash

transform_graph \
--in_graph=/root/models/optimize_me/linear/cpu/unoptimized_model_cpu.pb \
--out_graph=/root/models/optimize_me/linear/cpu/fold_batch_norms_optimized_cpu.pb \
--inputs='x_observed' \
--outputs='add' \
--transforms='
strip_unused_nodes
remove_nodes(op=Identity, op=CheckNumerics)
fold_constants(ignore_errors=true)
fold_batch_norms
fold_old_batch_norms'

### File Size

In [None]:
%%bash

ls -l /root/models/optimize_me/linear/cpu

### Graph

In [None]:
%%bash

summarize_graph --in_graph=/root/models/optimize_me/linear/cpu/fold_batch_norms_optimized_cpu.pb

In [None]:
input_graph='/root/models/optimize_me/linear/cpu/fold_batch_norms_optimized_cpu.pb'
output_dot='/root/notebooks/fold_batch_norms_optimized_cpu.dot'
convert_graph_to_dot(input_graph=input_graph, output_dot=output_dot, is_input_graph_binary=True)

In [None]:
%%bash

dot -T png /root/notebooks/fold_batch_norms_optimized_cpu.dot \
    -o /root/notebooks/fold_batch_norms_optimized_cpu.png > /tmp/a.out

In [None]:
from IPython.display import Image

Image('/root/notebooks/fold_batch_norms_optimized_cpu.png')

### Benchmarks

In [None]:
%%bash

benchmark_model --graph=/root/models/optimize_me/linear/cpu/fold_batch_norms_optimized_cpu.pb \
                --input_layer=x_observed,bias,weights \
                --input_layer_type=float,float,float \
                --input_layer_shape=:: \
                --output_layer=add

## Quantize Weights
Prereq: `fold_batch_norms`

In [None]:
%%bash

transform_graph \
--in_graph=/root/models/optimize_me/linear/cpu/unoptimized_model_cpu.pb \
--out_graph=/root/models/optimize_me/linear/cpu/quantize_weights_optimized_cpu.pb \
--inputs='x_observed' \
--outputs='add' \
--transforms='
strip_unused_nodes
remove_nodes(op=Identity, op=CheckNumerics)
fold_constants(ignore_errors=true)
fold_batch_norms
fold_old_batch_norms
quantize_weights'

### File Size

In [None]:
%%bash

ls -l /root/models/optimize_me/linear/cpu/

### Graph

In [None]:
%%bash

summarize_graph --in_graph=/root/models/optimize_me/linear/cpu/quantize_weights_optimized_cpu.pb

In [None]:
input_graph='/root/models/optimize_me/linear/cpu/quantize_weights_optimized_cpu.pb'
output_dot='/root/notebooks/quantize_weights_optimized_cpu.dot'
convert_graph_to_dot(input_graph=input_graph, output_dot=output_dot, is_input_graph_binary=True)

In [None]:
%%bash

dot -T png /root/notebooks/quantize_weights_optimized_cpu.dot \
    -o /root/notebooks/quantize_weights_optimized_cpu.png > /tmp/a.out

In [None]:
from IPython.display import Image

Image('/root/notebooks/quantize_weights_optimized_cpu.png')

### Benchmarks

In [None]:
%%bash

benchmark_model --graph=/root/models/optimize_me/linear/cpu/quantize_weights_optimized_cpu.pb --input_layer=x_observed,bias,weights --input_layer_type=float,float,float --input_layer_shape=:: --output_layer=add

## Quantize Activations
Prereq: quantize weights

In [None]:
%%bash

transform_graph \
--in_graph=/root/models/optimize_me/linear/cpu/fold_batch_norms_optimized_cpu.pb \
--out_graph=/root/models/optimize_me/linear/cpu/quantize_nodes_optimized_cpu.pb \
--inputs='x_observed' \
--outputs='add' \
--transforms='
strip_unused_nodes
remove_nodes(op=Identity, op=CheckNumerics)
fold_constants(ignore_errors=true)
fold_batch_norms
fold_old_batch_norms
quantize_weights
quantize_nodes'

In [None]:
%%bash

ls -l /root/models/optimize_me/linear/cpu/

In [None]:
%%bash

summarize_graph --in_graph=/root/models/optimize_me/linear/cpu/quantize_nodes_optimized_cpu.pb

In [None]:
input_graph='/root/models/optimize_me/linear/cpu/quantize_nodes_optimized_cpu.pb'
output_dot='/root/notebooks/quantize_nodes_optimized_cpu.dot'
convert_graph_to_dot(input_graph=input_graph, output_dot=output_dot, is_input_graph_binary=True)

In [None]:
%%bash

dot -T png /root/notebooks/quantize_nodes_optimized_cpu.dot \
    -o /root/notebooks/quantize_nodes_optimized_cpu.png > /tmp/a.out

In [None]:
from IPython.display import Image

Image('/root/notebooks/quantize_nodes_optimized_cpu.png')

### Benchmarks

In [None]:
%%bash

benchmark_model --graph=/root/models/optimize_me/linear/cpu/quantize_nodes_optimized_cpu.pb \
                --input_layer=x_observed,bias,weights \
                --input_layer_type=float,float,float \
                --input_layer_shape=:: \
                --output_layer=add

## Sort by Execution Order
* aka. Topological Order Sort
* Minimize inference overhead 
* Inputs for a each node are guaranteed to be available on the forward path

In [None]:
%%bash

transform_graph \
--in_graph=/root/models/optimize_me/linear/cpu/unoptimized_model_cpu.pb \
--out_graph=/root/models/optimize_me/linear/cpu/sort_by_execution_order_optimized_cpu.pb \
--inputs='x_observed' \
--outputs='add' \
--transforms='
strip_unused_nodes
remove_nodes(op=Identity, op=CheckNumerics)
fold_constants(ignore_errors=true)
fold_batch_norms
fold_old_batch_norms
quantize_weights
quantize_nodes
sort_by_execution_order'

### File Size

In [None]:
%%bash

ls -l /root/models/optimize_me/linear/cpu/

### Graph

In [None]:
%%bash

summarize_graph --in_graph=/root/models/optimize_me/linear/cpu/sort_by_execution_order_optimized_cpu.pb

In [None]:
input_graph='/root/models/optimize_me/linear/cpu/sort_by_execution_order_optimized_cpu.pb'
output_dot='/root/notebooks/sort_by_execution_order_optimized_cpu.dot'
convert_graph_to_dot(input_graph=input_graph, output_dot=output_dot, is_input_graph_binary=True)

In [None]:
%%bash

dot -T png /root/notebooks/sort_by_execution_order_optimized_cpu.dot \
    -o /root/notebooks/sort_by_execution_order_optimized_cpu.png > /tmp/a.out

In [None]:
from IPython.display import Image

Image('/root/notebooks/sort_by_execution_order_optimized_cpu.png')

### Benchmarks

In [None]:
%%bash

benchmark_model --graph=/root/models/optimize_me/linear/cpu/sort_by_execution_order_optimized_cpu.pb \
    --input_layer=x_observed,bias,weights \
    --input_layer_type=float,float,float \
    --input_layer_shape=:: \
    --output_layer=add

## Combine All Optimizations

In [None]:
%%bash

transform_graph \
--in_graph=/root/models/optimize_me/linear/cpu/unoptimized_model_cpu.pb \
--out_graph=/root/models/optimize_me/linear/cpu/fully_optimized_cpu.pb \
--inputs='x_observed' \
--outputs='add' \
--transforms='
add_default_attributes
remove_nodes(op=Identity, op=CheckNumerics)
fold_constants(ignore_errors=true)
fold_batch_norms
fold_old_batch_norms
quantize_weights
strip_unused_nodes
sort_by_execution_order'
#quantize_nodes

### File Size

In [None]:
%%bash

ls -l /root/models/optimize_me/linear/cpu/

### Graph

In [None]:
%%bash

summarize_graph --in_graph=/root/models/optimize_me/linear/cpu/fully_optimized_cpu.pb

In [None]:
input_graph='/root/models/optimize_me/linear/cpu/fully_optimized_cpu.pb'
output_dot='/root/notebooks/fully_optimized_cpu.dot'
convert_graph_to_dot(input_graph=input_graph, output_dot=output_dot, is_input_graph_binary=True)

In [None]:
%%bash

dot -T png /root/notebooks/fully_optimized_cpu.dot \
    -o /root/notebooks/fully_optimized_cpu.png > /tmp/a.out

In [None]:
from IPython.display import Image

Image('/root/notebooks/fully_optimized_cpu.png')

### Benchmarks

In [None]:
%%bash

benchmark_model --graph=/root/models/optimize_me/linear/cpu/fully_optimized_cpu.pb \
    --input_layer=weights,x_observed,bias \
    --input_layer_type=float,float,float \
    --input_layer_shape=:: \
    --output_layer=add

## Obfuscate Names
Shorten and mangle internal graph node names

In [None]:
%%bash

transform_graph \
--in_graph=/root/models/optimize_me/linear/cpu/fully_optimized_cpu.pb \
--out_graph=/root/models/optimize_me/linear/cpu/obfuscate_names_optimized_cpu.pb \
--inputs='x_observed' \
--outputs='add' \
--transforms='
obfuscate_names'

### File Size

In [None]:
%%bash

ls -l /root/models/optimize_me/linear/cpu/

### Graph

In [None]:
%%bash

summarize_graph --in_graph=/root/models/optimize_me/linear/cpu/obfuscate_names_optimized_cpu.pb

## Freeze Fully Optimized Graph

In [None]:
from tensorflow.python.tools import freeze_graph

optimize_me_parent_path = '/root/models/optimize_me/linear/cpu'

fully_optimized_model_graph_path = '%s/fully_optimized_cpu.pb' % optimize_me_parent_path
fully_optimized_frozen_model_graph_path = '%s/fully_optimized_frozen_cpu.pb' % optimize_me_parent_path

model_checkpoint_path = '%s/model.ckpt' % optimize_me_parent_path

freeze_graph.freeze_graph(input_graph=fully_optimized_model_graph_path, 
                          input_saver="",
                          input_binary=True, 
                          input_checkpoint='/root/models/optimize_me/linear/cpu/model.ckpt', 
                          output_node_names="add",
                          restore_op_name="save/restore_all", 
                          filename_tensor_name="save/Const:0",
                          output_graph=fully_optimized_frozen_model_graph_path, 
                          clear_devices=True, 
                          initializer_nodes="")
print(fully_optimized_frozen_model_graph_path)

## Reset Default Graph

In [None]:
import tensorflow as tf

graph = tf.reset_default_graph()

## Create New Session

In [None]:
sess = tf.Session()

In [None]:
from datetime import datetime 

version = int(datetime.now().strftime("%s"))

In [None]:
fully_optimized_saved_model_path = '/root/models/linear_fully_optimized/cpu/%s' % version

print(fully_optimized_saved_model_path)

In [None]:
import tensorflow as tf
from tensorflow.python.saved_model import builder as saved_model_builder
from tensorflow.python.saved_model import signature_constants
from tensorflow.python.saved_model import signature_def_utils
from tensorflow.python.saved_model import tag_constants
from tensorflow.python.saved_model import utils

print(fully_optimized_frozen_model_graph_path)

# Load GraphDef created above
with tf.gfile.GFile(fully_optimized_frozen_model_graph_path, 'rb') as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())

# Import GraphDef from above into current graph
tf.import_graph_def(
    graph_def, 
    input_map=None, 
    return_elements=None, 
    name="", 
    op_dict=None, 
    producer_op_list=None
)

In [None]:
graph = tf.get_default_graph()

for op in graph.get_operations():
    print(op.name)

x_observed = graph.get_tensor_by_name('x_observed:0')
print(x_observed)

y_pred = graph.get_tensor_by_name('add:0')
print(y_pred)

tensor_info_x_observed = utils.build_tensor_info(x_observed)
print(tensor_info_x_observed)

tensor_info_y_pred = utils.build_tensor_info(y_pred)
print(tensor_info_y_pred)

builder = saved_model_builder.SavedModelBuilder(fully_optimized_saved_model_path)

prediction_signature = signature_def_utils.build_signature_def(inputs = 
                {'x_observed': tensor_info_x_observed}, 
                outputs = {'y_pred': tensor_info_y_pred}, 
                method_name = signature_constants.PREDICT_METHOD_NAME)

builder.add_meta_graph_and_variables(sess, [tag_constants.SERVING],
                             signature_def_map={'predict':prediction_signature,                                     
                                                signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:prediction_signature}, 
                              clear_devices=True,
)

builder.save(as_text=False)

In [None]:
import os

os.listdir(fully_optimized_saved_model_path)

In [None]:
sess.close()