# Qunatizing Pretrained DL Models in Tensorflow

In [1]:
#importing required libraries
import tensorflow as tf
from tensorflow import keras
import numpy as np
from PIL import Image
import os
from pathlib import Path

In [2]:
tf.__version__

'2.9.2'

In [3]:
import logging
tf.get_logger().setLevel(logging.ERROR)

In [4]:
def dynamic_range_quantization(model_path,optimization):
    
    model = tf.keras.models.load_model(model_path)
    save_path = model_path[:-3] + '_dynamic_quant.tflite'
    
    converter = tf.lite.TFLiteConverter.from_keras_model(model)

    if optimization == "size":
        converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
    elif optimization == "latency":
        converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_LATENCY]
    else:
        converter.optimizations = [tf.lite.Optimize.DEFAULT]

    tflite_model = converter.convert()

    # Save the model.
    with open(save_path, 'wb') as f:
        f.write(tflite_model)
    print("Successfuly created and saved Dynamic Range Quantized {} model".format(model_path))
    return 0

In [5]:
def float16_quantization(model_path,optimization):
    
    model = tf.keras.models.load_model(model_path)
    save_path = model_path[:-3] + '_f16_quant.tflite'
    
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    
    if optimization == "size":
        converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
    elif optimization == "latency":
        converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_LATENCY]
    else:
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        
    converter.target_spec.supported_types = [tf.float16]   
    tflite_model = converter.convert()

    # Save the model.
    with open(save_path, 'wb') as f:
        f.write(tflite_model)
        
    print("Successfuly created and saved Float16 Quantized {} model".format(model_path))

    return 0

In [6]:
def representative_data_gen():  
    count =0
    for i in (os.listdir('val/n01440764/')):
        
        if count<100:
            count += 1
            f_name = './val/n01440764/'+i
            img = (tf.keras.preprocessing.image.load_img(f_name,target_size=(224,224)))
            img = (tf.keras.preprocessing.image.img_to_array(img))
            img = np.expand_dims(img,axis=0)#/255
            yield [img]
    
def int8_quantization(model_path,optimization=None, integer_only = False):
    
    model = tf.keras.models.load_model(model_path)
    save_path = model_path[:-3] + '_int8_quant.tflite'
    
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    
    if optimization == "size":
        converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
    elif optimization == "latency":
        converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_LATENCY]
    else:
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        
    converter.representative_dataset = representative_data_gen
    
    if integer_only:
        # Ensure that if any ops can't be quantized, the converter throws an error
        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
        # Set the input and output tensors to uint8 (APIs added in r2.3)
        converter.inference_input_type = tf.uint8
        converter.inference_output_type = tf.uint8
        save_path = model_path[:-3] + '_fullInt_quant.tflite'
    
    tflite_model_quant = converter.convert()
    
    # Save the model.
    with open(save_path, 'wb') as f:
        f.write(tflite_model_quant)
        
    print("Successfuly created and saved Int8 Quantized {} model".format(model_path))

    return 0

In [7]:
resnet_50 = keras.applications.resnet50.ResNet50(
    include_top=True, weights='imagenet',
    input_shape=(224,224,3))
resnet_50.compiled_metrics = None
resnet_50.save('Models/resnet_50.h5')

In [8]:
vgg16 = keras.applications.vgg16.VGG16(
    include_top=True, weights='imagenet',
    input_shape=(224,224,3))
vgg16.compiled_metrics = None
vgg16.save("Models/vgg16.h5")

In [9]:
mobilenet_v2 = keras.applications.mobilenet_v2.MobileNetV2(
    include_top=True, weights='imagenet',
    input_shape=(224,224,3))
mobilenet_v2.compiled_metrics = None
mobilenet_v2.save('Models/mobilenet_v2.h5')

# Dynamic Range Quantization

In [10]:
dynamic_range_quantization('Models/resnet_50.h5','deafult')



Successfuly created and saved Dynamic Range Quantized Models/resnet_50.h5 model


2022-10-01 16:34:24.379178: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2022-10-01 16:34:24.379196: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2022-10-01 16:34:24.379638: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmp3iw26z26
2022-10-01 16:34:24.396577: I tensorflow/cc/saved_model/reader.cc:81] Reading meta graph with tags { serve }
2022-10-01 16:34:24.396589: I tensorflow/cc/saved_model/reader.cc:122] Reading SavedModel debug info (if present) from: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmp3iw26z26
2022-10-01 16:34:24.446765: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled
2022-10-01 16:34:24.463212: I tensorflow/cc/saved_model/loader.cc:228] Restoring SavedModel bundle.
2022-10-01 16:34:24.473285: W tensorflow/core/platform/profile_

0

In [11]:
dynamic_range_quantization('Models/vgg16.h5','deafult')

2022-10-01 16:34:34.517906: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2022-10-01 16:34:34.517922: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2022-10-01 16:34:34.518008: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpeo23mavn
2022-10-01 16:34:34.520241: I tensorflow/cc/saved_model/reader.cc:81] Reading meta graph with tags { serve }
2022-10-01 16:34:34.520247: I tensorflow/cc/saved_model/reader.cc:122] Reading SavedModel debug info (if present) from: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpeo23mavn
2022-10-01 16:34:34.528118: I tensorflow/cc/saved_model/loader.cc:228] Restoring SavedModel bundle.
2022-10-01 16:34:34.957319: I tensorflow/cc/saved_model/loader.cc:212] Running initialization op on SavedModel bundle at path: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpeo23mavn
2022-10-

Successfuly created and saved Dynamic Range Quantized Models/vgg16.h5 model


0

In [12]:
dynamic_range_quantization('Models/mobilenet_v2.h5','deafult')



Successfuly created and saved Dynamic Range Quantized Models/mobilenet_v2.h5 model


2022-10-01 16:37:10.886066: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2022-10-01 16:37:10.886079: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2022-10-01 16:37:10.886176: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpeisv2qie
2022-10-01 16:37:10.902274: I tensorflow/cc/saved_model/reader.cc:81] Reading meta graph with tags { serve }
2022-10-01 16:37:10.902287: I tensorflow/cc/saved_model/reader.cc:122] Reading SavedModel debug info (if present) from: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpeisv2qie
2022-10-01 16:37:10.961604: I tensorflow/cc/saved_model/loader.cc:228] Restoring SavedModel bundle.
2022-10-01 16:37:11.264689: I tensorflow/cc/saved_model/loader.cc:212] Running initialization op on SavedModel bundle at path: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpeisv2qie
2022-10-

0

# Float16 Qunatization

In [13]:
float16_quantization('Models/resnet_50.h5','deafult')

2022-10-01 16:37:50.877909: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2022-10-01 16:37:50.877926: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2022-10-01 16:37:50.878011: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpsm_oe1py
2022-10-01 16:37:50.895946: I tensorflow/cc/saved_model/reader.cc:81] Reading meta graph with tags { serve }
2022-10-01 16:37:50.895962: I tensorflow/cc/saved_model/reader.cc:122] Reading SavedModel debug info (if present) from: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpsm_oe1py
2022-10-01 16:37:50.963239: I tensorflow/cc/saved_model/loader.cc:228] Restoring SavedModel bundle.
2022-10-01 16:37:51.402692: I tensorflow/cc/saved_model/loader.cc:212] Running initialization op on SavedModel bundle at path: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpsm_oe1py
2022-10-

Successfuly created and saved Float16 Quantized Models/resnet_50.h5 model


0

In [14]:
float16_quantization('Models/vgg16.h5','deafult')

2022-10-01 16:37:59.194393: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2022-10-01 16:37:59.194406: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2022-10-01 16:37:59.194486: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpxfn8kj_0
2022-10-01 16:37:59.196646: I tensorflow/cc/saved_model/reader.cc:81] Reading meta graph with tags { serve }
2022-10-01 16:37:59.196653: I tensorflow/cc/saved_model/reader.cc:122] Reading SavedModel debug info (if present) from: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpxfn8kj_0
2022-10-01 16:37:59.204633: I tensorflow/cc/saved_model/loader.cc:228] Restoring SavedModel bundle.
2022-10-01 16:37:59.634396: I tensorflow/cc/saved_model/loader.cc:212] Running initialization op on SavedModel bundle at path: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpxfn8kj_0
2022-10-

Successfuly created and saved Float16 Quantized Models/vgg16.h5 model


0

In [15]:
float16_quantization('Models/mobilenet_v2.h5','deafult')



Successfuly created and saved Float16 Quantized Models/mobilenet_v2.h5 model


2022-10-01 16:40:23.021858: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2022-10-01 16:40:23.021871: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2022-10-01 16:40:23.021950: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpabc6aa3j
2022-10-01 16:40:23.038099: I tensorflow/cc/saved_model/reader.cc:81] Reading meta graph with tags { serve }
2022-10-01 16:40:23.038115: I tensorflow/cc/saved_model/reader.cc:122] Reading SavedModel debug info (if present) from: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpabc6aa3j
2022-10-01 16:40:23.098586: I tensorflow/cc/saved_model/loader.cc:228] Restoring SavedModel bundle.
2022-10-01 16:40:23.399164: I tensorflow/cc/saved_model/loader.cc:212] Running initialization op on SavedModel bundle at path: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpabc6aa3j
2022-10-

0

# Full Integer Qunatization

Float Fallback

In [16]:
int8_quantization('Models/resnet_50.h5')

2022-10-01 16:41:02.729106: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2022-10-01 16:41:02.729120: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2022-10-01 16:41:02.729203: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpiqnhhdqh
2022-10-01 16:41:02.747308: I tensorflow/cc/saved_model/reader.cc:81] Reading meta graph with tags { serve }
2022-10-01 16:41:02.747324: I tensorflow/cc/saved_model/reader.cc:122] Reading SavedModel debug info (if present) from: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpiqnhhdqh
2022-10-01 16:41:02.816614: I tensorflow/cc/saved_model/loader.cc:228] Restoring SavedModel bundle.
2022-10-01 16:41:03.257956: I tensorflow/cc/saved_model/loader.cc:212] Running initialization op on SavedModel bundle at path: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpiqnhhdqh
2022-10-

Successfuly created and saved Int8 Quantized Models/resnet_50.h5 model


fully_quantize: 0, inference_type: 6, input_inference_type: 0, output_inference_type: 0


0

In [17]:
int8_quantization('Models/vgg16.h5')

2022-10-01 16:41:33.231885: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2022-10-01 16:41:33.231898: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2022-10-01 16:41:33.231996: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpweqxqwdg
2022-10-01 16:41:33.234412: I tensorflow/cc/saved_model/reader.cc:81] Reading meta graph with tags { serve }
2022-10-01 16:41:33.234422: I tensorflow/cc/saved_model/reader.cc:122] Reading SavedModel debug info (if present) from: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpweqxqwdg
2022-10-01 16:41:33.242838: I tensorflow/cc/saved_model/loader.cc:228] Restoring SavedModel bundle.
2022-10-01 16:41:33.687503: I tensorflow/cc/saved_model/loader.cc:212] Running initialization op on SavedModel bundle at path: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpweqxqwdg
2022-10-

Successfuly created and saved Int8 Quantized Models/vgg16.h5 model


0

In [18]:
int8_quantization('Models/mobilenet_v2.h5')

2022-10-01 16:44:33.069914: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2022-10-01 16:44:33.069928: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2022-10-01 16:44:33.070008: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpqzipqiko
2022-10-01 16:44:33.086691: I tensorflow/cc/saved_model/reader.cc:81] Reading meta graph with tags { serve }
2022-10-01 16:44:33.086705: I tensorflow/cc/saved_model/reader.cc:122] Reading SavedModel debug info (if present) from: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpqzipqiko
2022-10-01 16:44:33.155075: I tensorflow/cc/saved_model/loader.cc:228] Restoring SavedModel bundle.
2022-10-01 16:44:33.471347: I tensorflow/cc/saved_model/loader.cc:212] Running initialization op on SavedModel bundle at path: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpqzipqiko
2022-10-

Successfuly created and saved Int8 Quantized Models/mobilenet_v2.h5 model


fully_quantize: 0, inference_type: 6, input_inference_type: 0, output_inference_type: 0


0

Full integer qunatization

In [19]:
int8_quantization('Models/resnet_50.h5', integer_only=True)

2022-10-01 16:45:20.963767: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2022-10-01 16:45:20.963780: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2022-10-01 16:45:20.963860: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpfs4dszb2
2022-10-01 16:45:20.981876: I tensorflow/cc/saved_model/reader.cc:81] Reading meta graph with tags { serve }
2022-10-01 16:45:20.981889: I tensorflow/cc/saved_model/reader.cc:122] Reading SavedModel debug info (if present) from: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpfs4dszb2
2022-10-01 16:45:21.053676: I tensorflow/cc/saved_model/loader.cc:228] Restoring SavedModel bundle.
2022-10-01 16:45:21.495120: I tensorflow/cc/saved_model/loader.cc:212] Running initialization op on SavedModel bundle at path: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpfs4dszb2
2022-10-

Successfuly created and saved Int8 Quantized Models/resnet_50.h5 model


fully_quantize: 0, inference_type: 6, input_inference_type: 3, output_inference_type: 3


0

In [20]:
int8_quantization('Models/vgg16.h5', integer_only=True)

2022-10-01 16:45:50.463378: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2022-10-01 16:45:50.463395: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2022-10-01 16:45:50.463477: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmp51vmb0yl
2022-10-01 16:45:50.465677: I tensorflow/cc/saved_model/reader.cc:81] Reading meta graph with tags { serve }
2022-10-01 16:45:50.465684: I tensorflow/cc/saved_model/reader.cc:122] Reading SavedModel debug info (if present) from: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmp51vmb0yl
2022-10-01 16:45:50.473589: I tensorflow/cc/saved_model/loader.cc:228] Restoring SavedModel bundle.
2022-10-01 16:45:50.903454: I tensorflow/cc/saved_model/loader.cc:212] Running initialization op on SavedModel bundle at path: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmp51vmb0yl
2022-10-

Successfuly created and saved Int8 Quantized Models/vgg16.h5 model


0

In [21]:
int8_quantization('Models/mobilenet_v2.h5', integer_only=True)

2022-10-01 17:44:05.547334: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:362] Ignored output_format.
2022-10-01 17:44:05.547353: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:365] Ignored drop_control_dependency.
2022-10-01 17:44:05.547448: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpy0e52dy_
2022-10-01 17:44:05.563656: I tensorflow/cc/saved_model/reader.cc:81] Reading meta graph with tags { serve }
2022-10-01 17:44:05.563670: I tensorflow/cc/saved_model/reader.cc:122] Reading SavedModel debug info (if present) from: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpy0e52dy_
2022-10-01 17:44:05.626533: I tensorflow/cc/saved_model/loader.cc:228] Restoring SavedModel bundle.
2022-10-01 17:44:05.930963: I tensorflow/cc/saved_model/loader.cc:212] Running initialization op on SavedModel bundle at path: /var/folders/hx/8ktl0wt56q7dx073nt1x6dsm0000gp/T/tmpy0e52dy_
2022-10-

Successfuly created and saved Int8 Quantized Models/mobilenet_v2.h5 model


fully_quantize: 0, inference_type: 6, input_inference_type: 3, output_inference_type: 3


0

# 

# Graveyard

Below is the code to Qunatize the model using checkpoint file or Frozen Tensorflow graph

In [None]:
#Dataset for testing has been taken from https://github.com/fastai/imagenette 
#which has images of 10 most common classes from IMagenet dataset    

In [None]:
import tensorflow as tf
import numpy as np
import cv2

In [None]:
# Downloading and unzipping official Tensorflow Models
# Pretrained models : https://github.com/tensorflow/models/tree/master/research/slim
# We will be using Resnet50 in this example
!wget http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz
!tar -xf resnet_v1_50_2016_08_28.tar.gz

# Creating Frozen graph from ckpt file

In [None]:
from tensorflow.python.tools import freeze_graph
from tf_slim.nets import resnet_v1
import tf_slim as slim

tf.compat.v1.disable_eager_execution()
tf.compat.v1.disable_v2_behavior()

# Create graph
inputs = tf.compat.v1.placeholder(tf.float32, shape=[1, 224, 224, 3])
with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        net, end_points = resnet_v1.resnet_v1_50(inputs, num_classes=1000,is_training=False)

saver = tf.compat.v1.train.Saver()

output_node_names = ("resnet_v1_50/pool5")

with tf.compat.v1.Session() as sess:
        saver.restore(sess, 'resnet_v1_50.ckpt')
        representation_tensor = sess.graph.get_tensor_by_name('resnet_v1_50/pool5:0') 
        tf.compat.v1.train.write_graph(sess.graph_def,'./','resnet_v1_50.pbtxt')

        tensor_name_list = [tensor.name for tensor in tf.compat.v1.get_default_graph().as_graph_def().node]
        #print([tensor_name for tensor_name in tensor_name_list])
        output_graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(
                    sess, # The session is used to retrieve the weights
                    tf.compat.v1.get_default_graph().as_graph_def(), 
                    output_node_names.split(",") 
                )
        with tf.compat.v1.gfile.GFile("resnet_v1_50.pb", "wb") as f:
            f.write(output_graph_def.SerializeToString())


# Dynamic Range Quantization

The simplest form of post-training quantization statically quantizes only the weights from floating point to integer, which has 8-bits of precision

In [None]:
frozen_graph_pth = "resnet_v1_50.pb"
# Convert the model
#converter = tf.lite.TFLiteConverter.from_frozen_graph(saved_model_dir) # path to the SavedModel directory
converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(frozen_graph_pth,
                                                                input_arrays=['Placeholder'],
                                                                input_shapes={'Placeholder' : [1, 224, 224,3]},
                                                                output_arrays=["resnet_v1_50/pool5"])
converter.optimizations = [tf.lite.Optimize.DEFAULT]

tflite_model = converter.convert()

# Save the model.
with open('resnet_v1_50.tflite', 'wb') as f:
    f.write(tflite_model)

# Int8 Quantization

In [None]:
def representative_dataset():
    for _ in range(100):
        data = np.random.rand(1, 224, 224, 3)
        yield [data.astype(np.float32)]

In [None]:
converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(frozen_graph_pth,
                                                                input_arrays=['Placeholder'],
                                                                input_shapes={'Placeholder' : [1, 224, 224,3]},
                                                                output_arrays=["resnet_v1_50/pool5"])

converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8  # or tf.uint8
converter.inference_output_type = tf.int8  # or tf.uint8
tflite_int8_model = converter.convert()


# Save the model.
with open('resnet_v1_50_int8.tflite', 'wb') as f:
    f.write(tflite_int8_model)

# Integer with Float Fallback

In [None]:
converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(frozen_graph_pth,
                                                                input_arrays=['Placeholder'],
                                                                input_shapes={'Placeholder' : [1, 224, 224,3]},
                                                                output_arrays=["resnet_v1_50/pool5"])

converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
tflite_quant_model = converter.convert()

# Save the model.
with open('resnet_v1_50_fallback.tflite', 'wb') as f:
    f.write(tflite_int8_model)

# Float16 Quantization

In [None]:
converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(frozen_graph_pth,
                                                                input_arrays=['Placeholder'],
                                                                input_shapes={'Placeholder' : [1, 224, 224,3]},
                                                                output_arrays=["resnet_v1_50/pool5"])

converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]

tflite_float_model = converter.convert()

# Save the model.
with open('resnet_v1_50_float16.tflite', 'wb') as f:
    f.write(tflite_float_model)