# Qunatizing Pretrained DL Models in Tensorflow

In [1]:
#importing required libraries
import tensorflow as tf
from tensorflow import keras
import numpy as np
from PIL import Image
import os
from pathlib import Path

In [2]:
tf.__version__

'2.6.0'

In [3]:
import logging
tf.get_logger().setLevel(logging.ERROR)

In [4]:
def dynamic_range_quantization(model_path,optimization):
    
    model = tf.keras.models.load_model(model_path)
    save_path = model_path[:-3] + '_dynamic_quant.tflite'
    
    converter = tf.lite.TFLiteConverter.from_keras_model(model)

    if optimization == "size":
        converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
    elif optimization == "latency":
        converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_LATENCY]
    else:
        converter.optimizations = [tf.lite.Optimize.DEFAULT]

    tflite_model = converter.convert()

    # Save the model.
    with open(save_path, 'wb') as f:
        f.write(tflite_model)
    print("Successfuly created and saved Dynamic Range Quantized {} model".format(model_path))
    return 0

In [5]:
def float16_quantization(model_path,optimization):
    
    model = tf.keras.models.load_model(model_path)
    save_path = model_path[:-3] + '_f16_quant.tflite'
    
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    
    if optimization == "size":
        converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
    elif optimization == "latency":
        converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_LATENCY]
    else:
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        
    converter.target_spec.supported_types = [tf.float16]   
    tflite_model = converter.convert()

    # Save the model.
    with open(save_path, 'wb') as f:
        f.write(tflite_model)
        
    print("Successfuly created and saved Float16 Quantized {} model".format(model_path))

    return 0

In [6]:
def representative_data_gen():
    for _ in range(100):
        data = np.random.rand(1, 224, 224, 3)
        yield [data.astype(np.float32)]
    
def int8_quantization(model_path,optimization=None, integer_only = False):
    
    model = tf.keras.models.load_model(model_path)
    save_path = model_path[:-3] + '_int8_quant.tflite'
    
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    
    if optimization == "size":
        converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
    elif optimization == "latency":
        converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_LATENCY]
    else:
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        
    converter.representative_dataset = representative_data_gen
    
    if integer_only:
        # Ensure that if any ops can't be quantized, the converter throws an error
        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
        # Set the input and output tensors to uint8 (APIs added in r2.3)
        converter.inference_input_type = tf.uint8
        converter.inference_output_type = tf.uint8
        save_path = model_path[:-3] + '_fullInt_quant.tflite'
    
    tflite_model_quant = converter.convert()
    
    # Save the model.
    with open(save_path, 'wb') as f:
        f.write(tflite_model_quant)
        
    print("Successfuly created and saved Int8 Quantized {} model".format(model_path))

    return 0

In [7]:
resnet_50 = keras.applications.resnet50.ResNet50(
    include_top=True, weights='imagenet',
    input_shape=(224,224,3))
resnet_50.compiled_metrics = None
resnet_50.save('Models/resnet_50.h5')

2021-09-16 16:48:40.270804: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [8]:
vgg16 = keras.applications.vgg16.VGG16(
    include_top=True, weights='imagenet',
    input_shape=(224,224,3))
vgg16.compiled_metrics = None
vgg16.save("Models/vgg16.h5")

In [9]:
mobilenet_v2 = keras.applications.mobilenet_v2.MobileNetV2(
    include_top=True, weights='imagenet',
    input_shape=(224,224,3))
mobilenet_v2.compiled_metrics = None
mobilenet_v2.save('Models/mobilenet_v2.h5')

# Dynamic Range Quantization

In [10]:
dynamic_range_quantization('Models/resnet_50.h5','deafult')

2021-09-16 16:49:00.259581: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
2021-09-16 16:49:11.088206: I tensorflow/core/grappler/devices.cc:75] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
2021-09-16 16:49:11.088276: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2021-09-16 16:49:11.100412: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1137] Optimization results for grappler item: graph_to_optimize
  function_optimizer: function_optimizer did nothing. time = 0.003ms.
  function_optimizer: function_optimizer did nothing. time = 0ms.



Successfuly created and saved Dynamic Range Quantized Models/resnet_50.h5 model


2021-09-16 16:49:14.209939: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:351] Ignored output_format.
2021-09-16 16:49:14.209959: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:354] Ignored drop_control_dependency.
2021-09-16 16:49:14.448001: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:210] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.


0

In [11]:
dynamic_range_quantization('Models/vgg16.h5','deafult')

2021-09-16 16:49:19.406017: I tensorflow/core/grappler/devices.cc:75] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
2021-09-16 16:49:19.406128: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2021-09-16 16:49:19.408694: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1137] Optimization results for grappler item: graph_to_optimize
  function_optimizer: function_optimizer did nothing. time = 0.002ms.
  function_optimizer: function_optimizer did nothing. time = 0.001ms.

2021-09-16 16:49:40.594454: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:351] Ignored output_format.
2021-09-16 16:49:40.594579: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:354] Ignored drop_control_dependency.


Successfuly created and saved Dynamic Range Quantized Models/vgg16.h5 model


0

In [12]:
dynamic_range_quantization('Models/mobilenet_v2.h5','deafult')

2021-09-16 16:50:11.859569: I tensorflow/core/grappler/devices.cc:75] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
2021-09-16 16:50:11.859799: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2021-09-16 16:50:11.869816: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1137] Optimization results for grappler item: graph_to_optimize
  function_optimizer: function_optimizer did nothing. time = 0.003ms.
  function_optimizer: function_optimizer did nothing. time = 0.001ms.



Successfuly created and saved Dynamic Range Quantized Models/mobilenet_v2.h5 model


2021-09-16 16:50:13.146122: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:351] Ignored output_format.
2021-09-16 16:50:13.146138: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:354] Ignored drop_control_dependency.
2021-09-16 16:50:13.346968: I tensorflow/lite/tools/optimize/quantize_weights.cc:225] Skipping quantization of tensor mobilenetv2_1.00_224/Conv1/Conv2D because it has fewer than 1024 elements (864).
2021-09-16 16:50:13.346987: I tensorflow/lite/tools/optimize/quantize_weights.cc:225] Skipping quantization of tensor mobilenetv2_1.00_224/expanded_conv_depthwise_BN/FusedBatchNormV3;mobilenetv2_1.00_224/expanded_conv_depthwise/depthwise;mobilenetv2_1.00_224/block_5_project/Conv2D because it has fewer than 1024 elements (288).
2021-09-16 16:50:13.346995: I tensorflow/lite/tools/optimize/quantize_weights.cc:225] Skipping quantization of tensor mobilenetv2_1.00_224/expanded_conv_project/Conv2D because it has fewer than 1024 elements (512).

0

# Float16 Qunatization

In [14]:
float16_quantization('Models/resnet_50.h5','deafult')

2021-09-16 16:50:36.695296: I tensorflow/core/grappler/devices.cc:75] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
2021-09-16 16:50:36.695370: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2021-09-16 16:50:36.705608: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1137] Optimization results for grappler item: graph_to_optimize
  function_optimizer: function_optimizer did nothing. time = 0.003ms.
  function_optimizer: function_optimizer did nothing. time = 0ms.



Successfuly created and saved Float16 Quantized Models/resnet_50.h5 model


2021-09-16 16:50:39.874919: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:351] Ignored output_format.
2021-09-16 16:50:39.874937: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:354] Ignored drop_control_dependency.


0

In [15]:
float16_quantization('Models/vgg16.h5','deafult')

2021-09-16 16:50:46.060798: I tensorflow/core/grappler/devices.cc:75] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
2021-09-16 16:50:46.060896: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2021-09-16 16:50:46.063795: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1137] Optimization results for grappler item: graph_to_optimize
  function_optimizer: function_optimizer did nothing. time = 0.005ms.
  function_optimizer: function_optimizer did nothing. time = 0ms.

2021-09-16 16:51:01.470801: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:351] Ignored output_format.
2021-09-16 16:51:01.470823: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:354] Ignored drop_control_dependency.


Successfuly created and saved Float16 Quantized Models/vgg16.h5 model


0

In [16]:
float16_quantization('Models/mobilenet_v2.h5','deafult')

2021-09-16 16:51:37.960597: I tensorflow/core/grappler/devices.cc:75] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
2021-09-16 16:51:37.960683: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2021-09-16 16:51:37.969550: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1137] Optimization results for grappler item: graph_to_optimize
  function_optimizer: function_optimizer did nothing. time = 0.002ms.
  function_optimizer: function_optimizer did nothing. time = 0.001ms.



Successfuly created and saved Float16 Quantized Models/mobilenet_v2.h5 model


2021-09-16 16:51:39.217091: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:351] Ignored output_format.
2021-09-16 16:51:39.217109: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:354] Ignored drop_control_dependency.


0

# Full Integer Qunatization

Float Fallback

In [17]:
int8_quantization('Models/resnet_50.h5')

2021-09-16 16:51:59.934415: I tensorflow/core/grappler/devices.cc:75] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
2021-09-16 16:51:59.934494: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2021-09-16 16:51:59.945956: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1137] Optimization results for grappler item: graph_to_optimize
  function_optimizer: function_optimizer did nothing. time = 0.003ms.
  function_optimizer: function_optimizer did nothing. time = 0.001ms.

2021-09-16 16:52:03.807037: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:351] Ignored output_format.
2021-09-16 16:52:03.807056: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:354] Ignored drop_control_dependency.


Successfuly created and saved Int8 Quantized Models/resnet_50.h5 model


fully_quantize: 0, inference_type: 6, input_inference_type: 0, output_inference_type: 0


0

In [18]:
int8_quantization('Models/vgg16.h5')

2021-09-16 16:52:29.849736: I tensorflow/core/grappler/devices.cc:75] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
2021-09-16 16:52:29.849799: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2021-09-16 16:52:29.851946: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1137] Optimization results for grappler item: graph_to_optimize
  function_optimizer: function_optimizer did nothing. time = 0.002ms.
  function_optimizer: function_optimizer did nothing. time = 0ms.

2021-09-16 16:52:45.768623: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:351] Ignored output_format.
2021-09-16 16:52:45.768719: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:354] Ignored drop_control_dependency.


Successfuly created and saved Int8 Quantized Models/vgg16.h5 model


fully_quantize: 0, inference_type: 6, input_inference_type: 0, output_inference_type: 0


0

In [19]:
int8_quantization('Models/mobilenet_v2.h5')

2021-09-16 16:54:15.729887: I tensorflow/core/grappler/devices.cc:75] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
2021-09-16 16:54:15.729968: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2021-09-16 16:54:15.738763: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1137] Optimization results for grappler item: graph_to_optimize
  function_optimizer: function_optimizer did nothing. time = 0.002ms.
  function_optimizer: function_optimizer did nothing. time = 0.001ms.

2021-09-16 16:54:16.897326: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:351] Ignored output_format.
2021-09-16 16:54:16.897345: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:354] Ignored drop_control_dependency.


Successfuly created and saved Int8 Quantized Models/mobilenet_v2.h5 model


fully_quantize: 0, inference_type: 6, input_inference_type: 0, output_inference_type: 0


0

Full integer qunatization

In [20]:
int8_quantization('Models/resnet_50.h5', integer_only=True)

2021-09-16 16:54:48.099508: I tensorflow/core/grappler/devices.cc:75] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
2021-09-16 16:54:48.099591: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2021-09-16 16:54:48.110494: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1137] Optimization results for grappler item: graph_to_optimize
  function_optimizer: function_optimizer did nothing. time = 0.002ms.
  function_optimizer: function_optimizer did nothing. time = 0.001ms.

2021-09-16 16:54:51.097774: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:351] Ignored output_format.
2021-09-16 16:54:51.097790: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:354] Ignored drop_control_dependency.
fully_quantize: 0, inference_type: 6, input_inference_type: 3, output_inference_type: 3


Successfuly created and saved Int8 Quantized Models/resnet_50.h5 model


0

In [21]:
int8_quantization('Models/vgg16.h5', integer_only=True)

2021-09-16 16:55:19.330679: I tensorflow/core/grappler/devices.cc:75] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
2021-09-16 16:55:19.330738: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2021-09-16 16:55:19.333050: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1137] Optimization results for grappler item: graph_to_optimize
  function_optimizer: function_optimizer did nothing. time = 0.002ms.
  function_optimizer: function_optimizer did nothing. time = 0.001ms.

2021-09-16 16:55:35.059126: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:351] Ignored output_format.
2021-09-16 16:55:35.059146: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:354] Ignored drop_control_dependency.
fully_quantize: 0, inference_type: 6, input_inference_type: 3, output_inference_type: 3


Successfuly created and saved Int8 Quantized Models/vgg16.h5 model


0

In [22]:
int8_quantization('Models/mobilenet_v2.h5', integer_only=True)

2021-09-16 16:57:16.418806: I tensorflow/core/grappler/devices.cc:75] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
2021-09-16 16:57:16.418919: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2021-09-16 16:57:16.431898: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1137] Optimization results for grappler item: graph_to_optimize
  function_optimizer: function_optimizer did nothing. time = 0.003ms.
  function_optimizer: function_optimizer did nothing. time = 0ms.

2021-09-16 16:57:18.064288: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:351] Ignored output_format.
2021-09-16 16:57:18.064316: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:354] Ignored drop_control_dependency.
fully_quantize: 0, inference_type: 6, input_inference_type: 3, output_inference_type: 3


Successfuly created and saved Int8 Quantized Models/mobilenet_v2.h5 model


0

# 

# Graveyard

Below is the code to Qunatize the model using checkpoint file or Frozen Tensorflow graph

In [None]:
#Dataset for testing has been taken from https://github.com/fastai/imagenette 
#which has images of 10 most common classes from IMagenet dataset    

In [None]:
import tensorflow as tf
import numpy as np
import cv2

In [None]:
# Downloading and unzipping official Tensorflow Models
# Pretrained models : https://github.com/tensorflow/models/tree/master/research/slim
# We will be using Resnet50 in this example
!wget http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz
!tar -xf resnet_v1_50_2016_08_28.tar.gz

# Creating Frozen graph from ckpt file

In [None]:
from tensorflow.python.tools import freeze_graph
from tf_slim.nets import resnet_v1
import tf_slim as slim

tf.compat.v1.disable_eager_execution()
tf.compat.v1.disable_v2_behavior()

# Create graph
inputs = tf.compat.v1.placeholder(tf.float32, shape=[1, 224, 224, 3])
with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        net, end_points = resnet_v1.resnet_v1_50(inputs, num_classes=1000,is_training=False)

saver = tf.compat.v1.train.Saver()

output_node_names = ("resnet_v1_50/pool5")

with tf.compat.v1.Session() as sess:
        saver.restore(sess, 'resnet_v1_50.ckpt')
        representation_tensor = sess.graph.get_tensor_by_name('resnet_v1_50/pool5:0') 
        tf.compat.v1.train.write_graph(sess.graph_def,'./','resnet_v1_50.pbtxt')

        tensor_name_list = [tensor.name for tensor in tf.compat.v1.get_default_graph().as_graph_def().node]
        #print([tensor_name for tensor_name in tensor_name_list])
        output_graph_def = tf.compat.v1.graph_util.convert_variables_to_constants(
                    sess, # The session is used to retrieve the weights
                    tf.compat.v1.get_default_graph().as_graph_def(), 
                    output_node_names.split(",") 
                )
        with tf.compat.v1.gfile.GFile("resnet_v1_50.pb", "wb") as f:
            f.write(output_graph_def.SerializeToString())


# Dynamic Range Quantization

The simplest form of post-training quantization statically quantizes only the weights from floating point to integer, which has 8-bits of precision

In [None]:
frozen_graph_pth = "resnet_v1_50.pb"
# Convert the model
#converter = tf.lite.TFLiteConverter.from_frozen_graph(saved_model_dir) # path to the SavedModel directory
converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(frozen_graph_pth,
                                                                input_arrays=['Placeholder'],
                                                                input_shapes={'Placeholder' : [1, 224, 224,3]},
                                                                output_arrays=["resnet_v1_50/pool5"])
converter.optimizations = [tf.lite.Optimize.DEFAULT]

tflite_model = converter.convert()

# Save the model.
with open('resnet_v1_50.tflite', 'wb') as f:
    f.write(tflite_model)

# Int8 Quantization

In [None]:
def representative_dataset():
    for _ in range(100):
        data = np.random.rand(1, 224, 224, 3)
        yield [data.astype(np.float32)]

In [None]:
converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(frozen_graph_pth,
                                                                input_arrays=['Placeholder'],
                                                                input_shapes={'Placeholder' : [1, 224, 224,3]},
                                                                output_arrays=["resnet_v1_50/pool5"])

converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8  # or tf.uint8
converter.inference_output_type = tf.int8  # or tf.uint8
tflite_int8_model = converter.convert()


# Save the model.
with open('resnet_v1_50_int8.tflite', 'wb') as f:
    f.write(tflite_int8_model)

# Integer with Float Fallback

In [None]:
converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(frozen_graph_pth,
                                                                input_arrays=['Placeholder'],
                                                                input_shapes={'Placeholder' : [1, 224, 224,3]},
                                                                output_arrays=["resnet_v1_50/pool5"])

converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
tflite_quant_model = converter.convert()

# Save the model.
with open('resnet_v1_50_fallback.tflite', 'wb') as f:
    f.write(tflite_int8_model)

# Float16 Quantization

In [None]:
converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(frozen_graph_pth,
                                                                input_arrays=['Placeholder'],
                                                                input_shapes={'Placeholder' : [1, 224, 224,3]},
                                                                output_arrays=["resnet_v1_50/pool5"])

converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]

tflite_float_model = converter.convert()

# Save the model.
with open('resnet_v1_50_float16.tflite', 'wb') as f:
    f.write(tflite_float_model)