# TFLite Converter with TensorFlow 2.x 
![simple_nn](media/miscellaneous/tf_logo.png "TF Logo")

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

In [None]:
PATH_DIR = Path.cwd()
dataset_dir = PATH_DIR.joinpath('bin/tf_tutorial_2')
saved_model_dir = dataset_dir.joinpath('original_model')
saved_h5_dir = dataset_dir.joinpath('model_original.h5')

![simple_nn](media/tf_tutorial_3/tensorflow_lite_framework.png "TF-Lite Summary")

# 1.0 Train a simple CNN on MNIST

In [None]:
# import the datatset
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data(path='mnist.npz')
ds_train = tf.data.Dataset.from_tensor_slices((X_train, y_train))
ds_test = tf.data.Dataset.from_tensor_slices((X_test, y_test))

In [None]:
# normalize dataset
def normalize(x, y):
    return x / 255, y

In [None]:
# prepare the data
ds_train = ds_train.map(normalize).cache().batch(32).prefetch(tf.data.experimental.AUTOTUNE)
ds_test = ds_test.map(normalize).batch(32)

In [None]:
# create a simple cnn model
model = tf.keras.models.Sequential([
    tf.keras.layers.Reshape((28,28,1)),
    tf.keras.layers.Conv2D(32, 3, activation='relu', padding='same'),   
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(32, 3, strides=2, padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(64, 3, strides=2, padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Conv2D(128, 3, strides=2, padding='same', activation='relu'),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(10, activation='softmax' )
])

In [None]:
# compile
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(0.001),
    metrics=['accuracy'])

In [None]:
# train
history = model.fit(ds_train, steps_per_epoch=len(X_train)/32, epochs=15)

In [None]:
# evaluate
model.evaluate((ds_test))

In [None]:
# save model
model.save(saved_model_dir)
model.save(saved_h5_dir)

## 2.0 TF-Lite simple conversion
- Energy
- Size
- Latency
- Costs

In [None]:
# from keras model
converter = tf.lite.TFLiteConverter.from_keras_model(model)
# or from tf saved model
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
# last from concrete functions
converter = tf.lite.TFLiteConverter.from_concrete_funcions(tf_path_concrete_functions)

In [None]:
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir.as_posix())

In [None]:
# start conversion
tflite_model = converter.convert()

In [None]:
# save model
tflite_model_file = dataset_dir.joinpath('model_fp32.tflite')
tflite_model_file.write_bytes(tflite_model)

# 3.0 Float16 quantization

- **Size reduction:** up to 50 %
- **Latency reduction:** 2x
- **Accuracy:** Insignificant loss accuracy

In [None]:
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir.as_posix())

In [None]:
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]

In [None]:
tflite_model = converter.convert()

In [None]:
# save model
tflite_model_file = dataset_dir.joinpath('model_fp16.tflite')
tflite_model_file.write_bytes(tflite_model)

# 4.0 Dynamic int8 range quantization

- **Size reduction:** up to 75 %
- **Latency reduction:** 2x/3x
- **Accuracy:** Accuracy loss

In [None]:
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir.as_posix())

In [None]:
converter.optimizations = [tf.lite.Optimize.DEFAULT]

In [None]:
tflite_model = converter.convert()

In [None]:
# save model
tflite_model_file = dataset_dir.joinpath('model_int8_dynamic.tflite')
tflite_model_file.write_bytes(tflite_model)

# 5.0 Integer quantization with float fallback

- **Size reduction:** up to 75 %
- **Latency reduction:** 3x/4x
- **Accuracy:** Smallest loss accuracy

In [None]:
num_calibration_steps = 1 # at least 100

def representative_dataset_gen():
      for i in range(num_calibration_steps):
        # Remember to pre-process your dataset as your training
        imgs = X_train[i:i+1]
        imgs = imgs / 255
        yield [imgs.astype('float32')]

In [None]:
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir.as_posix())

In [None]:
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset_gen

In [None]:
tflite_model = converter.convert()

In [None]:
# save model
tflite_model_file = dataset_dir.joinpath('model_int8_fb.tflite')
tflite_model_file.write_bytes(tflite_model)

# 6.0 Full integer quantization (integer only)

- **Size reduction:** up to 75 %
- **Latency reductiion:** 3x/4x
- **Accuracy:** Smallest loss accuracy

## 6.1 With TF >= 2.3

In [None]:
num_calibration_steps = 1 # at least 100

def representative_dataset_gen():
      for i in range(num_calibration_steps):
        # Remember to pre-process your dataset as your training
        imgs = X_train[i:i+1]
        imgs = imgs / 255
        yield [imgs.astype('float32')]

In [None]:
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir.as_posix())

In [None]:
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8  # or tf.uint8
converter.inference_output_type = tf.int8  # or tf.uint8

In [None]:
tflite_model = converter.convert()

In [None]:
# save model
tflite_model_file = dataset_dir.joinpath('model_int8.tflite')
tflite_model_file.write_bytes(tflite_model)

## 6.2 With TF < 2.3

In [None]:
num_calibration_steps = 1 # at least 100

def representative_dataset_gen():
      for i in range(num_calibration_steps):
        # Remember to pre-process your dataset as your training
        imgs = X_train[i:i+1]
        imgs = imgs / 255
        yield [imgs.astype('float32')]

In [None]:
converter = tf.compat.v1.lite.TFLiteConverter.from_keras_model_file(dataset_dir.joinpath('model_original.h5'))

In [None]:
converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
converter.representative_dataset = representative_dataset_gen
converter.experimental_new_converter = True
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8

In [None]:
tflite_model = converter.convert()

In [None]:
# save model
tflite_model_file = dataset_dir.joinpath('model_int8.tflite')
tflite_model_file.write_bytes(tflite_model)