In [1]:
"""
Code to convert the keras h5 model to a quantized TFLite model
"""
import tensorflow as tf
import os

In [2]:
# Load the previously trained and saved Keras model
model = tf.keras.models.load_model('best_model.h5')

In [3]:
# Create a TFLite converter object from the Keras model
converter = tf.lite.TFLiteConverter.from_keras_model(model)

In [4]:
# Set the optimization to use the default optimizations
converter.optimizations = [tf.lite.Optimize.DEFAULT]

In [5]:
# Convert the model to a quantized TFLite model
tflite_quant_model = converter.convert()

INFO:tensorflow:Assets written to: /var/folders/pg/k661lbqs4sl4jplc4nftcthr0000gn/T/tmp3nkqhgm_/assets


INFO:tensorflow:Assets written to: /var/folders/pg/k661lbqs4sl4jplc4nftcthr0000gn/T/tmp3nkqhgm_/assets
2024-02-08 16:27:36.977904: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2024-02-08 16:27:36.977919: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2024-02-08 16:27:36.978525: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/pg/k661lbqs4sl4jplc4nftcthr0000gn/T/tmp3nkqhgm_
2024-02-08 16:27:36.979743: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2024-02-08 16:27:36.979749: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /var/folders/pg/k661lbqs4sl4jplc4nftcthr0000gn/T/tmp3nkqhgm_
2024-02-08 16:27:36.981767: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:388] MLIR V1 optimization pass is not enabled
2024-02-08 16:27:36.982731: I tensorflow/cc/saved_model/load

In [6]:
# Save the quantized model to file 
with open('quant_model.tflite', 'wb') as f:
    f.write(tflite_quant_model)

In [7]:
print("Full Integer quantization model saved!")

# Print model sizes for comparison
print("Initial model in Mb:", os.path.getsize('best_model.h5') / float(2**20))
print("Float model in Mb:", os.path.getsize('model.tflite') / float(2**20))
print("Quantized model in Mb:", os.path.getsize('quant_model.tflite') / float(2**20))

# Print compression ratio between float and quantized model
print("Compression ratio:", os.path.getsize('model.tflite')/os.path.getsize('quant_model.tflite'))

Full Integer quantization model saved!
Initial model in Mb: 42.47442626953125
Float model in Mb: 14.140071868896484
Quantized model in Mb: 3.5412826538085938
Compression ratio: 3.99292382201942
