In [55]:
import os
os.environ['TF_USE_LEGACY_KERAS'] = "1"
import tensorflow as tf
from tensorflow.keras import layers
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow_model_optimization.python.core.quantization.keras.quantizers import Quantizer
from tensorflow_model_optimization.python.core.quantization.keras.quantize_config import QuantizeConfig
from tensorflow_model_optimization.python.core.quantization.keras.quantizers import LastValueQuantizer, MovingAverageQuantizer
from tensorflow_model_optimization.python.core.quantization.keras.quantize import quantize_annotate_layer, quantize_apply
from tensorflow_model_optimization.quantization.keras import quantize_apply, quantize_scope
import tensorflow_model_optimization as tfmot
from tensorflow.python.profiler import model_analyzer
from tensorflow.python.profiler import option_builder


In [56]:
#Generate test data:
X=np.random.rand(10000,5)
y = np.sum(X, axis=1)
print(np.shape(X))
print(np.shape(y))

(10000, 5)
(10000,)


In [57]:
class FixedRangeQuantizer(Quantizer):
    def build(self, tensor_shape, name, layer):
        range_var = layer.add_weight(
            name=name + '_range',
            initializer=tf.keras.initializers.Constant(6.0),
            trainable=False
        )
        return {'range_var': range_var}

    def __call__(self, inputs, training, weights, **kwargs):
        return tf.keras.backend.clip(inputs, 0.0, weights['range_var'])

    def get_config(self):
        return {}

In [58]:


class CustomLayerQuantizeConfig(QuantizeConfig):
    def get_weights_and_quantizers(self, layer):
        return [
        (layer.kernel, LastValueQuantizer(num_bits=8, symmetric=True, narrow_range=False, per_axis=False)),
        (layer.bias,   LastValueQuantizer(num_bits=8, symmetric=True, narrow_range=False, per_axis=False)),
    ]

    def get_activations_and_quantizers(self, layer):
        #return []
        return [(layer.activation, MovingAverageQuantizer(num_bits=8, symmetric=False, narrow_range=False, per_axis=False))]

    def set_quantize_weights(self, layer, quantize_weights):
        layer.kernel = quantize_weights[0]

    def set_quantize_activations(self, layer, quantize_activations):
        layer.activation = quantize_activations[0]

    def get_output_quantizers(self, layer):
        return [MovingAverageQuantizer(num_bits=8, symmetric=True, narrow_range=False, per_axis=False)]
        #return []

    def get_config(self):
        return {}


In [59]:
model2 = tf.keras.Sequential([
    layers.Dense(1024, activation='relu', input_shape=(5,)),
    layers.Dense(512, activation='relu'),
    layers.Dense(256, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='relu')
])

In [60]:
model = tf.keras.Sequential([
    quantize_annotate_layer(layers.Dense(1024, activation='relu', input_shape=(5,)), quantize_config=CustomLayerQuantizeConfig()),
    quantize_annotate_layer(layers.Dense(512, activation='relu'), quantize_config=CustomLayerQuantizeConfig()),
    quantize_annotate_layer(layers.Dense(256, activation='relu'), quantize_config=CustomLayerQuantizeConfig()),
    quantize_annotate_layer(layers.Dense(128, activation='relu'), quantize_config=CustomLayerQuantizeConfig()),
    quantize_annotate_layer(layers.Dense(64, activation='relu'), quantize_config=CustomLayerQuantizeConfig()),
    quantize_annotate_layer(layers.Dense(1, activation='relu'), quantize_config=CustomLayerQuantizeConfig()),
    #layers.Dense(100, activation='relu', input_shape=(5,)),
    #layers.Dense(1, activation='relu')
])
with quantize_scope({'CustomLayerQuantizeConfig': CustomLayerQuantizeConfig}):
    quant_aware_model = quantize_apply(model)

In [61]:
quant_aware_model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
quant_aware_model.fit(X, y, epochs=1)
model2.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
model2.fit(X, y, epochs=1)



<tf_keras.src.callbacks.History at 0x7f7414303a60>

In [62]:
vals=np.random.rand(1,5)
result=np.sum(vals)
prediction=quant_aware_model.predict(vals)
print(result)
print(prediction)





1.8890255552311528
[[1.858444]]


In [63]:
index = np.random.choice(X.shape[0], 100, replace=False)
x_random = X[index]
def representative_data_gen():
    # Here, let's use 100 samples for calibration
    for i in range(100):
        # The model expects (batch_size=1, 5) if it’s Dense(…, input_shape=(5,)).
        # So we add a batch dimension of size 1:
        yield [x_random[i:i+1].astype(np.float32)]  # shape (1, 5)

In [64]:
quantconverter = tf.lite.TFLiteConverter.from_keras_model(quant_aware_model)
converter2 = tf.lite.TFLiteConverter.from_keras_model(model2)

converter2.optimizations = [tf.lite.Optimize.DEFAULT]
converter2.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter2.representative_dataset = representative_data_gen

quantconverter.optimizations = [tf.lite.Optimize.DEFAULT]
quantconverter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
quantconverter.representative_dataset = representative_data_gen
quantlite=quantconverter.convert()
lite2=converter2.convert()

INFO:tensorflow:Assets written to: /tmp/tmpsdvggxie/assets


INFO:tensorflow:Assets written to: /tmp/tmpsdvggxie/assets
W0000 00:00:1739553500.319659   27572 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1739553500.319670   27572 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
2025-02-14 18:18:20.319754: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmpsdvggxie
2025-02-14 18:18:20.321804: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2025-02-14 18:18:20.321812: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /tmp/tmpsdvggxie
2025-02-14 18:18:20.337533: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2025-02-14 18:18:20.400236: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /tmp/tmpsdvggxie
2025-02-14 18:18:20.415983: I tensorflow/cc/saved_model/loader.cc:466] SavedModel load for tags { serve }; Status: success: OK. Took 96229 

INFO:tensorflow:Assets written to: /tmp/tmpxo32gul4/assets


INFO:tensorflow:Assets written to: /tmp/tmpxo32gul4/assets
W0000 00:00:1739553500.895669   27572 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1739553500.895680   27572 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
2025-02-14 18:18:20.895787: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmpxo32gul4
2025-02-14 18:18:20.896483: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2025-02-14 18:18:20.896491: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /tmp/tmpxo32gul4
2025-02-14 18:18:20.901250: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2025-02-14 18:18:20.931064: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /tmp/tmpxo32gul4
2025-02-14 18:18:20.937526: I tensorflow/cc/saved_model/loader.cc:466] SavedModel load for tags { serve }; Status: success: OK. Took 41740 

In [65]:
#quant_aware_model.save('testfolder/8bit.h5')  # Save the model in HDF5 format
#model2.save('testfolder/32bit.h5')  # Save the model in HDF5 format
with open("testfolder/8bit.tflite", "wb") as f:
    f.write(quantlite)

with open("testfolder/32bit.tflite", "wb") as f:
    f.write(lite2)

# Get the size of the saved model file in bytes
model_size = os.path.getsize('testfolder/8bit.tflite')
print(model_size)

model_size = os.path.getsize('testfolder/32bit.tflite')
print(model_size)

714344
761032


In [None]:
import lzma

with open("testfolder/8bit.tflite", "rb") as f_in:
    model_data = f_in.read()

compressed_data = lzma.compress(model_data)

with open("testfolder/8bit.tflite.xz", "wb") as f_out:
    f_out.write(compressed_data)
