In [43]:
import os
os.environ['TF_USE_LEGACY_KERAS'] = "1"
import tensorflow as tf
from tensorflow.keras import layers
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow_model_optimization.python.core.quantization.keras.quantizers import Quantizer
from tensorflow_model_optimization.python.core.quantization.keras.quantize_config import QuantizeConfig
from tensorflow_model_optimization.python.core.quantization.keras.quantizers import LastValueQuantizer, MovingAverageQuantizer
from tensorflow_model_optimization.python.core.quantization.keras.quantize import quantize_annotate_layer, quantize_apply
from tensorflow_model_optimization.quantization.keras import quantize_apply, quantize_scope
import tensorflow_model_optimization as tfmot
from tensorflow.python.profiler import model_analyzer
from tensorflow.python.profiler import option_builder


In [44]:
#Generate test data:
X=np.random.rand(10000,5)
y = np.sum(X, axis=1)
print(np.shape(X))
print(np.shape(y))

(10000, 5)
(10000,)


In [45]:
class FixedRangeQuantizer(Quantizer):
    def build(self, tensor_shape, name, layer):
        range_var = layer.add_weight(
            name=name + '_range',
            initializer=tf.keras.initializers.Constant(6.0),
            trainable=False
        )
        return {'range_var': range_var}

    def __call__(self, inputs, training, weights, **kwargs):
        return tf.keras.backend.clip(inputs, 0.0, weights['range_var'])

    def get_config(self):
        return {}

In [46]:


class CustomLayerQuantizeConfig(QuantizeConfig):
    def get_weights_and_quantizers(self, layer):
        return [
        (layer.kernel, LastValueQuantizer(num_bits=8, symmetric=True, narrow_range=False, per_axis=False)),
        (layer.bias,   LastValueQuantizer(num_bits=8, symmetric=True, narrow_range=False, per_axis=False)),
    ]

    def get_activations_and_quantizers(self, layer):
        #return []
        return [(layer.activation, MovingAverageQuantizer(num_bits=8, symmetric=False, narrow_range=False, per_axis=False))]

    def set_quantize_weights(self, layer, quantize_weights):
        layer.kernel = quantize_weights[0]

    def set_quantize_activations(self, layer, quantize_activations):
        layer.activation = quantize_activations[0]

    def get_output_quantizers(self, layer):
        return [MovingAverageQuantizer(num_bits=8, symmetric=True, narrow_range=False, per_axis=False)]
        #return []

    def get_config(self):
        return {}


In [47]:
model2 = tf.keras.Sequential([
    layers.Dense(1024, activation='relu', input_shape=(5,)),
    layers.Dense(512, activation='relu'),
    layers.Dense(256, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='relu')
])

In [48]:
model = tf.keras.Sequential([
    quantize_annotate_layer(layers.Dense(1024, activation='relu', input_shape=(5,)), quantize_config=CustomLayerQuantizeConfig()),
    quantize_annotate_layer(layers.Dense(512, activation='relu'), quantize_config=CustomLayerQuantizeConfig()),
    quantize_annotate_layer(layers.Dense(256, activation='relu'), quantize_config=CustomLayerQuantizeConfig()),
    quantize_annotate_layer(layers.Dense(128, activation='relu'), quantize_config=CustomLayerQuantizeConfig()),
    quantize_annotate_layer(layers.Dense(64, activation='relu'), quantize_config=CustomLayerQuantizeConfig()),
    quantize_annotate_layer(layers.Dense(1, activation='relu'), quantize_config=CustomLayerQuantizeConfig()),
    #layers.Dense(100, activation='relu', input_shape=(5,)),
    #layers.Dense(1, activation='relu')
])
with quantize_scope({'CustomLayerQuantizeConfig': CustomLayerQuantizeConfig}):
    quant_aware_model = quantize_apply(model)

In [49]:
import quantize_model
epochs=10
batch_size=64
num_training_samples=len(X)
print(int(num_training_samples/batch_size*epochs))
model2, callbacks=quantize_model.get_pruning_wrapper(model2, 0.5, epochs,batch_size,num_training_samples)
model2.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
print(np.shape(X))

model2.fit(X, y,callbacks=callbacks, epochs=epochs, batch_size=batch_size)
quant_aware_model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
quant_aware_model.fit(X, y, epochs=epochs)

1562
(10000, 5)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tf_keras.src.callbacks.History at 0x7ef3d6b27340>

In [50]:
model2 = quantize_model.strip_prune(model2)

In [51]:
vals=np.random.rand(1,5)
result=np.sum(vals)
prediction=quant_aware_model.predict(vals)
prediction2=model2.predict(vals)
print(result)
print(prediction)
print(prediction2)

2.315572615813018
[[2.3326743]]
[[2.3185055]]


In [52]:
index = np.random.choice(X.shape[0], 100, replace=False)
x_random = X[index]
def representative_data_gen():
    # Here, let's use 100 samples for calibration
    for i in range(100):
        # The model expects (batch_size=1, 5) if it’s Dense(…, input_shape=(5,)).
        # So we add a batch dimension of size 1:
        yield [x_random[i:i+1].astype(np.float32)]  # shape (1, 5)

In [53]:
quantconverter = tf.lite.TFLiteConverter.from_keras_model(quant_aware_model)

quantize_model.quantize_8_bit(model2,x_random, "testfolder/32bit")


quantconverter.optimizations = [tf.lite.Optimize.DEFAULT]
quantconverter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
quantconverter.representative_dataset = representative_data_gen
quantlite=quantconverter.convert()

INFO:tensorflow:Assets written to: /tmp/tmp9smdjlsg/assets


INFO:tensorflow:Assets written to: /tmp/tmp9smdjlsg/assets
W0000 00:00:1739730060.506600   45954 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1739730060.506611   45954 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
2025-02-16 19:21:00.506698: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmp9smdjlsg
2025-02-16 19:21:00.507052: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2025-02-16 19:21:00.507060: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /tmp/tmp9smdjlsg
2025-02-16 19:21:00.509150: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2025-02-16 19:21:00.521249: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /tmp/tmp9smdjlsg
2025-02-16 19:21:00.524801: I tensorflow/cc/saved_model/loader.cc:466] SavedModel load for tags { serve }; Status: success: OK. Took 18103 

INFO:tensorflow:Assets written to: /tmp/tmpjfo6m8fm/assets


INFO:tensorflow:Assets written to: /tmp/tmpjfo6m8fm/assets
W0000 00:00:1739730062.158329   45954 tf_tfl_flatbuffer_helpers.cc:365] Ignored output_format.
W0000 00:00:1739730062.158339   45954 tf_tfl_flatbuffer_helpers.cc:368] Ignored drop_control_dependency.
2025-02-16 19:21:02.158430: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmpjfo6m8fm
2025-02-16 19:21:02.160814: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2025-02-16 19:21:02.160822: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /tmp/tmpjfo6m8fm
2025-02-16 19:21:02.176247: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2025-02-16 19:21:02.240096: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /tmp/tmpjfo6m8fm
2025-02-16 19:21:02.256697: I tensorflow/cc/saved_model/loader.cc:466] SavedModel load for tags { serve }; Status: success: OK. Took 98268 

In [54]:
#quant_aware_model.save('testfolder/8bit.h5')  # Save the model in HDF5 format
#model2.save('testfolder/32bit.h5')  # Save the model in HDF5 format
with open("testfolder/8bit.tflite", "wb") as f:
    f.write(quantlite)

# Get the size of the saved model file in bytes
model_size = os.path.getsize('testfolder/8bit.tflite')
print(model_size)

model_size = os.path.getsize('testfolder/32bit.tflite')
print(model_size)

714344
761032


Before:
714344
761032

In [55]:
import lzma

with open("testfolder/8bit.tflite", "rb") as f_in:
    model_data = f_in.read()

compressed_data = lzma.compress(model_data)

with open("testfolder/8bit.tflite.xz", "wb") as f_out:
    f_out.write(compressed_data)


In [56]:
with open("testfolder/32bit.tflite", "rb") as f_in:
    model_data = f_in.read()

compressed_data = lzma.compress(model_data)

with open("testfolder/32bit.tflite.xz", "wb") as f_out:
    f_out.write(compressed_data)