In [None]:
pip install tensorflow tensorflow-model-optimization

Collecting tensorflow-model-optimization
  Downloading tensorflow_model_optimization-0.8.0-py2.py3-none-any.whl.metadata (904 bytes)
Collecting numpy<2.2.0,>=1.26.0 (from tensorflow)
  Downloading numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
Downloading tensorflow_model_optimization-0.8.0-py2.py3-none-any.whl (242 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m242.5/242.5 kB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.0/18.0 MB[0m [31m52.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy, tensorflow-model-optimization
  Attempting uninstall: numpy
    Found existing installation: numpy 2.0.2
    Uninstalling numpy-2.0.

In [None]:
!pip uninstall -y numpy tensorflow tensorflow_model_optimization
!pip install numpy tensorflow tensorflow_model_optimization --upgrade --force-reinstall

In [None]:
import tensorflow as tf
import tensorflow_model_optimization as tfmot
import numpy as np
import os
import tempfile

# ----------------------------------------
# 1. Load and preprocess MNIST
# ----------------------------------------

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = x_train[..., tf.newaxis] / 255.0
x_test = x_test[..., tf.newaxis] / 255.0

# ----------------------------------------
# 2. Define LeNet-5
# ----------------------------------------

def create_lenet():
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(6, kernel_size=5, activation='relu', input_shape=(28,28,1), padding='same'), #layer 1
       tf.keras.layers.AveragePooling2D(pool_size=(2, 2)),                                                  # Layer 2

        tf.keras.layers.Conv2D(filters=16, kernel_size=(5, 5), activation='relu'),                           # Layer 3
        tf.keras.layers.AveragePooling2D(pool_size=(2, 2)),                                                  # Layer 4

        tf.keras.layers.Conv2D(filters=120, kernel_size=(5, 5), activation='relu'),                          # Layer 5

        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(units=84, activation='relu'),                                                  # Layer 6
        tf.keras.layers.Dense(units=10, activation='softmax')
    ])
    return model

In [None]:
# ----------------------------------------
# 3. Train Baseline Model
# ----------------------------------------

model = create_lenet()
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=3, validation_split=0.1, verbose=2)
baseline_loss, baseline_acc = model.evaluate(x_test, y_test, verbose=0)

# Save and check baseline model size
_, baseline_model_path = tempfile.mkstemp('.h5')
model.save(baseline_model_path)
baseline_size = os.path.getsize(baseline_model_path) / 1e6  # in MB

print(f"Baseline accuracy: {baseline_acc:.4f}, loss: {baseline_loss:.4f}, size: {baseline_size:.2f} MB")

NameError: name 'create_lenet' is not defined

In [None]:
path = "baseline_lenet5.h5"
model.save(path)

In [None]:
# ----------------------------------------
# 4. Apply Structured Pruning (filter pruning)
# ----------------------------------------

# Prune only Conv2D and Dense layers with 50% sparsity
prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

pruning_params = {
    'pruning_schedule': tfmot.sparsity.keras.ConstantSparsity(
        target_sparsity=0.5, begin_step=0, frequency=100
    ),
    # 'block_size': (1, 1),
    # 'block_pooling_type': 'AVG',
}

def apply_structured_pruning(model):
    def prune_layer(layer):
        if isinstance(layer, tf.keras.layers.Conv2D) or isinstance(layer, tf.keras.layers.Dense):
            return prune_low_magnitude(layer, **pruning_params)
        return layer

    pruned_model = tf.keras.models.clone_model(
        model,
        clone_function=prune_layer,
    )
    return pruned_model

pruned_model = apply_structured_pruning(model)

# Compile and retrain the pruned model
pruned_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Callback to update pruning during training
callbacks = [tfmot.sparsity.keras.UpdatePruningStep()]

pruned_model.fit(x_train, y_train, epochs=3, validation_split=0.1, callbacks=callbacks, verbose=2)
pruned_loss, pruned_acc = pruned_model.evaluate(x_test, y_test, verbose=0)

# ----------------------------------------
# 5. Strip pruning wrappers & check size
# ----------------------------------------

model_for_export = tfmot.sparsity.keras.strip_pruning(pruned_model)

_, pruned_model_path = tempfile.mkstemp('.h5')
model_for_export.save(pruned_model_path)
pruned_size = os.path.getsize(pruned_model_path) / 1e6  # in MB



In [None]:
path_pruned = "pruned-lenet5.h5"
model_for_export.save(path_pruned)

In [None]:
# ----------------------------------------
# 6. Print Comparison
# ---------------------
# -------------------

print("\n Results Comparison:")
print(f"Baseline accuracy: {baseline_acc:.4f}, loss: {baseline_loss:.4f}, size: {baseline_size:.2f} MB")
print(f"Pruned   accuracy: {pruned_acc:.4f}, loss: {pruned_loss:.4f}, size: {pruned_size:.2f} MB")


In [None]:
# --------------------------
# Post Training Quantization
# --------------------------

import tensorflow as tf
import numpy as np
import tempfile
import os

# Evaluate float model
float_loss, float_acc = model.evaluate(x_test, y_test, verbose=0)

# Save float model
_, float_model_file = tempfile.mkstemp('.h5')
model.save(float_model_file)
float_model_size = os.path.getsize(float_model_file) / 1e6  # MB

In [None]:

# ----------------------------
# 4. Convert to TFLite (PTQ)
# ----------------------------

# Provide a representative dataset for calibration
def representative_data_gen():
    for i in range(100):
        yield [x_train[i:i+1].astype(np.float32)]

converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8

quantized_tflite_model = converter.convert()

# Save quantized model
quant_model_path = os.path.join(tempfile.gettempdir(), "lenet_int8.tflite")
with open(quant_model_path, 'wb') as f:
    f.write(quantized_tflite_model)

quant_model_size = os.path.getsize(quant_model_path) / 1e6  # MB

# ----------------------------
# 5. Evaluate TFLite Model
# ----------------------------

# Load interpreter
interpreter = tf.lite.Interpreter(model_path=quant_model_path)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Run inference
correct = 0
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()

preds = []
for i in range(len(x_test)):
    x = x_test[i:i+1]
    label = y_test[i:i+1]

    # Quantize input
    input_scale, input_zero_point = input_details[0]['quantization']
    x_q = (x / input_scale + input_zero_point).astype(np.uint8)

    interpreter.set_tensor(input_details[0]['index'], x_q)
    interpreter.invoke()
    output = interpreter.get_tensor(output_details[0]['index'])

    # Dequantize output
    pred = np.argmax(output)
    preds.append(output)

    if pred == label:
        correct += 1

quant_acc = correct / len(x_test)

# Compute quantized loss
preds = tf.constant(np.vstack(preds), dtype=tf.float32)
quant_loss = loss_fn(y_test, preds).numpy()

# ----------------------------
# 6. Print Results
# ----------------------------
print("\nComparison:")
print(f"Float   - Accuracy: {float_acc:.4f}, Loss: {float_loss:.4f}, Size: {float_model_size:.2f} MB")
print(f"Quantized - Accuracy: {quant_acc:.4f}, Loss: {quant_loss:.4f}, Size: {quant_model_size:.2f} MB")


In [None]:
# ---------------------------------------
# Quantization Aware Training (QAT)
# ---------------------------------------
import tensorflow as tf
import numpy as np
import tensorflow_model_optimization as tfmot
import tempfile, os

float_model= model
# Evaluate float model
float_loss, float_acc = float_model.evaluate(x_test, y_test, verbose=0)

# Save float model and measure size
_, float_fp = tempfile.mkstemp('.h5')
float_model.save(float_fp)
float_size = os.path.getsize(float_fp) / 1e6  # MB

# ----------------------------
# 4. Prepare QAT model
# ----------------------------
quantize_model = tfmot.quantization.keras.quantize_model
qat_model = quantize_model(float_model)  # apply QAT wrappers

qat_model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

# Fine-tune with QAT for a few epochs
qat_model.fit(x_train, y_train, epochs=3, batch_size=128,
              validation_split=0.1, verbose=2)

# Evaluate QAT (still float inference with fake-quant)
qat_loss, qat_acc = qat_model.evaluate(x_test, y_test, verbose=0)


In [None]:
# ----------------------------
# 5. Convert both to TFLite
# ----------------------------
def convert_to_tflite(keras_model, tflite_path):
    converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
    converter.inference_input_type = tf.uint8
    converter.inference_output_type = tf.uint8

    def representative_data_gen():
        for i in range(100):
            yield [x_train[i:i+1].astype(np.float32)]

    # Always needed for full integer quantization
    converter.representative_dataset = representative_data_gen

    tflite_model = converter.convert()
    with open(tflite_path, 'wb') as f:
        f.write(tflite_model)
    return tflite_model

# float→quantized TFLite
float_tflite_path = os.path.join(tempfile.gettempdir(), "lenet_float.tflite")
_ = convert_to_tflite(float_model, float_tflite_path)
float_tflite_size = os.path.getsize(float_tflite_path) / 1e6

qat_tflite_path = os.path.join(tempfile.gettempdir(), "lenet_qat.tflite")
_ = convert_to_tflite(qat_model, qat_tflite_path)
qat_tflite_size = os.path.getsize(qat_tflite_path) / 1e6


In [None]:
# ----------------------------
# 6. Evaluate TFLite Models
# ----------------------------
def evaluate_tflite(model_path):
    interpreter = tf.lite.Interpreter(model_path=model_path)
    interpreter.allocate_tensors()
    inp_det = interpreter.get_input_details()[0]
    out_det = interpreter.get_output_details()[0]

    correct = 0
    preds = []
    for i in range(len(x_test)):
        img = x_test[i:i+1]
        # quantize input to uint8
        scale, zero_point = inp_det['quantization']
        img_q = (img / scale + zero_point).astype(np.uint8)
        interpreter.set_tensor(inp_det['index'], img_q)
        interpreter.invoke()
        out_q = interpreter.get_tensor(out_det['index'])
        preds.append(out_q)
        if np.argmax(out_q[0]) == y_test[i]:
            correct += 1

    acc = correct / len(x_test)
    # compute loss (dequantize outputs)
    preds = tf.constant(np.vstack(preds), dtype=tf.float32)
    loss = tf.keras.losses.sparse_categorical_crossentropy(y_test, preds)
    return acc, float(tf.reduce_mean(loss).numpy())

float_tflite_acc, float_tflite_loss = evaluate_tflite(float_tflite_path)
qat_tflite_acc,   qat_tflite_loss   = evaluate_tflite(qat_tflite_path)


In [None]:
# ----------------------------
# 7. Print Comparison
# ----------------------------
print("\nResults Comparison:")
print(f"FP32 Keras    - Acc: {float_acc:.4f}, Loss: {float_loss:.4f}, Model size: {float_size:.2f} MB")
print(f"QAT Keras     - Acc: {qat_acc:.4f}, Loss: {qat_loss:.4f}, Model size: {float_size:.2f} MB  (same .h5 size)")
print(f"FP32 TFLite   - Acc: {float_tflite_acc:.4f}, Loss: {float_tflite_loss:.4f}, Model size: {float_tflite_size:.2f} MB")
print(f"QAT  TFLite   - Acc: {qat_tflite_acc:.4f}, Loss: {qat_tflite_loss:.4f}, Model size: {qat_tflite_size:.2f} MB")
