In [1]:
import numpy as np
import tensorflow as tf
import tensorflow_model_optimization as tfmot
from tensorflow.keras.models import load_model
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.utils import to_categorical

# 1. Load Fashion MNIST dataset
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

# Normalize and reshape to (batch, height, width, channels)
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0
X_train = np.expand_dims(X_train, axis=-1)  # shape: (60000, 28, 28, 1)
X_test = np.expand_dims(X_test, axis=-1)

# One-hot encode labels
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)

# 2. Load baseline model
baseline_model = load_model('baseline_model3.h5')
print("✅ Baseline model loaded")


✅ Baseline model loaded


In [2]:
import tensorflow_model_optimization as tfmot
prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

pruning_params = {'pruning_schedule': tfmot.sparsity.keras.ConstantSparsity(0.5,
                begin_step=0, frequency=100)
        }
callbacks = [tfmot.sparsity.keras.UpdatePruningStep()]
pruned_model = prune_low_magnitude(baseline_model, **pruning_params)
pruned_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 prune_low_magnitude_conv2d  (None, 26, 26, 32)        610       
  (PruneLowMagnitude)                                            
                                                                 
 prune_low_magnitude_max_po  (None, 13, 13, 32)        1         
 oling2d (PruneLowMagnitude                                      
 )                                                               
                                                                 
 prune_low_magnitude_conv2d  (None, 11, 11, 64)        36930     
 _1 (PruneLowMagnitude)                                          
                                                                 
 prune_low_magnitude_max_po  (None, 5, 5, 64)          1         
 oling2d_1 (PruneLowMagnitu                                      
 de)                                                    

In [3]:
opt = tf.keras.optimizers.Adam(learning_rate=1e-5)
pruned_model.compile(
loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
optimizer=opt,metrics=['accuracy'])
pruned_model.summary()

pruned_model.fit(X_train, y_train, batch_size=128, epochs=3,validation_split=0.1, callbacks=callbacks)



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 prune_low_magnitude_conv2d  (None, 26, 26, 32)        610       
  (PruneLowMagnitude)                                            
                                                                 
 prune_low_magnitude_max_po  (None, 13, 13, 32)        1         
 oling2d (PruneLowMagnitude                                      
 )                                                               
                                                                 
 prune_low_magnitude_conv2d  (None, 11, 11, 64)        36930     
 _1 (PruneLowMagnitude)                                          
                                                                 
 prune_low_magnitude_max_po  (None, 5, 5, 64)          1         
 oling2d_1 (PruneLowMagnitu                                      
 de)                                                    

  output, from_logits = _get_logits(


Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x14f7a9a20>

In [4]:
stripped_pruned_model = tfmot.sparsity.keras.strip_pruning(pruned_model)
# make a cloning of the model

stripped_pruned_model_copy = tf.keras.models.clone_model(stripped_pruned_model)

stripped_pruned_model_copy.set_weights(stripped_pruned_model.get_weights())

In [5]:
from tensorflow_model_optimization.python.core.clustering.keras.experimental import (cluster,)

cluster_weights = tfmot.clustering.keras.cluster_weights

CentroidInitialization = tfmot.clustering.keras.CentroidInitialization.KMEANS_PLUS_PLUS

cluster_weights = cluster.cluster_weights

clustering_params = {
'number_of_clusters': 8,
'cluster_centroids_init': CentroidInitialization.KMEANS_PLUS_PLUS,
'preserve_sparsity': True
}

sparsity_clustered_model = cluster_weights(stripped_pruned_model_copy,**clustering_params)

In [7]:
sparsity_clustered_model.compile(optimizer='adam',loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),metrics=['accuracy'])


sparsity_clustered_model.fit(X_train, y_train, batch_size=128,epochs=3, validation_split=0.1)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x15fa31e40>

In [8]:
stripped_sparsity_clustered_model = tfmot.clustering.keras.strip_clustering(sparsity_clustered_model)

In [13]:
from pathlib import Path
import tensorflow as tf

# Convert the model
converter = tf.lite.TFLiteConverter.from_keras_model(stripped_sparsity_clustered_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
sparsity_clustered_quant_model = converter.convert()

# 📂 Save the model to your local directory
tflite_models_dir = Path('/Users/oscarpatrikminj/Documents/IITR/FMNIST/tflite_models')
tflite_models_dir.mkdir(exist_ok=True, parents=True)

# 📄 Save the TFLite file
tflite_model_file = tflite_models_dir / 'model_sparsity_clustered_qunat.tflite'
tflite_model_file.write_bytes(sparsity_clustered_quant_model)

print(f"✅ Model saved at: {tflite_model_file}")


INFO:tensorflow:Assets written to: /var/folders/bs/x0lj933d1hv0py0d4w2ypdp40000gn/T/tmpjz8umzix/assets


INFO:tensorflow:Assets written to: /var/folders/bs/x0lj933d1hv0py0d4w2ypdp40000gn/T/tmpjz8umzix/assets


✅ Model saved at: /Users/oscarpatrikminj/Documents/IITR/FMNIST/tflite_models/model_sparsity_clustered_qunat.tflite


2025-06-05 19:07:43.286638: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
2025-06-05 19:07:43.286659: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2025-06-05 19:07:43.286856: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /var/folders/bs/x0lj933d1hv0py0d4w2ypdp40000gn/T/tmpjz8umzix
2025-06-05 19:07:43.287710: I tensorflow/cc/saved_model/reader.cc:91] Reading meta graph with tags { serve }
2025-06-05 19:07:43.287717: I tensorflow/cc/saved_model/reader.cc:132] Reading SavedModel debug info (if present) from: /var/folders/bs/x0lj933d1hv0py0d4w2ypdp40000gn/T/tmpjz8umzix
2025-06-05 19:07:43.290222: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.
2025-06-05 19:07:43.311144: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: /var/folders/bs/x0lj933d1hv0py0d4w2ypdp40000gn/T/tmpjz8umzix
2025-06-

In [15]:
import numpy as np
import tensorflow as tf
import time
import os
import zipfile
from pathlib import Path

# Load model
tflite_model_file = 'tflite_models/model_sparsity_clustered_qunat.tflite'
interpreter = tf.lite.Interpreter(model_path=tflite_model_file)
interpreter.allocate_tensors()

input_index = interpreter.get_input_details()[0]['index']
output_index = interpreter.get_output_details()[0]['index']

# Prepare test data
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.utils import to_categorical

(_, _), (X_test, y_test) = fashion_mnist.load_data()
X_test = X_test.astype('float32') / 255.0
X_test = np.expand_dims(X_test, -1)
y_test = to_categorical(y_test, num_classes=10)

# Inference and accuracy calculation
pred_list = []
start_time = time.time()
for images in X_test:
    input_data = np.array(images, dtype=np.float32).reshape(1, 28, 28, 1)
    interpreter.set_tensor(input_index, input_data)
    interpreter.invoke()
    prediction = interpreter.get_tensor(output_index)
    prediction = np.argmax(prediction)
    pred_list.append(prediction)
end_time = time.time()

# Accuracy
accurate_count = sum([pred_list[i] == np.argmax(y_test[i]) for i in range(len(pred_list))])
accuracy = accurate_count * 1.0 / len(pred_list)
avg_time_per_sample = (end_time - start_time) / len(pred_list)

print(f" Accuracy: {accuracy * 100:.2f}%")
print(f" Avg inference time per image: {avg_time_per_sample * 1000:.2f} ms")

# File size
model_size_kb = os.path.getsize(tflite_model_file) / 1024
print(f" Uncompressed model size: {model_size_kb:.2f} KB")

# Compressed (ZIP) size
def get_gzipped_model(path):
    zip_path = Path(path).with_suffix('.zip')
    with zipfile.ZipFile(zip_path, 'w', compression=zipfile.ZIP_DEFLATED) as f:
        f.write(path, arcname=Path(path).name)
    return os.path.getsize(zip_path)

compressed_size_kb = get_gzipped_model(tflite_model_file) / 1024
print(f" Zipped model size: {compressed_size_kb:.2f} KB")


 Accuracy: 90.20%
 Avg inference time per image: 0.08 ms
 Uncompressed model size: 184.25 KB
 Zipped model size: 58.66 KB
