## Post Training Quantization
Classification model using tensorflow

- Weight Clustering
- Weight Pruning
- Integer Quantization

In [1]:
import tensorflow_model_optimization as tfmot
import tensorflow as tf
import os
import shutil
import pandas as pd
import numpy as np
import tempfile

Provided models

In [2]:
# Load the mobilenet and the second model from classification
model = tf.keras.models.load_model('model.h5')
mobilenet = tf.keras.models.load_model('mobilenet.h5')

In [2]:
# Data path
path = os.path.join(os.getcwd(), 'dsa2022-Arusha\camera-trap\porini-machine-learning\data\porinicroppedimages\dataset')
test_path = os.path.join(path, 'test')
train_path = os.path.join(path, 'train')

# load the test and train metadata
test_df = pd.read_csv(os.path.join(path, 'test.csv'))
train_df = pd.read_csv(os.path.join(path, 'train.csv'))

In [3]:
# rearrange the format of the data
def split_data(image_dir, label_df, value):
    '''
    value - the type either test or train
    label_df - the dataframe containing the labels of the classes
    image_dir - the directory containing the images
    '''
    for ind, row in label_df.iterrows():
        print(row['Species'])
        if os.path.exists(f'{value}') is False:
            os.mkdir(f'{value}')
        else:
            if os.path.exists(f'{value}/{row["Species"]}') is False:
                os.mkdir(f'{value}/{row["Species"]}')
                shutil.copy(f'{image_dir}/{row["filename"]}', f'{value}/{row["Species"]}')
            else:
                shutil.copy(f'{image_dir}/{row["filename"]}', f'{value}/{row["Species"]}')

In [4]:
# # train and test data
split_data(train_path, train_df, 'train')
split_data(test_path, test_df, 'test')

WARTHOG
IMPALA
WARTHOG
WARTHOG
IMPALA
WATERBUCK
WATERBUCK
WATERBUCK
WARTHOG
ZEBRA
ZEBRA
ZEBRA
WARTHOG
WARTHOG
IMPALA
IMPALA
IMPALA
WARTHOG
WARTHOG
WARTHOG
IMPALA
MONKEY
MONKEY
MONKEY
IMPALA
IMPALA
IMPALA
MONKEY
MONKEY
BUSHBUCK
BUSHBUCK
BUSHBUCK
BUSHBUCK
BUSHBUCK
BUSHBUCK
BUSHBUCK
BUSHBUCK
BUSHBUCK
BUSHBUCK
IMPALA
IMPALA
IMPALA
IMPALA
IMPALA
IMPALA
IMPALA
IMPALA
IMPALA
WATERBUCK
BUSHBUCK
BUSHBUCK
BUSHBUCK
IMPALA
IMPALA
IMPALA
IMPALA
IMPALA
IMPALA
WATERBUCK
IMPALA
MONKEY
MONKEY
IMPALA
IMPALA
WARTHOG
MONKEY
WARTHOG
BUSHBUCK
MONKEY
MONKEY
MONKEY
MONKEY
MONKEY
MONKEY
MONKEY
MONKEY
MONKEY
MONKEY
MONKEY
MONKEY
BUSHBUCK
BUSHBUCK
BUSHBUCK
BUSHBUCK
BUSHBUCK
BUSHBUCK
BUSHBUCK
BUSHBUCK
BUSHBUCK
BUSHBUCK
BUSHBUCK
BUSHBUCK
BUSHBUCK
BUSHBUCK
BUSHBUCK
BUSHBUCK
WARTHOG
WARTHOG
WARTHOG
BUSHBUCK
BUSHBUCK
BUSHBUCK
MONKEY
MONKEY
WATERBUCK
WATERBUCK
BUSHBUCK
IMPALA
IMPALA
IMPALA
IMPALA
IMPALA
IMPALA
IMPALA
IMPALA
IMPALA
IMPALA
IMPALA
IMPALA
IMPALA
IMPALA
IMPALA
IMPALA
IMPALA
IMPALA
IMPALA
IMPALA
WATERBUCK
W

In [6]:
n_train_dir = 'train'
n_test_dir = 'test'
seed = 2023

train_dataset = tf.keras.utils.image_dataset_from_directory(
    n_train_dir,
    batch_size=32,
    image_size=(128, 128),
    shuffle=True,
    validation_split=0.2,
    subset="training", 
    seed = seed
)

# Extract the train images and labels
train_images = []
train_labels = []

for images, labels in train_dataset:
    train_images.append(images.numpy()[0])
    train_labels.append(labels.numpy()[0])

train_images = np.array(train_images)
train_labels = np.array(train_labels)

# Load the dataset again, this time for testing/validation
test_dataset = tf.keras.utils.image_dataset_from_directory(
    n_test_dir,
    batch_size=32,
    image_size=(128, 128),
    seed=2022
)

# Extract the test images and labels
test_images = []
test_labels = []

for images, labels in test_dataset:
    test_images.append(images.numpy()[0])
    test_labels.append(labels.numpy()[0])

test_images = np.array(test_images)
test_labels = np.array(test_labels)

# normalizing the values
train_images = train_images.astype(np.float32) / 255.0
test_images = test_images.astype(np.float32) / 255.0


Found 2085 files belonging to 6 classes.
Using 1668 files for training.
Found 235 files belonging to 6 classes.


### Weight Pruning

In [7]:
prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude

# Compute end step to finish pruning after 2 epochs.
batch_size = 32
epochs = 2
validation_split = 0.1 # 10% of training set will be used for validation set.

num_images = train_images.shape[0] * (1 - validation_split)
end_step = np.ceil(num_images / batch_size).astype(np.int32) * epochs

# Define model for pruning.
pruning_params = {
        'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.50,
                                                                    final_sparsity=0.80,
                                                                    begin_step=0,
                                                                    end_step=end_step)
}

model_for_pruning = prune_low_magnitude(model, **pruning_params)

# `prune_low_magnitude` requires a recompile.
model_for_pruning.compile(optimizer='adam',
                loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                metrics=['accuracy'])

model_for_pruning.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 prune_low_magnitude_conv2d   (None, 126, 126, 32)     1762      
 (PruneLowMagnitude)                                             
                                                                 
 prune_low_magnitude_max_poo  (None, 63, 63, 32)       1         
 ling2d (PruneLowMagnitude)                                      
                                                                 
 prune_low_magnitude_conv2d_  (None, 61, 61, 64)       36930     
 1 (PruneLowMagnitude)                                           
                                                                 
 prune_low_magnitude_max_poo  (None, 30, 30, 64)       1         
 ling2d_1 (PruneLowMagnitude                                     
 )                                                               
                                                        

In [8]:
logdir = tempfile.mkdtemp()

callbacks = [
  tfmot.sparsity.keras.UpdatePruningStep(),
  tfmot.sparsity.keras.PruningSummaries(log_dir=logdir),
]

model_for_pruning.fit(train_images, train_labels,
                  batch_size=batch_size, epochs=epochs+8, validation_split=validation_split,
                  callbacks=callbacks)

Epoch 1/10


  output, from_logits = _get_logits(


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1e5367a08b0>

In [9]:
model_for_export = tfmot.sparsity.keras.strip_pruning(model_for_pruning)

_, pruned_keras_file = tempfile.mkstemp('.h5')
tf.keras.models.save_model(model_for_export, pruned_keras_file, include_optimizer=False)
print('Saved pruned Keras model to:', pruned_keras_file)

Saved pruned Keras model to: C:\Users\Austin\AppData\Local\Temp\tmpfkbogzob.h5


In [10]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export)
pruned_tflite_model = converter.convert()

_, pruned_tflite_file = tempfile.mkstemp('.tflite')

with open(pruned_tflite_file, 'wb') as f:
  f.write(pruned_tflite_model)

print('Saved pruned TFLite model to:', pruned_tflite_file)



INFO:tensorflow:Assets written to: C:\Users\Austin\AppData\Local\Temp\tmpe5t6gd4c\assets


INFO:tensorflow:Assets written to: C:\Users\Austin\AppData\Local\Temp\tmpe5t6gd4c\assets


Saved pruned TFLite model to: C:\Users\Austin\AppData\Local\Temp\tmpfbl99ndx.tflite


In [11]:
def get_gzipped_model_size(file):
      # Returns size of gzipped model, in bytes.
  import os
  import zipfile

  _, zipped_file = tempfile.mkstemp('.zip')
  with zipfile.ZipFile(zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
    f.write(file)

  return os.path.getsize(zipped_file)

# print("Size of gzipped baseline Keras model: %.2f bytes" % (get_gzipped_model_size(keras_file)))
print("Size of gzipped pruned Keras model: %.2f bytes" % (get_gzipped_model_size(pruned_keras_file)))
print("Size of gzipped pruned TFlite model: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file)))

Size of gzipped pruned Keras model: 12130890.00 bytes
Size of gzipped pruned TFlite model: 12119141.00 bytes


In [12]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
quantized_and_pruned_tflite_model = converter.convert()

_, quantized_and_pruned_tflite_file = tempfile.mkstemp('.tflite')

with open(quantized_and_pruned_tflite_file, 'wb') as f:
  f.write(quantized_and_pruned_tflite_model)

print('Saved quantized and pruned TFLite model to:', quantized_and_pruned_tflite_file)

# print("Size of gzipped baseline Keras model: %.2f bytes" % (get_gzipped_model_size(keras_file)))
print("Size of gzipped pruned and quantized TFlite model: %.2f bytes" % (get_gzipped_model_size(quantized_and_pruned_tflite_file)/1024))



INFO:tensorflow:Assets written to: C:\Users\Austin\AppData\Local\Temp\tmpd8_s0e3p\assets


INFO:tensorflow:Assets written to: C:\Users\Austin\AppData\Local\Temp\tmpd8_s0e3p\assets


Saved quantized and pruned TFLite model to: C:\Users\Austin\AppData\Local\Temp\tmpyvp7miot.tflite
Size of gzipped pruned and quantized TFlite model: 2378.94 bytes


### Weight Clustering

Clustering, or weight sharing, reduces the number of unique weight values in a model, leading to benefits for deployment. It first groups the weights of each layer into N clusters, then shares the cluster's centroid value for all the weights belonging to the cluster

In [13]:
cluster_weights = tfmot.clustering.keras.cluster_weights
CentroidInitialization = tfmot.clustering.keras.CentroidInitialization

# Cluster the weights of the model
clustering_params = {
    'number_of_clusters': 8,
    'cluster_centroids_init': CentroidInitialization.LINEAR
}

# cluster the weights of the model
clustered_model = cluster_weights(model_for_export, **clustering_params)

# smaller learning rate for fine-tuning clustered model
opt = tf.keras.optimizers.Adam(learning_rate=1e-5)

clustered_model.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=opt,
    metrics=['accuracy'])

clustered_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 cluster_conv2d (ClusterWeig  (None, 126, 126, 32)     1768      
 hts)                                                            
                                                                 
 cluster_max_pooling2d (Clus  (None, 63, 63, 32)       0         
 terWeights)                                                     
                                                                 
 cluster_conv2d_1 (ClusterWe  (None, 61, 61, 64)       36936     
 ights)                                                          
                                                                 
 cluster_max_pooling2d_1 (Cl  (None, 30, 30, 64)       0         
 usterWeights)                                                   
                                                                 
 cluster_conv2d_2 (ClusterWe  (None, 28, 28, 64)       7

In [14]:
# Fine-tune model
clustered_model.fit(
  train_images,
  train_labels,
  batch_size=500,
  epochs=1,
  validation_split=0.1)

  output, from_logits = _get_logits(




<keras.callbacks.History at 0x1e55a1cf400>

In [15]:
# creating a compressed model with the clustered weights
final_model = tfmot.clustering.keras.strip_clustering(clustered_model)


In [31]:
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_quant_model2 = converter.convert()

# Save the model.
with open('model2.tflite', 'wb') as f:
    f.write(tflite_quant_model2)



INFO:tensorflow:Assets written to: C:\Users\Austin\AppData\Local\Temp\tmp6b2c_kyw\assets


INFO:tensorflow:Assets written to: C:\Users\Austin\AppData\Local\Temp\tmp6b2c_kyw\assets


In [32]:
print(f'Model Size: {round(os.path.getsize("model2.tflite")/1024**2, 3)}MB')

Model Size: 3.126MB


int8 quantization

In [18]:
# define representative dataset
def representative_dataset():
      for input_value in tf.data.Dataset.from_tensor_slices(train_images).batch(1).take(100):
        yield [input_value]

In [19]:
model_for_export.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 126, 126, 32)      32        
                                                                 
 max_pooling2d (MaxPooling2D  (None, 63, 63, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 61, 61, 64)        64        
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 30, 30, 64)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 28, 28, 64)        64        
                                                                 
 flatten (Flatten)           (None, 50176)             0

In [20]:
input_name = model_for_export.input_names
input_shape = model_for_export.input_shape

In [26]:
converter = tf.lite.TFLiteConverter.from_keras_model(final_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8  # or tf.uint8
converter.inference_output_type = tf.int8  # or tf.uint8

input_shapes = {}
for input_tensor in model_for_export.inputs:
    input_name = input_tensor.name.split(":")[0]
    input_shape = input_tensor.shape.as_list()
    input_shapes[input_name] = [batch_size] + input_shape[1:]

converter.target_spec.supported_types = [tf.int8]  # or tf.uint8
converter.target_spec.input_shapes = input_shapes
tflite_quant_model = converter.convert()




INFO:tensorflow:Assets written to: C:\Users\Austin\AppData\Local\Temp\tmpqeq3_kcf\assets


INFO:tensorflow:Assets written to: C:\Users\Austin\AppData\Local\Temp\tmpqeq3_kcf\assets


In [27]:
# Save the model.
with open('model.tflite', 'wb') as f:
    f.write(tflite_quant_model)

In [28]:
print(f'Model Size: {round(os.path.getsize("model.tflite")/1024**2, 3)}MB')

Model Size: 3.125MB


### Post Training Quantization

Object Detection model using pytorch