# Pruning techniques in keras

----------------------------------------------------------------------

In this notebook, we are going to see :
    1. How to use pruning in Tensorflow Keras
    2. How to convert the model to tensorflow lite
    3. Inference the model
    4. Extract pruned model weights
    5. Pruning + Quantisation
    6. Comparision of models with Original Model + Pruned Model + Quantised Model

-------------------------------------------------------------------------
Here we are going to prune already trained model in keras.
Let us first import the pre-trained model here....

In [1]:
# Import necessery packages
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pickle
import tensorflow_model_optimization as tfmot
import time
import zipfile

In [2]:
# Load model
model = tf.keras.models.load_model("TrafficSignalClassifier_v4_2.h5")

# check model architecture
model.summary()

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 48, 48, 1)]  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 48, 48, 32)   832         input_1[0][0]                    
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 48, 48, 32)   0           conv2d_1[0][0]                   
__________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D)  (None, 24, 24, 32)   0           activation_1[0][0]              

In [3]:
def load_data(test_data_path):
    class_ids = ['background', 'red', 'red-yellow', 'green', 'yellow', 'off']
    IMG_SIZE = 48
    test_image = []
    test_label = []
    for class_id in class_ids:
        testpath = os.path.join(test_data_path,class_id)
        class_num = class_ids.index(class_id)
        for img in os.listdir(testpath):
            try:
                test_img_array = cv2.imread(os.path.join(testpath, img), cv2.IMREAD_GRAYSCALE)
                # preprocessing
                test_img_array = cv2.resize(test_img_array, (IMG_SIZE, IMG_SIZE))
                test_img_array = test_img_array.reshape(IMG_SIZE, IMG_SIZE, 1)
                test_img_array = test_img_array/255
                test_image.append(test_img_array)
                test_label.append(class_num)
            except Exception as e:
                print("Error : Incorrect Path")
    test_image = np.array(test_image)
    test_label = np.array(test_label)
    return test_image, test_label

In [4]:
# Load Test Data
test_data_path="C:\\Users\\ghodam2\\Desktop\\Pruning Technique\\test"
test_images, test_labels = load_data(test_data_path)

# Check for Accuracy for orriginal Model
_, original_model_accuracy = model.evaluate(test_images, test_labels)

#Print Accuracy
print('Baseline test accuracy:', original_model_accuracy)

Baseline test accuracy: 0.9645


****************************************************************
Load Training data for pruning the model. 
****************************************************************

In [5]:
pickle_in = open("X.pickle", "rb")
X = pickle.load(pickle_in)

pickle_in = open("y.pickle", "rb")
y = pickle.load(pickle_in)

# fine tuning model with the help of tensorflow optimization package

In [6]:
# Compute end step to finish pruning after 2 epochs.
batch_size = 128
epochs = 2
validation_split = 0.1 # 10% of training set will be used for validation set. 

num_images = X.shape[0] * (1 - validation_split)
end_step = np.ceil(num_images / batch_size).astype(np.int32) * epochs

# Define model for pruning.
pruning_params = {
      'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.50,
                                                               final_sparsity=0.80,
                                                               begin_step=0,
                                                               end_step=end_step)
}

model_for_pruning = tfmot.sparsity.keras.prune_low_magnitude(model, **pruning_params)

# `prune_low_magnitude` requires a recompile.
model_for_pruning.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model_for_pruning.summary()


Instructions for updating:
Deprecated in favor of operator or tf.math.divide.







Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 48, 48, 1)]  0                                            
__________________________________________________________________________________________________
prune_low_magnitude_conv2d_1 (P (None, 48, 48, 32)   1634        input_1[0][0]                    
__________________________________________________________________________________________________
prune_low_magnitude_activation_ (None, 48, 48, 32)   1           prune_low_magnitude_conv2d_1[0][0
__________________________________________________________________________________________________
prune_low_magnitude_max_pooling (None, 24, 24, 32)   1           prune_low_magnitude_activation_1[
____________________________________________________________________________________________

In [7]:
logdir = 'C:\\Users\\ghodam2\\Desktop\\Pruning Technique\\log'

X = X/255
callbacks = [
  tfmot.sparsity.keras.UpdatePruningStep(),
  tfmot.sparsity.keras.PruningSummaries(log_dir=logdir),
]
  
model_for_pruning.fit(X, y,
                  batch_size=batch_size, epochs=epochs, validation_split=validation_split,
                  callbacks=callbacks)

# Store pruned model with all pruning wrappers 
model_for_pruning.save("SaveModelAfterPruning.h5")  

model_for_pruning.summary()

Train on 207516 samples, validate on 23058 samples
Epoch 1/2
Epoch 2/2
Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 48, 48, 1)]  0                                            
__________________________________________________________________________________________________
prune_low_magnitude_conv2d_1 (P (None, 48, 48, 32)   1634        input_1[0][0]                    
__________________________________________________________________________________________________
prune_low_magnitude_activation_ (None, 48, 48, 32)   1           prune_low_magnitude_conv2d_1[0][0
__________________________________________________________________________________________________
prune_low_magnitude_max_pooling (None, 24, 24, 32)   1           prune_low_magnitude_activation_1[
_____________________

Let us compare the accuracy of both models
------------------------------------------------------------------
    1. Original model of keras
    2. Model after using pruning on original keras

In [8]:
_, model_for_pruning_accuracy = model_for_pruning.evaluate(
   test_images, test_labels, verbose=0)

print('Original test accuracy:', original_model_accuracy) 
print('Pruned test accuracy:', model_for_pruning_accuracy)

Original test accuracy: 0.9645
Pruned test accuracy: 0.9605


Strip the model which was pruned earlier to see how much compression has been done after pruning

In [9]:
# This will remove all wrappers from earlier prunned model and only keep original layers of model
model_for_export = tfmot.sparsity.keras.strip_pruning(model_for_pruning)

pruned_keras_file = 'pruned_stripped_model.h5'
model_for_export.save(pruned_keras_file)

print('Saved pruned Keras model to:', pruned_keras_file)

Saved pruned Keras model to: pruned_stripped_model.h5


Let us see the weights distribution between original model vs pruned stripped model
* Number of zeros and non zeros weights in model 

In [10]:
def fetch_weights(model_name):
    for i, w in enumerate(model_name.get_weights()):
        print(
            "{} -- Total:{}, Zeros: {:.2f}%".format(
                model_name.weights[i].name, w.size, np.sum(w == 0) / w.size * 100
            )
        )
        
    print("<=============================================================================>")

In [11]:
# compare the original model + pruned and stripped model
original_model = tf.keras.models.load_model('TrafficSignalClassifier_v4_2.h5')
model_pruned_stripped = tf.keras.models.load_model('pruned_stripped_model.h5')

print("Weights Distibution for Original Model")
print("---------------------------------------")
fetch_weights(original_model)

print("Weights Distibution for Pruned Model")
print("---------------------------------------")
fetch_weights(model_pruned_stripped)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Weights Distibution for Original Model
---------------------------------------
conv2d_1_2/kernel:0 -- Total:800, Zeros: 0.00%
conv2d_1_2/bias:0 -- Total:32, Zeros: 0.00%
conv2d_2_2/kernel:0 -- Total:25600, Zeros: 0.00%
conv2d_2_2/bias:0 -- Total:32, Zeros: 0.00%
conv2d_3_2/kernel:0 -- Total:51200, Zeros: 0.00%
conv2d_3_2/bias:0 -- Total:64, Zeros: 0.00%
conv2d_4_2/kernel:0 -- Total:204800, Zeros: 0.00%
conv2d_4_2/bias:0 -- Total:128, Zeros: 0.00%
dense_1_2/kernel:0 -- Total:516096, Zeros: 0.00%
dense_1_2/bias:0 -- Total:256, Zeros: 0.00%
dense_2_2/kernel:0 -- Total:32768, Zeros: 0.00%
dense_2_2/bias:0 -- Total:128, Zeros: 0.00%
dense_3_2/kernel:0 -- Total:768, Zeros: 0.00%
dense_3_2/bias:0 -- Total:6, Zeros: 0.00%
Weights Distibution for Pruned Model
---

Lets check the weights from each model
    1. Original Model
    2. Pruned Model

In [12]:
def store_weights(model_name, file_name):
    weights_file = file_name  
    with open(weights_file, 'w+') as f:
        for i, w in enumerate(model_name.get_weights()):
            f.write("".join(" ".join(map(str, x)) for x in model_name.weights[i].name))
            f.write("\n")
            f.write("".join(" ".join(map(str, w))))
            f.write("\n")
    print(weights_file)        
    print("<=============================================================================>")

In [13]:
# Load model
original_model = tf.keras.models.load_model("TrafficSignalClassifier_v4_2.h5")
store_weights(original_model, 'original_model.weights')

original_model = tf.keras.models.load_model("pruned_stripped_model.h5")
store_weights(original_model, 'pruned_stripped_model.weights')

original_model.weights
pruned_stripped_model.weights


In [14]:
IMG_SIZE = 48

class_ids = ['background', 'red', 'red-yellow', 'green', 'yellow', 'off']

def evaluate_model(basepath,model):
    start = time.time()
    correct_detected = 0
    total_samples = 0
    runtime = 0
    for class_id in class_ids:
        testpath = os.path.join(basepath,class_id)
        class_num = class_ids.index(class_id)
        for img in os.listdir(testpath):
            try:
                total_samples +=1
                test_img_array = cv2.imread(os.path.join(testpath, img), cv2.IMREAD_GRAYSCALE)
                test_img_array = cv2.resize(test_img_array, (IMG_SIZE, IMG_SIZE))
                test_img_array = test_img_array.reshape(-1, IMG_SIZE, IMG_SIZE, 1)
                test_img_array = test_img_array/255
                score = model.predict(test_img_array)
                score = score.argmax(axis=-1)
                if score[0] == class_num:
                    correct_detected +=1
            except Exception as e:
                print("Test Image path Not Found, Please check your config file: Fow windows dont forget to put path"
                      "in \\")
    runtime = format(time.time() - start)
    return runtime, (correct_detected/total_samples)*100

In [15]:
test_basepath = "C:\\Users\\ghodam2\\Desktop\\Pruning Technique\\test"
run_time,model_pruned_stripped_accuracy = evaluate_model(test_basepath, model_pruned_stripped)

                
print("=======================================================")
print("             Runtime            :", run_time, " Seconds")
print("             Model test accuracy:", model_pruned_stripped_accuracy ,"%")
print("=======================================================")

run_time,perc_accuracy = evaluate_model(test_basepath, original_model)               
print("=======================================================")
print("             Runtime            :", run_time, " Seconds")
print("             Model test accuracy:", perc_accuracy ,"%")
print("=======================================================")

             Runtime            : 9.244155645370483  Seconds
             Model test accuracy: 96.05 %
             Runtime            : 9.052635192871094  Seconds
             Model test accuracy: 96.05 %


# Convert the model to tensorflow lite for inferencing

In [14]:
converter = tf.lite.TFLiteConverter.from_keras_model_file("C:\\Users\\ghodam2\\Desktop\\Pruning Technique\\pruned_stripped_model.h5")
pruned_tflite_model = converter.convert()

pruned_tflite_file = 'pruned_tflite_model.tflite'

with open(pruned_tflite_file, 'wb') as f:
  f.write(pruned_tflite_model)

print('Saved pruned TFLite model to:', pruned_tflite_file)

Instructions for updating:
Use `tf.compat.v1.graph_util.convert_variables_to_constants`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
INFO:tensorflow:Froze 14 variables.
INFO:tensorflow:Converted 14 variables to const ops.
Saved pruned TFLite model to: pruned_tflite_model.tflite


In [15]:
# check compressed sizes of each model
def get_gzipped_model_size(file):
  # Returns size of gzipped model, in bytes.
  print(file)
  zipped_file = '{}.zip'.format(file)
  with zipfile.ZipFile(zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
    f.write(file)

  return os.path.getsize(zipped_file)

In [16]:
orig_model = 'TrafficSignalClassifier_v4_2.h5'
prun_model = 'pruned_stripped_model.h5'
tflite_model = 'pruned_tflite_model.tflite'

print("Size of gzipped baseline Keras model: %.2f bytes" % (get_gzipped_model_size(orig_model)))
print("Size of gzipped pruned Keras model: %.2f bytes" % (get_gzipped_model_size(prun_model)))
print("Size of gzipped pruned TFlite model: %.2f bytes" % (get_gzipped_model_size(tflite_model)))

TrafficSignalClassifier_v4_2.h5
Size of gzipped baseline Keras model: 9123266.00 bytes
pruned_stripped_model.h5
Size of gzipped pruned Keras model: 985704.00 bytes
pruned_tflite_model.tflite
Size of gzipped pruned TFlite model: 979980.00 bytes


# Pruning + Quantisation

In [17]:
converter = tf.lite.TFLiteConverter.from_keras_model_file(orig_model)

# optimization for size
converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]

# conver the model here in bytes form
quantized_and_pruned_tflite_model = converter.convert()

INFO:tensorflow:Froze 14 variables.
INFO:tensorflow:Converted 14 variables to const ops.


In [18]:
quantized_and_pruned_tflite_file = 'quantized_and_prune.tflite'

with open(quantized_and_pruned_tflite_file, 'wb') as f:
  f.write(quantized_and_pruned_tflite_model)

In [19]:
def evaluate_model(interpreter):
  input_index = interpreter.get_input_details()[0]["index"]
  output_index = interpreter.get_output_details()[0]["index"]

  # Run predictions on ever y image in the "test" dataset.
  prediction_digits = []
  for i, test_image in enumerate(test_images):
    if i % 1000 == 0:
      print('Evaluated on {n} results so far.'.format(n=i))
    # Pre-processing: add batch dimension and convert to float32 to match with
    # the model's input data format.
    test_image = np.expand_dims(test_image, axis=0).astype(np.float32)
    interpreter.set_tensor(input_index, test_image)

    # Run inference.
    interpreter.invoke()

    # Post-processing: remove batch dimension and find the digit with highest
    # probability.
    output = interpreter.tensor(output_index)
    digit = np.argmax(output()[0])
    prediction_digits.append(digit)

  print('\n')
  # Compare prediction results with ground truth labels to calculate accuracy.
  prediction_digits = np.array(prediction_digits)
  accuracy = (prediction_digits == test_labels).mean()
  return accuracy

# Comparision

In [20]:
###################################################################################
print('original model accuracy                  :', original_model_accuracy)
print('pruned keras without stripped accruacy   :', model_for_pruning_accuracy)
print('pruned keras with stripped accruacy      :', model_pruned_stripped_accuracy)

###################################################################################
interpreter = tf.lite.Interpreter(model_content=pruned_tflite_model)
interpreter.allocate_tensors()

test_accuracy = evaluate_model(interpreter)
print('pruned TFLite accuracy                   :', test_accuracy)

###################################################################################
interpreter = tf.lite.Interpreter(model_content=quantized_and_pruned_tflite_model)
interpreter.allocate_tensors()

test_accuracy = evaluate_model(interpreter)

print('pruned and quantized TFLite accuracy     :', test_accuracy)
###################################################################################

original model accuracy                  : 0.9645
pruned keras without stripped accruacy   : 0.9605
pruned keras with stripped accruacy      : 96.05
Evaluated on 0 results so far.
Evaluated on 1000 results so far.


pruned TFLite accuracy                   : 0.9605
Evaluated on 0 results so far.
Evaluated on 1000 results so far.


pruned and quantized TFLite accuracy     : 0.9645
