<a href="https://colab.research.google.com/github/Ibrah-N/Deep-Learning-Projects-Computer-Vision/blob/main/dl_17_quantization_QAT_PTQ_ONNX.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [79]:
import numpy as np
import time
import matplotlib.pyplot as plt


import tensorflow as tf

In [7]:

# Extract Saved dataset
!unzip -q gdrive/MyDrive/human-emotions-datasethes.zip -d dataset/

In [46]:
# Configurations

TRAIN_DIR = '/content/dataset/Emotions Dataset/Emotions Dataset/train/'
VAL_DIR = '/content/dataset/Emotions Dataset/Emotions Dataset/test/'


CONFIG = {
    'IM_SIZE' : 256,
    'BATCH_SIZE' : 32,
    'N_CLASSES': 3,
    'LABELS' : ['angry', 'happy', 'sad'],
    'EPOCHS' : 6,
    'SEED' : 42,
    'FILTER_SIZE': 16,
    'POOL_SIZE': 2,
    'PATCH_SIZE' : 16,
    'KERNEL_SIZE': 3,
    'DROPOUT_RATE': 0.0,
    'LEARNING_RATE': 0.0001,
    'DENSE_1': 32,
    'DENSE_2': 16,
    }


In [47]:
# create tensorflow dataset

train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    TRAIN_DIR,
    labels='inferred',
    label_mode='categorical',
    class_names=CONFIG['LABELS'],
    batch_size=None,
    image_size=(CONFIG['IM_SIZE'], CONFIG['IM_SIZE']),
    seed=CONFIG['SEED'],
)


validation_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    VAL_DIR,
    labels='inferred',
    label_mode='categorical',
    class_names=CONFIG['LABELS'],
    batch_size=None,
    image_size=(CONFIG['IM_SIZE'], CONFIG['IM_SIZE']),
    seed=CONFIG['SEED'],
)

Found 6799 files belonging to 3 classes.
Found 2278 files belonging to 3 classes.


In [48]:
# rescalling data

def resize_rescale(image, label):
    image  = tf.image.resize(image, (CONFIG['IM_SIZE'], CONFIG['IM_SIZE']))
    image = tf.cast(image/255. , tf.float32)
    return image, label

In [49]:
# train dataset prefetch

train_dataset = (
    train_dataset
    # .map(resize_rescale)
    .shuffle(buffer_size=1000, reshuffle_each_iteration=True)
    .batch(CONFIG['BATCH_SIZE'])
    .prefetch(buffer_size=tf.data.AUTOTUNE)
)

In [50]:
# validation dataset prefetch
validation_dataset = (
    validation_dataset
    # .map(resize_rescale)
    .shuffle(buffer_size=1000, reshuffle_each_iteration=True)
    .batch(CONFIG['BATCH_SIZE'])
    .prefetch(buffer_size=tf.data.AUTOTUNE)
)

In [84]:
batch_data= validation_dataset.take(1)
img = next(iter(batch_data))[0][0]
lbl = next(iter(batch_data))[1][0]

## Quantization

### Post Training Quantization

In [106]:
!pip install -q TFlite

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/123.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/123.6 kB[0m [31m664.4 kB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━[0m [32m61.4/123.6 kB[0m [31m748.4 kB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m123.6/123.6 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [108]:
# simple conversion

convert_model = tf.lite.TFLiteConverter.from_keras_model(model)
convert_model = convert_model.convert()

In [None]:
# conversion with optimization

# data generator
def data_gen():
  for x, y in train_dataset.take(1):
    yield [x], [y]


# conversion parameters
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.inference_input_type = tf.uint8
converter.interence_output_type = tf.uint8
converter.representative_dataset = data_gen


# convert and save the model
tflite_model = converter.convert()
tflite_model = open("model.tflite", "wb").write(tflite_model)



In [None]:
# install tflite run time
!pip install -q tflite_runtime
import tflite_runtime as tflite

In [None]:
# inference on tflite model
interpreter = tflite.Interpreter(model_path="model.tflite")
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()[0]
output_details = interpreter.get_output_details()[0]

test_image = np.expand_dims(img, axis=0).astype(input_detail['dtype'])
interpreter.set_tensor(input_details['index'], test_image)

interpreter.invoke()
interpreter.get_tensor(output_details['index'])[0]

### Quantization Aware Training

In [90]:
!pip install -q -U tensorflow-model-optimization
import tensorflow_model_optimization as tfmot

In [91]:
# download a pretrained efficient net B4

effnet_b4 = tf.keras.applications.EfficientNetB4(
    weights='imagenet',
    include_top=False,
    input_shape=(CONFIG['IM_SIZE'], CONFIG['IM_SIZE'], 3)
)

In [92]:
effnet_b4.trainable = False

In [94]:
x = effnet_b4.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(CONFIG['DENSE_1'], activation='relu')(x)
x = tf.keras.layers.Dense(CONFIG['N_CLASSES'], activation='softmax')(x)
model = tf.keras.Model(inputs=effnet_b4.input, outputs=x)
model.summary()

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 256, 256, 3)]        0         []                            
                                                                                                  
 rescaling_2 (Rescaling)     (None, 256, 256, 3)          0         ['input_2[0][0]']             
                                                                                                  
 normalization_1 (Normaliza  (None, 256, 256, 3)          7         ['rescaling_2[0][0]']         
 tion)                                                                                            
                                                                                                  
 rescaling_3 (Rescaling)     (None, 256, 256, 3)          0         ['normalization_1[0][0]'

In [104]:
# annotate the layer to be quantized during training

def annotate_layer(layer_name):
  if "conv" in layer_name.name:
    return tfmot.quantization.keras.quantize_annotate_layer(layer_name)
  return layer_name


effnet_b4_quantize = tf.keras.models.clone_model(
    model, clone_function=annotate_layer
)


effnet_b4_quantize.summary()

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 256, 256, 3)]        0         []                            
                                                                                                  
 rescaling_2 (Rescaling)     (None, 256, 256, 3)          0         ['input_2[0][0]']             
                                                                                                  
 normalization_1 (Normaliza  (None, 256, 256, 3)          7         ['rescaling_2[6][0]']         
 tion)                                                                                            
                                                                                                  
 rescaling_3 (Rescaling)     (None, 256, 256, 3)          0         ['normalization_1[6][0]'

In [105]:
effnet_b4_quantize.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=CONFIG['LEARNING_RATE']),
    loss='categorical_crossentropy',
    metrics=['accuracy'],
)


effnet_b4_quantize.fit(train_dataset.take(5), epochs=1, verbose=1)



<keras.src.callbacks.History at 0x7d3432faab00>

### Dynamic Quantization

In [None]:
!pip install -q -U onnx
!pip install -q onnxruntime

In [20]:
import onnx
import onnxruntime as ort
from onnxruntime.quantization import quantize_dynamic, QuantType

In [23]:
# Dynamic Quantization of onnx model
onnx_model_path = "/content/gdrive/MyDrive/dl_models/human-emotion-detection-tf-vit-model.onnx"
onnx_quantized_model_path = "human-emotion-detection-tf-vit-model-quant.onnx"

quantized_model = quantize_dynamic(onnx_model_path, onnx_quantized_model_path, weight_type=QuantType.QUInt8)



In [55]:
# onnx quantized inference
img_array = tf.expand_dims(img, axis=0)
output_names = ['output_1']
provider = ["CPUExecutionProvider"]
m = ort.InferenceSession("/content/gdrive/MyDrive/dl_models/human-emotion-detection-tf-vit-model.onnx", providers=provider)
pred = m.run(output_names, {m.get_inputs()[0].name: np.array(img_array)})
pred

[array([[0.9939393 , 0.00270534, 0.00335543]], dtype=float32)]

In [82]:
def acc(model):
  start_time = time.time()
  acc = 0
  total, correct_pred = 0, 0
  output_names = ['output_1']

  for x, y in validation_dataset.take(1):
    for img, lbl in zip(x, y):
      pred = m.run(output_names, {m.get_inputs()[0].name: np.array(np.expand_dims(img, axis=0))})

      if (tf.argmax(pred[0][0]).numpy() == tf.argmax(lbl).numpy()):
        correct_pred += 1

      total += 1
  acc = (correct_pred/total) * 100

  return acc, (time.time() - start_time)

In [83]:
provider = ["CPUExecutionProvider"]
m = ort.InferenceSession("/content/gdrive/MyDrive/dl_models/human-emotion-detection-tf-vit-model.onnx", providers=provider)
m_quant = ort.InferenceSession("human-emotion-detection-tf-vit-model-quant.onnx", providers=provider)


print(f"Accuracy before quantization: {acc(m)}")
print(f"Accuracy after quantization: {acc(m_quant)}")

Accuracy before quantization: (87.5, 25.1528480052948)
Accuracy after quantization: (87.5, 41.02073264122009)
