# MODEL QUANTIZATION

In [31]:
import onnx
import onnxruntime as rt
from onnxruntime.quantization import quantize_dynamic, QuantType
import os
import numpy as np
import cv2
import time
import tensorflow as tf

In [32]:
CONFIGURATION = {
    "CLASS_NAMES" : ['angry', 'happy', 'sad'],
    "BATCH_SIZE" : 32,
    "IMAGE_SIZE" : 224,
    "LEARNING_RATE" : 0.01,
    "N_EPOCHS" : 20,
    "DROPOUT_RATE": 0.0,
    "REGULARIZATION_RATE" : 0.0,
    "N_FILTERS" : 6,
    "KERNEL_SIZE" : 3,
    "N_STRIDES" : 1,
    "POOL_SIZE" : 2,
    "N_DENSE_1" : 128,
    "N_DENSE_2" : 128,
    "NUM_CLASSES" : 3,
    "PATCH_SIZE" : 16,
}

trainDirectory = "/Users/aman/Documents/Work/Machine Learning/Computer-Vision-TensorFlow/Human-Emotions-Detection/Dataset/Emotions Dataset/Emotions Dataset/train"
testDirectory = "/Users/aman/Documents/Work/Machine Learning/Computer-Vision-TensorFlow/Human-Emotions-Detection/Dataset/Emotions Dataset/Emotions Dataset/test"

trainDataset = tf.keras.utils.image_dataset_from_directory(
    trainDirectory,
    labels='inferred',
    label_mode='categorical',
    class_names=CONFIGURATION["CLASS_NAMES"],
    color_mode='rgb',
    batch_size=CONFIGURATION["BATCH_SIZE"],
    image_size=(CONFIGURATION["IMAGE_SIZE"], CONFIGURATION["IMAGE_SIZE"]),
    shuffle=True,
    seed=99,
    validation_split=0.2,
    subset='training',
)

valDataset = tf.keras.utils.image_dataset_from_directory(
    trainDirectory,
    labels='inferred',
    label_mode='categorical',
    class_names=CONFIGURATION["CLASS_NAMES"],
    color_mode='rgb',
    batch_size=CONFIGURATION["BATCH_SIZE"],
    image_size=(CONFIGURATION["IMAGE_SIZE"], CONFIGURATION["IMAGE_SIZE"]),
    shuffle=True,
    seed=99,
    validation_split=0.2,
    subset='validation',
)

testDataset = tf.keras.utils.image_dataset_from_directory(
    testDirectory,
    labels='inferred',
    label_mode='categorical',
    class_names=CONFIGURATION["CLASS_NAMES"],
    color_mode='rgb',
    batch_size=CONFIGURATION["BATCH_SIZE"],
    image_size=(CONFIGURATION["IMAGE_SIZE"], CONFIGURATION["IMAGE_SIZE"]),
    shuffle=True,
    seed=99,
    validation_split=None,
    subset=None,
)

trainDataset = trainDataset.prefetch(tf.data.AUTOTUNE)
testDataset = testDataset.prefetch(tf.data.AUTOTUNE)
valDataset = valDataset.prefetch(tf.data.AUTOTUNE)

Found 6799 files belonging to 3 classes.
Using 5440 files for training.


2025-02-02 20:01:10.010587: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Pro
2025-02-02 20:01:10.010628: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 18.00 GB
2025-02-02 20:01:10.010636: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 6.00 GB
2025-02-02 20:01:10.010826: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-02-02 20:01:10.010842: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Found 6799 files belonging to 3 classes.
Using 1359 files for validation.
Found 2278 files belonging to 3 classes.


In [5]:
ViTModelKeras = 'Models/EmotionDetectionViT.keras'
ViTModelONNX = 'Models/EmotionDetectionViT.onnx'
ViTQuantizedModel = 'Models/EmotionDetectionViT_Quantized.onnx'

quantizedViTModel = quantize_dynamic(ViTModelONNX, ViTQuantizedModel, weight_type=QuantType.QUInt8)



## Comparing Model's Sizes

In [9]:
def convert_size(size_bytes):
    for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
        if size_bytes < 1024:
            return f"{size_bytes:.2f} {unit}"
        size_bytes /= 1024
ViTModelKerasSize = os.path.getsize(ViTModelKeras)
ViTModelONNXSize = os.path.getsize(ViTModelONNX)
ViTQuantizedModelSize = os.path.getsize(ViTQuantizedModel)
print("Keras Model Size: ", convert_size(ViTModelKerasSize))
print("ONNX Model Size: ", convert_size(ViTModelONNXSize))
print("Quantized ONNX Model Size: ", convert_size(ViTQuantizedModelSize))

Keras Model Size:  330.03 MB
ONNX Model Size:  327.59 MB
Quantized ONNX Model Size:  83.12 MB


## Inference

In [None]:
testImage = cv2.imread("/Users/aman/Documents/Work/Machine Learning/Computer-Vision-TensorFlow/Human-Emotions-Detection/Dataset/Emotions Dataset/Emotions Dataset/test/happy/2705.jpg_rotation_1.jpg")
testImage = cv2.resize(testImage, (CONFIGURATION["IMAGE_SIZE"], CONFIGURATION["IMAGE_SIZE"]))
testImage = testImage.astype(np.float32)
testImage = np.expand_dims(testImage, axis=0)

In [25]:
providers = ['CPUExecutionProvider']
m = rt.InferenceSession("Models/EmotionDetectionViT_Quantized.onnx", providers=providers)

onnxPred = m.run(['dense'], {'image': testImage})
print(onnxPred)
print(CONFIGURATION['CLASS_NAMES'][np.argmax(onnxPred[0])])

[array([[7.9681049e-05, 9.9959975e-01, 3.2057121e-04]], dtype=float32)]
happy


## Comapring Runtimes

In [29]:
m = rt.InferenceSession("Models/EmotionDetectionViT.onnx", providers=providers)
mQuant = rt.InferenceSession("Models/EmotionDetectionViT_Quantized.onnx", providers=providers)

startTime = time.time()
m.run(['dense'], {'image': testImage})
endTime = time.time()
print("ONNX Model Time on CPU: ", endTime - startTime)

print("\n")

startTime = time.time()
mQuant.run(['dense'], {'image': testImage})
endTime = time.time()
print("Quantized ONNX Model Time on CPU: ", endTime - startTime)

ONNX Model Time on CPU:  0.1537320613861084


Quantized ONNX Model Time on CPU:  0.12828421592712402


## Comparing Accuracy

In [56]:
def evaluate(model):
    total, acc = 0, 0
    for images, labels in valDataset.take(100):
        onnxPred = model.run(['dense'], {'image': images.numpy()})
        for i in range(len(onnxPred)):
            pred = np.argmax(onnxPred[0][i])
            if pred == np.argmax(labels[i]):
                acc += 1
        total += len(onnxPred)
    return acc / total

In [57]:
print("Onnx Model Accuracy: ", evaluate(m))
print("Quantized Onnx Model Accuracy: ", evaluate(mQuant))

2025-02-02 20:31:44.814010: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Onnx Model Accuracy:  0.8604651162790697
Quantized Onnx Model Accuracy:  0.8837209302325582


2025-02-02 20:34:00.973434: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


## Generally the quantized model has a lower accuracy than the normal model. But, due to some reasons, it may increase the accuracy.
### 1. Acts as a regularizer and reduces overfitting.
### 2. Reduces noise and acts like a dropout.
### 3. PTQ sometimes smoothens the decision boundaries.