# Quantizing the Model so that its more efficient to run it on Edge Devices

In [1]:
import torch
import logging

from recognizer.data import ProjectDataset
from recognizer.model import ResNetFineTunedClassifier
from recognizer.trainer import TorchRunner

In [2]:
# configuring logging level
logging.basicConfig(level=logging.INFO)
# picking appropriate device to train the model
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")
# dataset related config
datasets_dir = "../data"
training_dataset = f"{datasets_dir}/train"
testing_dataset = f"{datasets_dir}/test"
DEBUG_MODE = False

In [3]:
classifier = ResNetFineTunedClassifier()
classifier.load("../checkpoints/models/latest.safetensors")
TorchRunner.get_summary(classifier, (3, 244, 244))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 122, 122]           9,408
       BatchNorm2d-2         [-1, 64, 122, 122]             128
              ReLU-3         [-1, 64, 122, 122]               0
         MaxPool2d-4           [-1, 64, 61, 61]               0
            Conv2d-5           [-1, 64, 61, 61]          36,864
       BatchNorm2d-6           [-1, 64, 61, 61]             128
              ReLU-7           [-1, 64, 61, 61]               0
            Conv2d-8           [-1, 64, 61, 61]          36,864
       BatchNorm2d-9           [-1, 64, 61, 61]             128
             ReLU-10           [-1, 64, 61, 61]               0
       BasicBlock-11           [-1, 64, 61, 61]               0
           Conv2d-12           [-1, 64, 61, 61]          36,864
      BatchNorm2d-13           [-1, 64, 61, 61]             128
             ReLU-14           [-1, 64,

## Analyse the current model

In [4]:
before_linear_quantization = {
    "no_of_params": classifier.no_of_params(),
    "size_in_memory_in_mb": round(classifier.size_in_memory() / 2**20, 2),
    "no_of_gflops": round(classifier.no_of_flops()[0] / 1e9, 2),
}
before_linear_quantization

Operation               OPS         
----------------------  ----------  
conv1                   118013952   
bn1                     1605632     
relu                    1605632     
maxpool                 1605632     
layer1_0_conv1          115605504   
layer1_0_bn1            401408      
layer1_0_relu           401408      
layer1_0_conv2          115605504   
layer1_0_bn2            401408      
add                     401408      
layer1_0_relu_1         401408      
layer1_1_conv1          115605504   
layer1_1_bn1            401408      
layer1_1_relu           401408      
layer1_1_conv2          115605504   
layer1_1_bn2            401408      
add_1                   401408      
layer1_1_relu_1         401408      
layer1_2_conv1          115605504   
layer1_2_bn1            401408      
layer1_2_relu           401408      
layer1_2_conv2          115605504   
layer1_2_bn2            401408      
add_2                   401408      
layer1_2_relu_1         401408      
l

{'no_of_params': 21567848, 'size_in_memory_in_mb': 82.34, 'no_of_gflops': 3.68}

## Load the dataset

In [5]:
_, val_loader, test_loader = ProjectDataset.get_loaders()

## Quantise the model

In [6]:
qclassifier = ResNetFineTunedClassifier()
qclassifier.load("../checkpoints/models/latest.safetensors")

### Results before quantization

In [7]:
TorchRunner.evaluate(
    loader=val_loader, model=qclassifier, k=5, device=device
)

INFO:recognizer.trainer:Top-1 accuracy: 75.12%	Top-5 accuracy: 94.69%


<recognizer.utils.types.EvalResults at 0x31f3e4dd0>

In [7]:
TorchRunner.evaluate(
    loader=test_loader, model=qclassifier, k=5, device=device
)

INFO:recognizer.trainer:Top-1 accuracy: 69.00%	Top-5 accuracy: 93.71%


<recognizer.utils.types.EvalResults at 0x3205f0e10>

### Quantisation

In [8]:
# Our initial baseline model which is FP32
qclassifier.half()

ResNetFineTunedClassifier(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine

### Evaluating model after quantization and before fine tuning

In [9]:
f32_train_loader, f32_val_loader, f32_test_loader = ProjectDataset.get_loaders(f32=True)
TorchRunner.evaluate(
    loader=f32_test_loader, model=qclassifier, k=5, device=device
)

INFO:recognizer.trainer:Top-1 accuracy: 69.00%	Top-5 accuracy: 93.71%


<recognizer.utils.types.EvalResults at 0x31f420dd0>

### Persisting the Model

In [10]:
qclassifier.save("../checkpoints/models/latest-float16.safetensors")

In [11]:
qclassifier.to("cpu")
after_linear_quantization = {
    "no_of_params": qclassifier.no_of_params(),
    "size_in_memory_in_mb": round(qclassifier.size_in_memory() / 2**20, 2),
    "no_of_gflops": round(qclassifier.no_of_flops(dtype=torch.float16)[0] / 1e9, 2),
}
after_linear_quantization

Operation               OPS         
----------------------  ----------  
conv1                   118013952   
bn1                     1605632     
relu                    1605632     
maxpool                 1605632     
layer1_0_conv1          115605504   
layer1_0_bn1            401408      
layer1_0_relu           401408      
layer1_0_conv2          115605504   
layer1_0_bn2            401408      
add                     401408      
layer1_0_relu_1         401408      
layer1_1_conv1          115605504   
layer1_1_bn1            401408      
layer1_1_relu           401408      
layer1_1_conv2          115605504   
layer1_1_bn2            401408      
add_1                   401408      
layer1_1_relu_1         401408      
layer1_2_conv1          115605504   
layer1_2_bn1            401408      
layer1_2_relu           401408      
layer1_2_conv2          115605504   
layer1_2_bn2            401408      
add_2                   401408      
layer1_2_relu_1         401408      
l

{'no_of_params': 21567848, 'size_in_memory_in_mb': 41.17, 'no_of_gflops': 3.68}