![logo](../../picture/license_header_logo.png)
**Copyright (c) 2020-2021 CertifAI Sdn. Bhd.**

This program is part of OSRFramework. You can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program. If not, see http://www.gnu.org/licenses/.

Authored by: [Jacklyn Lim](mailto:jacklyn.lim@certifai.ai)

### Import Libraries

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from utils import download_model, download_dataset, load_model_state_dict, load_dataset, load_image, compare_performance

### Download Model and Dataset

In [2]:
# model download
MODEL_DOWNLOAD_PATH = 'https://s3.eu-central-1.wasabisys.com/certifai/deployment-training-labs/models/fruit_classifier_state_dict.pt'
MODEL_STATE_DICT_PATH = '../../resources/model/'
MODEL_FILENAME = 'fruits_image_classification.zip'

# data download
DATA_DOWNLOAD_PATH = "https://s3.eu-central-1.wasabisys.com/certifai/deployment-training-labs/fruits_image_classification-20210604T123547Z-001.zip"
DATA_SAVE_PATH = "../../resources/data/"
DATA_ZIP_FILENAME = "fruits_image_classification.zip"

# download model
download_model(MODEL_DOWNLOAD_PATH, MODEL_STATE_DICT_PATH, MODEL_FILENAME)

# download dataset
download_dataset(DATA_DOWNLOAD_PATH, DATA_SAVE_PATH, DATA_ZIP_FILENAME)

model already exists, skipping download
data already exists, skipping download


### Load Original Model

In [3]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # Note that the input of this layers is depending on your input image sizes
        self.fc1 = nn.Linear(18496, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 3)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [4]:
# load original model
model_fp32 = Net()
model_fp32 = load_model_state_dict(model_fp32, MODEL_STATE_DICT_PATH + MODEL_FILENAME)
model_fp32.eval()

# Print original model
print("\033[1mFP32 Model: \033[0m")
print(model_fp32)
print("\n")

[1mFP32 Model: [0m
Net(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=18496, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=3, bias=True)
)




### Quantize Model Using Dynamic Quantization

In [5]:
def dynamic_quantization(model_fp32):
    """ Returns a quantized model """
    # create a quantized model instance
    model_int8 = torch.quantization.quantize_dynamic(
        model_fp32,  # the original model
        {torch.nn.Linear},  # a set of layers to dynamically quantize
        dtype=torch.qint8)  # the target dtype for quantized weights

    return model_int8

In [6]:
# quantising model dynamically
model_int8 = dynamic_quantization(model_fp32)

# Print quantized model
print("\033[1mINT8 Model: \033[0m")
print(model_int8)
print("\n")

[1mINT8 Model: [0m
Net(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): DynamicQuantizedLinear(in_features=18496, out_features=120, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
  (fc2): DynamicQuantizedLinear(in_features=120, out_features=84, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
  (fc3): DynamicQuantizedLinear(in_features=84, out_features=3, dtype=torch.qint8, qscheme=torch.per_tensor_affine)
)




### Compare Model Performance

Since Pytorch quantization only supports CPU inference in the following backends: x86 and ARM for now, we need to place the model and image data on the CPU.

In [7]:
INFERENCE_IMAGE_PATH = "../../resources/data/fruits_image_classification/test/apple/image1.jpg"
TEST_DATASET_ROOTDIR = "../../resources/data/fruits_image_classification/test"
   
# load image
inference_image = load_image(INFERENCE_IMAGE_PATH)

# load test dataset
test_dataloader = load_dataset(TEST_DATASET_ROOTDIR)

# compare performance between original model and quantized model
compare_performance(model_fp32, model_int8, "model_fp32", "model_int8", inference_image, test_dataloader)

Comparing size of models
model:  model_fp32  	 Size (KB): 8935.103
model:  model_int8  	 Size (KB): 2247.599
3.98 times smaller

Comparing latency of models
model:  model_fp32  	 prediction time: 0.004999399185180664s
model:  model_int8  	 prediction time: 0.004001140594482422s

Comparing accuracy of models
model:  model_fp32  	 Test Accuracy: 0.74
model:  model_int8  	 Test Accuracy: 0.74
