#Necessary imports

In [None]:
import os
import sys
import time
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import DataLoader

import torchvision
from torchvision import datasets
import torchvision.transforms as transforms

# Set up warnings
import warnings
warnings.filterwarnings(
    action='ignore',
    category=DeprecationWarning,
    module=r'.*'
)
warnings.filterwarnings(
    action='default',
    module=r'torch.ao.quantization'
)

# Specify random seed for repeatable results
torch.manual_seed(191009)

<torch._C.Generator at 0x7a3d0f7c64b0>

In [None]:
pip install --upgrade torch

Collecting torch
  Downloading torch-2.3.1-cp310-cp310-manylinux1_x86_64.whl (779.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m779.1/779.1 MB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cuf

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


##Data Processing


In [None]:
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=100, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:04<00:00, 41893735.31it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


#Evaluation metric

In [None]:
def evaluate_model(model, test_loader, device, criterion=None):

    model.eval()
    model.to(device)

    running_loss = 0
    running_corrects = 0

    for inputs, labels in test_loader:

        inputs = inputs.to(device)
        labels = labels.to(device)

        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        if criterion is not None:
            loss = criterion(outputs, labels).item()
        else:
            loss = 0

        # statistics
        running_loss += loss * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    eval_loss = running_loss / len(test_loader.dataset)
    eval_accuracy = running_corrects / len(test_loader.dataset)

    return eval_loss, eval_accuracy

In [None]:
cpu_device ="cpu"

#Model

#Modifying the model for PTQ

In [None]:
import os

import torch
import torch.nn as nn

__all__ = ["MobileNetV2", "mobilenet_v2"]

from torch.ao.quantization import QuantStub, DeQuantStub

def _make_divisible(v, divisor, min_value=None):
    """
    This function is taken from the original tf repo.
    It ensures that all layers have a channel number that is divisible by 8
    It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    :param v:
    :param divisor:
    :param min_value:
    :return:
    """
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    # Make sure that round down does not go down by more than 10%.
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v

class ConvBNReLU(nn.Sequential):
    def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
        padding = (kernel_size - 1) // 2
        super(ConvBNReLU, self).__init__(
            nn.Conv2d(
                in_planes,
                out_planes,
                kernel_size,
                stride,
                padding,
                groups=groups,
                bias=False,
            ),
            nn.BatchNorm2d(out_planes),
            nn.ReLU(inplace=True),
        )


class InvertedResidual(nn.Module):
    def __init__(self, inp, oup, stride, expand_ratio):
        super(InvertedResidual, self).__init__()
        self.quant = QuantStub()
        self.dequant = DeQuantStub()
        self.stride = stride
        assert stride in [1, 2]

        hidden_dim = int(round(inp * expand_ratio))
        self.use_res_connect = self.stride == 1 and inp == oup

        layers = []
        if expand_ratio != 1:
            # pw
            layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
        layers.extend(
            [
                # dw
                ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
                # pw-linear
                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup),
            ]
        )
        self.conv = nn.Sequential(*layers)
        self.skip_add = nn.quantized.FloatFunctional()

    def forward(self, x):
        if self.use_res_connect:

            return self.skip_add.add(x, self.conv(x))
        else:
            return self.conv(x)


class MobileNetV2(nn.Module):
    def __init__(self, num_classes=10, width_mult=1.0,round_nearest=8):
        super(MobileNetV2, self).__init__()
        block = InvertedResidual
        input_channel = 32
        last_channel = 1280

        # CIFAR10
        inverted_residual_setting = [
            # t, c, n, s
            [1, 16, 1, 1],
            [6, 24, 2, 1],  # Stride 2 -> 1 for CIFAR-10
            [6, 32, 3, 2],
            [6, 64, 4, 2],
            [6, 96, 3, 1],
            [6, 160, 3, 2],
            [6, 320, 1, 1],
        ]
        # END

        # building first layer
        input_channel = _make_divisible(input_channel * width_mult, round_nearest)
        self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)

        # CIFAR10: stride 2 -> 1
        features = [ConvBNReLU(3, input_channel, stride=1)]
        # END

        # building inverted residual blocks
        for t, c, n, s in inverted_residual_setting:
            output_channel = _make_divisible(c * width_mult, round_nearest)
            for i in range(n):
                stride = s if i == 0 else 1
                features.append(
                    block(input_channel, output_channel, stride, expand_ratio=t)
                )
                input_channel = output_channel
        # building last several layers
        features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
        # make it nn.Sequential
        self.features = nn.Sequential(*features)
        self.quant = QuantStub()
        self.dequant = DeQuantStub()
        # building classifier
        self.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(self.last_channel, num_classes),
        )

        # weight initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan_out")
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)

    def forward(self, x):
        x = self.quant(x)
        x = self.features(x)
        x = x.mean([2, 3])
        x = self.classifier(x)
        x = self.dequant(x)
        return x

    def fuse_model(self, is_qat=False):
        fuse_modules = torch.ao.quantization.fuse_modules_qat if is_qat else torch.ao.quantization.fuse_modules
        for m in self.modules():
            if type(m) == ConvBNReLU:
                fuse_modules(m, ['0', '1', '2'], inplace=True)
            if type(m) == InvertedResidual:
                for idx in range(len(m.conv)):
                    if type(m.conv[idx]) == nn.Conv2d:
                        fuse_modules(m.conv, [str(idx), str(idx + 1)], inplace=True)

def mobilenet_v2(pretrained=False, progress=True, device="cpu", **kwargs):
    """
    Constructs a MobileNetV2 architecture from
    `"MobileNetV2: Inverted Residuals and Linear Bottlenecks" <https://arxiv.org/abs/1801.04381>`_.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    model = MobileNetV2(**kwargs)
    if pretrained:
        script_dir = os.path.dirname(__file__)
        state_dict = torch.load(
            script_dir + "/state_dicts/mobilenet_v2.pt", map_location=device
        )
        model.load_state_dict(state_dict)
    return model

In [None]:
model=MobileNetV2()
model.load_state_dict(torch.load("/content/drive/MyDrive/Samsung/mobilenet_v2.pt"))
model.eval()

MobileNetV2(
  (features): Sequential(
    (0): ConvBNReLU(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
    (1): InvertedResidual(
      (quant): QuantStub()
      (dequant): DeQuantStub()
      (conv): Sequential(
        (0): ConvBNReLU(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (skip_add): FloatFunctional(
        (activation_post_process): Identity()
      )
    )
    (2): InvertedResidual(
      (quant): QuantStub()
      (dequant): DeQu

In [None]:
_,top1=evaluate_model(model, testloader,cpu_device, criterion=None)

In [None]:
print(top1)

tensor(0.9299)


#Applying PTQ

In [None]:
Mymodel=model
Mymodel

MobileNetV2(
  (features): Sequential(
    (0): ConvBNReLU(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
    (1): InvertedResidual(
      (quant): QuantStub()
      (dequant): DeQuantStub()
      (conv): Sequential(
        (0): ConvBNReLU(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (skip_add): FloatFunctional(
        (activation_post_process): Identity()
      )
    )
    (2): InvertedResidual(
      (quant): QuantStub()
      (dequant): DeQu

In [None]:
def print_size_of_model(model):
    torch.save(model.state_dict(), "temp.p")
    print('Size (MB):', os.path.getsize("temp.p")/1e6)
    os.remove('temp.p')

In [None]:
Mymodel.fuse_model()

In [None]:
Mymodel.qconfig = torch.ao.quantization.default_qconfig

In [None]:
print(Mymodel.qconfig)
torch.ao.quantization.prepare(Mymodel, inplace=True)
print('Post Training Quantization Prepare: Inserting Observers')
print('\n Inverted Residual Block:After observer insertion \n\n', Mymodel.features[1].conv)
evaluate_model(Mymodel, testloader,cpu_device, criterion=None)
print('Post Training Quantization: Calibration done')
torch.ao.quantization.convert(Mymodel, inplace=True)
print('Post Training Quantization: Convert done')
print('\n Inverted Residual Block: After fusion and quantization, note fused modules: \n\n',Mymodel.features[1].conv)

print("Size of model after quantization")
print_size_of_model(Mymodel)


QConfig(activation=functools.partial(<class 'torch.ao.quantization.observer.MinMaxObserver'>, quant_min=0, quant_max=127){}, weight=functools.partial(<class 'torch.ao.quantization.observer.MinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_tensor_symmetric){})
Post Training Quantization Prepare: Inserting Observers

 Inverted Residual Block:After observer insertion 

 Sequential(
  (0): ConvBNReLU(
    (0): Conv2d(
      32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False
      (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf)
    )
    (1): BatchNorm2d(
      32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True
      (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf)
    )
    (2): ReLU(inplace=True)
  )
  (1): Conv2d(
    32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False
    (activation_post_process): MinMaxObserver(min_val=inf, max_val=-inf)
  )
  (2): BatchNorm2d(
    16, eps=1e-05, momentum=0.1




Size of model after quantization
Size (MB): 2.649228


In [None]:
torch.save(Mymodel.state_dict(),"/content/drive/MyDrive/Samsung/saved_models/ptq1_weight.pt")

In [None]:
x=torch.load("/content/drive/MyDrive/Samsung/saved_models/ptq1_weight.pt")

  device=storage.device,


In [None]:
x

OrderedDict([('features.0.0.weight',
              tensor([[[[-0.0031,  0.0046, -0.0077],
                        [-0.0123,  0.0368,  0.0261],
                        [-0.0184,  0.0276,  0.0307]],
              
                       [[-0.0123, -0.0077, -0.0215],
                        [-0.0138,  0.0368,  0.0215],
                        [-0.0123,  0.0261,  0.0261]],
              
                       [[-0.0031,  0.0046, -0.0092],
                        [-0.0107,  0.0322,  0.0123],
                        [-0.0123,  0.0123,  0.0077]]],
              
              
                      [[[-0.0031, -0.0031, -0.0015],
                        [-0.0046, -0.0061, -0.0046],
                        [-0.0046, -0.0046, -0.0046]],
              
                       [[ 0.0015,  0.0015,  0.0031],
                        [ 0.0000,  0.0000,  0.0015],
                        [ 0.0000,  0.0000,  0.0000]],
              
                       [[ 0.0031,  0.0046,  0.0031],
                   

In [None]:
for param_tensor in x:
      print(x[param_tensor].requires_grad)

False


AttributeError: 'NoneType' object has no attribute 'requires_grad'

#Evaluating the PTQ model

In [None]:
_,top2=evaluate_model(Mymodel, testloader,cpu_device, criterion=None)

In [None]:
print(top2)

tensor(0.9248)


#per-channel basis

In [None]:
per_channel_quantized_model =MobileNetV2()
per_channel_quantized_model.load_state_dict(torch.load("/content/drive/MyDrive/Samsung/mobilenet_v2.pt"))
per_channel_quantized_model.eval()
per_channel_quantized_model.fuse_model()
# The old 'fbgemm' is still available but 'x86' is the recommended default.
per_channel_quantized_model.qconfig = torch.ao.quantization.get_default_qconfig('x86')
print(per_channel_quantized_model.qconfig)

torch.ao.quantization.prepare(per_channel_quantized_model, inplace=True)
#evaluate_model(per_channel_quantized_model, testloader,cpu_device, criterion=None)
torch.ao.quantization.convert(per_channel_quantized_model, inplace=True)

QConfig(activation=functools.partial(<class 'torch.ao.quantization.observer.HistogramObserver'>, reduce_range=True){}, weight=functools.partial(<class 'torch.ao.quantization.observer.PerChannelMinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_channel_symmetric){})


MobileNetV2(
  (features): Sequential(
    (0): ConvBNReLU(
      (0): QuantizedConvReLU2d(3, 32, kernel_size=(3, 3), stride=(1, 1), scale=1.0, zero_point=0, padding=(1, 1))
      (1): Identity()
      (2): Identity()
    )
    (1): InvertedResidual(
      (quant): Quantize(scale=tensor([1.]), zero_point=tensor([0]), dtype=torch.quint8)
      (dequant): DeQuantize()
      (conv): Sequential(
        (0): ConvBNReLU(
          (0): QuantizedConvReLU2d(32, 32, kernel_size=(3, 3), stride=(1, 1), scale=1.0, zero_point=0, padding=(1, 1), groups=32)
          (1): Identity()
          (2): Identity()
        )
        (1): QuantizedConv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), scale=1.0, zero_point=0)
        (2): Identity()
      )
      (skip_add): QFunctional(
        scale=1.0, zero_point=0
        (activation_post_process): Identity()
      )
    )
    (2): InvertedResidual(
      (quant): Quantize(scale=tensor([1.]), zero_point=tensor([0]), dtype=torch.quint8)
      (dequant): DeQu

In [None]:
_,top3=evaluate_model(per_channel_quantized_model, testloader,cpu_device, criterion=None)
print(top3)

NotImplementedError: Could not run 'aten::quantize_per_tensor' with arguments from the 'QuantizedCPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'aten::quantize_per_tensor' is only available for these backends: [CPU, CUDA, Meta, BackendSelect, Python, FuncTorchDynamicLayerBackMode, Functionalize, Named, Conjugate, Negative, ZeroTensor, ADInplaceOrView, AutogradOther, AutogradCPU, AutogradCUDA, AutogradHIP, AutogradXLA, AutogradMPS, AutogradIPU, AutogradXPU, AutogradHPU, AutogradVE, AutogradLazy, AutogradMTIA, AutogradPrivateUse1, AutogradPrivateUse2, AutogradPrivateUse3, AutogradMeta, AutogradNestedTensor, Tracer, AutocastCPU, AutocastCUDA, FuncTorchBatched, BatchedNestedTensor, FuncTorchVmapMode, Batched, VmapMode, FuncTorchGradWrapper, PythonTLSSnapshot, FuncTorchDynamicLayerFrontMode, PreDispatch, PythonDispatcher].

CPU: registered at aten/src/ATen/RegisterCPU.cpp:31419 [kernel]
CUDA: registered at aten/src/ATen/RegisterCUDA.cpp:44504 [kernel]
Meta: registered at ../aten/src/ATen/core/MetaFallbackKernel.cpp:23 [backend fallback]
BackendSelect: fallthrough registered at ../aten/src/ATen/core/BackendSelectFallbackKernel.cpp:3 [backend fallback]
Python: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:154 [backend fallback]
FuncTorchDynamicLayerBackMode: registered at ../aten/src/ATen/functorch/DynamicLayer.cpp:497 [backend fallback]
Functionalize: registered at ../aten/src/ATen/FunctionalizeFallbackKernel.cpp:324 [backend fallback]
Named: registered at ../aten/src/ATen/core/NamedRegistrations.cpp:7 [backend fallback]
Conjugate: registered at ../aten/src/ATen/ConjugateFallback.cpp:17 [backend fallback]
Negative: registered at ../aten/src/ATen/native/NegateFallback.cpp:18 [backend fallback]
ZeroTensor: registered at ../aten/src/ATen/ZeroTensorFallback.cpp:86 [backend fallback]
ADInplaceOrView: fallthrough registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:86 [backend fallback]
AutogradOther: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:19078 [autograd kernel]
AutogradCPU: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:19078 [autograd kernel]
AutogradCUDA: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:19078 [autograd kernel]
AutogradHIP: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:19078 [autograd kernel]
AutogradXLA: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:19078 [autograd kernel]
AutogradMPS: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:19078 [autograd kernel]
AutogradIPU: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:19078 [autograd kernel]
AutogradXPU: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:19078 [autograd kernel]
AutogradHPU: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:19078 [autograd kernel]
AutogradVE: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:19078 [autograd kernel]
AutogradLazy: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:19078 [autograd kernel]
AutogradMTIA: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:19078 [autograd kernel]
AutogradPrivateUse1: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:19078 [autograd kernel]
AutogradPrivateUse2: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:19078 [autograd kernel]
AutogradPrivateUse3: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:19078 [autograd kernel]
AutogradMeta: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:19078 [autograd kernel]
AutogradNestedTensor: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:19078 [autograd kernel]
Tracer: registered at ../torch/csrc/autograd/generated/TraceType_2.cpp:17415 [kernel]
AutocastCPU: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:378 [backend fallback]
AutocastCUDA: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:244 [backend fallback]
FuncTorchBatched: registered at ../aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:731 [backend fallback]
BatchedNestedTensor: registered at ../aten/src/ATen/functorch/LegacyBatchingRegistrations.cpp:758 [backend fallback]
FuncTorchVmapMode: fallthrough registered at ../aten/src/ATen/functorch/VmapModeRegistrations.cpp:27 [backend fallback]
Batched: registered at ../aten/src/ATen/LegacyBatchingRegistrations.cpp:1075 [backend fallback]
VmapMode: fallthrough registered at ../aten/src/ATen/VmapModeRegistrations.cpp:33 [backend fallback]
FuncTorchGradWrapper: registered at ../aten/src/ATen/functorch/TensorWrapper.cpp:202 [backend fallback]
PythonTLSSnapshot: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:162 [backend fallback]
FuncTorchDynamicLayerFrontMode: registered at ../aten/src/ATen/functorch/DynamicLayer.cpp:493 [backend fallback]
PreDispatch: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:166 [backend fallback]
PythonDispatcher: registered at ../aten/src/ATen/core/PythonFallbackKernel.cpp:158 [backend fallback]


In [None]:
per_channel_quantized_model.state_dict()

AttributeError: 'collections.OrderedDict' object has no attribute 'q_scale'

In [None]:
torch.save(per_channel_quantized_model.state_dict(),"/content/drive/MyDrive/Samsung/saved_models/ptq_weight.pt")

In [None]:
x=torch.load("/content/drive/MyDrive/Samsung/saved_models/ptq_weight.pt")

  device=storage.device,


In [None]:
i=1;
from collections import OrderedDict
scales=OrderedDict()
for param_tensor in x:
    print(param_tensor)
    if(param_tensor=="classifier.1._packed_params.dtype"):
      continue
    if(param_tensor=="classifier.1._packed_params._packed_params"):
      continue
    else:
      if(x[param_tensor].dim()>1):
        scales[param_tensor]=(x[param_tensor].q_per_channel_scales())



features.0.0.weight
features.0.0.bias
features.0.0.scale
features.0.0.zero_point
features.1.conv.0.0.weight
features.1.conv.0.0.bias
features.1.conv.0.0.scale
features.1.conv.0.0.zero_point
features.1.conv.1.weight
features.1.conv.1.bias
features.1.conv.1.scale
features.1.conv.1.zero_point
features.1.skip_add.scale
features.1.skip_add.zero_point
features.2.conv.0.0.weight
features.2.conv.0.0.bias
features.2.conv.0.0.scale
features.2.conv.0.0.zero_point
features.2.conv.1.0.weight
features.2.conv.1.0.bias
features.2.conv.1.0.scale
features.2.conv.1.0.zero_point
features.2.conv.2.weight
features.2.conv.2.bias
features.2.conv.2.scale
features.2.conv.2.zero_point
features.2.skip_add.scale
features.2.skip_add.zero_point
features.3.conv.0.0.weight
features.3.conv.0.0.bias
features.3.conv.0.0.scale
features.3.conv.0.0.zero_point
features.3.conv.1.0.weight
features.3.conv.1.0.bias
features.3.conv.1.0.scale
features.3.conv.1.0.zero_point
features.3.conv.2.weight
features.3.conv.2.bias
features.3

In [None]:
x

OrderedDict([('features.0.0.weight',
              tensor([[[[-8.5275e-04,  1.3265e-03, -2.3688e-03],
                        [-3.8848e-03,  1.2033e-02,  8.6223e-03],
                        [-5.6850e-03,  9.0013e-03,  9.8541e-03]],
              
                       [[-3.6953e-03, -2.3688e-03, -6.9168e-03],
                        [-4.3585e-03,  1.1844e-02,  7.0115e-03],
                        [-3.7900e-03,  8.2433e-03,  8.3380e-03]],
              
                       [[-1.0423e-03,  1.4213e-03, -2.7478e-03],
                        [-3.6953e-03,  1.0328e-02,  4.0743e-03],
                        [-3.9795e-03,  4.0743e-03,  2.2740e-03]]],
              
              
                      [[[-2.2593e-03, -2.4206e-03, -1.1296e-03],
                        [-4.4782e-03, -5.1640e-03, -3.7520e-03],
                        [-3.7923e-03, -4.5992e-03, -3.7116e-03]],
              
                       [[ 1.5734e-03,  1.8962e-03,  2.1382e-03],
                        [ 3.2275e-04, 

In [None]:
scales

OrderedDict([('features.0.0.weight',
              tensor([9.4751e-05, 4.0344e-05, 1.1921e-07, 1.1921e-07, 9.6097e-05, 8.2913e-05,
                      1.5832e-07, 4.1803e-04, 6.0603e-04, 1.1709e-04, 5.5939e-04, 1.4840e-04,
                      2.8578e-04, 3.7032e-05, 1.1921e-07, 2.6386e-04, 4.5933e-06, 2.1432e-04,
                      1.5934e-04, 7.6368e-05, 2.4364e-04, 5.3907e-04, 1.5334e-04, 1.1268e-04,
                      1.6154e-04, 8.3845e-06, 6.7753e-07, 4.4434e-04, 2.8149e-04, 3.8337e-04,
                      1.5098e-04, 2.7655e-04], dtype=torch.float64)),
             ('features.1.conv.0.0.weight',
              tensor([6.6205e-03, 3.2082e-04, 1.1921e-07, 1.1921e-07, 6.7396e-03, 1.9800e-03,
                      1.8514e-07, 9.1651e-03, 5.5952e-03, 2.1654e-03, 8.1118e-03, 5.1678e-03,
                      8.2899e-03, 4.8559e-03, 1.1921e-07, 7.5300e-03, 6.8437e-05, 1.1160e-02,
                      4.8996e-03, 7.1613e-03, 6.2629e-03, 5.9765e-03, 6.9001e-03, 5.5462e-03,
   

In [None]:
import pickle
try:
    geeky_file = open('/content/drive/MyDrive/Samsung/scales', 'wb')
    pickle.dump(y, geeky_file)
    geeky_file.close()

except:
    print("Something went wrong")

In [None]:
i=1;
from collections import OrderedDict
zero=OrderedDict()
for param_tensor in x:
    if(param_tensor=="classifier.1._packed_params.dtype"):
      continue
    if(param_tensor=="classifier.1._packed_params._packed_params"):
      continue
    else:
      if(x[param_tensor].dim()>1):
        print(param_tensor)
        zero[param_tensor]=(x[param_tensor].q_per_channel_zero_points())

features.0.0.weight
features.1.conv.0.0.weight
features.1.conv.1.weight
features.2.conv.0.0.weight
features.2.conv.1.0.weight
features.2.conv.2.weight
features.3.conv.0.0.weight
features.3.conv.1.0.weight
features.3.conv.2.weight
features.4.conv.0.0.weight
features.4.conv.1.0.weight
features.4.conv.2.weight
features.5.conv.0.0.weight
features.5.conv.1.0.weight
features.5.conv.2.weight
features.6.conv.0.0.weight
features.6.conv.1.0.weight
features.6.conv.2.weight
features.7.conv.0.0.weight
features.7.conv.1.0.weight
features.7.conv.2.weight
features.8.conv.0.0.weight
features.8.conv.1.0.weight
features.8.conv.2.weight
features.9.conv.0.0.weight
features.9.conv.1.0.weight
features.9.conv.2.weight
features.10.conv.0.0.weight
features.10.conv.1.0.weight
features.10.conv.2.weight
features.11.conv.0.0.weight
features.11.conv.1.0.weight
features.11.conv.2.weight
features.12.conv.0.0.weight
features.12.conv.1.0.weight
features.12.conv.2.weight
features.13.conv.0.0.weight
features.13.conv.1.0.w

In [None]:
import pickle
try:
    geeky_file = open('/content/drive/MyDrive/Samsung/zeroes', 'wb')
    pickle.dump(zero, geeky_file)
    geeky_file.close()

except:
    print("Something went wrong")

In [None]:
scales

defaultdict(list,
            {'features.0.0.weight': [tensor([9.4751e-05, 4.0344e-05, 1.1921e-07, 1.1921e-07, 9.6097e-05, 8.2913e-05,
                      1.5832e-07, 4.1803e-04, 6.0603e-04, 1.1709e-04, 5.5939e-04, 1.4840e-04,
                      2.8578e-04, 3.7032e-05, 1.1921e-07, 2.6386e-04, 4.5933e-06, 2.1432e-04,
                      1.5934e-04, 7.6368e-05, 2.4364e-04, 5.3907e-04, 1.5334e-04, 1.1268e-04,
                      1.6154e-04, 8.3845e-06, 6.7753e-07, 4.4434e-04, 2.8149e-04, 3.8337e-04,
                      1.5098e-04, 2.7655e-04], dtype=torch.float64)],
             'features.1.conv.0.0.weight': [tensor([6.6205e-03, 3.2082e-04, 1.1921e-07, 1.1921e-07, 6.7396e-03, 1.9800e-03,
                      1.8514e-07, 9.1651e-03, 5.5952e-03, 2.1654e-03, 8.1118e-03, 5.1678e-03,
                      8.2899e-03, 4.8559e-03, 1.1921e-07, 7.5300e-03, 6.8437e-05, 1.1160e-02,
                      4.8996e-03, 7.1613e-03, 6.2629e-03, 5.9765e-03, 6.9001e-03, 5.5462e-03,
             

In [None]:
zero

defaultdict(list,
            {'features.0.0.weight': [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                      0, 0, 0, 0, 0, 0, 0, 0])],
             'features.1.conv.0.0.weight': [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                      0, 0, 0, 0, 0, 0, 0, 0])],
             'features.1.conv.1.weight': [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])],
             'features.2.conv.0.0.weight': [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])],
             'features.2.conv.1.0.weight': [tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         

#Saving the model

In [None]:
torch.save(torch.jit.script(per_channel_quantized_model),"/content/drive/MyDrive/Samsung/saved_models/"+"ptq_model.pt" )

#QAT(Quatizatin Aware Training)

In [None]:
criterion = nn.CrossEntropyLoss()

In [None]:
def train_one_epoch(model, criterion, optimizer, data_loader, device, ntrain_batches):
    model.train()
    cnt = 0
    for image, target in data_loader:
        start_time = time.time()
        print('.', end = '')
        cnt += 1
        image, target = image.to(device), target.to(device)
        output = model(image)
        loss = criterion(output, target)
        print(loss.requires_grad)
        optimizer.zero_grad()
        #loss.requires_grad = True
        loss.backward()
        optimizer.step()
        if cnt >= ntrain_batches:
            return
        #break
    return

In [None]:
qat_model =MobileNetV2()
qat_model.load_state_dict(torch.load("/content/drive/MyDrive/Samsung/mobilenet_v2.pt"))
qat_model.train()
qat_model.fuse_model(is_qat=True)
optimizer = torch.optim.SGD(qat_model.parameters(), lr = 0.0001)
# The old 'fbgemm' is still available but 'x86' is the recommended default.
qat_model.qconfig = torch.ao.quantization.get_default_qat_qconfig('x86')
#

In [None]:
qat_model.train()

torch.ao.quantization.prepare_qat(qat_model, inplace=True)
print('Inverted Residual Block: After preparation for QAT, note fake-quantization modules \n',qat_model.features[1].conv)



Inverted Residual Block: After preparation for QAT, note fake-quantization modules 
 Sequential(
  (0): ConvBNReLU(
    (0): ConvBnReLU2d(
      32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False
      (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (weight_fake_quant): FusedMovingAvgObsFakeQuantize(
        fake_quant_enabled=tensor([1]), observer_enabled=tensor([1]), scale=tensor([1.]), zero_point=tensor([0], dtype=torch.int32), dtype=torch.qint8, quant_min=-128, quant_max=127, qscheme=torch.per_channel_symmetric, reduce_range=False
        (activation_post_process): MovingAveragePerChannelMinMaxObserver(min_val=tensor([]), max_val=tensor([]))
      )
      (activation_post_process): FusedMovingAvgObsFakeQuantize(
        fake_quant_enabled=tensor([1]), observer_enabled=tensor([1]), scale=tensor([1.]), zero_point=tensor([0], dtype=torch.int32), dtype=torch.quint8, quant_min=0, quant_max=127, qscheme=torch.per_t

In [None]:
from torch.ao.quantization import FakeQuantize
def set_manual_quantization_params(model, scales, zero_points):
    for name, module in model.named_modules():
        if isinstance(module, FakeQuantize):
            x=name[0:len(name)-11]
            #print(name)
            if x in scales and x in zero_points:
                #print(name)
                module.scale = scales[x]
                print(module.scale)
                module.zero_point =torch.tensor(zero_points[x],dtype=torch.int8)
                print(module.zero_point)
                module.scale.requires_grad = False
                module.zero_point.requires_grad = False

In [None]:
set_manual_quantization_params(qat_model, scales, zero)

tensor([0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000,
        0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000,
        0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000,
        0.1000, 0.1000, 0.1000, 0.1000, 0.1000], dtype=torch.float64)
tensor([127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
        127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
        127, 127, 127, 127], dtype=torch.int8)
tensor([6.6205e-03, 3.2082e-04, 1.1921e-07, 1.1921e-07, 6.7396e-03, 1.9800e-03,
        1.8514e-07, 9.1651e-03, 5.5952e-03, 2.1654e-03, 8.1118e-03, 5.1678e-03,
        8.2899e-03, 4.8559e-03, 1.1921e-07, 7.5300e-03, 6.8437e-05, 1.1160e-02,
        4.8996e-03, 7.1613e-03, 6.2629e-03, 5.9765e-03, 6.9001e-03, 5.5462e-03,
        7.4521e-03, 8.2267e-05, 4.6208e-07, 1.1300e-02, 6.6516e-03, 6.1584e-03,
        8.7347e-03, 8.6922e-03], dtype=torch.float64)
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0

  module.zero_point =torch.tensor(zero_points[x],dtype=torch.int8)


tensor([1.5953e-03, 1.1921e-07, 4.6231e-06, 1.1921e-07, 2.6730e-03, 1.7425e-03,
        1.1921e-07, 2.1574e-03, 2.2410e-03, 1.1921e-07, 1.7071e-07, 2.3886e-03,
        5.9328e-04, 1.0130e-04, 7.7046e-06, 2.1948e-03, 2.0596e-07, 1.1921e-07,
        1.9303e-03, 1.1921e-07, 2.3989e-03, 3.2869e-04, 1.4933e-03, 3.2246e-03,
        1.1921e-07, 1.9201e-03, 1.1921e-07, 1.5668e-03, 2.0260e-07, 4.8795e-06,
        3.0364e-03, 3.4338e-07, 1.1921e-07, 3.1486e-04, 1.1921e-07, 1.9355e-07,
        5.6504e-05, 1.1921e-07, 9.6454e-06, 1.1921e-07, 1.4192e-04, 2.0230e-03,
        3.7449e-07, 1.5485e-04, 1.1921e-07, 1.3179e-03, 2.2333e-03, 1.7247e-06,
        2.3646e-03, 3.0398e-05, 2.3992e-03, 1.1921e-07, 1.7439e-03, 2.1823e-06,
        2.0639e-03, 1.5396e-03, 1.4753e-07, 7.7220e-07, 1.8201e-03, 5.3478e-05,
        1.8433e-03, 2.8730e-03, 3.1910e-06, 2.7488e-07, 1.1921e-07, 5.9548e-04,
        1.8930e-03, 1.4134e-03, 1.8160e-03, 1.9274e-03, 1.1921e-07, 2.2605e-04,
        2.4748e-03, 1.1921e-07, 1.8769e-

In [None]:
import torch.optim as optim
def train_model(model, train_loader, test_loader, device, learning_rate=1e-1, num_epochs=200):

    # The training configurations were not carefully selected.

    criterion = nn.CrossEntropyLoss()

    model.to(cpu_device)

    # It seems that SGD optimizer is better than Adam optimizer for ResNet18 training on CIFAR10.
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=1e-4)
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=500)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[100, 150], gamma=0.1, last_epoch=-1)
    # optimizer = optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)

    # Evaluation
    model.eval()
    eval_loss, eval_accuracy = evaluate_model(model=model, test_loader=testloader, device=device, criterion=criterion)
    print("Epoch: {:02d} Eval Loss: {:.3f} Eval Acc: {:.3f}".format(-1, eval_loss, eval_accuracy))

    for epoch in range(num_epochs):

        # Training
        model.train()

        running_loss = 0
        running_corrects = 0

        for inputs, labels in train_loader:

            inputs = inputs.to(device)
            labels = labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # statistics
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        train_loss = running_loss / len(train_loader.dataset)
        train_accuracy = running_corrects / len(train_loader.dataset)

        # Evaluation
        model.eval()
        eval_loss, eval_accuracy = evaluate_model(model=model, test_loader=testloader, device=cpu_device, criterion=criterion)

        # Set learning rate scheduler
        scheduler.step()

        print("Epoch: {:03d} Train Loss: {:.3f} Train Acc: {:.3f} Eval Loss: {:.3f} Eval Acc: {:.3f}".format(epoch, train_loss, train_accuracy, eval_loss, eval_accuracy))

    return model

In [None]:
num_train_batches = 20

# QAT takes time and one needs to train over a few epochs.
# Train and check accuracy after each epoch
for nepoch in range(8):
    train_one_epoch(qat_model, criterion, optimizer,testloader, torch.device('cpu'), num_train_batches)
    if nepoch > 3:
        # Freeze quantizer parameters
        qat_model.apply(torch.ao.quantization.disable_observer)
    if nepoch > 2:
        # Freeze batch norm mean and variance estimates
        qat_model.apply(torch.nn.intrinsic.qat.freeze_bn_stats)

    # Check the accuracy after each epoch
    quantized_model = torch.ao.quantization.convert(qat_model.eval(), inplace=False)
    quantized_model.eval()
    top4 = evaluate_model(quantized_model, testloader,cpu_device,criterion=None)


.True
.True
.True
.

KeyboardInterrupt: 

In [None]:
import torch

# Load the pre-trained quantization parameters
quant_params_path = '/content/drive/MyDrive/Samsung/saved_models/ptq_weight.pt'
quant_params = torch.load(quant_params_path)
torch.ao.quantization.convert(qat_model, inplace=True)
# Integrate the loaded parameters into the QAT model


qat_model.load_state_dict(quant_params, strict=False)


# Freeze quantizer parameters and batch norm stats from the beginning
qat_model.apply(torch.ao.quantization.disable_observer)
qat_model.apply(torch.nn.intrinsic.qat.freeze_bn_stats)


MobileNetV2(
  (features): Sequential(
    (0): ConvBNReLU(
      (0): QuantizedConvReLU2d(3, 32, kernel_size=(3, 3), stride=(1, 1), scale=0.004458199255168438, zero_point=0, padding=(1, 1))
      (1): Identity()
      (2): Identity()
    )
    (1): InvertedResidual(
      (quant): Quantize(scale=tensor([1.]), zero_point=tensor([0]), dtype=torch.quint8)
      (dequant): DeQuantize()
      (conv): Sequential(
        (0): ConvBNReLU(
          (0): QuantizedConvReLU2d(32, 32, kernel_size=(3, 3), stride=(1, 1), scale=0.004535078536719084, zero_point=0, padding=(1, 1), groups=32)
          (1): Identity()
          (2): Identity()
        )
        (1): QuantizedConv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), scale=0.008576472289860249, zero_point=54)
        (2): Identity()
      )
      (skip_add): QFunctional(
        scale=1.0, zero_point=0
        (activation_post_process): Identity()
      )
    )
    (2): InvertedResidual(
      (quant): Quantize(scale=tensor([1.]), zero_point=te

In [None]:
_,top3=evaluate_model(qat_model, testloader,cpu_device, criterion=None)
print(top3)

tensor(0.9273)


RuntimeError: only Tensors of floating point and complex dtype can require gradients

In [None]:

# Continue training and evaluating the QAT model
num_train_batches = 20

for nepoch in range(8):
    print(nepoch)
    if(nepoch==0):
      qat_model.apply(torch.ao.quantization.disable_observer)
      qat_model.apply(torch.nn.intrinsic.qat.freeze_bn_stats)
    else:
      train_one_epoch(qat_model, criterion, optimizer, testloader, torch.device('cpu'), num_train_batches)

    # Check the accuracy after each epoch
      quantized_model = torch.ao.quantization.convert(qat_model.eval(), inplace=False)
      quantized_model.eval()
      top4 = evaluate_model(quantized_model, testloader,cpu_device,criterion=None)
      print(top4)


0
1
.

RuntimeError: Zero-point must be Int32, Float or Half, found Char

In [None]:
top4 = evaluate_model(qat_model, testloader,cpu_device,criterion=None)

RuntimeError: Zero-point must be Int32, Float or Half, found Char

In [None]:
print( top4)

(0.0, tensor(0.9273))
