# Import

In [None]:
%matplotlib inline
%xmode Verbose
# %xmode Plain

Exception reporting mode: Verbose


In [None]:
import os
import sys

if 'google.colab' in sys.modules:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)
    os.chdir('/content/drive/My Drive/Project/Quantization/')
    print('Env: colab, run colab init')
    isColab = True
else:
    os.chdir('.')
    cwd = os.getcwd()
    print('Env: local')
    isColab = False

Mounted at /content/drive
Env: colab, run colab init


In [None]:
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models, datasets, transforms

import torchvision

In [None]:
import copy
from torch import Tensor
from typing import Type, Any, Callable, Union, List, Optional

## Config

In [None]:
def set_seed(seed):
    # random.seed(seed)
    # np.random.seed(seed)
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False
    torch.manual_seed(seed)

set_seed(42)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Device: ', device)

Device:  cpu


In [None]:
# model save path and prefix
savepath = './checkpoint/' + 'ResNet50_2_'
modelpath = './checkpoint/ResNet50_93.62_44.pt'

kwargs = {'num_workers': 2, 'pin_memory': True}

# Utils

In [None]:
def calibrate_model(model, loader, device=torch.device('cpu')):
    model.to(device)
    model.eval()

    for inputs, labels in loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        _ = model(inputs)

def calibrate_model_n_liter(model, loader, device=torch.device('cpu'), count=3):
    model.to(device)
    model.eval()

    # short calibration
    for inputs, labels in loader:
        if count > 0:
            inputs = inputs.to(device)
            labels = labels.to(device)
            _ = model(inputs)
            count -= 1
        else:
            break

# Data Preprocessing

In [None]:
def get_CIFAR10(getdata=False):
    input_size = 32
    num_classes = 10
    normalize = transforms.Normalize((0.4914, 0.4822, 0.4465), 
                                     (0.2023, 0.1994, 0.2010))
    
    train_transform = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
    train_dataset = datasets.CIFAR10(
        root='./data', train=True, transform=train_transform, download=getdata
    )

    test_transform = transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])
    test_dataset = datasets.CIFAR10(
        root='./data', train=False, transform=test_transform, download=getdata
    )

    return input_size, num_classes, train_dataset, test_dataset

In [None]:
input_size, num_classes, train_dataset, test_dataset = get_CIFAR10()

# train_loader = torch.utils.data.DataLoader(
#     train_dataset, batch_size=128, shuffle=True, **kwargs
# )
test_loader = torch.utils.data.DataLoader(
    test_dataset, batch_size=128, shuffle=False, **kwargs
)

In [None]:
temp_data, temp_target = next(iter(test_loader))

# Model

In [None]:
# A modify version of original Pytorch Source Code
# https://github.com/pytorch/vision/blob/release/0.8.0/torchvision/models/resnet.py

class Bottleneck(nn.Module):
    expansion: int = 4

    def __init__(
            self,
            inplanes: int,
            planes: int,
            stride: int = 1,
            downsample: Optional[nn.Module] = None,
            groups: int = 1,
            base_width: int = 64,
            dilation: int = 1,
            norm_layer: Optional[Callable[..., nn.Module]] = None) -> None:
        super(Bottleneck, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        width = int(planes * (base_width / 64.)) * groups
        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
        self.conv1 = nn.Conv2d(inplanes, width, 
                               kernel_size=1, stride=1, bias=False)
        self.bn1 = norm_layer(width)
        self.relu1 = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(width, width, 
                               kernel_size=3,
                               stride=stride,
                               padding=dilation,
                               groups=groups,
                               bias=False,
                               dilation=dilation)
        self.bn2 = norm_layer(width)
        self.relu2 = nn.ReLU(inplace=True)
        self.conv3 = nn.Conv2d(width, planes * self.expansion, 
                               kernel_size=1, stride=1, bias=False)
        self.bn3 = norm_layer(planes * self.expansion)
        self.downsample = downsample
        self.stride = stride
        self.float_add = nn.quantized.FloatFunctional()
        self.relu3 = nn.ReLU(inplace=True)
        
    def forward(self, x: Tensor) -> Tensor:
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu1(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu2(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out = self.float_add.add(identity, out)
        out = self.relu3(out)

        return out

In [None]:
model = models.resnet._resnet('resnet50', Bottleneck, [3, 4, 6, 3], False, True)

model.conv1 = torch.nn.Conv2d(
    3, 64, kernel_size=3, stride=1, padding=1, bias=False
)
model.maxpool = torch.nn.Identity()
# model.fc = nn.Sequential(
#     nn.Linear(in_features=2048, out_features=10, bias=True),
#     nn.LogSoftmax(dim=1)
# )

model = model.to(device)

## Examine Statedict

In [None]:
# checkpoint = torch.load(modelpath, map_location=torch.device('cpu'))
checkpoint = torch.load(modelpath, map_location=lambda storage, loc: storage)
model.load_state_dict(checkpoint)

<All keys matched successfully>

### Structure

In [None]:
# for m in model.modules():
#     print(m)
#     # if isinstance(m, nn.Conv2d):
#     #     print(m)

print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): Identity()
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu1): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu2): ReLU(inplace=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): 

### Param shape

In [None]:
# or: for param in model.parameters()
for name, param in model.named_parameters():
    print(name, param.shape)

conv1.weight torch.Size([64, 3, 3, 3])
bn1.weight torch.Size([64])
bn1.bias torch.Size([64])
layer1.0.conv1.weight torch.Size([64, 64, 1, 1])
layer1.0.bn1.weight torch.Size([64])
layer1.0.bn1.bias torch.Size([64])
layer1.0.conv2.weight torch.Size([64, 64, 3, 3])
layer1.0.bn2.weight torch.Size([64])
layer1.0.bn2.bias torch.Size([64])
layer1.0.conv3.weight torch.Size([256, 64, 1, 1])
layer1.0.bn3.weight torch.Size([256])
layer1.0.bn3.bias torch.Size([256])
layer1.0.downsample.0.weight torch.Size([256, 64, 1, 1])
layer1.0.downsample.1.weight torch.Size([256])
layer1.0.downsample.1.bias torch.Size([256])
layer1.1.conv1.weight torch.Size([64, 256, 1, 1])
layer1.1.bn1.weight torch.Size([64])
layer1.1.bn1.bias torch.Size([64])
layer1.1.conv2.weight torch.Size([64, 64, 3, 3])
layer1.1.bn2.weight torch.Size([64])
layer1.1.bn2.bias torch.Size([64])
layer1.1.conv3.weight torch.Size([256, 64, 1, 1])
layer1.1.bn3.weight torch.Size([256])
layer1.1.bn3.bias torch.Size([256])
layer1.2.conv1.weight tor

### Conv

Both self.conv1 and self.downsample layers downsample the input when stride != 1

```python
if stride != 1 or self.inplanes != planes * block.expansion:
    downsample = nn.Sequential(
        conv1x1(self.inplanes, planes * block.expansion, stride),
        norm_layer(planes * block.expansion),
    )
```

In [None]:
# model.conv1?

In [None]:
print(model.conv1.weight.shape)
print(model.conv1.weight)

torch.Size([64, 3, 3, 3])
Parameter containing:
tensor([[[[ 0.0170,  0.0322,  0.0373],
          [ 0.0524,  0.0702,  0.0633],
          [ 0.0215,  0.0665,  0.0463]],

         [[-0.0093,  0.0030,  0.0114],
          [ 0.0089,  0.0259,  0.0148],
          [-0.0162, -0.0069,  0.0016]],

         [[-0.0068, -0.0046, -0.0009],
          [ 0.0087,  0.0030, -0.0065],
          [-0.0124, -0.0268, -0.0079]]],


        [[[-0.2928,  0.0568,  0.0551],
          [-0.0270,  0.7221,  0.5164],
          [-0.1322,  0.4442,  0.3527]],

         [[-0.0012, -0.2651, -0.1953],
          [-0.1325, -0.3407, -0.3702],
          [ 0.0156, -0.3355, -0.1800]],

         [[ 0.2756,  0.1948,  0.1828],
          [ 0.2136,  0.0059, -0.0479],
          [ 0.0997, -0.2444, -0.2038]]],


        [[[ 0.0121,  0.0083,  0.0228],
          [-0.0665,  0.2140,  0.1999],
          [-0.1181, -0.0997,  0.0364]],

         [[-0.0829,  0.0061,  0.1043],
          [-0.1747,  0.1208,  0.2339],
          [-0.2115, -0.2244,  0.0199]

In [None]:
print(model.layer1[0].conv1.weight.shape)
# print(model.layer1[0].conv1.weight)

torch.Size([64, 64, 1, 1])


### BN

In [None]:
print(model.bn1.weight.shape)
print(model.bn1.bias.shape)

torch.Size([64])
torch.Size([64])


In [None]:
print(model.layer4[1].bn1.weight)

Parameter containing:
tensor([0.0606, 0.0601, 0.1101, 0.0981, 0.0717, 0.0783, 0.1254, 0.0683, 0.0662,
        0.0809, 0.0734, 0.0693, 0.0832, 0.0561, 0.0784, 0.0765, 0.0680, 0.0571,
        0.0972, 0.0857, 0.0717, 0.0668, 0.0690, 0.0959, 0.0908, 0.0626, 0.0805,
        0.0602, 0.1032, 0.0596, 0.0688, 0.0783, 0.0572, 0.0795, 0.0853, 0.0607,
        0.0656, 0.0878, 0.0990, 0.0750, 0.0893, 0.1053, 0.0926, 0.0853, 0.0498,
        0.0845, 0.0586, 0.0741, 0.0685, 0.0757, 0.0831, 0.0604, 0.0708, 0.0737,
        0.1181, 0.0948, 0.0747, 0.0632, 0.1056, 0.0596, 0.0954, 0.0965, 0.0957,
        0.0785, 0.0772, 0.0686, 0.1035, 0.0773, 0.0769, 0.0889, 0.0527, 0.0926,
        0.0622, 0.1122, 0.0601, 0.0730, 0.0834, 0.0684, 0.0607, 0.0826, 0.0589,
        0.1024, 0.0760, 0.0736, 0.0649, 0.0849, 0.0516, 0.0745, 0.1141, 0.0465,
        0.0595, 0.0759, 0.0810, 0.0689, 0.0676, 0.0805, 0.0663, 0.0872, 0.1157,
        0.0670, 0.0491, 0.0674, 0.0697, 0.0850, 0.1060, 0.0845, 0.0725, 0.0946,
        0.1263, 0.

### ReLU

In [None]:
# print(model.resnet.bn1.bias)

# Let's Try Pytorch Built in Quantization Method

In [None]:
class QuantizedModel(nn.Module):
    def __init__(self, model_fp32):
        super(QuantizedModel, self).__init__()
        self.model_fp32 = model_fp32
        self.quant = torch.quantization.QuantStub()
        self.dequant = torch.quantization.DeQuantStub()

    def forward(self, x):
        x = self.quant(x)
        x = self.model_fp32(x)
        x = self.dequant(x)
        return x

In [None]:
fused_model = copy.deepcopy(model)

# model.eval()
fused_model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): Identity()
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu1): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu2): ReLU(inplace=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): 

In [None]:
# Fuse the model in place
# model, modules_to_fuse, inplace=False
fused_model = torch.quantization.fuse_modules(fused_model, [['conv1', 'bn1', 'relu']], inplace=True)

for module_name, module in fused_model.named_children():
    if 'layer' in module_name:
        for basic_block_name, basic_block in module.named_children():
            # print(basic_block_name, basic_block)
            torch.quantization.fuse_modules(basic_block, [['conv1', 'bn1', 'relu1'], 
                                                          ['conv2', 'bn2', 'relu2'],
                                                          ['conv3', 'bn3']], inplace=True)
            for sub_block_name, sub_block in basic_block.named_children():
                if sub_block_name == 'downsample':
                    torch.quantization.fuse_modules(sub_block, [['0', '1']], inplace=True)  # cov2d + bn

## Examine Fused Model

In [None]:
print(fused_model)

ResNet(
  (conv1): ConvReLU2d(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
  )
  (bn1): Identity()
  (relu): Identity()
  (maxpool): Identity()
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): ConvReLU2d(
        (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
        (1): ReLU(inplace=True)
      )
      (bn1): Identity()
      (relu1): Identity()
      (conv2): ConvReLU2d(
        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): ReLU(inplace=True)
      )
      (bn2): Identity()
      (relu2): Identity()
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
      (bn3): Identity()
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
        (1): Identity()
      )
      (float_add): FloatFunctional(
        (activation_post_process): Identity()
      )
      (relu3): ReLU(inplace=True)
    )
    (1): Bottleneck(
      (c

In [None]:
# torch.save(fused_model.state_dict(), savepath + 'fused.pt')
# torch.jit.save(torch.jit.script(fused_model), savepath + 'fused_jit.pt')

In [None]:
# Prepare the model for static quantization. 
# This inserts observers in the model that will observe activation tensors during calibration.
quantized_model = QuantizedModel(model_fp32=fused_model)

# config
quantization_config = torch.quantization.get_default_qconfig('fbgemm')  # x86
# quantization_config = torch.quantization.default_qconfig
# quantization_config = torch.quantization.QConfig(
#     activation=torch.quantization.MinMaxObserver.with_args(dtype=torch.quint8), 
#     weight=torch.quantization.MinMaxObserver.with_args(
#         dtype=torch.qint8, 
#         qscheme=torch.per_tensor_symmetric))

quantized_model.qconfig = quantization_config
# print(quantized_model.qconfig)

## Examine Model before Calibration

https://pytorch.org/docs/stable/_modules/torch/quantization/quantize.html#prepare

In [None]:
# Prepare the model for static quantization
torch.quantization.prepare(quantized_model, inplace=True)

  reduce_range will be deprecated in a future release of PyTorch."


QuantizedModel(
  (model_fp32): ResNet(
    (conv1): ConvReLU2d(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
      (activation_post_process): HistogramObserver()
    )
    (bn1): Identity()
    (relu): Identity()
    (maxpool): Identity()
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): ConvReLU2d(
          (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
          (1): ReLU(inplace=True)
          (activation_post_process): HistogramObserver()
        )
        (bn1): Identity()
        (relu1): Identity()
        (conv2): ConvReLU2d(
          (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (1): ReLU(inplace=True)
          (activation_post_process): HistogramObserver()
        )
        (bn2): Identity()
        (relu2): Identity()
        (conv3): Conv2d(
          64, 256, kernel_size=(1, 1), stride=(1, 1)
          (activation_post_process): HistogramObserver()
 

## Calibration and Convert

In [None]:
# Calibration!!!
# quantized_model.eval()
# for batch, target in test_loader:
# 	model(batch)

# calibrate_model(model=quantized_model, loader=test_loader)
calibrate_model_n_liter(model=quantized_model, loader=test_loader, count=1)

In [None]:
# print(quantized_model.model_fp32.conv1)
# torch.save(quantized_model.state_dict(), savepath + 'temp.pt'.format())

# RuntimeError: Hook '_observer_forward_hook' on module 'ConvReLU2d' expected the input argument to be typed as a Tuple but found type: 'Tensor' instead.
# This error occured while scripting the forward hook '_observer_forward_hook' on module ConvReLU2d. 
# If you did not want to script this hook remove it from the original NN module before scripting. 
# This hook was expected to have the following signature: _observer_forward_hook(self, input: Tuple[Tensor], output: Tensor). 
# The type of the output arg is the returned type from either the forward method or the previous hook if it exists. Note that hooks can return anything, but if the hook is on a submodule the outer module is expecting the same return type as the submodule's forward.
# torch.jit.save(torch.jit.script(quantized_model), savepath + 'temp_jit.pt'.format())

In [None]:
quantized_model = torch.quantization.convert(quantized_model, inplace=True)

quantized_model.eval()

# Using high-level static quantization wrapper
# The above steps, including torch.quantization.prepare, calibrate_model, and torch.quantization.convert, are also equivalent to
# quantized_model = torch.quantization.quantize(model=quantized_model, run_fn=calibrate_model, run_args=[train_loader], mapping=None, inplace=False)

QuantizedModel(
  (model_fp32): ResNet(
    (conv1): QuantizedConvReLU2d(3, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.01995701715350151, zero_point=0, padding=(1, 1))
    (bn1): Identity()
    (relu): Identity()
    (maxpool): Identity()
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): QuantizedConvReLU2d(64, 64, kernel_size=(1, 1), stride=(1, 1), scale=0.012247094884514809, zero_point=0)
        (bn1): Identity()
        (relu1): Identity()
        (conv2): QuantizedConvReLU2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.015170135535299778, zero_point=0, padding=(1, 1))
        (bn2): Identity()
        (relu2): Identity()
        (conv3): QuantizedConv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), scale=0.021831415593624115, zero_point=71)
        (bn3): Identity()
        (downsample): Sequential(
          (0): QuantizedConv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), scale=0.024291105568408966, zero_point=79)
          (1): Identity()
        )
      

## Examine Quantized Model

### Structure

In [None]:
print(quantized_model)

QuantizedModel(
  (model_fp32): ResNet(
    (conv1): QuantizedConvReLU2d(3, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.019204583019018173, zero_point=0, padding=(1, 1))
    (bn1): Identity()
    (relu): Identity()
    (maxpool): Identity()
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): QuantizedConvReLU2d(64, 64, kernel_size=(1, 1), stride=(1, 1), scale=0.012737809680402279, zero_point=0)
        (bn1): Identity()
        (relu1): Identity()
        (conv2): QuantizedConvReLU2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.012885705567896366, zero_point=0, padding=(1, 1))
        (bn2): Identity()
        (relu2): Identity()
        (conv3): QuantizedConv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), scale=0.02152417227625847, zero_point=60)
        (bn3): Identity()
        (downsample): Sequential(
          (0): QuantizedConv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), scale=0.022081870585680008, zero_point=64)
          (1): Identity()
        )
      

### Param shape

In [None]:
# # ???
# quantized_model.eval()
# for name, param in quantized_model.named_parameters():
#     print(name, param.shape)

# Test Function

In [None]:
criterion = nn.CrossEntropyLoss()

def test(model, test_loader, criterion, device='cpu'):
    model.eval()
    model.to(device)

    test_loss = 0
    correct = 0

    for data, target in test_loader:
        with torch.no_grad():
            data = data.to(device)
            target = target.to(device)

            outputs = model(data)
            _, preds = torch.max(outputs, 1)

            test_loss += criterion(outputs, target).item() * data.size(0)
            correct += torch.sum(preds == target.data)

    test_loss = test_loss / len(test_loader.dataset)
    test_acc = 100.0 * correct / len(test_loader.dataset)

    print('Test Loss: {:.4f}, Accuracy: {:.2f}%'.format(test_loss, test_acc))

    return test_loss, test_acc

## Test Acc

In [None]:
test_loss, test_acc = test(quantized_model, test_loader, criterion)
# print(test_loss, test_acc)

Test Loss: 0.2359, Accuracy: 93.55%


In [None]:
torch.save(quantized_model.state_dict(), savepath + '{:.2f}_quantized.pt'.format(test_acc))

# The saved module serializes all of the methods, submodules, parameters, and attributes of this module
torch.jit.save(torch.jit.script(quantized_model), savepath + '{:.2f}_quantized_jit.pt'.format(test_acc))

In [None]:
test_loss_fused, test_acc_fused = test(fused_model, test_loader, criterion)
# print(test_loss, test_acc)

Test Loss: 0.2349, Accuracy: 93.62%


# Load Quantized Statedict

## final quantized model

weight + bias + scale + zeropoint

In [None]:
# qmodel = torch.jit.load('./checkpoint/ResNet50_93.65_quantized_jit.pt', map_location=device)
qmodel = torch.load('./checkpoint/ResNet50_93.65_quantized.pt', map_location=device)

In [None]:
for name in qmodel:
    print(name)
    print(qmodel[name].int_repr())  # change the dtype from qint8 to int8
    break

model_fp32.conv1.weight
tensor([[[[  31,   59,   68],
          [  95,  127,  115],
          [  39,  121,   84]],

         [[ -17,    5,   21],
          [  16,   47,   27],
          [ -29,  -13,    3]],

         [[ -12,   -8,   -2],
          [  16,    5,  -12],
          [ -23,  -49,  -14]]],


        [[[ -52,   10,   10],
          [  -5,  127,   91],
          [ -23,   78,   62]],

         [[   0,  -47,  -34],
          [ -23,  -60,  -65],
          [   3,  -59,  -32]],

         [[  49,   34,   32],
          [  38,    1,   -8],
          [  18,  -43,  -36]]],


        [[[   7,    5,   12],
          [ -36,  116,  108],
          [ -64,  -54,   20]],

         [[ -45,    3,   56],
          [ -94,   65,  126],
          [-114, -121,   11]],

         [[ -18,    1,   24],
          [ -30,  100,  127],
          [ -83,  -52,   38]]],


        ...,


        [[[ -47,  -56,   -7],
          [ -28,   75,   -3],
          [ -61,   -9,  -36]],

         [[ -23,   18,   21],
     

### Layer

```
model_fp32.layer1.0.conv1.weight
model_fp32.layer1.0.conv1.bias
model_fp32.layer1.0.conv1.scale
model_fp32.layer1.0.conv1.zero_point
```


### Scale and Zero_point

In [None]:
print(qmodel['model_fp32.layer1.0.conv1.scale'])
print(qmodel['model_fp32.layer1.0.conv1.zero_point'])

tensor(0.0106)
tensor(0)


## fused model after calibration before convert

In [None]:
temp_model = torch.load('./checkpoint/ResNet50_2_temp.pt', map_location=device)

In [None]:
# for name in temp_model:
#     print(name)
#     print(temp_model[name])
#     break

temp_model['model_fp32.conv1.0.weight']

tensor([[[[ 0.0052,  0.0098,  0.0113],
          [ 0.0159,  0.0213,  0.0192],
          [ 0.0065,  0.0202,  0.0141]],

         [[-0.0028,  0.0009,  0.0035],
          [ 0.0027,  0.0079,  0.0045],
          [-0.0049, -0.0021,  0.0005]],

         [[-0.0021, -0.0014, -0.0003],
          [ 0.0027,  0.0009, -0.0020],
          [-0.0038, -0.0081, -0.0024]]],


        [[[-0.0917,  0.0178,  0.0173],
          [-0.0085,  0.2263,  0.1618],
          [-0.0414,  0.1392,  0.1105]],

         [[-0.0004, -0.0831, -0.0612],
          [-0.0415, -0.1068, -0.1160],
          [ 0.0049, -0.1051, -0.0564]],

         [[ 0.0864,  0.0610,  0.0573],
          [ 0.0669,  0.0018, -0.0150],
          [ 0.0312, -0.0766, -0.0639]]],


        [[[ 0.0029,  0.0020,  0.0054],
          [-0.0158,  0.0509,  0.0475],
          [-0.0281, -0.0237,  0.0087]],

         [[-0.0197,  0.0015,  0.0248],
          [-0.0415,  0.0287,  0.0556],
          [-0.0503, -0.0534,  0.0047]],

         [[-0.0077,  0.0003,  0.0107],
     

### Layer

In [None]:
for name in temp_model:
    print(name)

model_fp32.conv1.0.weight
model_fp32.conv1.0.bias
model_fp32.conv1.activation_post_process.eps
model_fp32.conv1.activation_post_process.histogram
model_fp32.conv1.activation_post_process.min_val
model_fp32.conv1.activation_post_process.max_val
model_fp32.layer1.0.conv1.0.weight
model_fp32.layer1.0.conv1.0.bias
model_fp32.layer1.0.conv1.activation_post_process.eps
model_fp32.layer1.0.conv1.activation_post_process.histogram
model_fp32.layer1.0.conv1.activation_post_process.min_val
model_fp32.layer1.0.conv1.activation_post_process.max_val
model_fp32.layer1.0.conv2.0.weight
model_fp32.layer1.0.conv2.0.bias
model_fp32.layer1.0.conv2.activation_post_process.eps
model_fp32.layer1.0.conv2.activation_post_process.histogram
model_fp32.layer1.0.conv2.activation_post_process.min_val
model_fp32.layer1.0.conv2.activation_post_process.max_val
model_fp32.layer1.0.conv3.weight
model_fp32.layer1.0.conv3.bias
model_fp32.layer1.0.conv3.activation_post_process.eps
model_fp32.layer1.0.conv3.activation_post_

### Histogram

In [None]:
print(temp_model['model_fp32.layer1.0.conv2.activation_post_process.histogram'])
print(len(temp_model['model_fp32.layer1.0.conv2.activation_post_process.histogram']))

tensor([4.3272e+07, 2.3522e+05, 2.3464e+05,  ..., 0.0000e+00, 0.0000e+00,
        2.0000e+00])
2048


### Eps

In [None]:
print(temp_model['model_fp32.layer1.0.conv2.activation_post_process.eps'])
print(len(temp_model['model_fp32.layer1.0.conv2.activation_post_process.eps']))

tensor([1.1921e-07])
1


### Min / Max Val

In [None]:
print(temp_model['model_fp32.layer1.0.conv2.activation_post_process.min_val'])
print(temp_model['model_fp32.layer1.0.conv2.activation_post_process.max_val'])

tensor(0.)
tensor(2.7615)
