# Static Quantization

## 1. Import packages

In [46]:
%load_ext autoreload
%autoreload 2
import os
from utils.config import opt
from model import FasterRCNNVGG16   
from trainer import FasterRCNNTrainer
from data.util import  read_image
from utils.vis_tool import vis_bbox
from utils import array_tool as at

%matplotlib inline

import torch
import copy
import torch.nn as nn
import helper
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader

import cv2
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

import os    
os.environ['KMP_DUPLICATE_LIB_OK']='True'

cuda_device = torch.device("cuda:0")
cpu_device = torch.device("cpu:0")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [47]:
from mmdet.apis import init_detector, inference_detector, inference_detector_quant
import mmcv

# Specify the path to model config and checkpoint file
config_file = 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
# cpheckpoint_file = 'checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.th'
checkpoint_file = 'work_dirs/faster_rcnn_r50_fpn_1x_coco/epoch_12.pth'

# build the model from a config file and a checkpoint file
fp32_model = init_detector(config_file, checkpoint_file, device='cpu:0')

load checkpoint from local path: work_dirs/faster_rcnn_r50_fpn_1x_coco/epoch_12.pth


In [3]:
fp32_model.state_dict()

OrderedDict([('backbone.conv1.weight',
              tensor([[[[ 1.3335e-02,  1.4664e-02, -1.5351e-02,  ..., -4.0896e-02,
                         -4.3034e-02, -7.0755e-02],
                        [ 4.1205e-03,  5.8477e-03,  1.4948e-02,  ...,  2.2060e-03,
                         -2.0912e-02, -3.8517e-02],
                        [ 2.2331e-02,  2.3595e-02,  1.6120e-02,  ...,  1.0281e-01,
                          6.2641e-02,  5.1977e-02],
                        ...,
                        [-9.0349e-04,  2.7767e-02, -1.0105e-02,  ..., -1.2722e-01,
                         -7.6604e-02,  7.8453e-03],
                        [ 3.5894e-03,  4.8006e-02,  6.2051e-02,  ...,  2.4267e-02,
                         -3.3662e-02, -1.5709e-02],
                        [-8.0029e-02, -3.2238e-02, -1.7808e-02,  ...,  3.5359e-02,
                          2.2439e-02,  1.7077e-03]],
              
                       [[-1.8452e-02,  1.1415e-02,  2.3850e-02,  ...,  5.3736e-02,
                       

## 2. Create a model instance

In [4]:
fp32_model

FasterRCNN(
  (backbone): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): ResLayer(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
        

In [5]:
fp32_model_statedict = fp32_model.state_dict()
fp32_model_statedict

OrderedDict([('backbone.conv1.weight',
              tensor([[[[ 1.3335e-02,  1.4664e-02, -1.5351e-02,  ..., -4.0896e-02,
                         -4.3034e-02, -7.0755e-02],
                        [ 4.1205e-03,  5.8477e-03,  1.4948e-02,  ...,  2.2060e-03,
                         -2.0912e-02, -3.8517e-02],
                        [ 2.2331e-02,  2.3595e-02,  1.6120e-02,  ...,  1.0281e-01,
                          6.2641e-02,  5.1977e-02],
                        ...,
                        [-9.0349e-04,  2.7767e-02, -1.0105e-02,  ..., -1.2722e-01,
                         -7.6604e-02,  7.8453e-03],
                        [ 3.5894e-03,  4.8006e-02,  6.2051e-02,  ...,  2.4267e-02,
                         -3.3662e-02, -1.5709e-02],
                        [-8.0029e-02, -3.2238e-02, -1.7808e-02,  ...,  3.5359e-02,
                          2.2439e-02,  1.7077e-03]],
              
                       [[-1.8452e-02,  1.1415e-02,  2.3850e-02,  ...,  5.3736e-02,
                       

# COCO DataSet

In [6]:
class CoCo_TestDataset(Dataset):
    def __init__(self, rootDir, folder, tf=None):
        """Dataset class for CoCo data

        Args:
            rootDir (str): path to directory containing CoCo image data
            folder (str) : 'train' or 'val' folder
            tf (optional): transformation to apply. Defaults to None
        """        
        self.rootDir = rootDir
        self.folder = folder
        self.transform = tf

        # read rgb image list
        sourceImgFolder =  os.path.join(self.rootDir, self.folder)
        self.sourceImgFiles  = [os.path.join(sourceImgFolder, x) for x in sorted(os.listdir(sourceImgFolder))]
    
    def __len__(self):
        return len(self.sourceImgFiles)
  
    def __getitem__(self, index):
        # read source image and convert to RGB, apply transform
        sourceImage = cv2.imread(f"{self.sourceImgFiles[index]}", -1)
        sourceImage = cv2.cvtColor(sourceImage, cv2.COLOR_BGR2RGB)
        if self.transform is not None:
            sourceImage = self.transform(sourceImage)

        return sourceImage

In [7]:
tf = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((320, 320)),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.485, 0.56, 0.406), std=(0.229, 0.224, 0.225))
        ])

TEST_BATCH_SIZE = 1
num_workers = 8

# Creating Test set and Test Dataloaders
test_set = CoCo_TestDataset(rootDir= 'data/coco/', folder='test2017_sub', tf = tf)
# test_set = torch.utils.data.Subset(test_set, indices=np.arange(32))
# test_loader  = DataLoader(test_set, batch_size=TEST_BATCH_SIZE)
test_sampler = torch.utils.data.SequentialSampler(test_set)


# train_sampler = torch.utils.data.RandomSampler(train_set)
# test_sampler = torch.utils.data.SequentialSampler(test_set)

# train_loader = torch.utils.data.DataLoader(
#     dataset=train_set, batch_size=32,
#     sampler=train_sampler, num_workers=num_workers)

test_loader = torch.utils.data.DataLoader(
    dataset=test_set, batch_size=TEST_BATCH_SIZE,
    sampler=test_sampler, num_workers=num_workers)

In [8]:
# test_set

In [9]:
# for a in test_loader:
#     print(a.size())

## 3. Fusion layer

In [10]:
fused_model_fp32 = copy.deepcopy(fp32_model)
fused_model_fp32.eval()

FasterRCNN(
  (backbone): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): ResLayer(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
        

In [11]:
# Fuse the activations to preceding layers, where applicable.
# This needs to be done manually depending on the model architecture.
# Common fusions include `conv + relu` and `conv + batchnorm + relu`
modules_to_fuse = [ 
    ["backbone.conv1", "backbone.bn1", "backbone.relu"],
    
    
    
    ["backbone.layer1.0.conv1", "backbone.layer1.0.bn1"],
    ["backbone.layer1.0.conv2", "backbone.layer1.0.bn2"],
    ["backbone.layer1.0.conv3", "backbone.layer1.0.bn3", "backbone.layer1.0.relu"],
    ["backbone.layer1.0.downsample.0", "backbone.layer1.0.downsample.1"],
    
    ["backbone.layer1.1.conv1", "backbone.layer1.1.bn1"],
    ["backbone.layer1.1.conv2", "backbone.layer1.1.bn2"],
    ["backbone.layer1.1.conv3", "backbone.layer1.1.bn3", "backbone.layer1.1.relu"],
    
    ["backbone.layer1.2.conv1", "backbone.layer1.2.bn1"],
    ["backbone.layer1.2.conv2", "backbone.layer1.2.bn2"],
    ["backbone.layer1.2.conv3", "backbone.layer1.2.bn3", "backbone.layer1.2.relu"],



    ["backbone.layer2.0.conv1", "backbone.layer2.0.bn1"],
    ["backbone.layer2.0.conv2", "backbone.layer2.0.bn2"],
    ["backbone.layer2.0.conv3", "backbone.layer2.0.bn3", "backbone.layer2.0.relu"],
    ["backbone.layer2.0.downsample.0", "backbone.layer2.0.downsample.1"],
    
    ["backbone.layer2.1.conv1", "backbone.layer2.1.bn1"],
    ["backbone.layer2.1.conv2", "backbone.layer2.1.bn2"],
    ["backbone.layer2.1.conv3", "backbone.layer2.1.bn3", "backbone.layer2.1.relu"],
    
    ["backbone.layer2.2.conv1", "backbone.layer2.2.bn1"],
    ["backbone.layer2.2.conv2", "backbone.layer2.2.bn2"],
    ["backbone.layer2.2.conv3", "backbone.layer2.2.bn3", "backbone.layer2.2.relu"],
    
    ["backbone.layer2.3.conv1", "backbone.layer2.3.bn1"],
    ["backbone.layer2.3.conv2", "backbone.layer2.3.bn2"],
    ["backbone.layer2.3.conv3", "backbone.layer2.3.bn3", "backbone.layer2.3.relu"],
    
    
    
    ["backbone.layer3.0.conv1", "backbone.layer3.0.bn1"],
    ["backbone.layer3.0.conv2", "backbone.layer3.0.bn2"],
    ["backbone.layer3.0.conv3", "backbone.layer3.0.bn3", "backbone.layer3.0.relu"],
    ["backbone.layer3.0.downsample.0", "backbone.layer3.0.downsample.1"],
    
    ["backbone.layer3.1.conv1", "backbone.layer3.1.bn1"],
    ["backbone.layer3.1.conv2", "backbone.layer3.1.bn2"],
    ["backbone.layer3.1.conv3", "backbone.layer3.1.bn3", "backbone.layer3.1.relu"],
    
    ["backbone.layer3.2.conv1", "backbone.layer3.2.bn1"],
    ["backbone.layer3.2.conv2", "backbone.layer3.2.bn2"],
    ["backbone.layer3.2.conv3", "backbone.layer3.2.bn3", "backbone.layer3.2.relu"],
    
    ["backbone.layer3.3.conv1", "backbone.layer3.3.bn1"],
    ["backbone.layer3.3.conv2", "backbone.layer3.3.bn2"],
    ["backbone.layer3.3.conv3", "backbone.layer3.3.bn3", "backbone.layer3.3.relu"],
    
    ["backbone.layer3.4.conv1", "backbone.layer3.4.bn1"],
    ["backbone.layer3.4.conv2", "backbone.layer3.4.bn2"],
    ["backbone.layer3.4.conv3", "backbone.layer3.4.bn3", "backbone.layer3.4.relu"],
    
    ["backbone.layer3.5.conv1", "backbone.layer3.5.bn1"],
    ["backbone.layer3.5.conv2", "backbone.layer3.5.bn2"],
    ["backbone.layer3.5.conv3", "backbone.layer3.5.bn3", "backbone.layer3.5.relu"],



    ["backbone.layer4.0.conv1", "backbone.layer4.0.bn1"],
    ["backbone.layer4.0.conv2", "backbone.layer4.0.bn2"],
    ["backbone.layer4.0.conv3", "backbone.layer4.0.bn3", "backbone.layer4.0.relu"],
    ["backbone.layer4.0.downsample.0", "backbone.layer4.0.downsample.1"],
    
    ["backbone.layer4.1.conv1", "backbone.layer4.1.bn1"],
    ["backbone.layer4.1.conv2", "backbone.layer4.1.bn2"],
    ["backbone.layer4.1.conv3", "backbone.layer4.1.bn3", "backbone.layer4.1.relu"],
    
    ["backbone.layer4.2.conv1", "backbone.layer4.2.bn1"],
    ["backbone.layer4.2.conv2", "backbone.layer4.2.bn2"],
    ["backbone.layer4.2.conv3", "backbone.layer4.2.bn3", "backbone.layer4.2.relu"]
    
    
    
    # ["neck.lateral_convs.0.conv"]
    ]


fused_model_fp32 = torch.quantization.fuse_modules(fused_model_fp32, modules_to_fuse, inplace=True)
fused_model_fp32

FasterRCNN(
  (backbone): ResNet(
    (conv1): ConvReLU2d(
      (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
      (1): ReLU(inplace=True)
    )
    (bn1): Identity()
    (relu): Identity()
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): ResLayer(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
        (bn1): Identity()
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn2): Identity()
        (conv3): ConvReLU2d(
          (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
          (1): ReLU(inplace=True)
        )
        (bn3): Identity()
        (relu): Identity()
        (downsample): Sequential(
          (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
          (1): Identity()
        )
      )
      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
      (1): Bottleneck(
        (

## fused model equivalence

In [12]:
fp32_model.eval()
fused_model_fp32.eval()

# assert helper.model_equivalence(model_1=fp32_model, model_2=fused_model_fp32, device=cpu_device, rtol=1e-05, atol=1e-05, num_tests=1, input_size=(1,3,320,320)), "Fused model is not equivalent to the original model!"

FasterRCNN(
  (backbone): ResNet(
    (conv1): ConvReLU2d(
      (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
      (1): ReLU(inplace=True)
    )
    (bn1): Identity()
    (relu): Identity()
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): ResLayer(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
        (bn1): Identity()
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn2): Identity()
        (conv3): ConvReLU2d(
          (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
          (1): ReLU(inplace=True)
        )
        (bn3): Identity()
        (relu): Identity()
        (downsample): Sequential(
          (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
          (1): Identity()
        )
      )
      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
      (1): Bottleneck(
        (

In [13]:
fused_model_fp32.eval()

# assert helper.model_equivalence(model_1=model_fp32, model_2=fused_model_fp32, device=cuda_device, rtol=1e-05, atol=1e-05, num_tests=100, input_size=(1,3,320,320)), "Fused model is not equivalent to the original model!"

FasterRCNN(
  (backbone): ResNet(
    (conv1): ConvReLU2d(
      (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
      (1): ReLU(inplace=True)
    )
    (bn1): Identity()
    (relu): Identity()
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): ResLayer(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
        (bn1): Identity()
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (bn2): Identity()
        (conv3): ConvReLU2d(
          (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
          (1): ReLU(inplace=True)
        )
        (bn3): Identity()
        (relu): Identity()
        (downsample): Sequential(
          (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
          (1): Identity()
        )
      )
      init_cfg={'type': 'Constant', 'val': 0, 'override': {'name': 'norm3'}}
      (1): Bottleneck(
        (

## 4. Static Quantization

In [14]:

# class QuantizedResNet18(nn.Module):
#     def __init__(self, model_fp32):
#         super(QuantizedResNet18, self).__init__()
#         self.quant = torch.quantization.QuantStub()
#         self.model_fp32 = model_fp32
#         self.dequant = torch.quantization.DeQuantStub()
            
#     # def forward(self, x, bboxes, labels, scale):
#     def forward(self, x):
#         x = self.quant(x)
#         x = self.model_fp32(x)
#         x = self.dequant(x)
#         return x
    
    
class quantStubModel(nn.Module):
    def __init__(self, model_fp32):
        super(quantStubModel, self).__init__()
        # QuantStub converts tensors from floating point to quantized.
        # This will only be used for inputs.
        self.quant = torch.quantization.QuantStub()

        self.model_fp32 = model_fp32

        # DeQuantStub converts tensors from quantized to floating point.
        # This will only be used for outputs.
        self.dequant = torch.quantization.DeQuantStub()


    def forward(self, x):
        x = self.quant(x)
        x = self.model_fp32(x)
        x = self.dequant(x)
        return x

In [15]:
# def calibrate_model(model, loader, device=torch.device("cpu:0")):
    
#     model.to(device)
#     model.eval()

#     for images, labels in tqdm(loader,desc= 'Calibrating model quantization'):
#         images = list(img.to(device) for img in images)
#         _ = model(images)

In [16]:
def calibrate_model(model, loader, device=torch.device("cpu:0")):
    
    model.to(device)
    model.eval()

    for inputs in tqdm(loader):

      inputs = inputs.to(device)
      # inputs = inputs.unsqueeze(0) # create a mini-batch as expected by the model
      
      # inputs = inputs[0].to(device)
      # inputs = inputs.unsqueeze(0) # create a mini-batch as expected by the model
      _ = model(inputs)

In [17]:
# Quantization Model Define
quantized_model = quantStubModel(model_fp32=fused_model_fp32)
# quantized_model = fused_model_fp32

# Quantization Configuration Define
quantized_model.qconfig = torch.quantization.get_default_qconfig('fbgemm')     # 'qnnpack' for NVIDIA
torch.quantization.prepare(quantized_model, inplace=True)

quantStubModel(
  (quant): QuantStub(
    (activation_post_process): HistogramObserver()
  )
  (model_fp32): FasterRCNN(
    (backbone): ResNet(
      (conv1): ConvReLU2d(
        (0): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3)
          (activation_post_process): HistogramObserver()
        )
        (1): ReLU(
          inplace=True
          (activation_post_process): HistogramObserver()
        )
      )
      (bn1): Identity()
      (relu): Identity()
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): ResLayer(
        (0): Bottleneck(
          (conv1): Conv2d(
            64, 64, kernel_size=(1, 1), stride=(1, 1)
            (activation_post_process): HistogramObserver()
          )
          (bn1): Identity()
          (conv2): Conv2d(
            64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)
            (activation_post_process): HistogramObserver()
          )
          (bn2):

In [18]:
# Calibration
# calibrate_model(model=quantized_model, loader=test_loader, device=cpu_device)
# quantized_model = quantized_model.to(cpu_device)

In [19]:
# Quantization Completed
quantized_model_int8 = torch.quantization.convert(quantized_model, inplace=True)
quantized_model_int8.eval()
print(quantized_model_int8)

quantStubModel(
  (quant): Quantize(scale=tensor([1.]), zero_point=tensor([0]), dtype=torch.quint8)
  (model_fp32): FasterRCNN(
    (backbone): ResNet(
      (conv1): QuantizedConvReLU2d(3, 64, kernel_size=(7, 7), stride=(2, 2), scale=1.0, zero_point=0, padding=(3, 3))
      (bn1): Identity()
      (relu): Identity()
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): ResLayer(
        (0): Bottleneck(
          (conv1): QuantizedConv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), scale=1.0, zero_point=0)
          (bn1): Identity()
          (conv2): QuantizedConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=1.0, zero_point=0, padding=(1, 1))
          (bn2): Identity()
          (conv3): QuantizedConvReLU2d(64, 256, kernel_size=(1, 1), stride=(1, 1), scale=1.0, zero_point=0)
          (bn3): Identity()
          (relu): Identity()
          (downsample): Sequential(
            (0): QuantizedConv2d(64, 256, kernel_size=(1, 1

In [20]:
def print_model_size(model):
    torch.save(model.state_dict(), "tmp.pt")
    print("%.2f MB" %(os.path.getsize("tmp.pt")/1e6))
    os.remove('tmp.pt')

In [21]:
print_model_size(fp32_model)
print_model_size(quantized_model_int8)

167.35 MB
42.46 MB


In [22]:
quantized_model_int8

quantStubModel(
  (quant): Quantize(scale=tensor([1.]), zero_point=tensor([0]), dtype=torch.quint8)
  (model_fp32): FasterRCNN(
    (backbone): ResNet(
      (conv1): QuantizedConvReLU2d(3, 64, kernel_size=(7, 7), stride=(2, 2), scale=1.0, zero_point=0, padding=(3, 3))
      (bn1): Identity()
      (relu): Identity()
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): ResLayer(
        (0): Bottleneck(
          (conv1): QuantizedConv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), scale=1.0, zero_point=0)
          (bn1): Identity()
          (conv2): QuantizedConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=1.0, zero_point=0, padding=(1, 1))
          (bn2): Identity()
          (conv3): QuantizedConvReLU2d(64, 256, kernel_size=(1, 1), stride=(1, 1), scale=1.0, zero_point=0)
          (bn3): Identity()
          (relu): Identity()
          (downsample): Sequential(
            (0): QuantizedConv2d(64, 256, kernel_size=(1, 1

In [23]:
# torch.save(quantized_model_int8.state_dict(), 'sehyun_quantized/quantized_model_int8_StateDict_0801.pth')

In [24]:
# torch.jit.save(torch.jit.script(quantized_model_int8), 'sehyun_quantized/quantized_model_MM_0811.pth')

---

### Demo

In [None]:
img = read_image('misc/demo6.jpg')
img = torch.from_numpy(img)[None]

# a=quantized_model_int8(img)
# a.size()

torch.Size([1, 3, 480, 640])

In [None]:
from mmdet.apis import init_detector, inference_detector
import mmcv

# Specify the path to model config and checkpoint file
config_file = 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
# cpheckpoint_file = 'checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.th'
checkpoint_file = 'work_dirs/faster_rcnn_r50_fpn_1x_coco/epoch_12.pth'

# build the model from a config file and a checkpoint file
fp32_model = init_detector(config_file, checkpoint_file, device='cpu:0')

load checkpoint from local path: work_dirs/faster_rcnn_r50_fpn_1x_coco/epoch_12.pth


In [None]:
import time
from PIL import Image
from torchvision import transforms

input_image = Image.open('misc/demo6.jpg')
preprocess = transforms.Compose([
    transforms.Resize(32),
    transforms.CenterCrop(32),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010]),
])

input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

In [31]:
quantized_model_int8.to(cpu_device)
quantized_model_int8.eval()

# a=quantized_model_int8(input_batch)
# a.size()

result = inference_detector(quantized_model_int8, img)

ModuleAttributeError: 'quantStubModel' object has no attribute 'cfg'

---

---

---

In [56]:
fp32_model.eval()
img = read_image('misc/demo6.jpg')
img = torch.from_numpy(img)[None]

with torch.no_grad():
    result = quantized_model_int8(img.to(cpu_device), )

TypeError: forward() missing 1 required positional argument: 'img_metas'

In [50]:
# Specify the path to model config and checkpoint file
config_file = 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'   # Config

# checkpointl_file = 'checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'   # MM Pretrained
checkpoint_fie = 'work_dirs/faster_rcnn_r50_fpn_1x_coco/epoch_9.pth'   # Self Trained

# build the model from a config file and a checkpoint file
model = init_detector(config_file, checkpoint_file, device='cpu:0')


# or dcpusave the visualization results to image files
img = 'misc/demo3.jpg'
result = inference_detector(model, img)
model.show_result(img, result, out_file='inference_result/result_nonQuant_Model_0811.jpg')

load checkpoint from local path: work_dirs/faster_rcnn_r50_fpn_1x_coco/epoch_12.pth
misc/demo3.jpg



 img metas
[[{'filename': 'misc/demo3.jpg', 'ori_filename': 'misc/demo3.jpg', 'ori_shape': (500, 353, 3), 'img_shape': (1133, 800, 3), 'pad_shape': (1152, 800, 3), 'scale_factor': array([2.266289, 2.266   , 2.266289, 2.266   ], dtype=float32), 'flip': False, 'flip_direction': None, 'img_norm_cfg': {'mean': array([123.675, 116.28 , 103.53 ], dtype=float32), 'std': array([58.395, 57.12 , 57.375], dtype=float32), 'to_rgb': True}}]]



 img
[tensor([[[[-2.1008, -2.1008, -2.1008,  ..., -2.1008, -2.1008, -2.1008],
          [-2.1008, -2.1008, -2.1008,  ..., -2.0837, -2.1008, -2.1008],
          [-2.1008, -2.1008, -2.1008,  ..., -2.1008, -2.1008, -2.1008],
          ...,
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

       

In [48]:
# # or dcpusave the visualization results to image files
img = 'misc/demo3.jpg'
result = inference_detector_quant(quantized_model_int8, fp32_model, img)

# model.show_result(img, result, out_file='inference_result/result_cpu9.jpg')

m


FileNotFoundError: [Errno 2] No such file or directory: 'm'

In [None]:
# # quantized_model_int8.eval()
# quantized_model_int8.to(cpu_device)

# # # or dcpusave the visualization results to image files
# img = 'misc/demo1.jpg'
# result = inference_detector(quantized_model_int8, img)
# model.show_result(img, result, out_file='inference_result/result_cpu9.jpg')

ModuleAttributeError: 'quantStubModel' object has no attribute 'cfg'