##### Import Libraries

In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader

import os
import cv2
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt


import warnings
warnings.filterwarnings('ignore')

import os    
os.environ['KMP_DUPLICATE_LIB_OK']='True'

# Get cpu or gpu device for training.
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Using {DEVICE} device")
if torch.cuda.is_available(): print(f'device name: {torch.cuda.get_device_name(0)}')

cpu_device = torch.device('cpu')
gpu_device = torch.device('cuda')

Using cuda device
device name: TITAN RTX


##### Define Size Func

In [2]:
def print_model_size(model):
    torch.save(model.state_dict(), "tmp.pt")
    print("%.2f MB" %(os.path.getsize("tmp.pt")/1e6))
    os.remove('tmp.pt')

Load Model and Load Pretrained Weight Values

In [3]:
# ResultandModels_faster_rcnn_r101_fpn_1x_coco_20200130-f513f705.pth
# PretrainedModel_faster_rcnn_r101_fpn_mstrain_3x_coco_20210524_110822-4d4d2ca8.pth
# 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
from torchvision.models import resnet50
import torchvision

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
# resnet = torchvision.models.resnet50(pretrained=True, quantized_model=True)
# resnet = torchvision.models.quantization.resnet.QuantizableResNet(pretrain)

quan_resnet = torchvision.models.quantization.resnet50(quantize=True)
# model = resnet50()
# model.load_state_dict(torch.load("sehyun_pretrained/DifferentRegressionLoss_faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth"), strict=False)
# model.load_state_dict(ResNet50_Weights, strict=False)

In [4]:
model

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256)
          (relu): ReLU(inplace=True)
          (downsample)

Change to Eval Mode

In [5]:
model.eval()

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256)
          (relu): ReLU(inplace=True)
          (downsample)

Model Layer Fusion

In [10]:
modules_to_fuse = [ 
    ["backbone.body.layer1.0.conv1", "backbone.body.layer1.0.bn1"]

    # ["layer1.0.conv1", "layer1.0.bn1"],
    # ["layer1.0.conv2", "layer1.0.bn2"],
    # ["layer1.0.conv3", "layer1.0.bn3", "layer1.0.relu"],
    # ["layer1.1.conv1", "layer1.1.bn1"],
    # ["layer1.1.conv2", "layer1.1.bn2"],
    # ["layer1.1.conv3", "layer1.1.bn3", "layer1.1.relu"],
    # ["layer1.2.conv1", "layer1.2.bn1"],
    # ["layer1.2.conv2", "layer1.2.bn2"],
    # ["layer1.2.conv3", "layer1.2.bn3", "layer1.2.relu"],

    # ["layer2.0.conv1", "layer2.0.bn1"],
    # ["layer2.0.conv2", "layer2.0.bn2"],
    # ["layer2.0.conv3", "layer2.0.bn3", "layer2.0.relu"],
    # ["layer2.1.conv1", "layer2.1.bn1"],
    # ["layer2.1.conv2", "layer2.1.bn2"],
    # ["layer2.1.conv3", "layer2.1.bn3", "layer2.1.relu"],
    # ["layer2.2.conv1", "layer2.2.bn1"],
    # ["layer2.2.conv2", "layer2.2.bn2"],
    # ["layer2.2.conv3", "layer2.2.bn3", "layer2.2.relu"],
    # ["layer2.3.conv1", "layer2.3.bn1"],
    # ["layer2.3.conv2", "layer2.3.bn2"],
    # ["layer2.3.conv3", "layer2.3.bn3", "layer2.3.relu"],

    # ["layer3.0.conv1", "layer3.0.bn1"],
    # ["layer3.0.conv2", "layer3.0.bn2"],
    # ["layer3.0.conv3", "layer3.0.bn3", "layer3.0.relu"],
    # ["layer3.1.conv1", "layer3.1.bn1"],
    # ["layer3.1.conv2", "layer3.1.bn2"],
    # ["layer3.1.conv3", "layer3.1.bn3", "layer3.1.relu"],
    # ["layer3.2.conv1", "layer3.2.bn1"],
    # ["layer3.2.conv2", "layer3.2.bn2"],
    # ["layer3.2.conv3", "layer3.2.bn3", "layer3.2.relu"],               
    # ["layer3.3.conv1", "layer3.3.bn1"],
    # ["layer3.3.conv2", "layer3.3.bn2"],
    # ["layer3.3.conv3", "layer3.3.bn3", "layer3.3.relu"],
    # ["layer3.4.conv1", "layer3.4.bn1"],
    # ["layer3.4.conv2", "layer3.4.bn2"],
    # ["layer3.4.conv3", "layer3.4.bn3", "layer3.4.relu"],
    # ["layer3.5.conv1", "layer3.5.bn1"],
    # ["layer3.5.conv2", "layer3.5.bn2"],
    # ["layer3.5.conv3", "layer3.5.bn3", "layer3.5.relu"],

    # ["layer4.0.conv1", "layer4.0.bn1"],
    # ["layer4.0.conv2", "layer4.0.bn2"],
    # ["layer4.0.conv3", "layer4.0.bn3", "layer4.0.relu"],
    # ["layer4.1.conv1", "layer4.1.bn1"],
    # ["layer4.1.conv2", "layer4.1.bn2"],
    # ["layer4.1.conv3", "layer4.1.bn3", "layer4.1.relu"],
    # ["layer4.2.conv1", "layer4.2.bn1"],
    # ["layer4.2.conv2", "layer4.2.bn2"],
    # ["layer4.2.conv3", "layer4.2.bn3", "layer4.2.relu"]
    ]


fused_model = torch.quantization.fuse_modules(model, modules_to_fuse)
fused_model

NotImplementedError: Cannot fuse modules: (<class 'torch.nn.modules.conv.Conv2d'>, <class 'torchvision.ops.misc.FrozenBatchNorm2d'>)

In [None]:
class quantStubModel(nn.Module):
    def __init__(self, model_fp32):
        super(quantStubModel, self).__init__()
        # QuantStub converts tensors from floating point to quantized.
        # This will only be used for inputs.
        self.quant = torch.quantization.QuantStub()
            
        # DeQuantStub converts tensors from quantized to floating point.
        # This will only be used for outputs.
        self.dequant = torch.quantization.DeQuantStub()
        
        self.model_fp32 = model_fp32

    def forward(self, x):
        x = self.quant(x)
        x = self.model_fp32(x)
        x = self.dequant(x)
        return x

# creating nn.Module with stubs for inputs and outputs
quant_stubbed_model = quantStubModel(model_fp32=fused_model)

##### Define Quantization Mode

In [None]:
# colab requires fbgemm backend
use_fbgemm = True

if use_fbgemm == True:
  quantization_config = torch.quantization.get_default_qconfig('fbgemm')
  torch.backends.quantized.engine = 'fbgemm'

else:
  quantization_config = torch.quantization.default_qconfig
  torch.backends.quantized.engine = 'qnnpack'

In [None]:
quant_stubbed_model.qconfig  = quantization_config
quant_stubbed_model_prepared = torch.quantization.prepare(quant_stubbed_model, inplace=False)

##### Define DataLoader

In [None]:
class CoCoDataset(Dataset):
    def __init__(self, rootDir, folder, tf=None):
        """Dataset class for CoCo data

        Args:
            rootDir (str): path to directory containing CoCo image data
            folder (str) : 'train' or 'val' folder
            tf (optional): transformation to apply. Defaults to None
        """        
        self.rootDir = rootDir
        self.folder = folder
        self.transform = tf

        # read rgb image list
        # sourceImgFolder =  os.path.join(self.rootDir, 'leftImg8bit', self.folder)
        sourceImgFolder =  os.path.join(self.rootDir, self.folder)
        self.sourceImgFiles  = [os.path.join(sourceImgFolder, x) for x in sorted(os.listdir(sourceImgFolder))]

        # read label image list
        # labelImgFolder =  os.path.join(self.rootDir, 'gtFine', self.folder)
        labelImgFolder =  os.path.join(self.rootDir, self.folder)
        self.labelImgFiles  = [os.path.join(labelImgFolder, x) for x in sorted(os.listdir(labelImgFolder))]
    
    def __len__(self):
        return len(self.sourceImgFiles)
  
    def __getitem__(self, index):
        # read source image and convert to RGB, apply transform
        sourceImage = cv2.imread(f"{self.sourceImgFiles[index]}", -1)
        sourceImage = cv2.cvtColor(sourceImage, cv2.COLOR_BGR2RGB)
        if self.transform is not None:
            sourceImage = self.transform(sourceImage)

        # read label image and convert to torch tensor
        labelImage  = torch.from_numpy(cv2.imread(f"{self.labelImgFiles[index]}", -1)).long()
        return sourceImage, labelImage  

##### Calibration

In [None]:
tf = transforms.Compose([
                    transforms.ToTensor(),
                    transforms.Normalize(mean=(0.485, 0.56, 0.406), std=(0.229, 0.224, 0.225))
                ])

TEST_BATCH_SIZE = 1
# quant_stubbed_model_prepared = eval()

# Creating Test set and Test Dataloaders
test_set = CoCoDataset(rootDir= 'coco/images/', folder='test2017', tf = tf)
test_set = torch.utils.data.Subset(test_set, indices=np.arange(2))
test_dataloader  = DataLoader(test_set, batch_size=TEST_BATCH_SIZE)

quantSet = torch.utils.data.Subset(test_set, indices=np.arange(2))
quantDataloader = DataLoader(quantSet, batch_size=TEST_BATCH_SIZE)

In [None]:
with torch.no_grad():
    for inputs in tqdm(quantDataloader):
      print(inputs[0].shape)
      print(inputs[0])
      real_inputs = inputs[0]
      inputs = real_inputs[0].to(cpu_device)

      inputs = inputs.unsqueeze(0)
      _ = quant_stubbed_model_prepared(inputs)

  0%|          | 0/2 [00:00<?, ?it/s]

torch.Size([1, 3, 480, 640])
tensor([[[[-1.8097, -1.7925, -1.7412,  ...,  0.5364,  0.5707,  0.5707],
          [-1.7583, -1.8097, -1.7925,  ...,  0.5364,  0.5707,  0.5707],
          [-1.7754, -1.8268, -1.8610,  ...,  0.5193,  0.5364,  0.5707],
          ...,
          [-1.8782, -1.6727, -1.4843,  ..., -1.6213, -1.4329, -1.6384],
          [-1.7412, -1.7412, -1.5699,  ..., -1.5014, -1.3644, -1.4500],
          [-1.2103, -1.5699, -1.6384,  ..., -1.4500, -1.3987, -1.3473]],

         [[-2.0973, -2.1324, -2.0623,  ...,  0.3711,  0.3536,  0.3536],
          [-2.0273, -2.0623, -2.0798,  ...,  0.3711,  0.3536,  0.3536],
          [-1.9573, -2.0448, -2.0623,  ...,  0.3887,  0.3711,  0.3536],
          ...,
          [-1.6246, -2.0798, -2.0623,  ..., -1.8172, -1.5721, -1.7822],
          [-1.4846, -1.7647, -1.8347,  ..., -1.7122, -1.5546, -1.6422],
          [-1.6772, -1.6772, -1.6422,  ..., -1.6597, -1.6071, -1.5546]],

         [[-1.5081, -1.5256, -1.4907,  ...,  1.0017,  1.0365,  1.0365],
 

 50%|█████     | 1/2 [00:00<00:00,  1.75it/s]

torch.Size([1, 3, 640, 480])
tensor([[[[-0.9020, -0.9192, -0.9534,  ...,  0.7591,  0.7419,  0.7419],
          [-0.7993, -0.8849, -0.8678,  ..., -0.0801, -0.2513, -0.3027],
          [-0.8164, -0.8678, -0.8678,  ..., -0.5767, -0.2513, -0.5424],
          ...,
          [ 0.7248,  0.4679,  0.6906,  ...,  0.7419,  0.9817,  0.9817],
          [ 0.5878,  0.7591,  0.8789,  ...,  0.8104,  0.9474,  1.0331],
          [ 0.8961,  0.9474,  0.8961,  ...,  1.0673,  1.0159,  1.1358]],

         [[-1.2745, -1.2920, -1.2920,  ...,  0.4237,  0.4587,  0.5637],
          [-1.1695, -1.2570, -1.2045,  ...,  0.0910, -0.0140, -0.1015],
          [-1.1870, -1.2045, -1.2045,  ..., -0.7143, -0.4867, -0.8543],
          ...,
          [ 0.0210, -0.2591,  0.0035,  ..., -0.0840,  0.2136,  0.2486],
          [ 0.0385,  0.1611,  0.3361,  ..., -0.0140,  0.1961,  0.3186],
          [ 0.2836,  0.2836,  0.2311,  ...,  0.2311,  0.2486,  0.4237]],

         [[-0.4973, -0.5147, -0.5321,  ...,  1.3851,  1.4025,  1.4722],
 

100%|██████████| 2/2 [00:01<00:00,  1.73it/s]


##### Quantization Done!

In [None]:
# quantized_model = torch.quantization.convert(quant_stubbed_model, inplace=True)
quantized_model = torch.quantization.convert(quant_stubbed_model_prepared, inplace=True)

##### Model Save

In [None]:
torch.save(quantized_model, 'sehyun_quantized/quantized_model.pth')

##### Compare

In [None]:
print_model_size(quantized_model)
print_model_size(model)

26.21 MB
102.54 MB
