In [3]:
import sys
if '..' not in sys.path:
    sys.path.append('..')

In [2]:
import torch 
from torch.quantization import MovingAverageMinMaxObserver,HistogramObserver
from torchvision.models import ResNet

from tqdm import tqdm
import numpy as np

from source.data import get_test_dataloader, get_training_dataloader
from source.models import BasicBlock, ResNet18Quant

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def accuracy(model, dataloader, device='gpu'):
    model.eval()
    with torch.no_grad():
        correct = 0.0
        for (images, labels) in tqdm(dataloader):
            if device == 'gpu':
                images = images.cuda()
                labels = labels.cuda()
            outputs = model(images)
            _, preds = outputs.max(1)
            correct += preds.eq(labels).sum()

    print('Acc:', correct.float() / len(dataloader.dataset))

In [4]:
CIFAR100_TRAIN_MEAN = (0.5070751592371323, 0.48654887331495095, 0.4409178433670343)
CIFAR100_TRAIN_STD = (0.2673342858792401, 0.2564384629170883, 0.27615047132568404)

In [5]:
cifar100_training_loader = get_training_dataloader(
    '../data',
    CIFAR100_TRAIN_MEAN,
    CIFAR100_TRAIN_STD,
    num_workers=4,
    batch_size=64,
    shuffle=True
)

cifar100_test_loader = get_test_dataloader(
    '../data',
    CIFAR100_TRAIN_MEAN,
    CIFAR100_TRAIN_STD,
    num_workers=4,
    batch_size=64,
    shuffle=True
)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
model = ResNet(num_classes=100, block=BasicBlock, layers=[2, 2, 2, 2])
model.load_state_dict(torch.load('../models/resnet18_cifar100.sd'))
model = model.cuda()

In [7]:
%%time
accuracy(model, cifar100_test_loader)

100%|██████████| 157/157 [00:05<00:00, 31.27it/s] 


Acc: tensor(0.5515, device='cuda:0')
CPU times: user 1.82 s, sys: 755 ms, total: 2.58 s
Wall time: 5.04 s


# Dynamic Quantization

In [8]:
model = model.cpu()
model = torch.quantization.quantize_dynamic(
    model,
    {torch.nn.Conv2d, torch.nn.Linear, torch.nn.BatchNorm2d,
     torch.nn.ReLU, torch.nn.MaxPool2d, torch.nn.AdaptiveAvgPool2d},
    dtype=torch.qint8
)

In [10]:
%%time
accuracy(model, cifar100_test_loader, device='cpu')

100%|██████████| 157/157 [01:30<00:00,  1.73it/s]

Acc: tensor(0.5512)
CPU times: user 5min 54s, sys: 309 ms, total: 5min 54s
Wall time: 1min 30s





# Static Quantization

In [16]:
model = ResNet18Quant(num_classes=100)
model.load_state_dict(torch.load('../models/resnet18_cifar100.sd'))
model.eval()

ResNet18Quant(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicQuantBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu1): ReLU(inplace=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (ff): FloatFunctional(
        (activation_post_process): Identity()
      )
    )
    (1): BasicQuantBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=F

In [18]:
# model.qconfig = torch.quantization.get_default_qconfig('fbgemm')
model.qconfig = torch.quantization.QConfig(
  activation=HistogramObserver.with_args(reduce_range=True),
  weight=MovingAverageMinMaxObserver.with_args(qscheme=torch.per_tensor_affine, dtype=torch.qint8)
)
model.qconfig

QConfig(activation=functools.partial(<class 'torch.ao.quantization.observer.HistogramObserver'>, reduce_range=True){}, weight=functools.partial(<class 'torch.ao.quantization.observer.MovingAverageMinMaxObserver'>, qscheme=torch.per_tensor_affine, dtype=torch.qint8){})

In [19]:
modules_to_fuse = [
    ['conv1', 'bn1', 'relu'],
    *([f'layer{i}.{j}.conv1', f'layer{i}.{j}.bn1', f'layer{i}.{j}.relu1'] for i in (1,2,3,4) for j in (0,1)),
    *([f'layer{i}.{j}.conv2', f'layer{i}.{j}.bn2'] for i in (1,2,3,4) for j in (0,1)),
    *([f'layer{i}.0.downsample.0', f'layer{i}.0.downsample.1'] for i in (2,3,4))
]
modules_to_fuse

[['conv1', 'bn1', 'relu'],
 ['layer1.0.conv1', 'layer1.0.bn1', 'layer1.0.relu1'],
 ['layer1.1.conv1', 'layer1.1.bn1', 'layer1.1.relu1'],
 ['layer2.0.conv1', 'layer2.0.bn1', 'layer2.0.relu1'],
 ['layer2.1.conv1', 'layer2.1.bn1', 'layer2.1.relu1'],
 ['layer3.0.conv1', 'layer3.0.bn1', 'layer3.0.relu1'],
 ['layer3.1.conv1', 'layer3.1.bn1', 'layer3.1.relu1'],
 ['layer4.0.conv1', 'layer4.0.bn1', 'layer4.0.relu1'],
 ['layer4.1.conv1', 'layer4.1.bn1', 'layer4.1.relu1'],
 ['layer1.0.conv2', 'layer1.0.bn2'],
 ['layer1.1.conv2', 'layer1.1.bn2'],
 ['layer2.0.conv2', 'layer2.0.bn2'],
 ['layer2.1.conv2', 'layer2.1.bn2'],
 ['layer3.0.conv2', 'layer3.0.bn2'],
 ['layer3.1.conv2', 'layer3.1.bn2'],
 ['layer4.0.conv2', 'layer4.0.bn2'],
 ['layer4.1.conv2', 'layer4.1.bn2'],
 ['layer2.0.downsample.0', 'layer2.0.downsample.1'],
 ['layer3.0.downsample.0', 'layer3.0.downsample.1'],
 ['layer4.0.downsample.0', 'layer4.0.downsample.1']]

In [20]:
model = torch.quantization.fuse_modules(model, modules_to_fuse)

In [2]:
model

NameError: name 'model' is not defined

In [21]:
model = torch.quantization.prepare(model)

In [22]:
# can move to gpu for faster quantization calbration
model = model.cuda()

In [1]:
# quantiation calibration on 1000 samples of train dataset
model.eval()
with torch.no_grad():
    for idx, (train_x, _) in tqdm(enumerate(train_loader)):
        _ = model(train_x.cuda())
        if idx * train_loader.batch_size >= 1000: break

NameError: name 'model' is not defined

In [24]:
# have to move to cpu for quantization conversion
model = model.cpu()

In [25]:
model = torch.quantization.convert(model)

In [26]:
accuracy(model, cifar100_test_loader, device='cpu')

100%|██████████| 157/157 [00:05<00:00, 28.62it/s]

Acc: tensor(0.5505)





In [15]:
accuracy(model, cifar100_test_loader, device='cpu')

100%|██████████| 157/157 [00:05<00:00, 28.61it/s]

Acc: tensor(0.5478)



