In [1]:
import sys
if '..' not in sys.path:
    sys.path.append('..')

In [2]:
import torch
from torchvision.models import ResNet

from tqdm import tqdm
import numpy as np
import os

from aimet_torch.model_preparer import prepare_model
from aimet_torch.batch_norm_fold import fold_all_batch_norms
from aimet_common.defs import QuantScheme
from aimet_torch.quantsim import QuantizationSimModel
from aimet_torch.adaround.adaround_weight import Adaround, AdaroundParameters

from source.data import get_training_dataloader, get_test_dataloader
from source.models import BasicBlock, ResNet18Quant

2022-12-21 23:54:03,824 - root - INFO - AIMET


In [3]:
def accuracy(model, dataloader, device='gpu'):
    model.eval()
    with torch.no_grad():
        correct = 0.0
        for (images, labels) in tqdm(dataloader):
            if device == 'gpu':
                images = images.cuda()
                labels = labels.cuda()
            outputs = model(images)
            _, preds = outputs.max(1)
            correct += preds.eq(labels).sum()

    print('Acc:', correct.float() / len(dataloader.dataset))

In [4]:
CIFAR100_TRAIN_MEAN = (0.5070751592371323, 0.48654887331495095, 0.4409178433670343)
CIFAR100_TRAIN_STD = (0.2673342858792401, 0.2564384629170883, 0.27615047132568404)

In [5]:
train_loader = get_training_dataloader(
    '../data',
    CIFAR100_TRAIN_MEAN,
    CIFAR100_TRAIN_STD,
    num_workers=4,
    batch_size=64,
    shuffle=True
)

test_loader = get_test_dataloader(
    '../data',
    CIFAR100_TRAIN_MEAN,
    CIFAR100_TRAIN_STD,
    num_workers=4,
    batch_size=64,
    shuffle=True
)

Files already downloaded and verified
Files already downloaded and verified


In [6]:
def pass_calibration_data(sim_model, use_cuda):
    batch_size = train_loader.batch_size

    if use_cuda:
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    sim_model.eval()
    samples = 1000

    batch_cntr = 0
    with torch.no_grad():
        for input_data, target_data in train_loader:

            inputs_batch = input_data.to(device)
            sim_model(inputs_batch)

            batch_cntr += 1
            print(batch_cntr * batch_size)
            if (batch_cntr * batch_size) > samples:
                break

In [7]:
model = ResNet(num_classes=100, block=BasicBlock, layers=[2, 2, 2, 2])
model.load_state_dict(torch.load('../models/resnet18_cifar100.sd'))
model.eval()
model = model.cuda()

In [8]:
%%time
accuracy(model, test_loader, device='gpu')

100%|██████████| 334/334 [00:02<00:00, 136.29it/s]

Acc: tensor(0.5515, device='cuda:0')
CPU times: user 1.41 s, sys: 276 ms, total: 1.69 s
Wall time: 2.45 s





# Quantization

In [8]:
model = prepare_model(model)

2022-12-21 23:53:18,450 - Quant - INFO - Functional         : Adding new module for node: {add} 
2022-12-21 23:53:18,450 - Quant - INFO - Reused/Duplicate   : Adding new module for node: {layer1_0_relu_1} 
2022-12-21 23:53:18,451 - Quant - INFO - Functional         : Adding new module for node: {add_1} 
2022-12-21 23:53:18,452 - Quant - INFO - Reused/Duplicate   : Adding new module for node: {layer1_1_relu_1} 
2022-12-21 23:53:18,452 - Quant - INFO - Functional         : Adding new module for node: {add_2} 
2022-12-21 23:53:18,453 - Quant - INFO - Reused/Duplicate   : Adding new module for node: {layer2_0_relu_1} 
2022-12-21 23:53:18,453 - Quant - INFO - Functional         : Adding new module for node: {add_3} 
2022-12-21 23:53:18,454 - Quant - INFO - Reused/Duplicate   : Adding new module for node: {layer2_1_relu_1} 
2022-12-21 23:53:18,454 - Quant - INFO - Functional         : Adding new module for node: {add_4} 
2022-12-21 23:53:18,455 - Quant - INFO - Reused/Duplicate   : Adding ne



In [9]:
_ = fold_all_batch_norms(model, input_shapes=(1, 3, 224, 224))

2022-12-21 23:53:19,001 - Utils - INFO - ...... subset to store [Conv_0, BatchNormalization_1]
2022-12-21 23:53:19,002 - Utils - INFO - ...... subset to store [Conv_4, BatchNormalization_5]
2022-12-21 23:53:19,002 - Utils - INFO - ...... subset to store [Conv_7, BatchNormalization_8]
2022-12-21 23:53:19,002 - Utils - INFO - ...... subset to store [Conv_11, BatchNormalization_12]
2022-12-21 23:53:19,003 - Utils - INFO - ...... subset to store [Conv_14, BatchNormalization_15]
2022-12-21 23:53:19,003 - Utils - INFO - ...... subset to store [Conv_18, BatchNormalization_19]
2022-12-21 23:53:19,004 - Utils - INFO - ...... subset to store [Conv_21, BatchNormalization_22]
2022-12-21 23:53:19,004 - Utils - INFO - ...... subset to store [Conv_27, BatchNormalization_28]
2022-12-21 23:53:19,004 - Utils - INFO - ...... subset to store [Conv_30, BatchNormalization_31]
2022-12-21 23:53:19,005 - Utils - INFO - ...... subset to store [Conv_34, BatchNormalization_35]
2022-12-21 23:53:19,005 - Utils - IN

In [10]:
dummy_input = torch.rand(1, 3, 224, 224)    # Shape for each ImageNet sample is (3 channels) x (224 height) x (224 width)
dummy_input = dummy_input.cuda()

sim = QuantizationSimModel(model=model,
                           quant_scheme=QuantScheme.post_training_tf_enhanced,
                           dummy_input=dummy_input,
                           default_output_bw=4,
                           default_param_bw=4)

2022-12-21 23:53:22,829 - Quant - INFO - No config file provided, defaulting to config file at /usr/local/lib/python3.8/dist-packages/aimet_common/quantsim_config/default_config.json
2022-12-21 23:53:22,849 - Quant - INFO - Unsupported op type Squeeze
2022-12-21 23:53:22,849 - Quant - INFO - Unsupported op type Pad
2022-12-21 23:53:22,849 - Quant - INFO - Unsupported op type Mean
2022-12-21 23:53:22,852 - Utils - INFO - ...... subset to store [Conv_0, Relu_1]
2022-12-21 23:53:22,853 - Utils - INFO - ...... subset to store [Conv_0, Relu_1]
2022-12-21 23:53:22,853 - Utils - INFO - ...... subset to store [Conv_3, Relu_4]
2022-12-21 23:53:22,854 - Utils - INFO - ...... subset to store [Conv_3, Relu_4]
2022-12-21 23:53:22,854 - Utils - INFO - ...... subset to store [Add_6, Relu_7]
2022-12-21 23:53:22,855 - Utils - INFO - ...... subset to store [Add_6, Relu_7]
2022-12-21 23:53:22,855 - Utils - INFO - ...... subset to store [Conv_8, Relu_9]
2022-12-21 23:53:22,855 - Utils - INFO - ...... subs

In [11]:
sim.compute_encodings(forward_pass_callback=pass_calibration_data,
                      forward_pass_callback_args=True)

64
128
192
256
320
384
448
512
576
640
704
768
832
896
960
1024


In [12]:
%%time
accuracy(sim.model, test_loader, device='gpu')

100%|██████████| 157/157 [00:02<00:00, 55.95it/s]

Acc: tensor(0.2235, device='cuda:0')
CPU times: user 1.89 s, sys: 259 ms, total: 2.15 s
Wall time: 2.81 s





# AdaRound

In [8]:
model = prepare_model(model)

2022-12-21 23:54:11,038 - Quant - INFO - Functional         : Adding new module for node: {add} 
2022-12-21 23:54:11,039 - Quant - INFO - Reused/Duplicate   : Adding new module for node: {layer1_0_relu_1} 
2022-12-21 23:54:11,039 - Quant - INFO - Functional         : Adding new module for node: {add_1} 
2022-12-21 23:54:11,041 - Quant - INFO - Reused/Duplicate   : Adding new module for node: {layer1_1_relu_1} 
2022-12-21 23:54:11,042 - Quant - INFO - Functional         : Adding new module for node: {add_2} 
2022-12-21 23:54:11,042 - Quant - INFO - Reused/Duplicate   : Adding new module for node: {layer2_0_relu_1} 
2022-12-21 23:54:11,044 - Quant - INFO - Functional         : Adding new module for node: {add_3} 
2022-12-21 23:54:11,045 - Quant - INFO - Reused/Duplicate   : Adding new module for node: {layer2_1_relu_1} 
2022-12-21 23:54:11,045 - Quant - INFO - Functional         : Adding new module for node: {add_4} 
2022-12-21 23:54:11,046 - Quant - INFO - Reused/Duplicate   : Adding ne



In [9]:
_ = fold_all_batch_norms(model, input_shapes=(1, 3, 224, 224))

2022-12-21 23:54:11,940 - Utils - INFO - ...... subset to store [Conv_0, BatchNormalization_1]
2022-12-21 23:54:11,940 - Utils - INFO - ...... subset to store [Conv_4, BatchNormalization_5]
2022-12-21 23:54:11,941 - Utils - INFO - ...... subset to store [Conv_7, BatchNormalization_8]
2022-12-21 23:54:11,941 - Utils - INFO - ...... subset to store [Conv_11, BatchNormalization_12]
2022-12-21 23:54:11,942 - Utils - INFO - ...... subset to store [Conv_14, BatchNormalization_15]
2022-12-21 23:54:11,942 - Utils - INFO - ...... subset to store [Conv_18, BatchNormalization_19]
2022-12-21 23:54:11,943 - Utils - INFO - ...... subset to store [Conv_21, BatchNormalization_22]
2022-12-21 23:54:11,943 - Utils - INFO - ...... subset to store [Conv_27, BatchNormalization_28]
2022-12-21 23:54:11,943 - Utils - INFO - ...... subset to store [Conv_30, BatchNormalization_31]
2022-12-21 23:54:11,944 - Utils - INFO - ...... subset to store [Conv_34, BatchNormalization_35]
2022-12-21 23:54:11,944 - Utils - IN

In [10]:
params = AdaroundParameters(data_loader=train_loader, num_batches=2000//train_loader.batch_size, default_num_iterations=10000)

In [11]:
dummy_input = torch.rand(1, 3, 224, 224).cuda()

In [12]:
os.makedirs('./cifar100_w4/', exist_ok=True)

In [13]:
%time
ada_model = Adaround.apply_adaround(model, dummy_input, params,
                                    path="cifar100_w4", 
                                    filename_prefix='adaround', 
                                    default_param_bw=4,
                                    default_quant_scheme=QuantScheme.post_training_tf_enhanced)

CPU times: user 1e+03 ns, sys: 1e+03 ns, total: 2 µs
Wall time: 3.81 µs
2022-12-21 23:54:16,410 - Quant - INFO - No config file provided, defaulting to config file at /usr/local/lib/python3.8/dist-packages/aimet_common/quantsim_config/default_config.json
2022-12-21 23:54:16,429 - Quant - INFO - Unsupported op type Squeeze
2022-12-21 23:54:16,429 - Quant - INFO - Unsupported op type Pad
2022-12-21 23:54:16,429 - Quant - INFO - Unsupported op type Mean
2022-12-21 23:54:16,432 - Utils - INFO - ...... subset to store [Conv_0, Relu_1]
2022-12-21 23:54:16,433 - Utils - INFO - ...... subset to store [Conv_0, Relu_1]
2022-12-21 23:54:16,433 - Utils - INFO - ...... subset to store [Conv_3, Relu_4]
2022-12-21 23:54:16,434 - Utils - INFO - ...... subset to store [Conv_3, Relu_4]
2022-12-21 23:54:16,434 - Utils - INFO - ...... subset to store [Add_6, Relu_7]
2022-12-21 23:54:16,435 - Utils - INFO - ...... subset to store [Add_6, Relu_7]
2022-12-21 23:54:16,435 - Utils - INFO - ...... subset to sto

                                      

2022-12-21 23:54:18,098 - Quant - INFO - Started Optimizing weight rounding of module: conv1


                                              

2022-12-21 23:54:34,695 - Quant - INFO - Started Optimizing weight rounding of module: layer1.0.conv1


                                              

2022-12-21 23:54:53,694 - Quant - INFO - Started Optimizing weight rounding of module: layer1.0.conv2


                                              

2022-12-21 23:55:12,395 - Quant - INFO - Started Optimizing weight rounding of module: layer1.1.conv1


                                               

2022-12-21 23:55:31,481 - Quant - INFO - Started Optimizing weight rounding of module: layer1.1.conv2


                                               

2022-12-21 23:55:50,205 - Quant - INFO - Started Optimizing weight rounding of module: layer2.0.conv1


                                               

2022-12-21 23:56:11,502 - Quant - INFO - Started Optimizing weight rounding of module: layer2.0.conv2


                                               

2022-12-21 23:56:30,401 - Quant - INFO - Started Optimizing weight rounding of module: layer2.0.downsample.0


                                               

2022-12-21 23:56:48,925 - Quant - INFO - Started Optimizing weight rounding of module: layer2.1.conv1


                                               

2022-12-21 23:57:08,328 - Quant - INFO - Started Optimizing weight rounding of module: layer2.1.conv2


                                               

2022-12-21 23:57:27,315 - Quant - INFO - Started Optimizing weight rounding of module: layer3.0.conv1


                                               

2022-12-21 23:57:48,586 - Quant - INFO - Started Optimizing weight rounding of module: layer3.0.conv2


                                               

2022-12-21 23:58:07,823 - Quant - INFO - Started Optimizing weight rounding of module: layer3.0.downsample.0


                                               

2022-12-21 23:58:26,842 - Quant - INFO - Started Optimizing weight rounding of module: layer3.1.conv1


                                               

2022-12-21 23:58:46,560 - Quant - INFO - Started Optimizing weight rounding of module: layer3.1.conv2


                                               

2022-12-21 23:59:05,891 - Quant - INFO - Started Optimizing weight rounding of module: layer4.0.conv1


                                               

2022-12-21 23:59:24,179 - Quant - INFO - Started Optimizing weight rounding of module: layer4.0.conv2


                                               

2022-12-21 23:59:50,407 - Quant - INFO - Started Optimizing weight rounding of module: layer4.0.downsample.0


                                               

2022-12-22 00:00:08,270 - Quant - INFO - Started Optimizing weight rounding of module: layer4.1.conv1


                                               

2022-12-22 00:00:34,791 - Quant - INFO - Started Optimizing weight rounding of module: layer4.1.conv2


                                               

2022-12-22 00:01:01,422 - Quant - INFO - Started Optimizing weight rounding of module: fc


100%|██████████| 68/68 [06:58<00:00,  6.16s/it]

2022-12-22 00:01:16,722 - Quant - INFO - Deleting model inputs from location: /tmp/adaround/
2022-12-22 00:01:16,782 - Quant - INFO - Completed Adarounding Model





In [14]:
dummy_input = torch.rand(1, 3, 224, 224)    # Shape for each ImageNet sample is (3 channels) x (224 height) x (224 width)
dummy_input = dummy_input.cuda()

sim = QuantizationSimModel(model=ada_model,
                           quant_scheme=QuantScheme.post_training_tf_enhanced,
                           dummy_input=dummy_input,
                           default_output_bw=4,
                           default_param_bw=4)

2022-12-22 00:01:17,018 - Quant - INFO - No config file provided, defaulting to config file at /usr/local/lib/python3.8/dist-packages/aimet_common/quantsim_config/default_config.json
2022-12-22 00:01:17,035 - Quant - INFO - Unsupported op type Squeeze
2022-12-22 00:01:17,036 - Quant - INFO - Unsupported op type Pad
2022-12-22 00:01:17,036 - Quant - INFO - Unsupported op type Mean
2022-12-22 00:01:17,039 - Utils - INFO - ...... subset to store [Conv_0, Relu_1]
2022-12-22 00:01:17,040 - Utils - INFO - ...... subset to store [Conv_0, Relu_1]
2022-12-22 00:01:17,040 - Utils - INFO - ...... subset to store [Conv_3, Relu_4]
2022-12-22 00:01:17,041 - Utils - INFO - ...... subset to store [Conv_3, Relu_4]
2022-12-22 00:01:17,041 - Utils - INFO - ...... subset to store [Add_6, Relu_7]
2022-12-22 00:01:17,042 - Utils - INFO - ...... subset to store [Add_6, Relu_7]
2022-12-22 00:01:17,042 - Utils - INFO - ...... subset to store [Conv_8, Relu_9]
2022-12-22 00:01:17,042 - Utils - INFO - ...... subs

In [15]:
sim.set_and_freeze_param_encodings(encoding_path=os.path.join("cifar100_w4", 'adaround.encodings'))

2022-12-22 00:01:17,181 - Quant - INFO - Setting quantization encodings for parameter: conv1.weight
2022-12-22 00:01:17,181 - Quant - INFO - Freezing quantization encodings for parameter: conv1.weight
2022-12-22 00:01:17,182 - Quant - INFO - Setting quantization encodings for parameter: layer1.0.conv1.weight
2022-12-22 00:01:17,182 - Quant - INFO - Freezing quantization encodings for parameter: layer1.0.conv1.weight
2022-12-22 00:01:17,183 - Quant - INFO - Setting quantization encodings for parameter: layer1.0.conv2.weight
2022-12-22 00:01:17,183 - Quant - INFO - Freezing quantization encodings for parameter: layer1.0.conv2.weight
2022-12-22 00:01:17,184 - Quant - INFO - Setting quantization encodings for parameter: layer1.1.conv1.weight
2022-12-22 00:01:17,184 - Quant - INFO - Freezing quantization encodings for parameter: layer1.1.conv1.weight
2022-12-22 00:01:17,184 - Quant - INFO - Setting quantization encodings for parameter: layer1.1.conv2.weight
2022-12-22 00:01:17,185 - Quant -

In [16]:
sim.compute_encodings(forward_pass_callback=pass_calibration_data,
                      forward_pass_callback_args=True)



64
128
192
256
320
384
448
512
576
640
704
768
832
896
960
1024


In [17]:
%time
accuracy(sim.model, test_loader, device='gpu')

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.29 µs


100%|██████████| 157/157 [00:02<00:00, 56.59it/s]

Acc: tensor(0.4052, device='cuda:0')





In [1]:
!nvidia-smi

Wed Dec 21 23:23:02 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.57.02    Driver Version: 470.57.02    CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:1E:00.0 Off |                    0 |
| N/A   31C    P0    39W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------