In [1]:
import sys
if '..' not in sys.path:
    sys.path.append('..')

In [2]:
import torch
from torchvision.models import resnet18

from tqdm import tqdm
import numpy as np
import os

from aimet_torch.model_preparer import prepare_model
from aimet_torch.batch_norm_fold import fold_all_batch_norms
from aimet_common.defs import QuantScheme
from aimet_torch.quantsim import QuantizationSimModel
from aimet_torch.adaround.adaround_weight import Adaround, AdaroundParameters

from source.data import get_imagenet_test_loader, get_imagenet_train_val_loaders

2022-12-21 23:48:19,042 - root - INFO - AIMET


In [3]:
def accuracy(model, dataset_loader, device='cuda', num_classes=1000):
    def one_hot(x, K):
        return np.array(x[:, None] == np.arange(K)[None, :], dtype=int)
    
    # Set BN and Droupout to eval regime
    model.eval()

    total_correct = 0

    for (x, y) in tqdm(dataset_loader):
        x = x.to(device)
        y = one_hot(np.array(y.numpy()), num_classes)
        target_class = np.argmax(y, axis=1)

        with torch.no_grad():
            out = model(x).cpu().detach().numpy()
            predicted_class = np.argmax(out, axis=1)
            total_correct += np.sum(predicted_class == target_class)

    total = len(dataset_loader) * dataset_loader.batch_size
    return total_correct / total

In [4]:
train_loader, val_loader = get_imagenet_train_val_loaders(data_root='/gpfs/gpfs0/k.sobolev/ILSVRC-12/',
                                       batch_size=500,
                                       num_workers=4,
                                       pin_memory=True,
                                       val_perc=0.04,
                                       shuffle=True,
                                       random_seed=5)

In [5]:
test_loader = get_imagenet_test_loader(data_root='/gpfs/gpfs0/k.sobolev/ILSVRC-12/', 
                                       batch_size=500,
                                       num_workers=4,
                                       pin_memory=True,
                                       shuffle=False)

In [6]:
def pass_calibration_data(sim_model, use_cuda):
    batch_size = train_loader.batch_size

    if use_cuda:
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    sim_model.eval()
    samples = 1000

    batch_cntr = 0
    with torch.no_grad():
        for input_data, target_data in train_loader:

            inputs_batch = input_data.to(device)
            sim_model(inputs_batch)

            batch_cntr += 1
            print(batch_cntr * batch_size)
            if (batch_cntr * batch_size) >= samples:
                break

In [7]:
model = resnet18(pretrained=True)

In [8]:
model = model.cuda()

In [8]:
%time 
accuracy(model, test_loader, device='cuda')

CPU times: user 0 ns, sys: 3 µs, total: 3 µs
Wall time: 5.96 µs


100%|██████████| 500/500 [07:13<00:00,  1.15it/s]


0.6976

# Quantization

In [9]:
model = prepare_model(model)

2022-12-21 23:48:33,659 - Quant - INFO - Functional         : Adding new module for node: {add} 
2022-12-21 23:48:33,660 - Quant - INFO - Reused/Duplicate   : Adding new module for node: {layer1_0_relu_1} 
2022-12-21 23:48:33,661 - Quant - INFO - Functional         : Adding new module for node: {add_1} 
2022-12-21 23:48:33,661 - Quant - INFO - Reused/Duplicate   : Adding new module for node: {layer1_1_relu_1} 
2022-12-21 23:48:33,662 - Quant - INFO - Functional         : Adding new module for node: {add_2} 
2022-12-21 23:48:33,662 - Quant - INFO - Reused/Duplicate   : Adding new module for node: {layer2_0_relu_1} 
2022-12-21 23:48:33,663 - Quant - INFO - Functional         : Adding new module for node: {add_3} 
2022-12-21 23:48:33,663 - Quant - INFO - Reused/Duplicate   : Adding new module for node: {layer2_1_relu_1} 
2022-12-21 23:48:33,664 - Quant - INFO - Functional         : Adding new module for node: {add_4} 
2022-12-21 23:48:33,664 - Quant - INFO - Reused/Duplicate   : Adding ne



In [10]:
_ = fold_all_batch_norms(model, input_shapes=(1, 3, 224, 224))

2022-12-21 23:48:33,948 - Utils - INFO - ...... subset to store [Conv_0, BatchNormalization_1]
2022-12-21 23:48:33,949 - Utils - INFO - ...... subset to store [Conv_4, BatchNormalization_5]
2022-12-21 23:48:33,949 - Utils - INFO - ...... subset to store [Conv_7, BatchNormalization_8]
2022-12-21 23:48:33,949 - Utils - INFO - ...... subset to store [Conv_11, BatchNormalization_12]
2022-12-21 23:48:33,950 - Utils - INFO - ...... subset to store [Conv_14, BatchNormalization_15]
2022-12-21 23:48:33,950 - Utils - INFO - ...... subset to store [Conv_18, BatchNormalization_19]
2022-12-21 23:48:33,951 - Utils - INFO - ...... subset to store [Conv_21, BatchNormalization_22]
2022-12-21 23:48:33,951 - Utils - INFO - ...... subset to store [Conv_27, BatchNormalization_28]
2022-12-21 23:48:33,951 - Utils - INFO - ...... subset to store [Conv_30, BatchNormalization_31]
2022-12-21 23:48:33,952 - Utils - INFO - ...... subset to store [Conv_34, BatchNormalization_35]
2022-12-21 23:48:33,952 - Utils - IN

In [11]:
dummy_input = torch.rand(1, 3, 224, 224)    # Shape for each ImageNet sample is (3 channels) x (224 height) x (224 width)
dummy_input = dummy_input.cuda()

sim = QuantizationSimModel(model=model,
                           quant_scheme=QuantScheme.post_training_tf_enhanced,
                           dummy_input=dummy_input,
                           default_output_bw=4,
                           default_param_bw=4)

2022-12-21 23:48:37,791 - Quant - INFO - No config file provided, defaulting to config file at /usr/local/lib/python3.8/dist-packages/aimet_common/quantsim_config/default_config.json
2022-12-21 23:48:37,809 - Quant - INFO - Unsupported op type Squeeze
2022-12-21 23:48:37,810 - Quant - INFO - Unsupported op type Pad
2022-12-21 23:48:37,810 - Quant - INFO - Unsupported op type Mean
2022-12-21 23:48:37,813 - Utils - INFO - ...... subset to store [Conv_0, Relu_1]
2022-12-21 23:48:37,814 - Utils - INFO - ...... subset to store [Conv_0, Relu_1]
2022-12-21 23:48:37,814 - Utils - INFO - ...... subset to store [Conv_3, Relu_4]
2022-12-21 23:48:37,815 - Utils - INFO - ...... subset to store [Conv_3, Relu_4]
2022-12-21 23:48:37,815 - Utils - INFO - ...... subset to store [Add_6, Relu_7]
2022-12-21 23:48:37,815 - Utils - INFO - ...... subset to store [Add_6, Relu_7]
2022-12-21 23:48:37,816 - Utils - INFO - ...... subset to store [Conv_8, Relu_9]
2022-12-21 23:48:37,816 - Utils - INFO - ...... subs

In [12]:
print(sim)

-------------------------
Quantized Model Report
-------------------------
----------------------------------------------------------
Layer: conv1
  Input[0]: bw=4, encoding-present=False
  -------
  Param[weight]: bw=4, encoding-present=False
  -------
  Param[bias]: Not quantized
  -------
  Output[0]: Not quantized
  -------
----------------------------------------------------------
Layer: relu
  Input[0]: Not quantized
  -------
  Output[0]: bw=4, encoding-present=False
  -------
----------------------------------------------------------
Layer: maxpool
  Input[0]: Not quantized
  -------
  Output[0]: bw=4, encoding-present=False
  -------
----------------------------------------------------------
Layer: layer1.0.conv1
  Input[0]: Not quantized
  -------
  Param[weight]: bw=4, encoding-present=False
  -------
  Param[bias]: Not quantized
  -------
  Output[0]: Not quantized
  -------
----------------------------------------------------------
Layer: layer1.0.relu
  Input[0]: Not quan

In [13]:
sim.compute_encodings(forward_pass_callback=pass_calibration_data,
                      forward_pass_callback_args=True)

500
1000


In [14]:
%time
accuracy(sim.model, test_loader, device='cuda')

CPU times: user 1e+03 ns, sys: 0 ns, total: 1e+03 ns
Wall time: 4.05 µs


100%|██████████| 100/100 [02:15<00:00,  1.35s/it]


0.06038

# AdaRound

In [9]:
model = prepare_model(model)

2022-12-21 21:51:46,958 - Quant - INFO - Functional         : Adding new module for node: {add} 
2022-12-21 21:51:46,959 - Quant - INFO - Reused/Duplicate   : Adding new module for node: {layer1_0_relu_1} 
2022-12-21 21:51:46,959 - Quant - INFO - Functional         : Adding new module for node: {add_1} 
2022-12-21 21:51:46,960 - Quant - INFO - Reused/Duplicate   : Adding new module for node: {layer1_1_relu_1} 
2022-12-21 21:51:46,961 - Quant - INFO - Functional         : Adding new module for node: {add_2} 
2022-12-21 21:51:46,962 - Quant - INFO - Reused/Duplicate   : Adding new module for node: {layer2_0_relu_1} 
2022-12-21 21:51:46,962 - Quant - INFO - Functional         : Adding new module for node: {add_3} 
2022-12-21 21:51:46,963 - Quant - INFO - Reused/Duplicate   : Adding new module for node: {layer2_1_relu_1} 
2022-12-21 21:51:46,964 - Quant - INFO - Functional         : Adding new module for node: {add_4} 
2022-12-21 21:51:46,964 - Quant - INFO - Reused/Duplicate   : Adding ne



In [10]:
_ = fold_all_batch_norms(model, input_shapes=(1, 3, 224, 224))

2022-12-21 21:51:47,252 - Utils - INFO - ...... subset to store [Conv_0, BatchNormalization_1]
2022-12-21 21:51:47,253 - Utils - INFO - ...... subset to store [Conv_4, BatchNormalization_5]
2022-12-21 21:51:47,253 - Utils - INFO - ...... subset to store [Conv_7, BatchNormalization_8]
2022-12-21 21:51:47,253 - Utils - INFO - ...... subset to store [Conv_11, BatchNormalization_12]
2022-12-21 21:51:47,254 - Utils - INFO - ...... subset to store [Conv_14, BatchNormalization_15]
2022-12-21 21:51:47,254 - Utils - INFO - ...... subset to store [Conv_18, BatchNormalization_19]
2022-12-21 21:51:47,255 - Utils - INFO - ...... subset to store [Conv_21, BatchNormalization_22]
2022-12-21 21:51:47,255 - Utils - INFO - ...... subset to store [Conv_27, BatchNormalization_28]
2022-12-21 21:51:47,255 - Utils - INFO - ...... subset to store [Conv_30, BatchNormalization_31]
2022-12-21 21:51:47,256 - Utils - INFO - ...... subset to store [Conv_34, BatchNormalization_35]
2022-12-21 21:51:47,256 - Utils - IN

In [11]:
params = AdaroundParameters(data_loader=val_loader, num_batches=2000//val_loader.batch_size, default_num_iterations=10000)

In [12]:
dummy_input = torch.rand(1, 3, 224, 224).cuda()

In [13]:
os.makedirs('./imagenet_w4/', exist_ok=True)

In [14]:
%time
ada_model = Adaround.apply_adaround(model, dummy_input, params,
                                    path="imagenet_w4", 
                                    filename_prefix='adaround', 
                                    default_param_bw=4,
                                    default_quant_scheme=QuantScheme.post_training_tf_enhanced)

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 3.81 µs
2022-12-21 21:51:51,361 - Quant - INFO - No config file provided, defaulting to config file at /usr/local/lib/python3.8/dist-packages/aimet_common/quantsim_config/default_config.json
2022-12-21 21:51:51,379 - Quant - INFO - Unsupported op type Squeeze
2022-12-21 21:51:51,380 - Quant - INFO - Unsupported op type Pad
2022-12-21 21:51:51,380 - Quant - INFO - Unsupported op type Mean
2022-12-21 21:51:51,384 - Utils - INFO - ...... subset to store [Conv_0, Relu_1]
2022-12-21 21:51:51,384 - Utils - INFO - ...... subset to store [Conv_0, Relu_1]
2022-12-21 21:51:51,385 - Utils - INFO - ...... subset to store [Conv_3, Relu_4]
2022-12-21 21:51:51,385 - Utils - INFO - ...... subset to store [Conv_3, Relu_4]
2022-12-21 21:51:51,385 - Utils - INFO - ...... subset to store [Add_6, Relu_7]
2022-12-21 21:51:51,386 - Utils - INFO - ...... subset to store [Add_6, Relu_7]
2022-12-21 21:51:51,386 - Utils - INFO - ...... subset to store [Conv

                                      

2022-12-21 21:52:17,590 - Quant - INFO - Started Optimizing weight rounding of module: conv1


                                                

2022-12-21 21:53:14,646 - Quant - INFO - Started Optimizing weight rounding of module: layer1.0.conv1


                                                

2022-12-21 21:53:46,338 - Quant - INFO - Started Optimizing weight rounding of module: layer1.0.conv2


                                              

2022-12-21 21:54:15,999 - Quant - INFO - Started Optimizing weight rounding of module: layer1.1.conv1


                                               

2022-12-21 21:54:48,119 - Quant - INFO - Started Optimizing weight rounding of module: layer1.1.conv2


                                               

2022-12-21 21:55:18,132 - Quant - INFO - Started Optimizing weight rounding of module: layer2.0.conv1


                                               

2022-12-21 21:55:46,201 - Quant - INFO - Started Optimizing weight rounding of module: layer2.0.conv2


                                               

2022-12-21 21:56:09,770 - Quant - INFO - Started Optimizing weight rounding of module: layer2.0.downsample.0


                                               

2022-12-21 21:56:34,446 - Quant - INFO - Started Optimizing weight rounding of module: layer2.1.conv1


                                               

2022-12-21 21:56:58,910 - Quant - INFO - Started Optimizing weight rounding of module: layer2.1.conv2


                                               

2022-12-21 21:57:22,730 - Quant - INFO - Started Optimizing weight rounding of module: layer3.0.conv1


                                               

2022-12-21 21:57:46,916 - Quant - INFO - Started Optimizing weight rounding of module: layer3.0.conv2


                                               

2022-12-21 21:58:09,051 - Quant - INFO - Started Optimizing weight rounding of module: layer3.0.downsample.0


                                               

2022-12-21 21:58:33,840 - Quant - INFO - Started Optimizing weight rounding of module: layer3.1.conv1


                                               

2022-12-21 21:58:56,642 - Quant - INFO - Started Optimizing weight rounding of module: layer3.1.conv2


                                               

2022-12-21 21:59:19,093 - Quant - INFO - Started Optimizing weight rounding of module: layer4.0.conv1


                                               

2022-12-21 21:59:44,111 - Quant - INFO - Started Optimizing weight rounding of module: layer4.0.conv2


                                               

2022-12-21 22:00:13,225 - Quant - INFO - Started Optimizing weight rounding of module: layer4.0.downsample.0


                                               

2022-12-21 22:00:35,344 - Quant - INFO - Started Optimizing weight rounding of module: layer4.1.conv1


                                               

2022-12-21 22:01:04,883 - Quant - INFO - Started Optimizing weight rounding of module: layer4.1.conv2


                                               

2022-12-21 22:01:34,193 - Quant - INFO - Started Optimizing weight rounding of module: fc


100%|██████████| 68/68 [09:37<00:00,  8.50s/it]

2022-12-21 22:01:55,316 - Quant - INFO - Deleting model inputs from location: /tmp/adaround/
2022-12-21 22:01:55,421 - Quant - INFO - Completed Adarounding Model





In [15]:
dummy_input = torch.rand(1, 3, 224, 224)    # Shape for each ImageNet sample is (3 channels) x (224 height) x (224 width)
dummy_input = dummy_input.cuda()

sim = QuantizationSimModel(model=ada_model,
                           quant_scheme=QuantScheme.post_training_tf_enhanced,
                           dummy_input=dummy_input,
                           default_output_bw=4,
                           default_param_bw=4)

2022-12-21 22:01:55,657 - Quant - INFO - No config file provided, defaulting to config file at /usr/local/lib/python3.8/dist-packages/aimet_common/quantsim_config/default_config.json
2022-12-21 22:01:55,674 - Quant - INFO - Unsupported op type Squeeze
2022-12-21 22:01:55,675 - Quant - INFO - Unsupported op type Pad
2022-12-21 22:01:55,675 - Quant - INFO - Unsupported op type Mean
2022-12-21 22:01:55,678 - Utils - INFO - ...... subset to store [Conv_0, Relu_1]
2022-12-21 22:01:55,679 - Utils - INFO - ...... subset to store [Conv_0, Relu_1]
2022-12-21 22:01:55,679 - Utils - INFO - ...... subset to store [Conv_3, Relu_4]
2022-12-21 22:01:55,680 - Utils - INFO - ...... subset to store [Conv_3, Relu_4]
2022-12-21 22:01:55,680 - Utils - INFO - ...... subset to store [Add_6, Relu_7]
2022-12-21 22:01:55,681 - Utils - INFO - ...... subset to store [Add_6, Relu_7]
2022-12-21 22:01:55,681 - Utils - INFO - ...... subset to store [Conv_8, Relu_9]
2022-12-21 22:01:55,681 - Utils - INFO - ...... subs

In [16]:
sim.set_and_freeze_param_encodings(encoding_path=os.path.join("imagenet_w4", 'adaround.encodings'))

2022-12-21 22:01:55,700 - Quant - INFO - Setting quantization encodings for parameter: conv1.weight
2022-12-21 22:01:55,700 - Quant - INFO - Freezing quantization encodings for parameter: conv1.weight
2022-12-21 22:01:55,701 - Quant - INFO - Setting quantization encodings for parameter: layer1.0.conv1.weight
2022-12-21 22:01:55,701 - Quant - INFO - Freezing quantization encodings for parameter: layer1.0.conv1.weight
2022-12-21 22:01:55,702 - Quant - INFO - Setting quantization encodings for parameter: layer1.0.conv2.weight
2022-12-21 22:01:55,702 - Quant - INFO - Freezing quantization encodings for parameter: layer1.0.conv2.weight
2022-12-21 22:01:55,703 - Quant - INFO - Setting quantization encodings for parameter: layer1.1.conv1.weight
2022-12-21 22:01:55,703 - Quant - INFO - Freezing quantization encodings for parameter: layer1.1.conv1.weight
2022-12-21 22:01:55,704 - Quant - INFO - Setting quantization encodings for parameter: layer1.1.conv2.weight
2022-12-21 22:01:55,704 - Quant -

In [17]:
sim.compute_encodings(forward_pass_callback=pass_calibration_data,
                      forward_pass_callback_args=True)



500
1000
1500


In [18]:
%time
accuracy(sim.model, test_loader, device='cuda')

CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs
Wall time: 4.29 µs


100%|██████████| 100/100 [02:12<00:00,  1.32s/it]


0.22722