# Imagenet Quantization with ONNX Recontructor

This Jupyter notebook shows how this network is reconstructed using ONNX-based generator and then do the quantization to get the results.

## 1. Load Dependencies

In [1]:
%load_ext autoreload
%autoreload 2
import os
if 1:
    os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"]="0"

### 1.1 Load the libraries and modules

In [2]:
import sys
sys.path.insert(0, "..")

import warnings
warnings.filterwarnings("ignore")

import onnx

from pprint import pprint
from onnx import numpy_helper
%matplotlib inline

import cv2
from torchvision.utils import save_image
from torchvision import transforms

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)


device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Which device: ', device)

Which device:  cuda


#### 1.1.1 Set OpSet for ONNX 

In [3]:
OPSET = 11 #onnx opset version

### 1.2 To check which model you want to try

In [4]:
models = list_models(pretrained=True)
print(f'Available pretrained models in the TIMM library: {len(models)}')

Available pretrained models in the TIMM library: 452


### 1.3 Prerequisite Util Class

In [6]:
class ImagenetWithArgmax(torch.nn.Module):
    def __init__(self,model):
        super().__init__()
        
        self.model = model
        self.argmax_layer = ArgMax()
        
    def forward(self,x):
        y = self.model(x)
        out = self.argmax_layer(y)
        
        return out

### 1.4 Load the model with timm models

In [7]:
model_name = 'resnet50'
verification_dict = {'model_name': model_name}

# load the model
# model = load_timm_model(model_name).to(device)
model = torch.load('/home/ubuntu/data/models/timm_models/pytorch/resnet50_0.88.pt')
model = ImagenetWithArgmax(model)

In [8]:
model;

## 2. Onnx Syntax based DFS and Generator

### 2.1 ONNX DataFrame for Syntax

In [9]:
imsize = (1, 3, 224, 224)
mapper_idx = 0

weight_folder = './uploads'
os.makedirs(weight_folder, exist_ok=True)

csv_file_name = f'{model_name}_dfs({mapper_idx}).csv'

batch_size = 100
batches = 10

# use index of dataloader
use_index = 0

# weight quantization levels
w_qmax = 60

# maximum analog gain for the weight quantized model
max_analog_gain = 768

# to get dump data, please enable this
dump_mode = 1

In [10]:
# This will generate {model_name}.csv and {model_name}.onnx files. Which can be directly used in model generation
syntax = OnnxSyntaxDFS(model_pt=model, model_onnx=None, model_name=model_name, 
                       imsize=imsize, onnx_save_path=weight_folder, 
                       csv_file_name=csv_file_name, device='cuda', opset=OPSET)

verification_dict['syntax_checked'] = 'Passed'

Exception raised from operator() at /opt/conda/conda-bld/pytorch_1640811803361/work/aten/src/ATen/native/TensorAdvancedIndexing.cpp:1033 (most recent call first):
frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x42 (0x7f6100aaad62 in /home/ubuntu/anaconda3/envs/pt_110/lib/python3.9/site-packages/torch/lib/libc10.so)
frame #1: <unknown function> + 0x133ef07 (0x7f614754af07 in /home/ubuntu/anaconda3/envs/pt_110/lib/python3.9/site-packages/torch/lib/libtorch_cpu.so)
frame #2: at::native::index_select_out_cpu_(at::Tensor const&, long, at::Tensor const&, at::Tensor&) + 0xbea (0x7f614756965a in /home/ubuntu/anaconda3/envs/pt_110/lib/python3.9/site-packages/torch/lib/libtorch_cpu.so)
frame #3: at::native::index_select_cpu_(at::Tensor const&, long, at::Tensor const&) + 0xe6 (0x7f614756add6 in /home/ubuntu/anaconda3/envs/pt_110/lib/python3.9/site-packages/torch/lib/libtorch_cpu.so)
frame #4: <unknown function> + 0x1a588e2 (0x7f6147c648e2 in /home/ubuntu/anaconda3/envs/pt_110/li

This model cannot be fully generated


You can check **syntax.basic_dfs** for the graph details, and **syntax.dfs** for complete models which is based on CART architecture. To select ingress and egress layers, you should look at the *syntax.dfs*. 

You can use *syntax.basic_dfs* to filter out at what point you are going to stop the dfs to be generated for final dfs.

In [11]:
syntax.basic_dfs;

In [12]:
syntax.dfs

Unnamed: 0,layer_type,index,source,residual,concat,concat_dict,weight_name,bias_name,conv_type,activation,value,mode,skip_from,up_scale,infilt,ofilt,dilations,pads,kernel_shape,strides,ceil_mode,group,axis,keepdim,blocksize,split_params,skip_to,dest
0,Conv,0,input.1,0,0,{},model.conv1.weight,model.conv1.bias,CP,0,0,,-1,-1,3,64,"[1, 1]","[3, 3, 3, 3]","[7, 7]","[2, 2]",-1,1,2,1,-1,[],-1,10
1,MaxPool,10,0,0,0,{},,,,0,0,,-1,-1,-1,-1,-1,"[1, 1, 1, 1]","[3, 3]","[2, 2]",0,-1,2,0,-1,[],-1,"[20, 50]"
2,Conv,20,10,0,0,{},model.layer1.0.conv1.weight,model.layer1.0.conv1.bias,CP,0,0,,-1,-1,64,64,"[1, 1]","[0, 0, 0, 0]","[1, 1]","[1, 1]",-1,1,1,1,-1,[],-1,[30]
3,Conv,30,20,0,0,{},model.layer1.0.conv2.weight,model.layer1.0.conv2.bias,CP,0,0,,-1,-1,64,64,"[1, 1]","[1, 1, 1, 1]","[3, 3]","[1, 1]",-1,1,1,1,-1,[],-1,[40]
4,Conv,40,30,0,0,{},model.layer1.0.conv3.weight,model.layer1.0.conv3.bias,C,0,0,,-1,-1,64,256,"[1, 1]","[0, 0, 0, 0]","[1, 1]","[1, 1]",-1,1,1,1,-1,[],-1,[50]
5,Conv,50,10,1,0,{},model.layer1.0.downsample.0.weight,model.layer1.0.downsample.0.bias,CAR,Relu,0,,40,-1,64,256,"[1, 1]","[0, 0, 0, 0]","[1, 1]","[1, 1]",-1,1,1,1,-1,[],80,"[60, 80]"
6,Conv,60,50,0,0,{},model.layer1.1.conv1.weight,model.layer1.1.conv1.bias,CP,0,0,,-1,-1,256,64,"[1, 1]","[0, 0, 0, 0]","[1, 1]","[1, 1]",-1,1,1,1,-1,[],-1,[70]
7,Conv,70,60,0,0,{},model.layer1.1.conv2.weight,model.layer1.1.conv2.bias,CP,0,0,,-1,-1,64,64,"[1, 1]","[1, 1, 1, 1]","[3, 3]","[1, 1]",-1,1,1,1,-1,[],-1,[80]
8,Conv,80,70,1,0,{},model.layer1.1.conv3.weight,model.layer1.1.conv3.bias,CAR,Relu,0,,50,-1,64,256,"[1, 1]","[0, 0, 0, 0]","[1, 1]","[1, 1]",-1,1,1,1,-1,[],110,"[90, 110]"
9,Conv,90,80,0,0,{},model.layer1.2.conv1.weight,model.layer1.2.conv1.bias,CP,0,0,,-1,-1,256,64,"[1, 1]","[0, 0, 0, 0]","[1, 1]","[1, 1]",-1,1,1,1,-1,[],-1,[100]


### 2.2 Reconstructed Model with ONNX generator

In [13]:
M_reconstructed = ModelGeneratorONNX(syntax.dfs, syntax.model_onnx, split_no=0, 
                                     start_index=0, source_name=syntax.dfs.loc[0,'source'], 
                                     verbose=False).to(torch.device(device))
_aix_save_path = os.path.join(weight_folder, f'{model_name}_aix.pt')
torch.save(M_reconstructed, _aix_save_path)

verification_dict['AIX Model Generated'] = 'yes'

In [14]:
M_reconstructed;

#### 2.2.1 Check Model Summary of the Reconstructed Model

In [15]:
model_summary(M_reconstructed)

{'ModelGeneratorONNX': 1,
 'ModuleList': 1,
 'CART': 54,
 'CART_ReLU': 49,
 'CART_Conv2d': 54,
 'MaxPool2d': 1,
 'CART_Add': 16,
 'AdaptiveAvgPool2d': 1,
 'ArgMax': 1,
 'Identity': 1,
 'Parameters': '25.530472M'}

### 2.3 Calculate Frame Rates and Get Mapper Results

#### 2.3.1 Frame Rate

In [16]:
fr = FrameRate(M_reconstructed, imsize, verbose=0, frame_rate=1000, # set -1 to get maximum frame rate
               device=device, chip = Chip(n_chips=1), 
               posneg_input=False)
fr.validate()
fr.create_report()

verification_dict['Max_Frame_Rate'] = int(fr.current_frate)
verification_dict['Min Macs Required'] = len(fr.mac_index)
verification_dict['Layers'] = len(np.unique(fr.mac_index))
verification_dict['Latency'] = fr.latency

+----------------+---------+
|   Parameter    |  Value  |
+----------------+---------+
|     Chips      |    1    |
|   Frame Rate   | 1000 Hz |
| Max Frame Rate | 1275 Hz |
|      Time      |  784 us |
| Available Macs |   120   |
|   Used Macs    |    97   |
+----------------+---------+


+----------------+---------+
|   Parameter    |  Value  |
+----------------+---------+
|     Chips      |    1    |
|   Frame Rate   | 1000 Hz |
| Max Frame Rate | 1275 Hz |
|      Time      |  784 us |
| Available Macs |   120   |
|   Used Macs    |    97   |
+----------------+---------+




(Optional) Dataframe of Frame Rate 

In [17]:
# fr.dfa

In [18]:
# fr.df.columns

#### 2.3.2 Mapper Results

In [19]:
# Mapper FR, dfs, chip=Chip(n_chips=1), model_index=0, verbose=0, yolo=0
mapper = Mapper(fr, syntax.dfs)

Mapper PE Allocation Table

In [20]:
# mapper.pdf

Mapper Layer Information (Optional)

In [21]:
# mapper.ldf

Complete Mapper Report (Optional)

In [22]:
mapper.create_report();

Mapped all layers

+--------------------+-----------------+
|     Power Type     |    Value (mW)   |
+--------------------+-----------------+
|   input_uc_power   |      165.9      |
|  input_dma_power   |       16.2      |
|   pooling_power    |       3.0       |
|     sram_power     |       65.7      |
|    pf_dma_power    |       39.5      |
|  output_dma_power  |       26.6      |
|  output_uc_power   |      198.0      |
| output_simd_power  |      291.2      |
|    non_pe_power    |      214.0      |
|   gr_sram_power    |       12.8      |
|    non_rc_power    |      106.9      |
|     pcie_power     |       6.0       |
|  ---------------   | --------------- |
| total_non_pe_power |      339.7      |
|   total_pe_power   |      806.1      |
| total_analog_power |      262.9      |
|  ---------------   | --------------- |
|    [1mtotal_power[0m     |      [1m1408.7[0m     |
+--------------------+-----------------+

Total PE: 25

################################################

### 2.4 Validation 

#### 2.4.1 Validate Original Model on Dataloader

In [23]:
print('Validation for Original Model ...\n')
fp_accuracy, b ,_ = validate_model(model, image_loader(batch_size), batches=batches, argmax=False)

verification_dict['FP Accuracy'] = fp_accuracy

Validation for Original Model ...

0 0.88
1 0.9
2 0.76
3 0.85
4 0.79
5 0.91
6 0.86
7 0.87
8 0.96
9 0.87
1000 0.865


#### 2.4.2 Validate Reconstructed Model on Dataloader

In [24]:
print('Validation for Reconstructed Model ...\n')
rfp_accuracy, b,_ = validate_model(M_reconstructed, image_loader(batch_size), batches=batches, argmax=False)

verification_dict['Reconstructed FP Accuracy'] = rfp_accuracy

Validation for Reconstructed Model ...

0 0.88
1 0.9
2 0.76
3 0.85
4 0.79
5 0.91
6 0.86
7 0.87
8 0.96
9 0.87
1000 0.865


## 3. Wrap the Model with ConvSimdSplits

Split the model in a way that ConvSimd and Analog.

In [25]:
M_convsimdsplit = convsimdsplit_wrap_cart_model(M_reconstructed)
M_split = analog_split_model(M_convsimdsplit, inp_split_size=4096*2, out_split_size=4096*2)

## 4. Model Quantization Stage

In [26]:
directory = f"dump_{model_name}"
use_index = use_index

### 4.1 Create Quantizer Object from __ModelQuantizerImageNet__ class

In [27]:
quantizer = ModelQuantizerImagenet(M_split, M_reconstructed, directory, batch_size=batch_size, wqmax=w_qmax)

Last Layer 5600


### 4.2 Run Dynamic mode on Quantizer 

In this case, we are going run the dynamic mode. 

In [28]:
accuracy_dynamic = 0.0

print(f'Run Dynamic Mode for Analog Gain of {max_analog_gain}\n')
_, accuracy_dynamic, elist, delta_t = quantizer.dynamic_mode(image_loader, use_index, max_analog_gain)

verification_dict['Quantized Dynamic Accuracy'] = accuracy_dynamic

Run Dynamic Mode for Analog Gain of 768

0 0.84
100 0.84
Delta_t 57.78186845779419


#### 4.2.1 (Optional) Check for different Use Indexes

In [29]:
# for _use_index in [1, 4, 8]:
#     _, _accuracy_dynamic, _elist, _delta_t = quantizer.dynamic_mode(image_loader, _use_index, max_analog_gain)

### 4.3 Run Static mode on Quantizer

In this case, we are going to run the static mode.

In [30]:
accuracy_static = 0.0

print(f'Run Static Mode for Analog Gain of {max_analog_gain}\n')
quantizer.max_analog_gain = max_analog_gain
accuracy_static, elist, delta_t = quantizer.static_mode(image_loader, use_index, verify_batches=1,skip=0)

verification_dict['Quantized Static Accuracy'] = accuracy_static

Run Static Mode for Analog Gain of 768

0 0.84
100 0.84


### 4.4 Prepare JSON files

#### 4.4.1 Util Functions for Image Creation

In [31]:
def get_image(L, batches=None, skip=0):
    unnormalize = transforms.Normalize(mean=[-0.485/0.229, -0.456/0.224, -0.406/0.225],
                                     std=[1/0.229, 1/0.224, 1/0.225])
    for i, (x, t) in enumerate(L):
        if i < skip: continue
        if batches is not None and batches == i - skip: break
        print(x.shape)
        img = unnormalize(x)[0]
        print(img.shape)
        break
        
    return img

### 4.5 Prepare Sample Image for the Quantizer

In [32]:
image_directory = f'dump_{model_name}_q{quantizer.wqmax}_ag_{quantizer.max_analog_gain}/'
image_predict = get_image(image_loader(1), batches=1, skip=use_index) 
os.makedirs(f'{image_directory}data_dumps', exist_ok=True)
save_image(image_predict, f'{image_directory}data_dumps/image_predict.jpg')

torch.Size([1, 3, 224, 224])
torch.Size([3, 224, 224])


### 4.5 Dump FSIM Data

In [33]:
verification_dict['FSIM Data'] = 'None'

if dump_mode:
    quantizer.max_analog_gain = max_analog_gain
    quantizer.batch_size = 1
    accuracy, elist, delta_t = quantizer.static_mode(image_loader, use_index, verify_batches=1, dump_data=1)
    
    verification_dict['FSIM Data'] = 'Done'

0 1.0
1 1.0


In [34]:
quantizer.dump_backend_json(use_index ,model_name)
quantizer.create_frontend_json()

verification_dict['Model JSON'] = 'done'

In [35]:
pprint(verification_dict)

{'AIX Model Generated': 'yes',
 'FP Accuracy': 0.865,
 'FSIM Data': 'Done',
 'Latency': 734,
 'Layers': 54,
 'Max_Frame_Rate': 1000,
 'Min Macs Required': 92,
 'Model JSON': 'done',
 'Quantized Dynamic Accuracy': 0.84,
 'Quantized Static Accuracy': 0.84,
 'Reconstructed FP Accuracy': 0.865,
 'model_name': 'resnet50',
 'syntax_checked': 'Passed'}


In [36]:
summary_dict = {model_name: verification_dict}
df = pd.DataFrame().from_dict(summary_dict).T
df

Unnamed: 0,AIX Model Generated,FP Accuracy,FSIM Data,Latency,Layers,Max_Frame_Rate,Min Macs Required,Model JSON,Quantized Dynamic Accuracy,Quantized Static Accuracy,Reconstructed FP Accuracy,model_name,syntax_checked
resnet50,yes,0.865,Done,734,54,1000,92,done,0.84,0.84,0.865,resnet50,Passed


# Finish!