In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import os
os.environ['CUDA_VISIBLE_DEVICES']='0'
import pprint

import torch

import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torch.utils.data.distributed
import torchvision.transforms as transforms
from torchsummary import summary

import _init_paths
from core.config import config
from core.config import update_config
from core.config import update_dir
from core.loss import JointsMSELoss
from core.function import validate
from utils.utils import create_logger

import dataset
import models

import quantize_dorefa
# from quantize_iao import *
from quantize_iao_deconv3 import *
# from quantize_iao_uint import *  #对feature map进行uint对称量化

import numpy as np
# 保证所有数据能够显示，而不是用省略号表示，np.inf表示一个足够大的数
np.set_printoptions(threshold = np.inf) 

# # 若想不以科学计数显示:
# np.set_printoptions(suppress = True)

In [2]:
def bn_fuse_conv(bn_conv,device):
    # ******************** bn参数 *********************
    mean = bn_conv.running_mean
    std = torch.sqrt(bn_conv.running_var + bn_conv.eps)
    gamma = bn_conv.gamma
    beta = bn_conv.beta
    # ******************* conv参数 ********************
    w = bn_conv.weight
    w_fused = w.clone()
    if bn_conv.bias is not None:
        b = bn_conv.bias
    else:
        b = mean.new_zeros(mean.shape)
    b_fused = b.clone()
    # ******************* bn融合 *******************
    w_fused = w * (gamma / std).reshape([bn_conv.out_channels, 1, 1, 1])
    b_fused = beta + (b - mean) * (gamma / std)
    bn_fused_conv = QuantConv2d(bn_conv.in_channels,
                                         bn_conv.out_channels,
                                         bn_conv.kernel_size,
                                         stride=bn_conv.stride,
                                         padding=bn_conv.padding,
                                         dilation=bn_conv.dilation,
                                         groups=bn_conv.groups,
                                         bias=True,
                                         padding_mode=bn_conv.padding_mode,
                                         a_bits=config.QUANTIZATION.A_BITS,
                                         w_bits=config.QUANTIZATION.W_BITS,
                                         q_type=config.QUANTIZATION.Q_TYPE,
                                         q_level=config.QUANTIZATION.Q_LEVEL,
                                         device=device,
                                         quant_inference=True)
    bn_fused_conv.weight.data = w_fused
    bn_fused_conv.bias.data = b_fused
    bn_fused_conv.activation_quantizer.scale.copy_(bn_conv.activation_quantizer.scale)
    bn_fused_conv.activation_quantizer.zero_point.copy_(bn_conv.activation_quantizer.zero_point)
    bn_fused_conv.activation_quantizer.eps = bn_conv.activation_quantizer.eps
    bn_fused_conv.weight_quantizer.scale.copy_(bn_conv.weight_quantizer.scale)
    bn_fused_conv.weight_quantizer.zero_point.copy_(bn_conv.weight_quantizer.zero_point)
    bn_fused_conv.weight_quantizer.eps = bn_conv.weight_quantizer.eps
    return bn_fused_conv

def bn_fuse_deconv(bn_conv,device):
    # ******************** bn参数 *********************
    mean = bn_conv.running_mean
    std = torch.sqrt(bn_conv.running_var + bn_conv.eps)
    gamma = bn_conv.gamma
    beta = bn_conv.beta
    # ******************* conv参数 ********************
    w = bn_conv.weight
    w_fused = w.clone()
    if bn_conv.bias is not None:
        b = bn_conv.bias
    else:
        b = mean.new_zeros(mean.shape)
    b_fused = b.clone()
    # ******************* bn融合 *******************
    w_fused = w * (gamma / std).reshape([bn_conv.out_channels, 1, 1, 1])
    b_fused = beta + (b - mean) * (gamma / std)
    bn_fused_conv = QuantConvTranspose2d(bn_conv.in_channels,
                                         bn_conv.out_channels,
                                         bn_conv.kernel_size,
                                         stride=bn_conv.stride,
                                         padding=bn_conv.padding,
                                         output_padding=bn_conv.output_padding,
                                         dilation=bn_conv.dilation,
                                         groups=bn_conv.groups,
                                         bias=True,
                                         padding_mode=bn_conv.padding_mode,
                                         a_bits=config.QUANTIZATION.A_BITS,
                                         w_bits=config.QUANTIZATION.W_BITS,
                                         q_type=config.QUANTIZATION.Q_TYPE,
                                         q_level=config.QUANTIZATION.Q_LEVEL,
                                         device=device,
                                         quant_inference=True)
    bn_fused_conv.weight.data = w_fused
    bn_fused_conv.bias.data = b_fused
    bn_fused_conv.activation_quantizer.scale.copy_(bn_conv.activation_quantizer.scale)
    bn_fused_conv.activation_quantizer.zero_point.copy_(bn_conv.activation_quantizer.zero_point)
    bn_fused_conv.activation_quantizer.eps = bn_conv.activation_quantizer.eps
    bn_fused_conv.weight_quantizer.scale.copy_(bn_conv.weight_quantizer.scale)
    bn_fused_conv.weight_quantizer.zero_point.copy_(bn_conv.weight_quantizer.zero_point)
    bn_fused_conv.weight_quantizer.eps = bn_conv.weight_quantizer.eps
    return bn_fused_conv

In [3]:
def bn_fuse_module(module, device):
    for name, child in module.named_children():
        if isinstance(child, QuantBNFuseConv2d):
            bn_fused_conv = bn_fuse_conv(child, device)
            module._modules[name] = bn_fused_conv
        elif isinstance(child, QuantBNFuseConvTranspose2d):
            bn_fused_deconv = bn_fuse_deconv(child, device)
            module._modules[name] = bn_fused_deconv
        else:
            bn_fuse_module(child, device)


def model_bn_fuse(model, inplace=False):
    if not inplace:
        model = copy.deepcopy(model)
    device = next(model.parameters()).device
    bn_fuse_module(model,device)
    return model

In [4]:
def select_device(device='', apex=False, batch_size=None):
    # device = 'cpu' or '0' or '0,1,2,3'
    cpu_request = device.lower() == 'cpu'
    if device and not cpu_request:  # if device requested other than 'cpu'
        # os.environ['CUDA_VISIBLE_DEVICES'] = device  # set environment variable
        assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device  # check availablity

    cuda = False if cpu_request else torch.cuda.is_available()
    if cuda:
        c = 1024 ** 2  # bytes to MB
        ng = torch.cuda.device_count()
        if ng > 1 and batch_size:  # check that batch_size is compatible with device_count
            assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % (batch_size, ng)
        x = [torch.cuda.get_device_properties(i) for i in range(ng)]
        s = 'Using CUDA ' + ('Apex ' if apex else '')  # apex for mixed precision https://github.com/NVIDIA/apex
        for i in range(0, ng):
            if i == 1:
                s = ' ' * len(s)
            print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" %
                  (s, i, x[i].name, x[i].total_memory / c))
    else:
        print('Using CPU')

    print('')  # skip a line
    return torch.device('cuda:0' if cuda else 'cpu')

In [5]:
# cfg='../experiments/coco/resnet50/mobile_quant_relu_int.yaml'   #MODEL_FILE:'output/weights_quan/int8_mobilenet8_relu_bnfuse_inference.pt' 生成 'output/weights_quan/float_mobilenetpose_nobn.pt'
cfg='../experiments/coco/resnet50/mobile_quant_relu_int_deconv3.yaml'   #MODEL_FILE:'output/weights_quan_deconv3/int8_mobilenet8_relu_bnfuse_deconv3_float.pt' 生成 'output/weights_quan/float_mobilenetpose_nobn.pt'
update_config(cfg)
# cudnn related setting
cudnn.benchmark = config.CUDNN.BENCHMARK
torch.backends.cudnn.deterministic = config.CUDNN.DETERMINISTIC
torch.backends.cudnn.enabled = config.CUDNN.ENABLED

# for shufflenetv2
shufflenetv2_spec = {'0.5': ([4, 8, 4], [24, 48, 96, 192, 1024]),
                        '1.0': ([4, 8, 4], [24, 116, 232, 464, 1024]),
                        '1.5': ([4, 8, 4], [24, 176, 352, 704, 1024]),
                        '2.0': ([4, 8, 4], [24, 244, 488, 976, 2048])}
stages_repeats, stages_out_channels = shufflenetv2_spec['1.0']
print('models.'+config.MODEL.NAME+'.get_pose_net')
model = eval('models.'+config.MODEL.NAME+'.get_pose_net')(
        config, 
        stages_repeats, stages_out_channels,
        is_train=False
    )

models.pose_mobilenet_relu.get_pose_net
  exp_config = edict(yaml.load(f))


In [6]:
####################################### bnfuse model ############################################
bnfuse_model = eval('models.pose_mobilenet_relu_bnfuse.get_pose_net')(
    config, 
    stages_repeats, stages_out_channels,
    is_train=False
)

In [7]:
gpus = [int(i) for i in config.GPUS.split(',')]
device = select_device(config.GPUS, batch_size=config.TEST.BATCH_SIZE*len(gpus))

model = model.to(device)
# summary(model,input_size=(3, 256, 192))

Using CUDA device0 _CudaDeviceProperties(name='Tesla V100-PCIE-32GB', total_memory=32510MB)



In [8]:
#print('*******************ori_model*******************\n', model)
if(config.QUANTIZATION.QUANT_METHOD == 1): # DoReFa
    quantize_dorefa.prepare(model, inplace=True, a_bits=config.QUANTIZATION.A_BITS, w_bits=config.QUANTIZATION.W_BITS, quant_inference=config.QUANTIZATION.QUANT_INFERENCE, is_activate=False)
else: #default quant_method == 0   IAO
    prepare(model, inplace=True, a_bits=config.QUANTIZATION.A_BITS, w_bits=config.QUANTIZATION.W_BITS,q_type=config.QUANTIZATION.Q_TYPE, q_level=config.QUANTIZATION.Q_LEVEL, device=device,#device=next(model.parameters()).device, 
                        weight_observer=config.QUANTIZATION.WEIGHT_OBSERVER, bn_fuse=config.QUANTIZATION.BN_FUSE, quant_inference=config.QUANTIZATION.QUANT_INFERENCE)
#print('\n*******************quant_model*******************\n', model)
# print('\n*******************Using quant_model in test*******************\n')

a_bits= 8 	w_bits= 8 	q_type= 0 	q_level= 0 	device= cuda:0 	weight_observer= 0 	bn_fuse= 1 	quant_inference= False


In [9]:
checkpoint_list=torch.load('../'+config.TEST.MODEL_FILE)
# print(checkpoint_list.keys())

In [10]:
if config.TEST.MODEL_FILE:
    # logger.info('=> loading model from {}'.format(config.TEST.MODEL_FILE))
    print('=> loading model from {}'.format(config.TEST.MODEL_FILE))
    if(config.TEST.MODEL_FILE.split('/')[-1]=='checkpoint.pth.tar'):
        model = torch.nn.DataParallel(model, device_ids=gpus).cuda()
        #model.load_state_dict(torch.load(config.TEST.MODEL_FILE,map_location=torch.device('cuda'))['state_dict'])
        model.load_state_dict(torch.load(config.TEST.MODEL_FILE,map_location=device)['state_dict'])
        #torch.save(model.module.state_dict(), 'output/coco_quan/mobile_quant_relu_w8a8_bnfuse0/checkpoint_nomodule.pth.tar')
    elif(config.TEST.MODEL_FILE.split('/')[-1]=='model_best_140.pth.tar'):  #multiGPU has model.module.
        model = torch.nn.DataParallel(model, device_ids=gpus).cuda()
        model.load_state_dict(torch.load('../'+config.TEST.MODEL_FILE,map_location=device))
    elif(config.TEST.MODEL_FILE.split('/')[-1]=='checkpoint_resave.pth.tar'):  #multiGPU has model.module.
        model = torch.nn.DataParallel(model, device_ids=gpus).cuda()
        model.load_state_dict(torch.load(config.TEST.MODEL_FILE,map_location=device))
    else:  #final_state.pth.tar
        model.load_state_dict(torch.load('../'+config.TEST.MODEL_FILE,map_location=device))
        # model = torch.nn.DataParallel(model, device_ids=gpus).cuda()

=> loading model from output/coco_quan/mobile_quant_relu_int_deconv3_w8a8_bnfuse1/final_state_140.pth.tar


In [11]:
######################################################################################################
# ********************* quant_bn_fused_model_inference **********************
model.to(device)
model_bn_fuse(model, inplace=True)  # bn融合
# print('\n*******************For inference bn_fuse quant_model*******************\n', model)
# ckpt = {'model': model.module.state_dict() if hasattr(model, 'module') else model.state_dict()}
# torch.save(ckpt, '../output/weights_quan_deconv3/int8_mobilenet8_relu_bnfuse_inference.pt')
print('*******************For inference bn_fuse quant_model*******************')

*******************For inference bn_fuse quant_model*******************


In [12]:
print(config.TEST.MODEL_FILE)
# model.load_state_dict(torch.load('../'+config.TEST.MODEL_FILE,map_location=device)['model'])  ##为什么还在'model'里面呀？
# model.load_state_dict(torch.load('../output/weights_quan_deconv3/int8_mobilenet8_relu_bnfuse_inference.pt',map_location=device)['model'])  ##为什么还在'model'里面呀？
# model.load_state_dict(torch.load('../'+config.TEST.MODEL_FILE,map_location=device))  ##为什么还在'model'里面呀？
# model = torch.nn.DataParallel(model, device_ids=gpus).cuda()

output/coco_quan/mobile_quant_relu_int_deconv3_w8a8_bnfuse1/final_state_140.pth.tar


In [13]:
remapped_state = {}
print('Model.state_dict:')
######################################## before #######################################
for n,param_tensor in enumerate(model.state_dict()):
    #打印 key value字典
    print(n, param_tensor,'\t',model.state_dict()[param_tensor].size())
    # if(n<5):
    #     print(n, param_tensor,'\t',model.state_dict()[param_tensor].size())
    #     # print(model.state_dict()[param_tensor])

# ######################################### after #######################################
# for n,param_tensor in enumerate(bnfuse_model.state_dict()):
#     #打印 key value字典
#     print(n, param_tensor,'\t',bnfuse_model.state_dict()[param_tensor].size())
#     # if(n<4):
#     #     print(n, param_tensor,'\t',model.state_dict()[param_tensor].size())
#     #     print(model.state_dict()[param_tensor])

Model.state_dict:
0 features.0.0.weight 	 torch.Size([16, 3, 3, 3])
1 features.0.0.bias 	 torch.Size([16])
2 features.0.0.activation_quantizer.scale 	 torch.Size([1])
3 features.0.0.activation_quantizer.zero_point 	 torch.Size([1])
4 features.0.0.weight_quantizer.scale 	 torch.Size([16, 1, 1, 1])
5 features.0.0.weight_quantizer.zero_point 	 torch.Size([16, 1, 1, 1])
6 features.1.conv.0.weight 	 torch.Size([16, 1, 3, 3])
7 features.1.conv.0.bias 	 torch.Size([16])
8 features.1.conv.0.activation_quantizer.scale 	 torch.Size([1])
9 features.1.conv.0.activation_quantizer.zero_point 	 torch.Size([1])
10 features.1.conv.0.weight_quantizer.scale 	 torch.Size([16, 1, 1, 1])
11 features.1.conv.0.weight_quantizer.zero_point 	 torch.Size([16, 1, 1, 1])
12 features.1.conv.3.weight 	 torch.Size([8, 16, 1, 1])
13 features.1.conv.3.bias 	 torch.Size([8])
14 features.1.conv.3.activation_quantizer.scale 	 torch.Size([1])
15 features.1.conv.3.activation_quantizer.zero_point 	 torch.Size([1])
16 features

In [23]:
# ################################# 生成浮点权重 refactor后 #############################
# import re
# remapped_state = {}
# for n,state_key in enumerate(bnfuse_model.state_dict().keys()):
#     k = state_key.split('.') # pytorch  ['features', '0', '0', 'weight']
#     if(k[0]=='features'):
#         k[1]=str(int(k[1])+1)
#         k[-2]=str((int(k[2][-1])-1)*3)
#         k[2]='conv'
#         remapped_state_key=('.').join(k) #进行重映射
#     elif(k[0].startswith('deconv_layers')): #final_layer
#         number=3*int(k[0][-1])
#         remapped_state_key='deconv_layers.'+str(number)+'.'+k[-1] #weight/bias
#     elif(k[0]=='conv1'): #final_layer
#         remapped_state_key='features.0.0.'+k[-1] #weight/bias
#     else: #final_layer  conv2
#         remapped_state_key=state_key
#     # print(n, state_key, remapped_state_key, model.state_dict()[remapped_state_key].shape)
#     remapped_state[state_key]= model.state_dict()[remapped_state_key]    

In [24]:
# ################################# 生成浮点权重 refactor前 #############################
# remapped_state = {}
# for n,state_key in enumerate(bnfuse_model.state_dict().keys()):
#     k = state_key.split('.') # pytorch  ['features', '0', '0', 'weight']
#     if(k[0]!='final_layer'):
#         number = int(k[-2])//2*3 
#         # print(number)
#         k[-2]=str(number)
#         # print(k)
#         remapped_state_key=('.').join(k) #进行重映射
#     else: #final_layer
#         remapped_state_key=state_key
#     print(n, state_key, model.state_dict()[remapped_state_key].shape)
#     remapped_state[state_key]= model.state_dict()[remapped_state_key]   

In [14]:
################################### 量化步骤 ##############################
## 对称量化，因此zero_point均为0
def quantize_tensor(weight, bias, wscale, ascale, num_bits=8):
    qmin = -2**(num_bits-1)  #8bit [-128,127]
    qmax = 2**(num_bits-1) - 1
    q_weight= torch.round(weight/wscale).clamp_(qmin, qmax).type(torch.int32)
    # print('0:', torch.max(torch.round(weight/wscale)), torch.min(torch.round(weight/wscale)))
    q_bias= torch.round(bias/wscale.flatten()/ascale).type(torch.int32)  #bias没有进行截断
    return q_weight, q_bias

#反量化回浮点结果
def dequantize_tensor(q_weight, q_bias, wscale, ascale):
    return wscale*q_weight.float(), wscale.flatten()*ascale*q_bias.float()

In [20]:
############################################################## 定点结果导出 类似后训练量化，直接从权重中得到最大值最小值 ##############################################
remapped_state = {}
M_list = {}  #存储int推断时每个conv之后的*M scale
M_key=[]  #存储M_list中的索引键值
oscale_list={} #储存 浮点和整型的oscale
ascale_list={} #储存 浮点和整型的ascale
wscale_list={} #储存 浮点和整型的wscale

count=0
#导出权重和偏置至二进制文件中
print('Model.state_dict:')
for n,param_key in enumerate(model.state_dict()): # AttributeError: 'collections.OrderedDict' object has no attribute 'key',所以这里不使用model.state_dict().keys()
    #打印 key value字典
    # print(n, param_key,'\t',model.state_dict()[param_key].size())
    if(n<=434): #if(n<4*2): if(n<=434):  n==16 or n==8
        # print(n, param_key,'\t',model.state_dict()[param_key].size()) #param_key: features.0.0.activation_quantizer.scale 
        ############################################## 以下得到浮点权重值 ###################################################
        if(param_key.split('.')[-1]=='weight'):  #最后一层final_layer没有bias,但最后一层本来就应该要单独设计的？
            layer_name=('.').join(param_key.split('.')[0:-1])
            next_layer=''
            # print(layer_name+'.weight')
            conv_weight=model.state_dict()[layer_name+'.weight'].detach().cpu()
            ascale=model.state_dict()[layer_name+'.activation_quantizer.scale'].detach().cpu()  #对称量化，因此zero_point为0
            ######################################## 使用量化感知训练的权重 #########################################
            wscale=model.state_dict()[layer_name+'.weight_quantizer.scale'].detach().cpu()  # 通道量化  torch.Size([16, 1, 1, 1])
            ######################################## 直接后训练量化得到权重 #########################################
            # inter = torch.flatten(conv_weight, start_dim=1)  #变成两维 [out_channel,-1]
            # frange=torch.max(torch.abs(inter), 1)[0].reshape(-1,1,1,1)
            # wscale=torch.clamp(frange/127.5, min=1.1920928955078125e-7)  #1.1920928955078125e-7*127.5=1.519918441772461e-05
            # print('frange=',frange.flatten(),'\nwscale=',wscale.flatten())

            if(param_key.split('.')[0]!='final_layer' and param_key.split('.')[1]!='final_layer'): #'module.final_layer.bias'
                conv_bias=model.state_dict()[layer_name+'.bias'].detach().cpu()  # final_layer其实是没有bias的
                tmp=param_key.split('.')[0:-1]
                tmp[-1]=str(int(tmp[-1])+2)
                next_layer=('.').join(tmp)
                # print(param_key, next_layer)
            else:
                print('It is final_layer')
                print(conv_weight.shape)
            
            # if((next_layer+'.activation_quantizer.scale') in model.state_dict()):
            #     oscale=model.state_dict()[next_layer+'.activation_quantizer.scale'].detach().cpu()  #存在relu的情况 包括网络第一层、InvertedResidual中包含relu的层、conv2以及deconv_layers
            #     # print('0:',next_layer, oscale)
            # elif(param_key.split('.')[1]!='17' and next_layer!=''): #不存在relu的情况  此时是mobilenet 每个InvertedResidual模块的末尾（线性直通，无relu)   但features.17的oscale应该是conv2的ascale
            #     tmp2=param_key.split('.')[0:-1]
            #     tmp2[1]=str(int(tmp[1])+1)
            #     tmp2[-1]='0'
            #     next_layer=('.').join(tmp2)
            #     oscale=model.state_dict()[next_layer+'.activation_quantizer.scale'].detach().cpu()
            #     # print('1:',next_layer,oscale)
            # elif(param_key.split('.')[1]=='17'): #features.17的oscale应该是final_layer的ascale
            #     oscale=model.state_dict()['conv2.0.activation_quantizer.scale'].detach().cpu()
            #     # print('2: conv2.0',oscale)
            # else:  #final_layer 不需要oscale    next_layer=='' 没有relu的都不需要oscale
            #     oscale=torch.tensor([1.])
            #     # print('3: final_layer',oscale)
            # print(conv_weight.shape, 'conv_bias=',conv_bias.shape,'\nascale=',ascale, ascale.shape, '\nwscale=',wscale.flatten(), wscale.shape, '\noscale=',oscale,oscale.shape) #wscale.flatten()

            # ############################################## 以下进行浮点权重的量化，得到int权重和M ###################################################
            # if(param_key.split('.')[0]!='final_layer'):
            #     #计算移位值  不同通道的wscale还相差蛮大的（e-02,e-07) 2**16好像还不够  所以先使用M来验证
            #     # if(param_key.split('.')[0]=='features'and param_key.split('.')[1]=='0'): #第一层的M需要特殊处理，将输入ascale放入图像预处理中实现，输入网络的数据直接是[-128,127] 似乎又不需要...
            #     #     M=wscale/oscale
            #     # else:
            #     #     M=wscale*ascale/oscale  #一开始的不同通道间M差距很大（e-03,e-09)， 后面就挺均匀的  
            #     M=wscale*ascale/oscale
            #     # M0=(M*2**16).type(torch.int32)
            #     # print('M=',M.flatten(),'  M0=',M0.flatten(),'   error=',((M-M0*2**(-16))/M).flatten()) #
            #     # print('###: M=',M.flatten())

            #     # print(conv_weight.shape, conv_bias.shape)
            #     # #*********************************************************************************************************************************
            #     # print('conv_weight:',conv_weight.squeeze(), 'conv_bias:',conv_bias.flatten())
                
            #     #计算权重和偏置int量化结果
            #     q_weight,q_bias = quantize_tensor(conv_weight, conv_bias, wscale, ascale, num_bits=8)
            #     # print(q_weight.shape, q_bias.shape)
            #     # print('q_weight:',q_weight.flatten(), 'q_bias:',q_bias.flatten())
                
            #     #反量化回浮点数的结果
            #     dq_weight, dq_bias = dequantize_tensor(q_weight, q_bias, wscale, ascale)
            #     # print(dq_weight, dq_bias)
            #     # print('**量化误差**   weight:', torch.mean(conv_weight-dq_weight), torch.max(conv_weight-dq_weight),'  bias', torch.mean(conv_bias-dq_bias), torch.max(conv_bias-dq_bias))
            #     # print(conv_bias-dq_bias)
            # else: #final_layer
            #     #计算移位值  不同通道的wscale还相差蛮大的（e-02,e-07) 2**16好像还不够  所以先使用M来验证
            #     M=wscale*ascale  #一开始的不同通道间M差距很大（e-03,e-09)， 后面就挺均匀的  
            #     # M0=(M*2**16).type(torch.int32) #如果最后一层直接使用浮点数进行运算，则不用计算M0
            #     # print('M=',M.flatten(),'  M0=',M0.flatten(),'   error=',((M-M0*2**(16))/M).flatten()) #
            #     # print('final_layer: M=',M.flatten())

            #     #计算权重和偏置int量化结果
            #     q_weight,q_bias = quantize_tensor(conv_weight, torch.zeros([17]), wscale, ascale, num_bits=8) #这儿的bias是上一层的bias,是没有用的
            #     # print(q_weight, q_bias)
            #     # print(q_bias.shape)
            #     #反量化回浮点数的结果
            #     dq_weight, dq_bias = dequantize_tensor(q_weight, q_bias, wscale, ascale)
            #     # print(dq_weight, dq_bias)
            #     # print(torch.mean(conv_weight-dq_weight), torch.max(conv_weight-dq_weight))
            #     # print(conv_bias-dq_bias)

            ################################################### 浮点权重导出 ################################################################
            q_weight=conv_weight #浮点权重的导出，需要控制变量呀！！！
            q_bias=conv_bias
            ################################################################################################################################

            ############################################## 以下将int权重映射给bnfuse_model ###################################################
            k = param_key.split('.') # pytorch  ['features', '0', '0', 'weight']    k = param_key[7:].split('.')如果有module.的话  如果没有module的话直接用k = param_key.split('.')
            if(k[0]=='features' and k[1]=='0'): # 网络第一层  features.0. -> conv1
                remapped_state_key = 'conv1.weight'
                remapped_state[remapped_state_key]=q_weight
                remapped_state[remapped_state_key.replace('weight','bias')]=q_bias
            elif(k[0]=='features'): #除了第一层外的其他层
                k[1]=str(int(k[1])-1)
                number = int(k[-2])//3 + 1 
                # print(number)
                k[2]=k[2]+str(number)
                # k[3]='0' #现在不需要了
                # print(k)
                remapped_state_key=('.').join(k[0:3])+'.weight' #进行重映射
                # print(count, n, remapped_state_key, bnfuse_model.state_dict()[remapped_state_key].shape)
                remapped_state[remapped_state_key]=q_weight
                remapped_state[remapped_state_key.replace('weight','bias')]=q_bias
                # print(count, n, remapped_state_key.replace('weight','bias'), bnfuse_model.state_dict()[remapped_state_key.replace('weight','bias')].shape)
            elif(k[0]=='deconv_layers'): #deconv_layers0-5
                number = int(k[-2])//3
                remapped_state_key=k[0]+str(number)+'.weight' #进行重映射
                remapped_state[remapped_state_key]=q_weight
                remapped_state[k[0]+str(number)+'.bias']=q_bias
                # print(count, n, remapped_state_key.replace('weight','bias'), bnfuse_model.state_dict()[remapped_state_key.replace('weight','bias')].shape)
            elif(k[0]=='final_layer'): #final_layer
                remapped_state_key='final_layer.weight' #param_key
                remapped_state[remapped_state_key]=q_weight
                # print(count, n, remapped_state_key, bnfuse_model.state_dict()[remapped_state_key].shape)
            else: #conv2 无需进行重映射
                remapped_state_key = 'conv2.weight'
                remapped_state[remapped_state_key]=q_weight
                remapped_state[remapped_state_key.replace('weight','bias')]=q_bias
            # print(n, remapped_state_key, model.state_dict()[remapped_state_key].shape)
            # remapped_state[state_key]= model.state_dict()[remapped_state_key]   
            if(remapped_state_key.split('.')[0]=='features'):
                M_key_name = '.'.join(remapped_state_key.split('.')[0:-1])
            else:
                M_key_name = remapped_state_key.split('.')[0]
            M_key.append(M_key_name)
#             M_list[M_key_name] = torch.squeeze(M, dim=-1) #M.flatten() torch.Size([16]);  M通道量化torch.Size([16, 1, 1, 1]) -> 需要转换为torch.Size([16, 1, 1])
#             oscale_list[M_key_name]=oscale  #储存 浮点和整形的Oscale 是一个值  类似这样oscale= tensor([0.9272]) torch.Size([1])
            ascale_list[M_key_name]=ascale  
            wscale_list[M_key_name]=torch.squeeze(wscale, dim=-1)  
            count += 1

#保存M结果
print(M_key)
parent_path='../output/weights_quan_deconv3/'
# np.save('../output/weights_quan/M_key.npy', M_key) #这才真的是list
np.save(parent_path+'M_key.npy', M_key) #这才真的是list

#其实下面的都是假的list,实际是字典
# np.save(parent_path+'M_refactor_noreluq.npy', M_list)
# np.save(parent_path+'oscale.npy', oscale_list)
np.save(parent_path+'ascale.npy', ascale_list)

# np.save(parent_path+'M_refactor.npy', M_list)
np.save(parent_path+'wscale.npy', wscale_list)


Model.state_dict:
It is final_layer
torch.Size([17, 128, 1, 1])
['conv1', 'features.0.conv1', 'features.0.conv2', 'features.1.conv1', 'features.1.conv2', 'features.1.conv3', 'features.2.conv1', 'features.2.conv2', 'features.2.conv3', 'features.3.conv1', 'features.3.conv2', 'features.3.conv3', 'features.4.conv1', 'features.4.conv2', 'features.4.conv3', 'features.5.conv1', 'features.5.conv2', 'features.5.conv3', 'features.6.conv1', 'features.6.conv2', 'features.6.conv3', 'features.7.conv1', 'features.7.conv2', 'features.7.conv3', 'features.8.conv1', 'features.8.conv2', 'features.8.conv3', 'features.9.conv1', 'features.9.conv2', 'features.9.conv3', 'features.10.conv1', 'features.10.conv2', 'features.10.conv3', 'features.11.conv1', 'features.11.conv2', 'features.11.conv3', 'features.12.conv1', 'features.12.conv2', 'features.12.conv3', 'features.13.conv1', 'features.13.conv2', 'features.13.conv3', 'features.14.conv1', 'features.14.conv2', 'features.14.conv3', 'features.15.conv1', 'features.

In [55]:
#127.0 **量化误差**   weight: tensor(0.0004) tensor(0.0275)   bias tensor(7.7337e-06) tensor(0.0005)

In [56]:
# ############################################################## 定点结果导出 refactor后 ##############################################
# remapped_state = {}
# M_list = {}  #存储int推断时每个conv之后的*M scale
# M_key=[]  #存储M_list中的索引键值
# oscale_list={} #储存 浮点和整型的oscale
# ascale_list={} #储存 浮点和整型的ascale
# wscale_list={} #储存 浮点和整型的wscale

# count=0
# #导出权重和偏置至二进制文件中
# print('Model.state_dict:')
# for n,param_key in enumerate(model.state_dict()): # AttributeError: 'collections.OrderedDict' object has no attribute 'key',所以这里不使用model.state_dict().keys()
#     #打印 key value字典
#     # print(n, param_key,'\t',model.state_dict()[param_key].size())
#     if(n<=434): #if(n<4*2): if(n<=434):  n==16 or n==8
#         # print(n, param_key,'\t',model.state_dict()[param_key].size()) #param_key: features.0.0.activation_quantizer.scale 
#         ############################################## 以下得到浮点权重值 ###################################################
#         if(param_key.split('.')[-1]=='weight'):  #最后一层final_layer没有bias,但最后一层本来就应该要单独设计的？
#             layer_name=('.').join(param_key.split('.')[0:-1])
#             next_layer=''
#             # print(layer_name+'.weight')
#             conv_weight=model.state_dict()[layer_name+'.weight'].detach().cpu()
#             ascale=model.state_dict()[layer_name+'.activation_quantizer.scale'].detach().cpu()  #对称量化，因此zero_point为0
#             wscale=model.state_dict()[layer_name+'.weight_quantizer.scale'].detach().cpu()  # 通道量化  torch.Size([16, 1, 1, 1])

#             if(param_key.split('.')[0]!='final_layer'):
#                 conv_bias=model.state_dict()[layer_name+'.bias'].detach().cpu()  # final_layer其实是没有bias的
#                 tmp=param_key.split('.')[0:-1]
#                 tmp[-1]=str(int(tmp[-1])+2)
#                 next_layer=('.').join(tmp)
#                 print(param_key, next_layer)
#             else:
#                 print('It is final_layer')
#                 print(conv_weight.shape)
            
#             if((next_layer+'.activation_quantizer.scale') in model.state_dict()):
#                 oscale=model.state_dict()[next_layer+'.activation_quantizer.scale'].detach().cpu()  #存在relu的情况 包括网络第一层、InvertedResidual中包含relu的层、conv2以及deconv_layers
#                 # print('0:',next_layer, oscale)
#             elif(param_key.split('.')[1]!='17' and next_layer!=''): #不存在relu的情况  此时是mobilenet 每个InvertedResidual模块的末尾（线性直通，无relu)   但features.17的oscale应该是conv2的ascale
#                 tmp2=param_key.split('.')[0:-1]
#                 tmp2[1]=str(int(tmp[1])+1)
#                 tmp2[-1]='0'
#                 next_layer=('.').join(tmp2)
#                 oscale=model.state_dict()[next_layer+'.activation_quantizer.scale'].detach().cpu()
#                 # print('1:',next_layer,oscale)
#             elif(param_key.split('.')[1]=='17'): #features.17的oscale应该是final_layer的ascale
#                 oscale=model.state_dict()['conv2.0.activation_quantizer.scale'].detach().cpu()
#                 # print('2: conv2.0',oscale)
#             else:  #final_layer 不需要oscale    next_layer=='' 没有relu的都不需要oscale
#                 oscale=torch.tensor([1.])
#                 # print('3: final_layer',oscale)
#             print(conv_weight.shape, 'conv_bias=',conv_bias.shape,'\nascale=',ascale, ascale.shape, '\nwscale=',wscale.flatten(), wscale.shape, '\noscale=',oscale,oscale.shape) #wscale.flatten()

#             ############################################## 以下进行浮点权重的量化，得到int权重和M ###################################################
#             if(param_key.split('.')[0]!='final_layer'):
#                 #计算移位值  不同通道的wscale还相差蛮大的（e-02,e-07) 2**16好像还不够  所以先使用M来验证
#                 # if(param_key.split('.')[0]=='features'and param_key.split('.')[1]=='0'): #第一层的M需要特殊处理，将输入ascale放入图像预处理中实现，输入网络的数据直接是[-128,127] 似乎又不需要...
#                 #     M=wscale/oscale
#                 # else:
#                 #     M=wscale*ascale/oscale  #一开始的不同通道间M差距很大（e-03,e-09)， 后面就挺均匀的  
#                 M=wscale*ascale/oscale
#                 # M0=(M*2**16).type(torch.int32)
#                 # print('M=',M.flatten(),'  M0=',M0.flatten(),'   error=',((M-M0*2**(-16))/M).flatten()) #
#                 print('###: M=',M.flatten())

#                 print(conv_weight.shape, conv_bias.shape)
#                 #*********************************************************************************************************************************
#                 print('conv_weight:',conv_weight.squeeze(), 'conv_bias:',conv_bias.flatten())
                
#                 #计算权重和偏置int量化结果
#                 q_weight,q_bias = quantize_tensor(conv_weight, conv_bias, wscale, ascale, num_bits=8)
#                 print(q_weight.shape, q_bias.shape)
#                 print(q_weight.flatten(), q_bias.flatten())
                
#                 #反量化回浮点数的结果
#                 dq_weight, dq_bias = dequantize_tensor(q_weight, q_bias, wscale, ascale)
#                 # print(dq_weight, dq_bias)
#                 print('weight:', torch.mean(conv_weight-dq_weight), torch.max(conv_weight-dq_weight),'  bias', torch.mean(conv_bias-dq_bias), torch.max(conv_bias-dq_bias))
#                 # print(conv_bias-dq_bias)
#             else: #final_layer
#                 #计算移位值  不同通道的wscale还相差蛮大的（e-02,e-07) 2**16好像还不够  所以先使用M来验证
#                 M=wscale*ascale  #一开始的不同通道间M差距很大（e-03,e-09)， 后面就挺均匀的  
#                 # M0=(M*2**16).type(torch.int32) #如果最后一层直接使用浮点数进行运算，则不用计算M0
#                 # print('M=',M.flatten(),'  M0=',M0.flatten(),'   error=',((M-M0*2**(16))/M).flatten()) #
#                 print('final_layer: M=',M.flatten())

#                 #计算权重和偏置int量化结果
#                 q_weight,q_bias = quantize_tensor(conv_weight, torch.zeros([17]), wscale, ascale, num_bits=8) #这儿的bias是上一层的bias,是没有用的
#                 # print(q_weight, q_bias)
#                 # print(q_bias.shape)
#                 #反量化回浮点数的结果
#                 dq_weight, dq_bias = dequantize_tensor(q_weight, q_bias, wscale, ascale)
#                 # print(dq_weight, dq_bias)
#                 print(torch.mean(conv_weight-dq_weight), torch.max(conv_weight-dq_weight))
#                 # print(conv_bias-dq_bias)

#             # ################################################### 浮点权重导出 ################################################################
#             # q_weight=conv_weight #浮点权重的导出，需要控制变量呀！！！
#             # q_bias=conv_bias
#             # ################################################################################################################################

#             ############################################## 以下将int权重映射给bnfuse_model ###################################################
#             k = param_key.split('.') # pytorch  ['features', '0', '0', 'weight']
#             if(k[0]=='features' and k[1]=='0'): # 网络第一层  features.0. -> conv1
#                 remapped_state_key = 'conv1.weight'
#                 remapped_state[remapped_state_key]=q_weight
#                 remapped_state[remapped_state_key.replace('weight','bias')]=q_bias
#             elif(k[0]=='features'): #除了第一层外的其他层
#                 k[1]=str(int(k[1])-1)
#                 number = int(k[-2])//3 + 1 
#                 # print(number)
#                 k[2]=k[2]+str(number)
#                 # k[3]='0' #现在不需要了
#                 # print(k)
#                 remapped_state_key=('.').join(k[0:3])+'.weight' #进行重映射
#                 # print(count, n, remapped_state_key, bnfuse_model.state_dict()[remapped_state_key].shape)
#                 remapped_state[remapped_state_key]=q_weight
#                 remapped_state[remapped_state_key.replace('weight','bias')]=q_bias
#                 # print(count, n, remapped_state_key.replace('weight','bias'), bnfuse_model.state_dict()[remapped_state_key.replace('weight','bias')].shape)
#             elif(k[0]=='deconv_layers'): #deconv_layers0-5
#                 number = int(k[-2])//3
#                 remapped_state_key=k[0]+str(number)+'.weight' #进行重映射
#                 remapped_state[remapped_state_key]=q_weight
#                 remapped_state[k[0]+str(number)+'.bias']=q_bias
#                 # print(count, n, remapped_state_key.replace('weight','bias'), bnfuse_model.state_dict()[remapped_state_key.replace('weight','bias')].shape)
#             elif(k[0]=='final_layer'): #final_layer
#                 remapped_state_key=param_key
#                 remapped_state[remapped_state_key]=q_weight
#                 # print(count, n, remapped_state_key, bnfuse_model.state_dict()[remapped_state_key].shape)
#             else: #conv2 无需进行重映射
#                 remapped_state_key = 'conv2.weight'
#                 remapped_state[remapped_state_key]=q_weight
#                 remapped_state[remapped_state_key.replace('weight','bias')]=q_bias
#             # print(n, state_key, model.state_dict()[remapped_state_key].shape)
#             # remapped_state[state_key]= model.state_dict()[remapped_state_key]   
#             if(remapped_state_key.split('.')[0]=='features'):
#                 M_key_name = '.'.join(remapped_state_key.split('.')[0:-1])
#             else:
#                 M_key_name = remapped_state_key.split('.')[0]
#             M_key.append(M_key_name)
#             M_list[M_key_name] = torch.squeeze(M, dim=-1) #M.flatten() torch.Size([16]);  M通道量化torch.Size([16, 1, 1, 1]) -> 需要转换为torch.Size([16, 1, 1])
#             oscale_list[M_key_name]=oscale  #储存 浮点和整形的Oscale 是一个值  类似这样oscale= tensor([0.9272]) torch.Size([1])
#             ascale_list[M_key_name]=ascale  
#             wscale_list[M_key_name]=torch.squeeze(M, dim=-1)  
#             count += 1

# #保存M结果
# print(M_key)
# np.save('../output/weights_quan/M_refactor.npy', M_list)
# np.save('../output/weights_quan/oscale.npy', oscale_list)
# np.save('../output/weights_quan/ascale.npy', ascale_list)
# np.save('../output/weights_quan/wscale.npy', wscale_list)


In [57]:
############################################################## 定点结果导出 refactor前 ##############################################
# remapped_state = {}
# M_list = {}
# count=0
# #导出权重和偏置至二进制文件中
# print('Model.state_dict:')
# for n,param_key in enumerate(model.state_dict()): # AttributeError: 'collections.OrderedDict' object has no attribute 'key',所以这里不使用model.state_dict().keys()
#     #打印 key value字典
#     # print(n, param_key,'\t',model.state_dict()[param_key].size())
#     if(n<=434): #if(n<4*2): if(n<=434):
#         # print(n, param_key,'\t',model.state_dict()[param_key].size()) #param_key: features.0.0.activation_quantizer.scale 
#         ############################################## 以下得到浮点权重值 ###################################################
#         if(param_key.split('.')[-1]=='weight'):  #最后一层final_layer没有bias,但最后一层本来就应该要单独设计的？
#             layer_name=('.').join(param_key.split('.')[0:-1])
#             next_layer=''
#             # print(layer_name+'.weight')
#             conv_weight=model.state_dict()[layer_name+'.weight'].detach().cpu()
#             ascale=model.state_dict()[layer_name+'.activation_quantizer.scale'].detach().cpu()  #对称量化，因此zero_point为0
#             wscale=model.state_dict()[layer_name+'.weight_quantizer.scale'].detach().cpu()  # 通道量化  torch.Size([16, 1, 1, 1])

#             if(param_key.split('.')[0]!='final_layer'):
#                 conv_bias=model.state_dict()[layer_name+'.bias'].detach().cpu()  # final_layer其实是没有bias的
#                 tmp=param_key.split('.')[0:-1]
#                 tmp[-1]=str(int(tmp[-1])+2)
#                 next_layer=('.').join(tmp)
#             else:
#                 print('It is final_layer')
#                 print(conv_weight.shape)
            
#             if((next_layer+'.activation_quantizer.scale') in model.state_dict()):
#                 # print('0:',next_layer)
#                 oscale=model.state_dict()[next_layer+'.activation_quantizer.scale'].detach().cpu()  #存在relu的情况
#             elif(param_key.split('.')[1]!='17' and next_layer!=''): #不存在relu的情况  此时是mobilenet InvertedResidual模块的末尾
#                 tmp2=param_key.split('.')[0:-1]
#                 tmp2[1]=str(int(tmp[1])+1)
#                 tmp2[-1]='0'
#                 next_layer=('.').join(tmp2)
#                 # print('1:',next_layer)
#                 oscale=model.state_dict()[next_layer+'.activation_quantizer.scale'].detach().cpu()
#             elif(param_key.split('.')[1]!='17'):
#                 oscale=model.state_dict()['conv2.0.activation_quantizer.scale'].detach().cpu()
#             # print(conv_weight.shape, 'conv_bias=',conv_bias.shape,'\nascale=',ascale, ascale.shape, '\nwscale=',wscale.flatten(),wscale.shape, '\noscale=',oscale,oscale.shape)

#             ############################################## 以下进行浮点权重的量化，得到int权重和M ###################################################
#             if(param_key.split('.')[0]!='final_layer'):
#                 #计算移位值  不同通道的wscale还相差蛮大的（e-02,e-07) 2**16好像还不够  所以先使用M来验证
#                 M=wscale*ascale/oscale  #一开始的不同通道间M差距很大（e-03,e-09)， 后面就挺均匀的  
#                 M0=(M*2**16).type(torch.int32)
#                 # print('M=',M.flatten(),'  M0=',M0.flatten(),'   error=',((M-M0*2**(16))/M).flatten()) #

#                 #计算权重和偏置int量化结果
#                 q_weight,q_bias = quantize_tensor(conv_weight, conv_bias, wscale, ascale, num_bits=8)
#                 # print(q_weight, q_bias)
#                 # print(q_bias.shape)
#                 #反量化回浮点数的结果
#                 dq_weight, dq_bias = dequantize_tensor(q_weight, q_bias, wscale, ascale)
#                 # print(dq_weight, dq_bias)
#                 print(torch.mean(conv_weight-dq_weight), torch.max(conv_weight-dq_weight))
#                 # print(conv_bias-dq_bias)
#             else: #final_layer
#                 #计算移位值  不同通道的wscale还相差蛮大的（e-02,e-07) 2**16好像还不够  所以先使用M来验证
#                 M=wscale*ascale  #一开始的不同通道间M差距很大（e-03,e-09)， 后面就挺均匀的  
#                 # M0=(M*2**16).type(torch.int32) #如果最后一层直接使用浮点数进行运算，则不用计算M0
#                 # print('M=',M.flatten(),'  M0=',M0.flatten(),'   error=',((M-M0*2**(16))/M).flatten()) #

#                 #计算权重和偏置int量化结果
#                 q_weight,q_bias = quantize_tensor(conv_weight, torch.zeros([17]), wscale, ascale, num_bits=8) #这儿的bias是上一层的bias,是没有用的
#                 # print(q_weight, q_bias)
#                 # print(q_bias.shape)
#                 #反量化回浮点数的结果
#                 dq_weight, dq_bias = dequantize_tensor(q_weight, q_bias, wscale, ascale)
#                 # print(dq_weight, dq_bias)
#                 print(torch.mean(conv_weight-dq_weight), torch.max(conv_weight-dq_weight))
#                 # print(conv_bias-dq_bias)

                
#             ############################################## 以下将int权重映射给bnfuse_model ###################################################
#             k = param_key.split('.') # pytorch  ['features', '0', '0', 'weight']
#             if(k[0]!='final_layer'):
#                 number = int(k[-2])//3*2 
#                 # print(number)
#                 k[-2]=str(number)
#                 # print(k)
#                 remapped_state_key=('.').join(k) #进行重映射
#                 # print(count, n, remapped_state_key, bnfuse_model.state_dict()[remapped_state_key].shape)
#                 remapped_state[remapped_state_key]=q_weight
#                 remapped_state[remapped_state_key.replace('weight','bias')]=q_bias
#                 # print(count, n, remapped_state_key.replace('weight','bias'), bnfuse_model.state_dict()[remapped_state_key.replace('weight','bias')].shape)
#             else: #final_layer
#                 remapped_state_key=param_key
#                 remapped_state[remapped_state_key]=q_weight
#                 # print(count, n, remapped_state_key, bnfuse_model.state_dict()[remapped_state_key].shape)
#             # print(n, state_key, model.state_dict()[remapped_state_key].shape)
#             # remapped_state[state_key]= model.state_dict()[remapped_state_key]   
#             M_list[remapped_state_key] = M.flatten()
#             count += 1

# #保存M结果
# np.save('../output/weights_quan/M.npy', M_list)

In [21]:
# print(len(M_key), type(M_key), M_key)
import numpy as np
import torch
Mkey_load = np.load('../output/weights_quan/M_key.npy', allow_pickle=True)  #type:<class 'numpy.ndarray'>
Mkey_load=list(Mkey_load)
print(type(Mkey_load))
print(Mkey_load[0])
print(Mkey_load.index('features.0.conv2'))
print(Mkey_load[Mkey_load.index('features.0.conv2')-1])


# 59 ['conv1', 'features.0.conv1', 'features.0.conv2', 'features.1.conv1', 'features.1.conv2', 'features.1.conv3', 'features.2.conv1', 'features.2.conv2', 'features.2.conv3', 'features.3.conv1', 'features.3.conv2', 'features.3.conv3', 'features.4.conv1', 'features.4.conv2', 'features.4.conv3', 'features.5.conv1', 'features.5.conv2', 'features.5.conv3', 'features.6.conv1', 'features.6.conv2', 'features.6.conv3', 'features.7.conv1', 'features.7.conv2', 'features.7.conv3', 'features.8.conv1', 'features.8.conv2', 'features.8.conv3', 'features.9.conv1', 'features.9.conv2', 'features.9.conv3', 'features.10.conv1', 'features.10.conv2', 'features.10.conv3', 'features.11.conv1', 'features.11.conv2', 'features.11.conv3', 'features.12.conv1', 'features.12.conv2', 'features.12.conv3', 'features.13.conv1', 'features.13.conv2', 'features.13.conv3', 'features.14.conv1', 'features.14.conv2', 'features.14.conv3', 'features.15.conv1', 'features.15.conv2', 'features.15.conv3', 'features.16.conv1', 'features.16.conv2', 'features.16.conv3', 'conv2', 'deconv_layers0', 'deconv_layers1', 'deconv_layers2', 'deconv_layers3', 'deconv_layers4', 'deconv_layers5', 'final_layer']

''' ******************************************************************  计算M0 *************************************************************** '''
BIT=16
Mscale_list = {}  #存储int推断时每个conv之后的*M scale
M0_float_list = {}  #存储int推断时每个conv之后的*M scale
M0_int_list = {}  #存储int推断时每个conv之后的*M scale
ascale_load = np.load(parent_path+'ascale.npy', allow_pickle=True) 
# oscale_load = np.load('../output/weights_quan/oscale.npy', allow_pickle=True) 
wscale_load = np.load(parent_path+'wscale.npy', allow_pickle=True) 
M_list = np.load('../output/weights_quan/M_refactor.npy', allow_pickle=True) #量化感知训练得到的scale   有relu的量化反量化，因此需要转换
for n,key in enumerate(Mkey_load):
    if(n==58): # 最后一层没有output_scale
        Mscale=wscale_load.item()[key]*ascale_load.item()[key]
    else:
        key_post = Mkey_load[Mkey_load.index(key)+1] 
        Mscale=wscale_load.item()[key]*ascale_load.item()[key]/ascale_load.item()[key_post]  #不用relu的量化反量化，重新计算M值
    Mscale_list[key] = Mscale #torch.squeeze(Mscale, dim=-1) 
    # print(n, key, M_list.item()[key].flatten(), M_list.item()[key].shape)
    # print(ascale_load.item()[key], ascale_load.item()[key_post], oscale_load.item()[key])
    # print(wscale_load.item()[key].flatten())
    if(n!=58):
        M0=(Mscale*2**BIT).type(torch.int32)
        M0_float=M0*2**(-1*BIT)
    else:  #此时还没有管最后一层的M0（最后一层应该不用*M0）
        M0_float=Mscale
        M0=torch.tensor(2**BIT) #乘以2**16,再除以2**16是原值
    M0_float_list[key] = M0_float
    M0_int_list[key] = M0
    # print('M=',Mscale.flatten(),'\nM0_float=',M0_float.flatten(),'\nM0=',M0.flatten(),'\nerror=',(Mscale-M0_float).flatten())
    # print(n, key, Mscale.flatten(), Mscale_list[key].shape)

# np.save('../output/weights_quan/mscale_norelu_quant.npy', Mscale_list)
np.save(parent_path+'M0_quant_requant.npy', M0_float_list)
np.save(parent_path+'M0_int.npy', M0_int_list)
# # 读取M结果
# M_load = np.load('../output/weights_quan/mscale_norelu_quant.npy', allow_pickle=True) #M_list
# # M_load = np.load('../output/weights_quan/M0_int.npy', allow_pickle=True) #M_list
# # print('M_list=',M_load.item())
# print('conv1', M_load.item()['conv1'])#, M_load.item()[key])
# for n,key in enumerate(M_key):
#     if(n<=150):
#         print(n, key, M_load.item()[key].shape)#, M_load.item()[key])
#         M0=(M*2**16).type(torch.int32)
#         print('M=',M.flatten(),'  M0=',M0.flatten(),'   error=',((M-M0*2**(-16))/M).flatten()) #


<class 'list'>
conv1
2
features.0.conv1


In [22]:
#导入量化后的int权重  但由于模型中的weight/bias参数仍是float32类型的，因此保存时还是会保存成float32.
print(remapped_state.keys())
bnfuse_model.load_state_dict(remapped_state)
#修改权重数据的类型为int32
# for n,param_key in enumerate(bnfuse_model.state_dict()):
#     #打印 key value字典
#     print(n, param_key,'\t',bnfuse_model.state_dict()[param_key].size())
#     eval('bnfuse_model.'+param_key+'.data.type(torch.int32)')

# for name, module in bnfuse_model.named_modules():
#     if type(module) in [torch.nn.Conv2d, torch.nn.ConvTranspose2d]:
#         print(name, module)
#         if(name=='final_layer'): #没有bias
#             eval('bnfuse_model.'+name+'.weight.data.type(torch.int32)')
#         else: #conv2d, ConvTranspose2d
#             eval('bnfuse_model.'+name+'.weight.data.type(torch.int32)')
#             eval('bnfuse_model.'+name+'.bias.data.type(torch.int32)')

dict_keys(['conv1.weight', 'conv1.bias', 'features.0.conv1.weight', 'features.0.conv1.bias', 'features.0.conv2.weight', 'features.0.conv2.bias', 'features.1.conv1.weight', 'features.1.conv1.bias', 'features.1.conv2.weight', 'features.1.conv2.bias', 'features.1.conv3.weight', 'features.1.conv3.bias', 'features.2.conv1.weight', 'features.2.conv1.bias', 'features.2.conv2.weight', 'features.2.conv2.bias', 'features.2.conv3.weight', 'features.2.conv3.bias', 'features.3.conv1.weight', 'features.3.conv1.bias', 'features.3.conv2.weight', 'features.3.conv2.bias', 'features.3.conv3.weight', 'features.3.conv3.bias', 'features.4.conv1.weight', 'features.4.conv1.bias', 'features.4.conv2.weight', 'features.4.conv2.bias', 'features.4.conv3.weight', 'features.4.conv3.bias', 'features.5.conv1.weight', 'features.5.conv1.bias', 'features.5.conv2.weight', 'features.5.conv2.bias', 'features.5.conv3.weight', 'features.5.conv3.bias', 'features.6.conv1.weight', 'features.6.conv1.bias', 'features.6.conv2.weigh

<All keys matched successfully>

In [24]:
ckpt = {'model': bnfuse_model.module.state_dict() if hasattr(bnfuse_model, 'module') else bnfuse_model.state_dict()}
parent_path='../output/weights_quan_deconv3/'
# torch.save(ckpt, parent_path+'float_mobilenetpose_nobn_refactor_deconv3.pt')
# torch.save(ckpt, '../output/weights_quan/post_int_mobilenetpose_nobn_refactor.pt')
# torch.save(ckpt, '../output/weights_quan/int_mobilenetpose_nobn_refactor.pt')
torch.save(ckpt, parent_path+'int_mobilenetpose_nobn_refactor.pt')

In [27]:
####################################### bnfuse float model ############################################ 
import numpy as np
import torch

parent_path='../output/weights_quan_deconv3/'
float_nobn_dict=torch.load(parent_path+'float_mobilenetpose_nobn_refactor_deconv3.pt')['model'] #可以直接导入权重，可以不需要加载模型
# print(float_nobn_dict.keys())

for n,param_key in enumerate(float_nobn_dict):
    #打印 key value字典
    print(n, param_key,'\t',float_nobn_dict[param_key].size())

0 conv1.weight 	 torch.Size([16, 3, 3, 3])
1 conv1.bias 	 torch.Size([16])
2 features.0.conv1.weight 	 torch.Size([16, 1, 3, 3])
3 features.0.conv1.bias 	 torch.Size([16])
4 features.0.conv2.weight 	 torch.Size([8, 16, 1, 1])
5 features.0.conv2.bias 	 torch.Size([8])
6 features.1.conv1.weight 	 torch.Size([48, 8, 1, 1])
7 features.1.conv1.bias 	 torch.Size([48])
8 features.1.conv2.weight 	 torch.Size([48, 1, 3, 3])
9 features.1.conv2.bias 	 torch.Size([48])
10 features.1.conv3.weight 	 torch.Size([16, 48, 1, 1])
11 features.1.conv3.bias 	 torch.Size([16])
12 features.2.conv1.weight 	 torch.Size([96, 16, 1, 1])
13 features.2.conv1.bias 	 torch.Size([96])
14 features.2.conv2.weight 	 torch.Size([96, 1, 3, 3])
15 features.2.conv2.bias 	 torch.Size([96])
16 features.2.conv3.weight 	 torch.Size([16, 96, 1, 1])
17 features.2.conv3.bias 	 torch.Size([16])
18 features.3.conv1.weight 	 torch.Size([96, 16, 1, 1])
19 features.3.conv1.bias 	 torch.Size([96])
20 features.3.conv2.weight 	 torch.Size

In [26]:
''' ******************************************************************  shortcut处的另一种计算方式 *************************************************************** '''
'''shortcut处使用同一scale，则x处的表示范围会变小，由此带来误差；   直接用输出的的scale计算？ 
    因为ascale改变了，所以除了M需要改变，bias也需要改变
但如果使用不同scale, 则需要硬件上进行*M0并移位的操作，操作更多，而且是整型计算，同样会带来误差'''
Mkey_load = np.load('../output/weights_quan/M_key.npy', allow_pickle=True)  #type:<class 'numpy.ndarray'>
Mkey_load=list(Mkey_load)
print(type(Mkey_load))
print(Mkey_load[0])
print(Mkey_load.index('features.0.conv2'))
print(Mkey_load[Mkey_load.index('features.0.conv2')-1])

shortcut_list=['features.2.conv1','features.4.conv1','features.5.conv1','features.7.conv1','features.8.conv1','features.9.conv1','features.11.conv1','features.12.conv1','features.14.conv1','features.15.conv1']
shortcut_layer=[2,4,5,7,8,9,11,12,14,15]
# shortcut_index=[6, 12, 15, 21, 24, 27, 33, 36, 42, 45]
# shortcut_corr_index=[9, 18, 18, 30, 30, 30, 39, 39, 48, 48]
shortcut_dict={6:9, 12:18, 15:18,21:30, 24:30, 27:30, 33:39, 36:39, 42:48, 45:48}

# print(shortcut_index)
int_weight_nobn_dict = {}
ascale_shortcut_same_list = {}  
Mscale_shortcut_same_list = {}  
M0_float_list = {}
M0_int_list = {}
ascale_load = np.load(parent_path+'ascale.npy', allow_pickle=True) 
# oscale_load = np.load(parent_path+'oscale.npy', allow_pickle=True) 
wscale_load = np.load(parent_path+'wscale.npy', allow_pickle=True) 
#重设ascale  shortcut处使用同一scale
for n,key in enumerate(Mkey_load):
    ascale=ascale_load.item()[key]
    if(n in shortcut_dict.keys()):
        index=shortcut_dict[n]
        ascale_pre=ascale
        ascale=ascale_load.item()[Mkey_load[index]]
        # print(n//3, key, ':', ascale_pre, Mkey_load[index], ':', ascale, ascale_pre/ascale)
    ascale_shortcut_same_list[key] = ascale 
np.save(parent_path+'ascale_shortcut0.npy', ascale_shortcut_same_list)


# 读取ascale结果
ascale_shortcut_load = np.load(parent_path+'ascale_shortcut0.npy', allow_pickle=True) #M_list
# print('conv1', ascale_shortcut_load.item()['conv1'])#, M_load.item()[key])
for n,key in enumerate(Mkey_load):
    ascale0=ascale_load.item()[key]
    ascale=ascale_shortcut_load.item()[key]
    wscale=wscale_load.item()[key]
    # print(n, key, ascale_shortcut_load.item()[key], ascale_load.item()[key])
    # print(n, key, ascale, ascale0)
    
    #计算权重和偏置int量化结果
    conv_weight=float_nobn_dict[key+'.weight']
    if(n!=58):#不是final_layer
        #计算q_weight, q_bias
        conv_bias=float_nobn_dict[key+'.bias']
        # print(conv_weight.shape, conv_bias.shape, wscale.shape)
        q_weight,q_bias = quantize_tensor(conv_weight, conv_bias, wscale.unsqueeze(dim=-1), ascale, num_bits=8) 
        int_weight_nobn_dict[key+'.weight'] = q_weight
        int_weight_nobn_dict[key+'.bias'] = q_bias
        #计算Mscale
        key_post = Mkey_load[Mkey_load.index(key)+1] 
        Mscale=wscale*ascale/ascale_shortcut_load.item()[key_post]  #不用relu的量化反量化，重新计算M值
        M0=(Mscale*2**16).type(torch.int32)
        M0_float=M0*2**(-16)
    else: #final_layer
        #计算q_weight, q_bias
        q_weight,q_bias = quantize_tensor(conv_weight, torch.zeros([17]), wscale.unsqueeze(dim=-1), ascale, num_bits=8) #这儿的bias是上一层的bias,是没有用的
        int_weight_nobn_dict[key+'.weight'] = q_weight
        #计算Mscale
        Mscale=wscale*ascale
        M0_float=Mscale
        print('M0_float:',M0_float.flatten())
        '''M0_float: tensor([3.9311e-05, 5.1540e-05, 4.8065e-05, 5.7022e-05, 5.8868e-05, 1.3317e-04,
        4.3028e-05, 4.5899e-05, 4.3756e-05, 4.0470e-05, 4.7417e-05, 4.5811e-05,
        3.7967e-05, 5.0862e-05, 4.8882e-05, 9.4663e-05, 7.4792e-05])'''
        M0=torch.tensor(2**16) #乘以2**16,再除以2**16是原值
    
    # print(n, q_weight, q_bias)
    Mscale_shortcut_same_list[key] = Mscale #torch.squeeze(Mscale, dim=-1)
    M0_float_list[key] = M0_float
    M0_int_list[key] = M0 

torch.save(int_weight_nobn_dict, parent_path+'int_mobilenetpose_shortcut0.pt')
np.save(parent_path+'Mscale_shortcut0.npy', Mscale_shortcut_same_list)
np.save(parent_path+'M0_float_shortcut0.npy', M0_float_list)
np.save(parent_path+'M0_int_shortcut0.npy', M0_int_list)
# 读取ascale结果

<class 'list'>
conv1
2
features.0.conv1
2 features.2.conv1 : tensor([0.0739]) features.3.conv1 : tensor([0.1108]) tensor([0.6670])
4 features.4.conv1 : tensor([0.0533]) features.6.conv1 : tensor([0.1001]) tensor([0.5321])
5 features.5.conv1 : tensor([0.0737]) features.6.conv1 : tensor([0.1001]) tensor([0.7364])
7 features.7.conv1 : tensor([0.0673]) features.10.conv1 : tensor([0.0809]) tensor([0.8318])
8 features.8.conv1 : tensor([0.0694]) features.10.conv1 : tensor([0.0809]) tensor([0.8582])
9 features.9.conv1 : tensor([0.0741]) features.10.conv1 : tensor([0.0809]) tensor([0.9159])
11 features.11.conv1 : tensor([0.0504]) features.13.conv1 : tensor([0.1270]) tensor([0.3967])
12 features.12.conv1 : tensor([0.0649]) features.13.conv1 : tensor([0.1270]) tensor([0.5110])
14 features.14.conv1 : tensor([0.0561]) features.16.conv1 : tensor([0.0963]) tensor([0.5824])
15 features.15.conv1 : tensor([0.0672]) features.16.conv1 : tensor([0.0963]) tensor([0.6979])
0 conv1 tensor([0.0207]) tensor([0.