In [1]:
import numpy as np
import torch
from PIL import Image
from torch.nn import functional as F
from tqdm import tqdm
import onnx
import onnxruntime
from onnx import numpy_helper
import os
import time

In [2]:
################### float32数据转int8模块  ##############
def float32_to_int8(float32_array): 
   # 定义量化范围（最小和最大值）
    min_val = np.min(float32_array)
    max_val = np.max(float32_array)

    # 计算缩放因子和零点
    scale = (max_val - min_val) / 127.0  # 127 是有符号 int8 的最大值
    zero_point = np.round(0 - min_val / scale)

    # 使用缩放因子和零点进行量化
    int8_array = np.round(float32_array / scale + zero_point).astype(np.int8)

    return int8_array   
    

In [3]:
def bitlet_time(arr_w, arr_a):
    arr_w=float32_to_int8(arr_w)
    arr_a=float32_to_int8(arr_a)

    mantis = np.empty((8,64))
    time = 0

    weight_int8 = [""] * 64
    active_int8 = [""] * 64
    a_sign = [""] * 64
    w_sign = [""] * 64
    sign   = [""] * 64
                            
    sign = [""] * 64
    for b in range(64): 
        weight_int8[b] = '{0:08b}'.format(abs(arr_w[b]))
        active_int8[b] ='{0:08b}'.format(abs(arr_a[b]))   
        
        if arr_w[b] >= 0:
                w_sign[b] = 0
        else:
                w_sign[b] = 1

        if arr_a[b] >= 0:
                a_sign[b] = 0
        else:
                a_sign[b] = 1
                
        if w_sign[b]^a_sign[b]:
                sign[b] = 1
        else:
                sign[b] = 0
                
    w_manti_list = []
    a_manti_list = []

    for c in range(64):
            w_manti_list.append(weight_int8[c])
            a_manti_list.append(active_int8[c])


    
    for i in range(64):
        for j in range(8):
            mantis[j][i] = w_manti_list[j]


    ones = np.empty((8,4))
    for i in range(8):
        for k in range(4):
            one = 0
            for j in range(8):
                if mantis[i][32+8*k+j]:
                    one = one + 1 
            ones[i][k] = one

    for i in range(8):
        for k in range(4):
            if (ones[i][k] == 0 or ones[i][k] == 1):
                ones[i][k] = k+6
            elif (ones[i][k] == 2):
                ones[i][k] = k+7
            elif (ones[i][k] == (3 or 4)):
                ones[i][k] = k+8
            else:
                ones[i][k] = k+9

    for i in range(8):
        for k in range(3):
            for j in range(3-k):
                if(ones[i][k] == ones[i][k+j+1]):
                    ones[i][k] = ones[i][k] + 1

    for i in range(8):
        for k in range(3):
            for j in range(3-k):
                if(ones[i][k] == ones[i][k+j+1]):
                    ones[i][k] = ones[i][k] + 1

    for i in range(8):
        for k in range(3):
            for j in range(3-k):
                if(ones[i][k] == ones[i][k+j+1]):
                    ones[i][k] = ones[i][k] + 1

    time = ones[0][0]
    for i in range(8):
        for k in range(4):
            if(time < ones[i][k]):
                time = ones[i][k]


    
    
    return time+5

In [4]:

def Mac_64(arr_w, mac_a64):
    weight_size = int(arr_w.size/1000)
    mac_time = 0

    for i in range(0, weight_size, 64):
        mac_time = mac_time + bitlet_time(arr_w[i:i+64],mac_a64)
    return mac_time


def Conv_33(arr_w, arr_a, w_shape, a_shape, s):

    if(arr_w.size % 64 != 0):
        arr_w = np.pad(arr_w,(0,(64 - arr_w.size % 64)))

    yolo_time = 0
    mac_a64 = np.empty(64)
    cnt = 0

    for i in range(0 , (a_shape[3] - 2), s): 
        print("row = ",i)           #row
        for j in range(0 , (a_shape[2] - 2), s):        #channel
            for k in range(a_shape[1]):        #col
                for c in range(3):
                    for r in range(3):
                        mac_a64[cnt] =  arr_a[i+c,j+r,k]
                        cnt = cnt + 1
                        if(cnt == 64):
                            cnt = 0
                            yolo_time = yolo_time + Mac_64(arr_w, mac_a64)
            print(j , end = "|")

    return yolo_time


def Conv_11(arr_w, arr_a, w_shape, a_shape):

    if(arr_w.size % 64 != 0):
        arr_w = np.pad(arr_w,(0,(64 - arr_w.size % 64)))

    yolo_time = 0
    mac_a64 = np.empty(64)
    cnt = 0

    for i in range(a_shape[3]):
        print("row = ",i)            #row
        for j in range(a_shape[2]):        #channel
            for k in range(a_shape[1]):        #col
                mac_a64[cnt] =  arr_a[i,j,k]
                cnt = cnt + 1
                if(cnt == 64):
                    cnt = 0
                    yolo_time = yolo_time + Mac_64(arr_w, mac_a64)
            print(j , end = "| ")

    return yolo_time

In [5]:
def extract_onnx_weight(onnx_path):
    model = onnx.load(onnx_path)

    weights, names = [], []
    for t in model.graph.initializer:
        weights.append(numpy_helper.to_array(t))
        names.append(t.name)
        
    onnx_weight = dict()
    for name, weight in zip(names, weights):
        onnx_weight[name] = weight
    return onnx_weight

In [6]:
if __name__ == '__main__':
    IMG_PATH = 'HUAWEI/img/dog.jpg'
    ONNX_PATH = "HUAWEI/yolov3_quant_0426.onnx"
    OUT_TXT_PATH = "HUAWEI/data.txt"

    # w = ["1142","1146","1150"]           #手动输入onnx的w
    # a = ["606", "614", "621"]             #手动输入onnx的x
    w = ["704_quantized", "707_quantized", "710_quantized", "713_quantized", "716_quantized", "719_quantized", "722_quantized", "725_quantized", "728_quantized", 
        "731_quantized", "734_quantized", "737_quantized", "740_quantized", "743_quantized", "746_quantized", "749_quantized", "752_quantized", "755_quantized", "758_quantized", 
        "761_quantized", "764_quantized", "767_quantized", "770_quantized", "773_quantized", "776_quantized", "779_quantized", "782_quantized", "785_quantized", "788_quantized", 
        "791_quantized", "794_quantized", "797_quantized", "800_quantized", "803_quantized", "806_quantized", "809_quantized", "812_quantized", "815_quantized", "818_quantized", 
        "821_quantized", "824_quantized", "827_quantized", "830_quantized", "833_quantized", "836_quantized", "839_quantized", "842_quantized", "845_quantized", "848_quantized", 
        "851_quantized", "854_quantized", "857_quantized", "860_quantized", "863_quantized", "866_quantized", "869_quantized", "872_quantized", "875_quantized", "878_quantized", 
        "881_quantized", "884_quantized", "887_quantized", "890_quantized", "893_quantized", "896_quantized", "899_quantized", "902_quantized", "905_quantized", "908_quantized", 
        "blocks.yolo_106.layers.layer_105.conv.weight_quantized"]           #手动输入onnx的w
    a = ["441_quantized", "444_quantized", "447_quantized", "451_quantized", "454_quantized", "457_quantized", "461_quantized", "464_quantized", "468_quantized", 
        "471_quantized", "474_quantized", "478_quantized", "481_quantized", "485_quantized", "488_quantized", "492_quantized", "495_quantized", "499_quantized", "502_quantized", 
        "506_quantized", "509_quantized", "513_quantized", "516_quantized", "520_quantized", "523_quantized", "527_quantized", "530_quantized", "533_quantized", "537_quantized", 
        "540_quantized", "544_quantized", "547_quantized", "551_quantized", "554_quantized", "558_quantized", "561_quantized", "565_quantized", "568_quantized", "572_quantized", 
        "575_quantized", "579_quantized", "582_quantized", "586_quantized", "589_quantized", "592_quantized", "596_quantized", "599_quantized", "603_quantized", "606_quantized", 
        "610_quantized", "613_quantized", "617_quantized", "620_quantized", "623_quantized", "626_quantized", "629_quantized", "632_quantized", "641_quantized", "644_quantized", 
        "647_quantized", "650_quantized", "653_quantized", "656_quantized", "665_quantized", "668_quantized", "671_quantized", "674_quantized", "677_quantized", "680_quantized", 
        "683_quantized" ]
    s = [   2,   1,   1,   2,   1,   1,   1,   1,   2,   1,   1,   1,   1,
       1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   2,   1,
       1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
       1,   2,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
       1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1]
    print(len(w))
    print(len(a))
    print(len(s))
    
    torch.manual_seed(999)
    torch.set_printoptions(precision=10,sci_mode=True)
    torch.set_printoptions(threshold=np.inf)

    # activation
    img = Image.open(IMG_PATH).convert("RGB")
    img = np.asarray(img, np.float32)/255.0
    img= img.transpose(2,0,1).reshape(1,3,224,224)
    

    # weight
    onnx_path_list = [ONNX_PATH]
    
    out_file = OUT_TXT_PATH
    if os.path.exists(out_file):
        os.remove(out_file)
        print("文件已存在并已删除。")
    else:
        print("文件不存在。")

    final_time = np.empty((10,100))
    compute_time = np.empty((10,100))
    for prun in range(10):
        print("This is prun "+ str(prun))
        for layerX in range(0,25):
            start_time = time.time()
            print("This is layer "+ str(layerX))
            #f = open(out_file, 'a', encoding='utf-8')

            print(w[layerX])
            WEIGHT_NAME = str(w[layerX])        #输入w
            MED_LAYER_NAME = str(a[layerX])      #输入a

            #onnx_name = "yolov3_prune_0_"+str(prun)+".onnx"
            yolov3_weight = extract_onnx_weight(ONNX_PATH)
            select_weight = WEIGHT_NAME # "1138"
            weight = torch.from_numpy(yolov3_weight[select_weight])

            # activate
            intermediate_layer_name = MED_LAYER_NAME  
            model = onnx.load(ONNX_PATH)
            for node in model.graph.node:
                for output in node.output:
                    if(output==intermediate_layer_name):
                        model.graph.output.extend([onnx.ValueInfoProto(name=output)])
            session = onnxruntime.InferenceSession(model.SerializeToString())
            # model input
            inputs = {'input.1': img}
            # infer
            intermediate_layer_output = session.run([intermediate_layer_name], inputs)[0]

            act = torch.from_numpy(intermediate_layer_output)

            ######################################################################
            w_shape = weight.shape 
            a_shape = act.shape

            print('w_shape:',w_shape)
            print('a_shape:',a_shape)
            core_size = w_shape[3]

            weight = weight.numpy().reshape(-1)
            act = act.numpy().reshape(a_shape[1],a_shape[2],a_shape[3]).transpose(1,2,0)
            print('after modify:')
            print('w_shape:',weight.shape)
            print('a_shape:',act.shape)

            #####################################################################
            if(core_size == 3 ):
                f_time = Conv_33(weight, act, w_shape, a_shape, s[layerX])
            else:
                f_time = Conv_11(weight, act, w_shape, a_shape)
            end_time = time.time()
            compute_time[prun][layerX] =end_time - start_time 
            final_time[prun][layerX] = f_time
            print(f_time)
            print("cp_time",end_time - start_time)

70
70
70
文件不存在。
This is prun 0
This is layer 0
704_quantized


  weight = torch.from_numpy(yolov3_weight[select_weight])


w_shape: torch.Size([64, 32, 3, 3])
a_shape: torch.Size([1, 32, 224, 224])
after modify:
w_shape: (18432,)
a_shape: (224, 224, 32)
row =  0
0|2|4|6|8|10|12|14|16|18|20|22|24|26|28|30|32|34|36|38|40|42|44|46|48|50|52|54|56|58|60|62|64|66|68|70|72|74|76|78|80|82|84|86|88|90|92|94|96|98|100|102|104|106|108|110|112|114|116|118|120|122|124|126|128|130|132|134|136|138|140|142|144|146|148|150|152|154|156|158|160|162|164|166|168|170|172|174|

  zero_point = np.round(0 - min_val / scale)
  int8_array = np.round(float32_array / scale + zero_point).astype(np.int8)


176|178|180|182|184|186|188|190|192|194|196|198|200|202|204|206|208|210|212|214|216|218|220|row =  2
0|2|4|6|8|10|12|14|16|18|20|22|24|26|28|30|32|34|36|38|40|42|44|46|48|50|52|54|56|58|60|62|64|66|68|70|72|74|76|78|80|82|84|86|88|90|92|94|96|98|100|102|104|106|108|110|112|114|116|118|120|122|124|126|128|130|132|134|136|138|140|142|144|146|148|150|152|154|156|158|160|162|164|166|168|170|172|174|176|178|180|182|184|186|188|190|192|194|196|198|200|202|204|206|208|210|212|214|216|218|220|row =  4
0|2|4|6|8|10|12|14|16|18|20|22|24|26|28|30|32|34|36|38|40|42|44|46|48|50|52|54|56|58|60|62|64|66|68|70|72|74|76|78|80|82|84|86|88|90|92|94|96|98|100|102|104|106|108|110|112|114|116|118|120|122|124|126|128|130|132|134|136|138|140|142|144|146|148|150|152|154|156|158|160|162|164|166|168|170|172|174|176|178|180|182|184|186|188|190|192|194|196|198|200|202|204|206|208|210|212|214|216|218|220|row =  6
0|2|4|6|8|10|12|14|16|18|20|22|24|26|28|30|32|34|36|38|40|42|44|46|48|50|52|54|56|58|60|62|64|66|68|70|

  scale = (max_val - min_val) / 127.0  # 127 是有符号 int8 的最大值


46|48|50|52|54|56|58|60|62|64|66|68|70|72|74|76|78|80|82|84|86|88|90|92|94|96|98|100|102|104|106|108|row =  2
0|2|4|6|8|10|12|14|16|18|20|22|24|26|28|30|32|34|36|38|40|42|44|46|48|50|52|54|56|58|60|62|64|66|68|70|72|74|76|78|80|82|84|86|88|90|92|94|96|98|100|102|104|106|108|row =  4
0|2|4|6|8|10|12|14|16|18|20|22|24|26|28|30|32|34|36|38|40|42|44|46|48|50|52|54|56|58|60|62|64|66|68|70|72|74|76|78|80|82|84|86|88|90|92|94|96|98|100|102|104|106|108|row =  6
0|2|4|6|8|10|12|14|16|18|20|22|24|26|28|30|32|34|36|38|40|42|44|46|48|50|52|54|56|58|60|62|64|66|68|70|72|74|76|78|80|82|84|86|88|90|92|94|96|98|100|102|104|106|108|row =  8
0|2|4|6|8|10|12|14|16|18|20|22|24|26|28|30|32|34|36|38|40|42|44|46|48|50|52|54|56|58|60|62|64|66|68|70|72|74|76|78|80|82|84|86|88|90|92|94|96|98|100|102|104|106|108|row =  10
0|2|4|6|8|10|12|14|16|18|20|22|24|26|28|30|32|34|36|38|40|42|44|46|48|50|52|54|56|58|60|62|64|66|68|70|72|74|76|78|80|82|84|86|88|90|92|94|96|98|100|102|104|106|108|row =  12
0|2|4|6|8|10|12|14

In [8]:
import numpy as np

total_time = 0
total_mac_cnt = 0
for prun in range(10):
        for layerX in range(0,25):
            total_time = total_time + compute_time[prun][layerX]    
print("total_time is ", total_time)

total_time is  5040.662515640259
