In [7]:
import numpy as np
import torch
from PIL import Image
from torch.nn import functional as F
from tqdm import tqdm
import onnx
import onnxruntime
from onnx import numpy_helper
import os
import time

In [8]:
################### float32数据转int8模块  ##############
def float32_to_int8(float32_array): 
   # 定义量化范围（最小和最大值）
    min_val = np.min(float32_array)
    max_val = np.max(float32_array)

    # 计算缩放因子和零点
    scale = (max_val - min_val) / 127.0  # 127 是有符号 int8 的最大值
    zero_point = np.round(0 - min_val / scale)

    # 使用缩放因子和零点进行量化
    int8_array = np.round(float32_array / scale + zero_point).astype(np.int8)

    return int8_array   
    

In [9]:
def bitlet_time(arr_w, arr_a):
    arr_w=float32_to_int8(arr_w)
    arr_a=float32_to_int8(arr_a)

    mantis = np.empty((8,64))
    time = 0

    weight_int8 = [""] * 64
    active_int8 = [""] * 64
    a_sign = [""] * 64
    w_sign = [""] * 64
    sign   = [""] * 64
                            
    sign = [""] * 64
    for b in range(64): 
        weight_int8[b] = '{0:08b}'.format(abs(arr_w[b]))
        active_int8[b] ='{0:08b}'.format(abs(arr_a[b]))   
        
        if arr_w[b] >= 0:
                w_sign[b] = 0
        else:
                w_sign[b] = 1

        if arr_a[b] >= 0:
                a_sign[b] = 0
        else:
                a_sign[b] = 1
                
        if w_sign[b]^a_sign[b]:
                sign[b] = 1
        else:
                sign[b] = 0
                
    w_manti_list = []
    a_manti_list = []

    for c in range(64):
            w_manti_list.append(weight_int8[c])
            a_manti_list.append(active_int8[c])


    
    for i in range(64):
        for j in range(8):
            mantis[j][i] = w_manti_list[j]


    ones = np.empty((8,4))
    for i in range(8):
        for k in range(4):
            one = 0
            for j in range(8):
                if mantis[i][32+8*k+j]:
                    one = one + 1 
            ones[i][k] = one

    for i in range(8):
        for k in range(4):
            if (ones[i][k] == 0 or ones[i][k] == 1):
                ones[i][k] = k+6
            elif (ones[i][k] == 2):
                ones[i][k] = k+7
            elif (ones[i][k] == (3 or 4)):
                ones[i][k] = k+8
            else:
                ones[i][k] = k+9

    for i in range(8):
        for k in range(3):
            for j in range(3-k):
                if(ones[i][k] == ones[i][k+j+1]):
                    ones[i][k] = ones[i][k] + 1

    for i in range(8):
        for k in range(3):
            for j in range(3-k):
                if(ones[i][k] == ones[i][k+j+1]):
                    ones[i][k] = ones[i][k] + 1

    for i in range(8):
        for k in range(3):
            for j in range(3-k):
                if(ones[i][k] == ones[i][k+j+1]):
                    ones[i][k] = ones[i][k] + 1

    time = ones[0][0]
    for i in range(8):
        for k in range(4):
            if(time < ones[i][k]):
                time = ones[i][k]


    
    
    return time+5

In [10]:

# def Mac_64(arr_w, mac_a64):
#     weight_size = int(arr_w.size/10)
#     mac_time = 0

#     for i in range(0, weight_size, 64):
#         mac_time = mac_time + bitlet_time(arr_w[i:i+64],mac_a64)
#     return mac_time

def Mac_64(arr_w, mac_a64):
    weight_size = int(arr_w.size/10)
    mac_time = 0

    for i in range(0, weight_size, 64):
        # print(bitlet_time(arr_w[i:i+64],mac_a64))
        mac_time = mac_time + 1
    return mac_time


def Conv_33(arr_w, arr_a, w_shape, a_shape, s):

    if(arr_w.size % 64 != 0):
        arr_w = np.pad(arr_w,(0,(64 - arr_w.size % 64)))

    yolo_time = 0
    mac_a64 = np.empty(64)
    cnt = 0

    for i in range(0 , (a_shape[3] - 2), s): 
        print("row = ",i)           #row
        for j in range(0 , (a_shape[2] - 2), s):        #channel
            for k in range(a_shape[1]):        #col
                for c in range(3):
                    for r in range(3):
                        mac_a64[cnt] =  arr_a[i+c,j+r,k]
                        cnt = cnt + 1
                        if(cnt == 64):
                            cnt = 0
                            yolo_time = yolo_time + Mac_64(arr_w, mac_a64)
            print(j , end = "|")

    return yolo_time


def Conv_11(arr_w, arr_a, w_shape, a_shape):

    if(arr_w.size % 64 != 0):
        arr_w = np.pad(arr_w,(0,(64 - arr_w.size % 64)))

    yolo_time = 0
    mac_a64 = np.empty(64)
    cnt = 0

    for i in range(a_shape[3]):
        print("row = ",i)            #row
        for j in range(a_shape[2]):        #channel
            for k in range(a_shape[1]):        #col
                mac_a64[cnt] =  arr_a[i,j,k]
                cnt = cnt + 1
                if(cnt == 64):
                    cnt = 0
                    yolo_time = yolo_time + Mac_64(arr_w, mac_a64)
            print(j , end = "| ")

    return yolo_time

In [11]:

def extract_onnx_weight(onnx_path):
    model = onnx.load(onnx_path)

    weights, names = [], []
    for t in model.graph.initializer:
        weights.append(numpy_helper.to_array(t))
        names.append(t.name)
        
    onnx_weight = dict()
    for name, weight in zip(names, weights):
        onnx_weight[name] = weight
    return onnx_weight

if __name__ == '__main__':


    cwd = os.getcwd()
    print(cwd)
    IMG_PATH = 'yolov5_fp32/dog_cat.jpg'
    ONNX_PATH = 'yolov5_fp32/yolov5l6_quant_0505.onnx'
    OUT_TXT_PATH = "yolov5_fp32/data.txt"
    
    # w = ["1142","1146","1150"]           #手动输入onnx的w
    # a = ["606", "614", "621"]             #手动输入onnx的x
    w = ["backbone0.conv.weight_quantized", "backbone1.conv.weight_quantized", "backbone2.cv1.conv.weight_quantized", "backbone2.m.0.cv1.conv.weight_quantized", "backbone2.m.0.cv2.conv.weight_quantized", "backbone2.m.1.cv1.conv.weight_quantized", "backbone2.m.1.cv2.conv.weight_quantized", "backbone2.m.2.cv1.conv.weight_quantized", "backbone2.m.2.cv2.conv.weight_quantized", "backbone2.cv2.conv.weight_quantized", 
        "backbone2.cv3.conv.weight_quantized", "backbone3.conv.weight_quantized", "backbone4.cv1.conv.weight_quantized", "backbone4.m.0.cv1.conv.weight_quantized", "backbone4.m.0.cv2.conv.weight_quantized", "backbone4.m.1.cv1.conv.weight_quantized", "backbone4.m.1.cv2.conv.weight_quantized", "backbone4.m.2.cv1.conv.weight_quantized", "backbone4.m.2.cv2.conv.weight_quantized", "backbone4.m.3.cv1.conv.weight_quantized", 
        "backbone4.m.3.cv2.conv.weight_quantized", "backbone4.m.4.cv1.conv.weight_quantized", "backbone4.m.4.cv2.conv.weight_quantized", "backbone4.m.5.cv1.conv.weight_quantized", "backbone4.m.5.cv2.conv.weight_quantized", "backbone4.cv2.conv.weight_quantized", "backbone4.cv3.conv.weight_quantized", "backbone5.conv.weight_quantized", "backbone6.cv1.conv.weight_quantized", "backbone6.m.0.cv1.conv.weight_quantized", 
        "backbone6.m.0.cv2.conv.weight_quantized", "backbone6.m.1.cv1.conv.weight_quantized", "backbone6.m.1.cv2.conv.weight_quantized", "backbone6.m.2.cv1.conv.weight_quantized", "backbone6.m.2.cv2.conv.weight_quantized", "backbone6.m.3.cv1.conv.weight_quantized", "backbone6.m.3.cv2.conv.weight_quantized", "backbone6.m.4.cv1.conv.weight_quantized", "backbone6.m.4.cv2.conv.weight_quantized", "backbone6.m.5.cv1.conv.weight_quantized", 
        "backbone6.m.5.cv2.conv.weight_quantized", "backbone6.m.6.cv1.conv.weight_quantized", "backbone6.m.6.cv2.conv.weight_quantized", "backbone6.m.7.cv1.conv.weight_quantized", "backbone6.m.7.cv2.conv.weight_quantized", "backbone6.m.8.cv1.conv.weight_quantized", "backbone6.m.8.cv2.conv.weight_quantized", "backbone6.cv2.conv.weight_quantized", "backbone6.cv3.conv.weight_quantized", "backbone7.conv.weight_quantized", 
        "backbone8.cv1.conv.weight_quantized", "backbone8.m.0.cv1.conv.weight_quantized", "backbone8.m.0.cv2.conv.weight_quantized", "backbone8.m.1.cv1.conv.weight_quantized", "backbone8.m.1.cv2.conv.weight_quantized", "backbone8.m.2.cv1.conv.weight_quantized", "backbone8.m.2.cv2.conv.weight_quantized", "backbone8.cv2.conv.weight_quantized", "backbone8.cv3.conv.weight_quantized", "backbone9.conv.weight_quantized", 
        "backbone10.cv1.conv.weight_quantized", "backbone10.m.0.cv1.conv.weight_quantized", "backbone10.m.0.cv2.conv.weight_quantized", "backbone10.m.1.cv1.conv.weight_quantized", "backbone10.m.1.cv2.conv.weight_quantized", "backbone10.m.2.cv1.conv.weight_quantized", "backbone10.m.2.cv2.conv.weight_quantized", "backbone10.cv2.conv.weight_quantized", "backbone10.cv3.conv.weight_quantized", "backbone11.cv1.conv.weight_quantized", 
        "backbone11.cv2.conv.weight_quantized", "neck0.conv.weight_quantized", "neck3.cv1.conv.weight_quantized", "neck3.m.0.cv1.conv.weight_quantized", "neck3.m.0.cv2.conv.weight_quantized", "neck3.m.1.cv1.conv.weight_quantized", "neck3.m.1.cv2.conv.weight_quantized", "neck3.m.2.cv1.conv.weight_quantized", "neck3.m.2.cv2.conv.weight_quantized", "neck3.cv2.conv.weight_quantized", 
        "neck3.cv3.conv.weight_quantized", "neck4.conv.weight_quantized", "neck7.cv1.conv.weight_quantized", "neck7.m.0.cv1.conv.weight_quantized", "neck7.m.0.cv2.conv.weight_quantized", "neck7.m.1.cv1.conv.weight_quantized", "neck7.m.1.cv2.conv.weight_quantized", "neck7.m.2.cv1.conv.weight_quantized", "neck7.m.2.cv2.conv.weight_quantized", "neck7.cv2.conv.weight_quantized", 
        "neck7.cv3.conv.weight_quantized", "neck8.conv.weight_quantized", "neck11.cv1.conv.weight_quantized", "neck11.m.0.cv1.conv.weight_quantized", "neck11.m.0.cv2.conv.weight_quantized", "neck11.m.1.cv1.conv.weight_quantized", "neck11.m.1.cv2.conv.weight_quantized", "neck11.m.2.cv1.conv.weight_quantized", "neck11.m.2.cv2.conv.weight_quantized", "neck11.cv2.conv.weight_quantized", 
        "neck11.cv3.conv.weight_quantized", "neck12.conv.weight_quantized", "neck14.cv1.conv.weight_quantized", "neck14.m.0.cv1.conv.weight_quantized", "neck14.m.0.cv2.conv.weight_quantized", "neck14.m.1.cv1.conv.weight_quantized", "neck14.m.1.cv2.conv.weight_quantized", "neck14.m.2.cv1.conv.weight_quantized", "neck14.m.2.cv2.conv.weight_quantized", "neck14.cv2.conv.weight_quantized", 
        "neck14.cv3.conv.weight_quantized", "neck15.conv.weight_quantized", "neck17.cv1.conv.weight_quantized", "neck17.m.0.cv1.conv.weight_quantized", "neck17.m.0.cv2.conv.weight_quantized", "neck17.m.1.cv1.conv.weight_quantized", "neck17.m.1.cv2.conv.weight_quantized", "neck17.m.2.cv1.conv.weight_quantized", "neck17.m.2.cv2.conv.weight_quantized", "neck17.cv2.conv.weight_quantized", 
        "neck17.cv3.conv.weight_quantized", "neck18.conv.weight_quantized", "neck20.cv1.conv.weight_quantized", "neck20.m.0.cv1.conv.weight_quantized", "neck20.m.0.cv2.conv.weight_quantized", "neck20.m.1.cv1.conv.weight_quantized", "neck20.m.1.cv2.conv.weight_quantized", "neck20.m.2.cv1.conv.weight_quantized", "neck20.m.2.cv2.conv.weight_quantized", "neck20.cv2.conv.weight_quantized", 
        "neck20.cv3.conv.weight_quantized", "head3.weight_quantized"]           #手动输入onnx的w
    
    a = ["281_quantized", "284_quantized", "287_quantized", "290_quantized", "293_quantized", "297_quantized", "300_quantized", "304_quantized", "307_quantized", "287_quantized", 
    "315_quantized", "318_quantized", "321_quantized", "324_quantized", "327_quantized", "331_quantized", "334_quantized", "338_quantized", "341_quantized", "345_quantized", 
    "348_quantized", "352_quantized", "355_quantized", "359_quantized", "362_quantized", "321_quantized", "370_quantized", "373_quantized", "376_quantized", "379_quantized", 
    "382_quantized", "386_quantized", "389_quantized", "393_quantized", "396_quantized", "400_quantized", "403_quantized", "407_quantized", "410_quantized", "414_quantized", 
    "417_quantized", "421_quantized", "424_quantized", "428_quantized", "431_quantized", "435_quantized", "438_quantized", "376_quantized", "446_quantized", "449_quantized", 
    "452_quantized", "455_quantized", "458_quantized", "462_quantized", "465_quantized", "469_quantized", "472_quantized", "452_quantized", "480_quantized", "483_quantized", 
    "486_quantized", "489_quantized", "492_quantized", "496_quantized", "499_quantized", "503_quantized", "506_quantized", "486_quantized", "514_quantized", "517_quantized", 
    "524_quantized", "527_quantized", "536_quantized", "539_quantized", "542_quantized", "545_quantized", "548_quantized", "551_quantized", "554_quantized", "536_quantized", 
    "561_quantized", "564_quantized", "573_quantized", "576_quantized", "579_quantized", "582_quantized", "585_quantized", "588_quantized", "591_quantized", "573_quantized", 
    "598_quantized", "601_quantized", "610_quantized", "613_quantized", "616_quantized", "619_quantized", "622_quantized", "625_quantized", "628_quantized", "610_quantized", 
    "635_quantized", "638_quantized", "642_quantized", "645_quantized", "648_quantized", "651_quantized", "654_quantized", "657_quantized", "660_quantized", "642_quantized", 
    "667_quantized", "670_quantized", "674_quantized", "677_quantized", "680_quantized", "683_quantized", "686_quantized", "689_quantized", "692_quantized", "674_quantized", 
    "699_quantized", "702_quantized", "706_quantized", "709_quantized", "712_quantized", "715_quantized", "718_quantized", "721_quantized", "724_quantized", "706_quantized", 
    "731_quantized", "734_quantized"]

    s = [ 2,2,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1]
    
    print(len(w))
    print(len(a))
    print(len(s))
    
    torch.manual_seed(999)
    torch.set_printoptions(precision=10,sci_mode=True)
    torch.set_printoptions(threshold=np.inf)

    # activation
    img = Image.open(IMG_PATH).convert("RGB")
    img = np.asarray(img, np.float32)/255.0
    img= img.transpose(2,0,1).reshape(1,3,224,224)
    

    # weight
    # onnx_path_list = os.listdir(ONNX_PATH)
    onnx_path_list = [ONNX_PATH]
    
    out_file = OUT_TXT_PATH
    if os.path.exists(out_file):
        os.remove(out_file)
        print("文件已存在并已删除。")
    else:
        print("文件不存在。")

    final_time = np.empty((200))
    compute_time = np.empty((200))

    for layerX in range(len(w)):
        start_time = time.time()
        print("This is layer "+ str(layerX))
        #f = open(out_file, 'a', encoding='utf-8')

        print(w[layerX])
        WEIGHT_NAME = str(w[layerX])        #输入w
        MED_LAYER_NAME = str(a[layerX])      #输入a

        onnx_name = "yolov5l6_fuse_test.onnx"
        yolov3_weight = extract_onnx_weight(ONNX_PATH)
        select_weight = WEIGHT_NAME # "1138"
        weight = torch.from_numpy(yolov3_weight[select_weight])

        # activate
        intermediate_layer_name = MED_LAYER_NAME  
        model = onnx.load(ONNX_PATH)
        for node in model.graph.node:
            for output in node.output:
                if(output==intermediate_layer_name):
                    model.graph.output.extend([onnx.ValueInfoProto(name=output)])
        session = onnxruntime.InferenceSession(model.SerializeToString())
        # model input
        inputs = {'x': img}
        # infer
        intermediate_layer_output = session.run([intermediate_layer_name], inputs)[0]

        act = torch.from_numpy(intermediate_layer_output)

        ######################################################################
        w_shape = weight.shape 
        a_shape = act.shape

        print('w_shape:',w_shape)
        print('a_shape:',a_shape)
        core_size = w_shape[3]

        weight = weight.numpy().reshape(-1)
        act = act.numpy().reshape(a_shape[1],a_shape[2],a_shape[3]).transpose(1,2,0)
        print('after modify:')
        print('w_shape:',weight.shape)
        print('a_shape:',act.shape)

        #####################################################################
        if(core_size == 3 ):
            f_time = Conv_33(weight, act, w_shape, a_shape, s[layerX])
        else:
            f_time = Conv_11(weight, act, w_shape, a_shape)
        end_time = time.time()
        compute_time[layerX] =end_time - start_time 
        final_time[layerX] = f_time
        print(f_time)
        print("cp_time",end_time - start_time)

/home/chen/ROCC/python/mac_test/mac_test_yolov5
132
132
132
文件不存在。
This is layer 0
backbone0.conv.weight_quantized
w_shape: torch.Size([64, 3, 6, 6])
a_shape: torch.Size([1, 3, 256, 256])
after modify:
w_shape: (6912,)
a_shape: (256, 256, 3)
row =  0
0| 1| 2| 3| 4| 5| 6| 7| 8| 9| 10| 11| 12| 13| 14| 15| 16| 17| 18| 19| 20| 21| 22| 23| 24| 25| 26| 27| 28| 29| 30| 31| 32| 33| 34| 35| 36| 37| 38| 39| 40| 41| 42| 43| 44| 45| 46| 47| 48| 49| 50| 51| 52| 53| 54| 55| 56| 57| 58| 59| 60| 61| 62| 63| 64| 65| 66| 67| 68| 69| 70| 71| 72| 73| 74| 75| 76| 77| 78| 79| 80| 81| 82| 83| 84| 85| 86| 87| 88| 89| 90| 91| 92| 93| 94| 95| 96| 97| 98| 99| 100| 101| 102| 103| 104| 105| 106| 107| 108| 109| 110| 111| 112| 113| 114| 115| 116| 117| 118| 119| 120| 121| 122| 123| 124| 125| 126| 127| 128| 129| 130| 131| 132| 133| 134| 135| 136| 137| 138| 139| 140| 141| 142| 143| 144| 145| 146| 147| 148| 149| 150| 151| 152| 153| 154| 155| 156| 157| 158| 159| 160| 161| 162| 163| 164| 165| 166| 167| 168| 169| 170| 171|

In [12]:
import numpy as np

total_time = 0
total_mac_cnt = 0
for i in range(71):
    total_time = total_time + compute_time[i]
print("total_time is ", total_time)

total_time is  62.053457498550415
