# Content
* [Extract DML PIX info](#section1)
* [EXtract DML DDI info](#section2)
* [Extract OpenVINO performance data](#section3)
* [Extract onnxruntime profiling data](#section4)
* [DML&OV Layer By Layer profiling](#section5)
> tip1: the links above don't work in browser and only works in locak dev. Please seach the key words in browser to locate the position.

> tip2: sometimes the scripts below may need some modifications to perfectly match your own requirement. 

In [None]:
import json 
import csv
import numpy as np
import os
import pandas as pd
import sys
import time
import glob

# Inference onnx model with DML 
> related packages need to be installed, e.g. `onnxruntime-directml,onnx,skl2onnx`

In [None]:
import onnxruntime as ort
import numpy as np
import onnx
model_path = r"C:\Users\GAME\Documents\Project\Adobe\model_rename\Adobe-Lr_dn_bayerlinear1024.onnx"
#model_path = "test.onnx"
model = onnx.load(model_path)   
data_dic = {}
def switch_case(input_datatype):
    if input_datatype == "FLOAT16":
        return np.float16
    if input_datatype == "FLOAT32":
        return np.float32
for _input in model.graph.input:
    input_name = _input.name
    input_datatype = onnx.TensorProto.DataType.Name( _input.type.tensor_type.elem_type)
    input_shape = [int(dim.dim_value) for dim in _input.type.tensor_type.shape.dim] 
    np_dataype = switch_case(input_datatype)
    input_data = np.random.randn(*input_shape).astype(np_dataype) # The * before the shape variable is used to unpack the values from the shape list 
    data_dic[input_name] = input_data

sess_options = ort.SessionOptions()
dml_provider = ["DmlExecutionProvider"]
ref_provider = ["CPUExecutionProvider"]

sess = ort.InferenceSession(model_path, sess_options=sess_options,providers=dml_provider)
output_name = sess.get_outputs()[0].name

dml_output = sess.run([output_name], data_dic)
print(dml_output)

sess = ort.InferenceSession(model_path, sess_options=sess_options,providers=ref_provider)
ref_output = sess.run([output_name], data_dic)
print(ref_output)

# you can compare the dml_output and ref_output manually but there will be still small gaps because the precision difference beween GPU and CPU 

## Print the output in middle layer

In [None]:
import onnxruntime as rt
import numpy as np
import os
import onnx
from onnx import helper

model_path = r"C:\Users\GAME\Documents\Project\Adobe\model_rename\Adobe-Lr_dn_bayerlinear1024.onnx"
#######################################################################
'''
ref: https://github.com/microsoft/onnxruntime/issues/1455#issuecomment-514805365
This is ONNX debug
The purpose is to inspect the desired intermediate layer from onnx

Flow:
    - A. Load the model and do some sanity check on it
    - A*. You could inspect the input/output here
    - B. Get the model node (length and node names in string)
        *Add this step, you could use any editor, and CTRL+F your desired layer
        in my case, I want to get Resnet101 with block4 output
    - C. Put the name into some variables
    - D. Add the intermediate layers' name into the helper function
    - E. Add the helper function into model output
    - F. Save the model with the added outputs
    - G. Load the customized model
    - H. Inspect the new I/O's names of the customized model here
    - I. Do some inference on your image, and get/save the output
    
'''

# Step A
# load and check that the model is correct or not
model = onnx.load(model_path)
#onnx.checker.check_model(model)

# Step A*
#sess = rt.InferenceSession(os.path.join(path,'frozen.onnx'))
##sess = rt.InferenceSession('D:\\embedmask_simplifier.onnx')
#input1 = sess.get_inputs()[0].name
#outputs = sess.run([], {input1: img_preprocess})

# Step B
# get the output of block 4 add and relu operation for each add layer
node_count = len(model.graph.node)
model_node = str(model.graph.node)

# Step C
# find the name first
unit1_name = '/model/model/encoders.0/encoders.0.0/act/LeakyRelu_output_0'
unit2_name = '/model/model/encoders.0/encoders.0.0/conv2/Conv_output_0'

# Step D
# add into the current onnx model and save to a new model
info_unit1_name = helper.ValueInfoProto()
info_unit1_name.name = unit1_name
info_unit2_name = helper.ValueInfoProto()
info_unit2_name.name = unit2_name

# Step E
model.graph.output.extend([info_unit1_name,info_unit2_name])

# Step F
onnx.save(model, os.path.join('frozen_out.onnx'))

## save ONNX subgraph

In [None]:
from skl2onnx.helpers.onnx_helper import select_model_inputs_outputs
from skl2onnx.helpers.onnx_helper import save_onnx_model
from skl2onnx.helpers.onnx_helper import enumerate_model_node_outputs
from skl2onnx.helpers.onnx_helper import load_onnx_model
model_path = r"C:\Users\GAME\Documents\Project\Adobe\model_rename\Adobe-Lr_dn_bayerlinear1024.onnx"
model_onnx = load_onnx_model(model_path)
# for out in enumerate_model_node_outputs(model_onnx):
#     print(out)

"""
/model/model/intro/Conv_output_0
/model/model/encoders.0/encoders.0.0/conv1/Conv_output_0
/model/model/encoders.0/encoders.0.0/act/LeakyRelu_output_0
/model/model/encoders.0/encoders.0.0/conv2/Conv_output_0
/model/model/encoders.0/encoders.0.0/Mul_output_0
/model/model/encoders.0/encoders.0.0/Add_output_0
/model/model/encoders.0/encoders.0.0/conv3/Conv_output_0
/model/model/encoders.0/encoders.0.0/act_1/LeakyRelu_output_0
/model/model/encoders.0/encoders.0.0/conv4/Conv_output_0
/model/model/encoders.0/encoders.0.0/Mul_1_output_0
/model/model/encoders.0/encoders.0.0/Add_1_output_0
/model/model/downs.0/Conv_output_0
/model/model/encoders.1/encoders.1.0/conv1/Conv_output_0
/model/model/encoders.1/encoders.1.0/act/LeakyRelu_output_0
/model/model/encoders.1/encoders.1.0/conv2/Conv_output_0
/model/model/encoders.1/encoders.1.0/Mul_output_0
/model/model/encoders.1/encoders.1.0/Add_output_0
/model/model/encoders.1/encoders.1.0/conv3/Conv_output_0
/model/model/encoders.1/encoders.1.0/act_1/LeakyRelu_output_0
/model/model/encoders.1/encoders.1.0/conv4/Conv_output_0
/model/model/encoders.1/encoders.1.0/Mul_1_output_0
/model/model/encoders.1/encoders.1.0/Add_1_output_0
/model/model/encoders.1/encoders.1.1/conv1/Conv_output_0
/model/model/encoders.1/encoders.1.1/act/LeakyRelu_output_0
/model/model/encoders.1/encoders.1.1/conv2/Conv_output_0
...
/model/model/ups.2/ups.2.0/Resize_output_0
/model/model/ups.3/ups.3.0/Resize_input_cast_0
/model/model/ups.3/ups.3.0/Resize_input_cast_2
/model/model/ups.3/ups.3.0/Resize_output_
"""

num_onnx = select_model_inputs_outputs(model_onnx, '/model/model/encoders.0/encoders.0.0/conv3/Conv_output_0')
save_onnx_model(num_onnx, "test2.onnx")

# Map Adobe model name

In [None]:
mapping_file = "model_mapping.txt"
line_count = 0
dict_name={}
with open(mapping_file, "r") as f:
    for line in f.readlines():
        line_count+=1
        if line_count == 1:
            continue
        a,b=line.rstrip().split("	")
        dict_name[b] = a

print(dict_name)




In [None]:
# get all onnx model
import os
import shutil
# Provide the directory path
directory = r"Adobe_models\Adobe_models\AI_Model_Restricted"
new_directory = r"model_rename"
# Traverse through the directory
for root, dirs, files in os.walk(directory):
    
    for file in files:
        # Print the file name
        key = file.split(".onnx")[0] 
        if key in dict_name.keys():

            target_path = os.path.join(new_directory, dict_name[key]+".onnx")
            source_path = os.path.join(root,file)
            if not os.path.exists(source_path):
                print(source_path)
                continue
            shutil.copy2(source_path, target_path)

# <a id='section2'>Extract DML DDI info </a>

In [None]:
import os
import shutil
import glob
ddi_root= r"C:\Intel\igfx\d3d12"
new_root= r"C:\Users\GAME\Documents\Project\Adobe\analyze\ddilog_backup"
modelfile = glob.glob(r"C:\Users\GAME\Documents\Project\Adobe\model_rename\*.onnx")
#print(modelfile)
for i in range(45):
        ddi_file = str(i+1)+".log"
        print(os.path.basename(modelfile[i]))
        modelname = os.path.basename(modelfile[i]).split(".onnx")[0]
        source_path = os.path.join(ddi_root, ddi_file)
        target_path = os.path.join(new_root, modelname+".log")
        if not os.path.exists(source_path):
            print(source_path)
            continue
        shutil.copy(source_path, target_path)

In [None]:
import re
input_string = "Input = [0x1,0x200,0x80,0x80], Filter = [0x200,0x200,0x3,0x3], Bias = [0x1,0x200,0x1,0x1], Output = [0x1,0x200,0x80,0x80]"
input_string = "Input = [0x1,0x3,0x400,0x400], Filter = [0x80,0x3,0x3,0x3], Bias = [0x1,0x80,0x1,0x1], Output = [0x1,0x80,0x400,0x400]"
values = re.findall(r'0x\w+', input_string)
output = []
for idx, val in enumerate(values):
    output.append(int(val, 16))

print("input ", output[:4])
print("filter", output[4:8])
print("bias  ", output[8:12])
print("output", output[12:16])


## Parse ddi log into json file
> *todo*: sometimes, the kernel is failed, but the related kernel information will be still dumped in the log. Need to be handled during parsing

### Convolution

In [None]:

import json
import csv
import re
import pandas as pd
import os

def create_ddiConv_json(root_path,filename):
    ddi_file = os.path.join(root_path, filename)
    json_file = os.path.join(root_path, filename.replace("log","_conv.json"))
    dic ={}
    openf =  open(ddi_file, "r")

    lines = openf.readlines()
    #print(len(lines))
    i = -1
    while i < len(lines)-1:
        i+=1
        if "Passed-Metacommand type : Convolution1" in lines[i]:
            #print(lines[i-12:i])
            mc_type = lines[i].rstrip().split("type :")[-1]
            
            kernel_name_idx = -1
            for k in range(11):
                if "Conv Kernel:" in lines[i-k] \
                    or "Gemm Kernel:" in lines[i-k]:  # for the case of GemmBasedConvolution 
                    kernel_name_idx = i-k
                    # print(lines[kernel_name_idx])
                    # print(kernel_name_idx)
                    break
                if k == 10:
                    assert("no conv kernel")
                    
            kernel_name = lines[kernel_name_idx].rstrip().split("Kernel:")[-1]
            #print(mc_type, kernel_name)
            if mc_type not in dic.keys():
                dic[mc_type] = {}
            if kernel_name not in dic[mc_type].keys():
                dic[mc_type][kernel_name] = []
            
            layout_idx = -1
            dim_idx = -1
            param_str = []
            for k in range(8):
                dim_idx = kernel_name_idx-k
                layout_idx = kernel_name_idx - k+2
                param_str =  re.findall(r'0x\w+', lines[dim_idx])
                if len(param_str) > 5 \
                    and "Filter" in lines[dim_idx]:  # for the case of GemmBasedConvolution
                    break

            # print(dim_idx, lines[dim_idx])   
            # print(param_str)
            param_list = []
            for idx, val in enumerate(param_str):
                param_list.append(int(val, 16))
            
            layout_str = ""
            #print(lines[layout_idx].rstrip(), layout_idx,kernel_name_idx)
            layout_str = re.findall(r'N\w+', lines[layout_idx])
            #print(kernel_name_idx, lines[dim_idx], lines[layout_idx])

            ''' some log  BiasDesc = isNull'''
            output_shape = []
            if "IsNull" in lines[i+5+38]:
                bias_value = "isnull"
                outputdesc_idx = i+5+38+2
                output_shape = param_list[9:13]
            else:
                outputdesc_idx = i+5+19+38
                bias_value = param_list[9:13]
                output_shape =  param_list[13:17]

            input_stride = []
            for j in range(4):
                input_stride.append(int(lines[i+12+j].rstrip().split("=")[-1],16))
            output_stride = []
            for j in range(4):
                output_stride.append(int(lines[outputdesc_idx+8+j].rstrip().split("=")[-1],16))

            # inputpadding_list = 
            outputpadding_list = []
            for j in range(5):
                outputpadding_list.append(lines[outputdesc_idx+35+j].rstrip().split("=")[-1])
 
            filter_stride = []
            for j in range(3):
                filter_stride.append(int(lines[outputdesc_idx+22+j].rstrip().split("=")[-1],16))

            filter_dilation =  []
            for j in range(3):
                filter_dilation.append(int(lines[outputdesc_idx+25+j].rstrip().split("=")[-1],16))
            
            input_padding = []
            for j in range(6):
                input_padding.append(int(lines[outputdesc_idx+28+j].rstrip().split("=")[-1],16))
            

            
            group_count =int( re.findall(r'0x\w+', lines[outputdesc_idx+40])[0],16)
            
            info_dic ={}
            info_dic["input"] = {"shape": param_list[1:5], "layout": layout_str[0], "datatype": re.findall(r'\((.*?)\)', lines[i+5])[0], "flag": re.findall(r'\((.*?)\)', lines[i+6])[0], "stride": input_stride, "padding": input_padding}
            info_dic["filter"] = {"shape": param_list[5:9], "layout": layout_str[1], "datatype": re.findall(r'\((.*?)\)', lines[i+5 + 19])[0],"flag": re.findall(r'\((.*?)\)', lines[i+6+19])[0],"stirde": filter_stride, "dilation":filter_dilation,  "groupcount": group_count}
            info_dic["output"] = {"shape":output_shape, "layout": layout_str[2], "datatype": re.findall(r'\((.*?)\)', lines[outputdesc_idx+1])[0],"flag":re.findall(r'\((.*?)\)', lines[outputdesc_idx+2])[0], "stride": output_stride,"padding":outputpadding_list}
            info_dic["bias"] = bias_value
            info_dic["direction"] = re.findall(r'\((.*?)\)', lines[outputdesc_idx+20])[0]
            
            exec_flag_idx = 0
            if "Function" in lines[outputdesc_idx+42]:
                info_dic["activation"] =  re.findall(r'\((.*?)\)', lines[outputdesc_idx+42])[0]
                exec_flag_idx = outputdesc_idx+46
            else:
                info_dic["activation"] = "isnull"
                exec_flag_idx = outputdesc_idx+44
            
            exec_flag_info = re.findall(r'0x\w+', lines[exec_flag_idx])
            if len(exec_flag_info) > 0:
                info_dic["exec_flag"] = int( exec_flag_info[0],16)
            else:
                info_dic["exec_flag"] = 0
                
            dic[mc_type][kernel_name].append(info_dic)
            
            
        

    json_object = json.dumps(dic, indent=4)
    with open(json_file, "w") as outfile:
        outfile.write(json_object)




### GEMM

In [None]:
import json
import csv
import re
import pandas as pd
import os

def create_ddiGEMM_json(root_path, basename):
    ddi_file = os.path.join(root_path, basename)
    json_file = os.path.join(root_path, basename.replace(".log","_gemm.json"))
    dic ={}
    openf =  open(ddi_file, "r")

    lines = openf.readlines()
    #print(len(lines))
    i = -1
    while i < len(lines)-1:
        i+=1
        if "Passed-Metacommand type : GEMM1" in lines[i]:
            info_dic ={}
            mc_type = lines[i].rstrip().split("type :")[-1]
            kernel_name_idx = -1
            kernel_name = ""
            for k in range(10):
                if "Gemm Kernel:" in lines[i-k]:
                    kernel_name_idx = i-k
                    kernel_name = lines[kernel_name_idx].rstrip().split("Kernel:")[-1]
                    break
                if k == 8:  # max search = 6 if not using cache shader ; max search = 6+ 1 if using cache shader
                    kernel_name_idx = i-4
                    kernel_name = "unknown"
                    print("no gemm kernel")
                    assert(False)

            #print(mc_type, kernel_name)
            if mc_type not in dic.keys():
                dic[mc_type] = {}
            if kernel_name not in dic[mc_type].keys():
                dic[mc_type][kernel_name] = []
            
            dim_idx = kernel_name_idx - 2
            param_str =  re.findall(r'0x\w+', lines[dim_idx])
            if len(param_str) < 10:
                assert(False)

            param_list = []
            for idx, val in enumerate(param_str):
                param_list.append(int(val, 16))

            inputA_shape = param_list[3:5] # this first value is for thread id, gemm only has two dim MxK
            inputB_shape = param_list[7:9] # K x N
            
            
            outputdesc_idx = 0
            if "IsNull" in lines[i + 41+ 2]:
                outputdesc_idx = i + 41 + 4 # 4 is start from Cdes
                info_dic["inputC"] = "isnull"
                output_shape = param_list[11:13]
            else:
                # Cdes is not null
                outputdesc_idx = i + 41 + 21
                info_dic["inputC"] = "not null"
                output_shape = param_list[15:17]

            '''stride is useless '''
            # inputA_stride = []
            # for j in range(4):
            #     inputA_stride.append(int(lines[i+12+j].rstrip().split("=")[-1],16))
            
            # inputB_stride = []
            # for j in range(4):
            #     inputB_stride.append(int(lines[i+31+j].rstrip().split("=")[-1],16))
            #     #print(lines[i+31+j])

            # output_stride = []
            # for j in range(4):
            #     output_stride.append(int(lines[outputdesc_idx +  8 +j].rstrip().split("=")[-1],16))
            #     #print(lines[outputdesc_idx +  8 +j])
            
            transA = "false" if int(lines[outputdesc_idx +  20].rstrip().split("=")[-1],16) == 0 else "true"
            transB = "false" if int(lines[outputdesc_idx +  21].rstrip().split("=")[-1],16) ==0 else "true"
            alpha = lines[outputdesc_idx +  22].rstrip().split("=")[-1]
            beta = lines[outputdesc_idx +  23].rstrip().split("=")[-1]

            info_dic["config"] = {"alpha": alpha, "beta": beta}
            info_dic["inputA"] = {"shape": inputA_shape,  "datatype": re.findall(r'\((.*?)\)', lines[i+5])[0], "flag": re.findall(r'\((.*?)\)', lines[i+6])[0], "transA": transA}
            info_dic["inputB"] = {"shape": inputB_shape,  "datatype": re.findall(r'\((.*?)\)', lines[i+5+19])[0], "flag": re.findall(r'\((.*?)\)', lines[i+6 + 19])[0], "transB": transB}       
            info_dic["output"] = {"shape":output_shape,   "datatype": re.findall(r'\((.*?)\)', lines[outputdesc_idx + 1])[0], "flag": re.findall(r'\((.*?)\)', lines[outputdesc_idx + 2])[0]}
            
            dic[mc_type][kernel_name].append(info_dic)   

    json_object = json.dumps(dic, indent=4)
    with open(json_file, "w") as outfile:
        outfile.write(json_object)

### Pooling

In [None]:
import json
import csv
import re
import pandas as pd
import os

def create_ddiPool_json(root_path, basename):
    ddi_file = os.path.join(root_path, basename)
    json_file = os.path.join(root_path, basename.replace(".log","_pool.json"))
    dic ={}
    openf =  open(ddi_file, "r")

    lines = openf.readlines()
    #print(len(lines))
    i = -1
    while i < len(lines)-1:
        i+=1
        if "Passed-Metacommand type : Pooling" in lines[i]:
            info_dic ={}
            mc_type = lines[i].rstrip().split("type :")[-1]
            kernel_name_idx = i - 3

            kernel_name = lines[kernel_name_idx].rstrip().split("Kernel:")[-1] 
            kernel_name = "unknown" if kernel_name=="" else kernel_name
            #print(mc_type, kernel_name)
            if mc_type not in dic.keys():
                dic[mc_type] = {}
            if kernel_name not in dic[mc_type].keys():
                dic[mc_type][kernel_name] = []      

           
            layout_list = re.findall(r'N\w+', lines[i+4]) 

            input_shape = []
            for j in range(4):
                input_shape.append(int(lines[i+9+j].rstrip().split("=")[-1],16))
            '''offset is useless'''
            # input_stride = []
            # for j in range(4):
            #     input_stride.append(int(lines[i+13+j].rstrip().split("=")[-1],16))

            outputdesc_idx = i+24
            output_shape = []
            for j in range(4):
                output_shape.append(int(lines[outputdesc_idx + 4 + j].rstrip().split("=")[-1],16))
            '''offset is useless'''
            # output_stride = []
            # for j in range(4):
            #     output_stride.append(int(lines[outputdesc_idx +  8 +j].rstrip().split("=")[-1],16))
            
            available_function = ["AvgPool", "L2Pool", "MaxPool"]  # ref from driver code MetaCommandPoolingDnnl line62
            pooling_function = available_function[int(lines[outputdesc_idx + 19].rstrip().split("=")[-1],16)]
            stride = []
            for j in range(2):
                stride.append(int(lines[outputdesc_idx +  21 +j].rstrip().split("=")[-1],16))
            
            kernel_size = []  #WindowSize
            for j in range(2):
                kernel_size.append(int(lines[outputdesc_idx +  24 +j].rstrip().split("=")[-1],16))

            padding_size = []  #[h_begin, w_begin, h_end, w_end ]
            for j in range(6):
                if j == 2 or j ==5:
                    continue
                padding_size.append(int(lines[outputdesc_idx +  27 +j].rstrip().split("=")[-1],16))    

            '''m_PoolingParams.PoolingType is not kernel size'''
            info_dic["input"] = {"layout": layout_list[0],"shape": input_shape,  "datatype": re.findall(r'\((.*?)\)', lines[i+6])[0], "flag": re.findall(r'\((.*?)\)', lines[i+7])[0]}
            info_dic["filter"] = {"shape": kernel_size, "stride": stride, "padding": padding_size}
            info_dic["pool_function"] = pooling_function
            info_dic["output"] = {"layout": layout_list[1], "shape":output_shape,   "datatype": re.findall(r'\((.*?)\)', lines[outputdesc_idx + 1])[0], "flag": re.findall(r'\((.*?)\)', lines[outputdesc_idx + 2])[0]}
            
            dic[mc_type][kernel_name].append(info_dic)   

    json_object = json.dumps(dic, indent=4)
    with open(json_file, "w") as outfile:
        outfile.write(json_object)

## MHA

In [16]:
import os
import csv
import re
def create_ddiMHA_csv( root_path,ddi_file):

    openf =  open(os.path.join(root_path,ddi_file), "r")
    lines = openf.readlines()

    i = -1
    csv_file = os.path.join(root_path,"DDI_MHA.csv")
    csvf = open(csv_file,"w",newline='')
    writer =csv.writer(csvf)

    line = ["mc_type","gemm0_kernel_name", "gemm0_A_size", "gemm0_B_size","gemm0_Output_size",
            "gemm1_kernel_name", "gemm1_A_size", "gemm1_B_size","gemm1_Output_size"]
    writer.writerow(line)
    while i < len(lines)-1:
        i+=1
        if "Passed-Metacommand type : MHA" in lines[i]:
            info_dic =dict()
            mc_type = lines[i].rstrip().split("type :")[-1]
            gemm0_kernel_name = ""
            gemm0_A_size = ""
            gemm0_B_size = ""
            gemm0_Output_size = ""

            gemm1_kernel_name = ""
            gemm1_A_size = ""
            gemm1_B_size = ""
            gemm1_Output_size = ""

            for k in range(50):
                if "m_mhaGemm0Desc.ADesc.Size" in lines[i-k]:
                    print(lines[i-k])
                    gemm0_A_size =  re.findall(r'[[](.*?)[]]', lines[i-k])[0]
                    continue
                if "m_mhaGemm0Desc.BDesc.Size" in lines[i-k]:
                    print(lines[i-k])
                    gemm0_B_size =  re.findall(r'[[](.*?)[]]', lines[i-k])[0]
                    continue
                if "m_mhaGemm0Desc.OutputDesc.Size" in lines[i-k]:
                    print(lines[i-k])
                    gemm0_Output_size =  re.findall(r'[[](.*?)[]]', lines[i-k])[0]
                    continue
                if "MHA Gemm0 Shader Code" in lines[i-k] or "BLOB Gemm Kernel" in lines[i-k]:
                    if "MHA Gemm0 Shader Code" in lines[i-k]:
                        gemm0_kernel_name =  lines[i-k].split('=')[1][:-1]
                    else:
                        gemm0_kernel_name = lines[i-k].split("BLOB Gemm Kernel")[-1].strip().split(".cpp")[0]
                    continue
                
                if "m_mhaGemm1Desc.ADesc.Size" in lines[i-k]:
                    print(lines[i-k])
                    gemm1_A_size =  re.findall(r'[[](.*?)[]]', lines[i-k])[0]
                    continue
                if "m_mhaGemm1Desc.BDesc.Size" in lines[i-k]:
                    print(lines[i-k])
                    gemm1_B_size =  re.findall(r'[[](.*?)[]]', lines[i-k])[0]
                    continue
                if "m_mhaGemm1Desc.OutputDesc.Size" in lines[i-k]:
                    print(lines[i-k])
                    gemm1_Output_size =  re.findall(r'[[](.*?)[]]', lines[i-k])[0]
                    continue
                if "MHA Gemm1 Shader Code" in lines[i-k] or "BLOB Gemm Kernel" in lines[i-k]:
                    if "MHA Gemm0 Shader Code" in lines[i-k]:
                        gemm1_kernel_name =  lines[i-k].split('=')[1][:-1]
                    else:
                        gemm1_kernel_name = lines[i-k].split("BLOB Gemm Kernel")[-1].strip().split(".cpp")[0]
                    continue
            
            writer.writerow([mc_type,gemm0_kernel_name, gemm0_A_size, gemm0_B_size,gemm0_Output_size,
                                gemm1_kernel_name, gemm1_A_size, gemm1_B_size,gemm1_Output_size])

    csvf.close()
    print("{} generated".format(csv_file))
    df = pd.read_csv(csv_file)
    s = pd.pivot_table(df, index=['gemm0_kernel_name', "gemm1_kernel_name"], aggfunc={"gemm0_kernel_name": "count", })
    s.columns=['count']
    ddi_conv_pivot_table = s.sort_values(by='count', ascending=0)
    ddi_conv_pivot_table.to_csv(os.path.join(root_path, "ddi_mha_pivot_table.csv"))
    print(s)



In [None]:
import glob
import pandas as pd
import os
root_path = r"C:\Intel\igfx\d3d12"
basename = "python0.json"
count = 0

df = pd.read_json(os.path.join(root_path,basename))
#print(basename)
str_info = basename+"	"
conv =df[df.keys()[0]]

for key in conv.keys():
    print(key, len(conv[key]))
    #for i in range(len(conv[key])):


#print(count) 
    #print(str_info,f32_count,f16_count)


In [None]:

import glob

# root_path = r"C:\Users\GAME\Documents\Project\Adobe\analyze\ddilog_full"
# basename = "Adobe-Ae_FastMask_Query1024.log"
#create_ddi_json(root_path, basename)

root_path = r"C:\Users\GAME\Documents\Project\Adobe\analyze\ddilog_full"
files = glob.glob(root_path+"\\*.log")

for file in files:
    basename = os.path.basename(file)
    #print(basename)
    create_ddiPool_json(root_path, basename)

## Convert json to csv

### Convolution

In [None]:

import glob
import pandas as pd
import os
root_path = r"C:\Users\GAME\Documents\Project\Adobe\DDIlog\model_Sept"
files = glob.glob(root_path+"\\*_conv.json")

csv_file = os.path.join(root_path,"DDI_conv.csv")
csvf = open(csv_file,"w",newline='')
writer =csv.writer(csvf)
line = ["model_name", "kernel_name","input_shape_n","input_shape_c","input_shape_h","input_shape_w", "input_layout", "input_datatype","input_flag","input_padding",\
        "filter_shape_n","filter_shape_c","filter_shape_h","filter_shape_w", "filter_layout", "filter_datatype","filter_flag", "filter_stride_h", "filter_stride_w", "filter_stride_c", "filter_dilation_h", "filter_dilation_w", "filter_dilation_c", "filter_groupcount",
        "output_shape_n","output_shape_c","output_shape_h","output_shape_w", "output_layout", "output_datatype","output_flag", "output_padding", 
        "bias", "direction", "activation" ,"exec_flag"]
writer.writerow(line)
row_index = 0
for file in files:
    basename = os.path.basename(file)
    df = pd.read_json(file)
    conv =df[df.keys()[0]]
    for key in conv.keys():
        kernel_name = key
        for i in range(len(conv[key])):
            input_shape = conv[key][i]["input"]["shape"]
            input_layout = conv[key][i]["input"]["layout"]
            input_datatype = conv[key][i]["input"]["datatype"]
            input_flag = conv[key][i]["input"]["flag"]
            input_padding = [int(v) for v in  conv[key][i]["input"]["padding"]] 

            filter_shape = conv[key][i]["filter"]["shape"]
            filter_layout = conv[key][i]["filter"]["layout"]
            filter_datatype = conv[key][i]["filter"]["datatype"]
            filter_flag = conv[key][i]["filter"]["flag"]
            filter_stride = conv[key][i]["filter"]["stirde"]
            filter_dilation = conv[key][i]["filter"]["dilation"]
            filter_groupcount = conv[key][i]["filter"]["groupcount"]

            output_shape = conv[key][i]["output"]["shape"]
            output_layout = conv[key][i]["output"]["layout"]
            output_datatype = conv[key][i]["output"]["datatype"]
            output_flag = conv[key][i]["output"]["flag"]
            output_padding = [int(v) for v in conv[key][i]["output"]["padding"]]

            bias = conv[key][i]["bias"]
            direction = conv[key][i]["direction"]
            activation = conv[key][i]["activation"]
            exec_flag = conv[key][i]["exec_flag"]
            row_index +=1
            if row_index % 2 ==0: #remove the repeated Conv info, may need to double check if there is some modification in application(ort or dml or driver)
                continue
            writer.writerow([basename, kernel_name, input_shape[0], input_shape[1], input_shape[2], input_shape[3], input_layout, input_datatype, input_flag,input_padding,\
                              filter_shape[0],filter_shape[1],filter_shape[2],filter_shape[3], filter_layout, filter_datatype, filter_flag, filter_stride[0], filter_stride[1],filter_stride[2],filter_dilation[0],filter_dilation[1],filter_dilation[2], filter_groupcount, \
                                output_shape[0],output_shape[1],output_shape[2],output_shape[3], output_layout, output_datatype, output_flag, output_padding,
                                bias, direction, activation, exec_flag])
    
csvf.close()

### GEMM

In [None]:

import glob
import pandas as pd
import os
root_path = r"C:\Users\GAME\Documents\Project\Adobe\analyze\ddilog_full"
files = glob.glob(root_path+"\\*_gemm.json")

csv_file = os.path.join(root_path,"Adobe_GEMM.csv")
csvf = open(csv_file,"w",newline='')
writer =csv.writer(csvf)
line = ["model_name", "kernel_name","inputA_shape_M","inputA_shape_K", "inputA_datatype","inputA_flag","transA",\
       "inputB_shape_K","inputB_shape_N", "inputB_datatype","inputB_flag","transB",\
       "output_shape_M","output_shape_N", "output_datatype","output_flag", "inputC_flag", "alpha","beta" ]
writer.writerow(line)
for file in files:
    basename = os.path.basename(file)
    df = pd.read_json(file)
    if len(df.keys()) == 0:
        continue
    #print(basename)
    gemm =df[df.keys()[0]]
    for key in gemm.keys():
        kernel_name = key
        for i in range(len(gemm[key])):
            inputA_shape = gemm[key][i]["inputA"]["shape"]
            inputA_datatype = gemm[key][i]["inputA"]["datatype"]
            inputA_flag = gemm[key][i]["inputA"]["flag"]
            transA = gemm[key][i]["inputA"]["transA"]

            inputB_shape = gemm[key][i]["inputB"]["shape"]
            inputB_datatype = gemm[key][i]["inputB"]["datatype"]
            inputB_flag = gemm[key][i]["inputB"]["flag"]
            transB = gemm[key][i]["inputB"]["transB"]

            output_shape = gemm[key][i]["output"]["shape"]
            output_datatype = gemm[key][i]["output"]["datatype"]
            output_flag = gemm[key][i]["output"]["flag"]

            inputC_flag = gemm[key][i]["inputC"]
            alpha = gemm[key][i]["config"]["alpha"]
            beta = gemm[key][i]["config"]["beta"]

            writer.writerow([basename, kernel_name, inputA_shape[0], inputA_shape[1], inputA_datatype, inputA_flag,transA,\
                              inputB_shape[0], inputB_shape[1], inputB_datatype, inputB_flag,transB, \
                                output_shape[0],output_shape[1],output_datatype, output_flag,\
                                 inputC_flag,alpha, beta])
    
csvf.close()

### Pooling

In [None]:
import glob
import pandas as pd
import os
root_path = r"C:\Users\GAME\Documents\Project\Adobe\analyze\ddilog_full"
files = glob.glob(root_path+"\\*_pool.json")

csv_file = os.path.join(root_path,"Adobe_Pooling.csv")
csvf = open(csv_file,"w",newline='')
writer =csv.writer(csvf)
# the name is mapping to https://onnx.ai/onnx/operators/onnx__MaxPool.html#maxpool
line = ["model_name", "kernel_name","pool_function", \
       "input_layout", "input_shape_n","input_shape_c","input_shape_h","input_shape_w", "input_datatype","input_flag",\
       "kernel_shape_h","kernel_shape_w", "stride_h", "stride_w","pads",\
       "output_layout", "output_shape_n","output_shape_c", "output_shape_h","output_shape_w", "output_datatype","output_flag" ]
writer.writerow(line)
for file in files:
    basename = os.path.basename(file)
    df = pd.read_json(file)
    if len(df.keys()) == 0:
        continue
    #print(basename)
    pool =df[df.keys()[0]]
    for key in pool.keys():
        kernel_name = key
        for i in range(len(pool[key])):
            pool_function = pool[key][i]["pool_function"]
            input_layout = pool[key][i]["input"]["layout"]
            input_shape = pool[key][i]["input"]["shape"]
            input_datatype = pool[key][i]["input"]["datatype"]
            input_flag = pool[key][i]["input"]["flag"]

            filter_shape =  pool[key][i]["filter"]["shape"]
            filter_stride =  pool[key][i]["filter"]["stride"]
            filter_padding =  pool[key][i]["filter"]["padding"]

            output_layout = pool[key][i]["output"]["layout"]
            output_shape = pool[key][i]["output"]["shape"]
            output_datatype = pool[key][i]["output"]["datatype"]
            output_flag = pool[key][i]["output"]["flag"]

            writer.writerow([basename, kernel_name, pool_function,
                             input_layout, input_shape[0], input_shape[1], input_shape[2], input_shape[3],input_datatype, input_flag, \
                                filter_shape[0],filter_shape[1], filter_stride[0],filter_stride[1], filter_padding,\
                                output_layout, output_shape[0], output_shape[1], output_shape[2], output_shape[3],output_datatype, output_flag ])
    
csvf.close()

In [None]:
def round_up_next_multiple(n,m):
    return ((n + m - 1) / m) * m

values = "-0.125488;0.279785;-0.343994;-0.166260;-0.479492;0.438477;-0.316650;-0.492920;0.111877;0.473877;0.285156;0.483154;0.107544;-0.486816;0.308350;-0.269043;-0.377930;-0.326660;0.162476;-0.291992;0.469482;-0.104858;0.421875;0.020828;-0.111328;0.086731;0.042694;-0.334717;0.272217;-0.485840;0.229004;0.426270;0.363037;-0.404541;-0.174805;-0.225220;-0.380371;0.221680;-0.006203;-0.389160;-0.468506;0.063293;-0.250732;-0.296875;-0.210205;0.124329;0.371582;-0.083496;0.307373;0.406738;-0.072876;-0.195190;-0.082581;-0.230591;-0.176758;-0.096191;0.462402;0.212280;-0.463135;-0.088989;0.408203;0.030930;-0.258057;-0.130371;-0.132202;-0.101196;0.335205;0.195801;0.177612;0.309570;-0.325684;0.017746;-0.158936;0.041443;0.160034;0.016632;-0.406982;0.382568;-0.150757;0.356445;0.296631;0.098633;-0.400146;0.101135;-0.443604;-0.287598;0.117493;-0.068054;-0.100159;-0.133667;0.118408;0.092407;0.180298;0.448975;-0.114563;0.184204;0.109985;0.409424;0.255371;0.046722;0.342285;0.394775;-0.173462;-0.454834;0.247314;-0.143188;-0.223999;-0.425537;-0.105103;0.315430;0.290283;-0.141479;0.350098;-0.436523;0.165894;0.387207;0.471680;0.061279;-0.459473;-0.474609;0.395752;0.008568;0.104431;-0.271240;0.194824;0.308105;-0.043457;0.392578;-0.377930;-0.389893;-0.499512;-0.493164;-0.015167;-0.162354;-0.281250;-0.136353;-0.253174;-0.199097;-0.233276;-0.448486;0.134399;-0.010544;0.092712;-0.262451;-0.029694;0.035767;-0.349365;-0.459229;-0.279785;-0.273438;0.440430;0.436768;0.235229;0.377441;-0.324951;0.029648;0.465332;0.133057;0.200317;0.387207;0.450684;0.096863;-0.441895;-0.357178;0.469971;-0.499268;-0.195801;-0.477051;-0.360596;-0.267334;-0.300293;-0.033234;-0.329590;0.442139;-0.195435;-0.259033;-0.004822;-0.108948;-0.188232;0.067688;0.275146;0.426758;-0.411621;0.461182;-0.228638;0.465332;-0.359131;-0.484375;-0.301270;-0.301270;0.271240;0.151123;0.123291;-0.129150;0.229614;0.061249;0.213257;-0.263916;0.022736;-0.060669;0.136353;0.195557;-0.089600;0.442871;-0.338867;-0.204346;0.303711;0.383301;0.395996;-0.227905;0.318115;-0.335449;-0.277832;-0.255859;0.018784;-0.435059;-0.248169;-0.351807;0.109558;-0.467041;-0.260498;-0.052216;0.172119;-0.257812;0.132324;0.316406;-0.179199;0.358398;-0.483398;-0.151367;0.190918;0.337646;-0.386475;0.195801;0.317139;-0.239136;0.397217;-0.311279;0.225952;-0.095520;-0.316650;-0.343994;-0.040741;0.208130;0.222046;-0.318115;0.111633;-0.208740;-0.453369;-0.043915;-0.117554;-0.453613;-0.049500;0.465576;-0.484131;-0.059845;0.333252;-0.241211;-0.074829;-0.315186;-0.050232;0.097900;0.070435;-0.174683;0.039703;-0.219116;-0.203735;0.486816;-0.206543;0.206909;0.105957;-0.384033;-0.050537;-0.188965;0.091309;-0.027786;0.348877;0.270996;0.210693;-0.392090;-0.024628;0.407471;0.039856;-0.423096;0.380371;0.133423;-0.281494;0.039337;-0.143677;-0.271973;-0.147461;0.010750;0.192383;0.442871;0.058105;0.471680;0.196289;-0.215210;0.476562;-0.221313;0.180664;0.485596;-0.419189;0.228271;0.483398;-0.409668;0.008202;0.090881;0.211182;0.145142;-0.102417;-0.362549;-0.291016;-0.242065;0.482178;-0.258057;0.058289;-0.161011;0.346680;0.279785;0.231934;-0.054169;0.366211;0.150879;0.332520;0.492188;0.024750;0.024780;-0.207886;-0.409424;0.014236;0.359863;-0.435059;0.063293;-0.402344;0.183228;-0.465576;-0.317871;0.020065;-0.468750;0.439453;0.227295;-0.303955;0.344482;0.328857;0.107056;0.302246;-0.076599;-0.494385;0.211304;-0.426025;0.415039;-0.169067;0.168823;0.137573;-0.117065;0.260742;-0.243896;-0.072449;-0.298340;-0.185669;-0.360596;0.255615;0.098877;0.429688;-0.394531;-0.313477;-0.175659;-0.182007;0.147705;0.360840;0.034088;-0.380127;-0.331787;0.203003;-0.246094;-0.002752;0.497803;0.002680;-0.154907;-0.355225;0.052887;0.261719;0.303223;0.133545;0.298340;-0.313477;-0.174072;0.012093;-0.403809;-0.113281;0.175659;0.424805;-0.271484;0.055206;0.496338;0.400391;-0.221069;0.397217;0.387695;0.279785;-0.343994;-0.166260;-0.479492;0.438477;-0.316650;-0.492920;0.111877;0.473877;0.285156;0.483154;0.107544;-0.486816;0.308350;-0.269043;-0.377930;-0.326660;0.162476;-0.291992;0.469482;-0.104858;0.421875;0.020828;-0.111328;0.086731;0.042694;-0.334717;0.272217;-0.485840;0.229004;0.426270;0.363037;-0.404541;-0.174805;-0.225220;-0.380371;0.221680;-0.006203;-0.389160;-0.468506;0.063293;-0.250732;-0.296875;-0.210205;0.124329;0.371582;-0.083496;0.307373;0.406738;-0.072876;-0.195190;-0.082581;-0.230591;-0.176758;-0.096191;0.462402;0.212280;-0.463135;-0.088989;0.408203;0.030930;-0.258057;-0.130371;-0.132202;-0.101196;0.335205;0.195801;0.177612;0.309570;-0.325684;0.017746;-0.158936;0.041443;0.160034;0.016632;-0.406982;0.382568;-0.150757;0.356445;0.142090;0.098633;-0.400146;0.101135;-0.443604;-0.287598;0.117493;-0.068054;-0.100159;-0.133667;0.118408;0.092407;0.180298;0.448975;-0.114563;0.184204;0.109985;0.409424;0.255371;0.046722;0.342285;0.394775;-0.173462;-0.454834;0.247314;-0.143188;-0.223999;-0.425537;-0.105103;0.315430;0.290283;-0.141479;0.350098;-0.436523;0.165894;0.387207;0.471680;0.061279;-0.459473;-0.474609;0.395752;0.008568;0.104431;-0.271240;0.194824;0.308105;-0.043457;0.392578;-0.377930;-0.389893;-0.499512;-0.493164;-0.015167;-0.162354;-0.281250;-0.136353;-0.253174;-0.199097;-0.233276;-0.448486;0.134399;-0.010544;0.092712;-0.262451;-0.029694;0.035767;-0.349365;-0.459229;-0.279785;-0.273438;0.440430;0.436768;0.235229;0.377441;-0.324951;0.029648;0.465332;0.133057;0.200317;0.387207;0.350830;0.096863;-0.441895;-0.357178;0.469971;-0.499268;-0.195801;-0.477051;-0.360596;-0.267334;-0.300293;-0.033234;-0.329590;0.442139;-0.195435;-0.259033;-0.004822;-0.108948;-0.188232;0.067688;0.275146;0.426758;-0.411621;0.461182;-0.228638;0.465332;-0.359131;-0.484375;-0.301270;-0.301270;0.271240;0.151123;0.123291;-0.129150;0.229614;0.061249;0.213257;-0.263916;0.022736;-0.060669;0.136353;0.195557;-0.089600;0.442871;-0.338867;-0.204346;0.303711;0.383301;0.395996;-0.227905;0.318115;-0.335449;-0.277832;-0.255859;0.018784;-0.435059;-0.248169;-0.351807;0.109558;-0.467041;-0.260498;-0.052216;0.172119;-0.257812;0.132324;0.316406;-0.179199;0.358398;-0.483398;-0.151367;0.190918;0.337646;-0.386475;0.195801;0.317139;-0.239136;0.397217;-0.311279;0.225952;-0.095520;-0.415771"
values_list = [float(i) for i in values.split(";")]
print(len(values_list[::5]))
print(values_list[::5])

# <a id='section1'>Extract DML PIX info</a>
* Before using the script below, please cget the PIX log first. You can get step-by-step guide from this recording [PIX GPU capture training-20220314_155822-Meeting Recording.mp4](https://intel.sharepoint.com/sites/applied_ai/_layouts/15/stream.aspx?id=%2Fsites%2Fapplied%5Fai%2FShared%20Documents%2FClient%20AI%2FPIX%20GPU%20capture%20training%2D20220314%5F155822%2DMeeting%20Recording%2Emp4&referrer=StreamWebApp%2EWeb&referrerScenario=AddressBarCopied%2Eview)

In [None]:
import os
import csv
import pandas as pd
rootpath = r"C:\Users\GAME\Documents\Project"
log_file = "PIX_log.txt"
file = os.path.join(rootpath, log_file)
file = file.strip(".txt")
rawdata = pd.read_csv(f"{file}.txt",delimiter = '\t')

signal_count = 0
first_iteration_start = 0
second_iteration_start = 0  
## Option 1:
#  use "DML_EXECUTION_PLAN" to find where is the start of first_iteration and second iteration
#  value = line number - 1
#  
## Option 2:
#  use the algorithm below to find first iteration and second iteration
#  but most of the time it does not work
'''
for index, line in rawdata.iterrows():
    if signal_count == 5:
        first_iteration = index
    if signal_count == 10:
        second_iteration = index
        break
    if "Signal" in line :
        signal_count +=0
'''
## Option 3: [todo] use the whole information to decide

first_iteration_start = 180
second_iteration_start = 960
prevline = ""
ex_operator_list=[]
ex_time = []

dispatch_operator_list=[]
dispatch_time=[]

pre_checkDispatch =False
idx = -1
while idx < len(rawdata)-2: 
    idx+=1
    if idx < first_iteration_start:
        continue
    if idx > second_iteration_start:
        break
    line = rawdata.iloc[idx]
    if "ExecuteMetaCommand" in line[2]:      
        ex_operator_list.append((prevline[2].strip()))
        ex_time.append(int(prevline[4]))
        # temp.append(line)
    if "Dispatch" in line[2]:
        if pre_checkDispatch:
            continue
        dispatch_operator_list.append((prevline[2].strip()))
        dispatch_time.append(int(prevline[4]))
        pre_checkDispatch = True 
    else:
        prevline = line
        pre_checkDispatch = False

sumup= (sum(dispatch_time) + sum(ex_time))/1000000
print("total latency per iteration: {} ms \n \
      Tip: if the data is too different from ort_perf_test.exe,\n \
      please double check the first/second iteration number".format(round(sumup,2)))


csv_file = f"{file}_test.csv"
csvf = open(csv_file,"w",newline='')
writer = csv.writer(csvf)
line = ["execute type","layer type","layer name","time"]
writer.writerow(line)

for op,time in zip(ex_operator_list,ex_time):
    layer_info = op.split(",")[-1]
    mark_idx = layer_info.index("(")
    layer_type = layer_info[0:mark_idx-1]
    layer_name = layer_info[mark_idx+1:-1]
    writer.writerow(["ExecuteMetaCommand",layer_type, layer_name,round(float(time)/1000000,2)])

for op,time in zip(dispatch_operator_list,dispatch_time):
    layer_info = op.split(",")[-1]
    if "(" in layer_info:
        mark_idx = layer_info.index("(")
        layer_type = layer_info[0:mark_idx-1]
        layer_name = layer_info[mark_idx+1:-1]
    else:# some op may not have infomation in (), e.g.DML_OPERATOR_ACTIVATION_GELU
        mark_idx = 0
        layer_type = layer_info[0:]
        layer_name = "unknown"
    writer.writerow(["Dispatch",layer_type, layer_name,round(float(time)/1000000,2)])


csvf.close()
print("{} generated".format(csv_file))

import pandas as pd
df1 = pd.read_csv(os.path.join(rootpath, csv_file))
filtered_df1 = df1[(df1['layer type'] == 'DML_OPERATOR_CONVOLUTION') & (df1['execute type'] == 'ExecuteMetaCommand')]
filtered_df1.to_csv(os.path.join(rootpath, 'temp.csv'), index=False)

In [None]:
# compare different pix log
import pandas as pd
rootpath = r"C:\Users\GAME\Documents\Project\helpWindow\for_prithv"
log1 = "v0_pixlog_1_test.csv"
log2 = "v1_Emptyreorder_pixlog_test.csv"
log3 = "origin_pixlog_test.csv"
df1 = pd.read_csv(os.path.join(rootpath, log1))
df2 = pd.read_csv(os.path.join(rootpath, log2))
df3 = pd.read_csv(os.path.join(rootpath, log3))

filtered_df1 = df1[(df1['layer type'] == 'DML_OPERATOR_GEMM') & (df1['execute type'] == 'ExecuteMetaCommand')]
filtered_df2 = df2[(df2['layer type'] == 'DML_OPERATOR_GEMM') & (df2['execute type'] == 'ExecuteMetaCommand')]
filtered_df3 = df3[(df3['layer type'] == 'DML_OPERATOR_GEMM') & (df3['execute type'] == 'ExecuteMetaCommand')]

merged_df = filtered_df1.merge(filtered_df2, on='layer name').merge(filtered_df3, on='layer name')  #df1  x, df2 y
merged_df.to_csv(os.path.join(rootpath, 'compare_gemm.csv'), index=False)

## <a id='section4'>Extract onnxruntime profiling data </a>
* take the unet model as an example, `onnxruntime_perf_test.exe -m times -r 1 -f unet_sample_batch:2 -f unet_sample_channels:4 -f unet_sample_width:64 -f unet_sample_height:64 -f unet_hidden_batch:2 -f unet_hidden_sequence:77 -I -u temp.onnx -p ort_profiling.json -e dml model_path`
  - `-u` is used to avoid only one DML fused node is dummped
  - `-p` is used to generate information like layer name, input and output shape.
* Convert ort profling json to csv

In [None]:

# index value in profile is same with allocator_planner.cc
# index value in tensor allocation is different, where is it?
import csv
import json
time_file = r'C:\Users\GAME\Documents\Project\INT8\mobilenetv2-12-int8\mobilenet_dml.json_2023-12-12_17-04-08.json'
csv_file = time_file.replace("json", 'csv')
csvf = open(csv_file,"w",newline='')
writer = csv.writer(csvf)
 # only has three inputs and  one output as defualt, may need to change as needed
line = ["name","op_type", "input1_type", "input1_shape", "input2_type", "input2_shape","input3_type", "input3_shape","output_type","output_shape","duration"]
writer.writerow(line)
count = 0
node_count =0
total_duration = 0
count_time={}
count_name = {}
other_info={}
with open(time_file, 'r') as f:
    # load the contents of the file into a dictionary
    data = json.load(f)
    for i in range(len(data)):
        # if "/conv_in/Conv_fence_before" in data[i]["name"]:
        #     count += 1
        # if count == 2:
        #     break
        if "kernel" in data[i]["name"]: 
            output_name = data[i]["name"].split("_kernel")[0]
            if output_name not in count_time:
                count_time[output_name] = int(data[i]["dur"])
                count_name[output_name] = 1

                
                op_type = data[i]['args']['op_name']
                
                output_size = data[i]['args']['output_size']
                input_type = ["none"]*3
                input_shape = [[]] * 3
                output_type = None
                output_shape = None
                for j, p in enumerate(data[i]['args']['input_type_shape']):
                    if j == 3:
                        break
                    for key, value in p.items():
                        input_type[j]= key
                        input_shape[j]= value
                for j, p in enumerate(data[i]['args']['output_type_shape']):
                    for key, value in p.items():
                        output_type = key
                        output_shape = value
                        break
                    break

                other_info[output_name] = [output_name, op_type, input_type[0],input_shape[0],\
                                            input_type[1],input_shape[1],
                                                input_type[2], input_shape[2],
                                                output_type,output_shape]
            else:
                count_time[output_name] += int(data[i]["dur"])
                count_name[output_name] += 1


for key, value in count_time.items():
    duration = round(value / count_name[key],2)
    total_duration+=duration
    kernel_info = other_info[key]
    
    kernel_info.append(duration)
    writer.writerow(kernel_info)
    node_count+=1
csvf.close()
print("total duration:",round(total_duration/1000,2))
print("Done to generate file {}". format(csv_file))
          

#  <a id='section5'> DML&OV Layer By Layer profiling </a> 
* Step1: collect CSV1:  DML info from [Extract PIX log](#section1) which has <font color='orange'>the layer name</font> and DML performance data for each layer
* Step2: collect CSV2:  DML DDI info from [Extract DDI log](#section2) which has the <font color='red'>input/output shape/datatype/layout</font> for each layer
* Step3: collect CSV3:  OV info from [Extract OV performance data](#section3) which has <font color='orange'>the layer name</font>  and OV performance data for each layer
* Step4: collect CSV4:  model info from [onnxruntime profiling data](#section4) which has <font color='orange'>the layer name</font>  and the <font color='red'>input/output shape/datatype</font> for each layer  
* Step5: Once these four CSVs are collected, we can connect them based on the unique info. For example,  <font color='orange'>layer name</font>  in yellow;  <font color='red'>input/output shape/datatype</font> in red. Some info may not be exactly same, need to manually check
> Here is a reference for automatically combining ORT info with DML and OV performance data

In [None]:
import os
# Take layer name as a unique key
rootpath = r"C:\Users\GAME\Documents\Project\AIGC\2023Q4_Profiling_sd1.5"
perf_file = "new_unet_onnx_info.csv" # this is a file has [Layer name, DML perf, OV perf]
node_csv = r"new_issue\profiling.csv_2023-10-12_11-33-55.csv"
new_file = open(os.path.join(rootpath,perf_file+".csv"),"w")
node_file =  open(os.path.join(rootpath, node_csv))
used_line_index = []
new_file.write("DML, OV, input1_layout,Layer name, op_type,  input1_type, input1_shape, input2_type, input2_shape,input3_type, input3_shape,output_type,output_shape\n")
for i, nline in enumerate(node_file.readlines()):
    if i == 0:
        continue
    node_info = nline.split(",")
    layer_name = node_info[0]
    ddi_file = open(os.path.join(rootpath, perf_file))
    for j, dline in enumerate(ddi_file.readlines()):
        if j == 0:
            continue
        info_list = dline.rstrip().split(",")      
        perf_layer_name = info_list[0]
        input_layout = "NONE"
        if "Conv" in node_info[1]:
            # check layout
            # this layout is not aligned with DDI LOG
            # Because DML may have optimization like NCHW <=> HLSL Layout conversion + NHWC
            pattern = re.compile(r'\[([^\]]+)\]')
            matches = pattern.findall(nline)
            input_shape = matches[0].split(",")
            if input_shape[2] == input_shape[3]:
                input_layout = "NCHW"
            else:
                input_layout = "NHWC"
        if layer_name == perf_layer_name \
            and j not in used_line_index:
            used_line_index.append(j)       
            newline = ",".join(info_list[1:3])+","+input_layout+","+nline
            new_file.write(newline)
            #break
    ddi_file.close()
node_file.close()
new_file.close()

In [None]:
#Convolution: link to DDI log
# Take "input_shape, filter_shape, output_shaper" as a unique key
rootpath = r"C:\Users\GAME\Documents\Project\AIGC\2023Q4_Profiling_sd1.5"
ddi_csv = r"DDI_conv.csv"
ort_perf_csv = "unet_onnx_info.csv.csv"
ort_perf_file =  open(os.path.join(rootpath, ort_perf_csv))
new_file = open(os.path.join(rootpath,ort_perf_csv+"_ddi.csv"),"w")

new_file.write("Status, DML,OV, input1_layout,Layer name,model_name,kernel_name,input_shape_n,input_shape_c,input_shape_h,input_shape_w,input_layout,input_datatype,input_flag,input_padding,filter_shape_n,filter_shape_c,filter_shape_h,filter_shape_w,filter_layout,filter_datatype,filter_flag,filter_stride_h,filter_stride_w,filter_stride_c,filter_dilation_h,filter_dilation_w,filter_dilation_c,filter_groupcount,output_shape_n,output_shape_c,output_shape_h,output_shape_w,output_layout,output_datatype,output_flag,output_padding,bias,direction,activation,exec_flag \n")
used_line_index = []
matched_count = 0
rest_count = 0
for i, nline in enumerate(ort_perf_file.readlines()):
    if i == 0:
        continue
    pattern = re.compile(r'\[([^\]]+)\]')
    matches = pattern.findall(nline)
    ort_input_shape = matches[0]
    ort_filter_shape = matches[1]
    ort_output_shape = matches[-1]
    op_type = nline.split(",")[4]

    nline_new = ",".join(nline.split(",")[:4])
    matched_list = []
    temp_used_line_index= []
    if  op_type == "Conv" :
        shape = node_info[-1].rstrip()
        #print(shape)
        ddi_file = open(os.path.join(rootpath, ddi_csv))
        for j, dline in enumerate(ddi_file.readlines()):
            if j == 0:
                continue           
            info_list = dline.rstrip().split(",")
            input_shape  = ", ".join(info_list[2:6])
            filter_shape = ", ".join(info_list[15:19])
            output_shape = ", ".join(info_list[29:33])
            #print(input_shape, ort_input_shape)
            #print(filter_shape, ort_filter_shape)
            #print(output_shape, ort_output_shape)
            if ort_input_shape == input_shape  \
                    and ort_filter_shape == filter_shape \
                    and ort_output_shape == output_shape \
                     and j not in used_line_index:
                temp_used_line_index.append(j)             
                matched_list.append(dline)
        no_repeat_match = list(set(matched_list))
        if len(no_repeat_match) == 1: # if ddi log is same, then assign one directly
            matched_count += 1
            used_line_index.append(temp_used_line_index[0])
            newline = "Valid,"+nline_new + ","+no_repeat_match[0]
            new_file.write(newline)
        else:
            #print("{} cannot find".format(nline_new))
            rest_count +=1
            for item in no_repeat_match:
                newline = "Uncertian,"+nline_new + ","+ item
                new_file.write(newline)

new_file.close()
print("Summary:\n \
      {}/{} find a unique record of DDI LOG \n \
      rest {} Convs find multiple records of DDI LOGS ". format(matched_count, matched_count+rest_count, rest_count))

### Option2(Depreciate): 
* hack into netron to dump model information: layer name, weight shape etc.
* However, information about input shape cannot be captured because it is calculated in runtime

In [None]:
import pandas as pd
import os
import re
rootpath = r"C:\Users\GAME\Documents\Project\AIGC\2023Q4_Profiling_sd1.5"
ddi_csv = r"new_issue\new_DDI_conv.csv"
node_csv = "new_unet_onnx_info.csv"
new_file = open(os.path.join(rootpath,node_csv+".csv"),"w")
node_file =  open(os.path.join(rootpath, node_csv))

used_line_index = []
new_file.write("Layer name,DML,OV, model_name,kernel_name,input_shape_n,input_shape_c,input_shape_h,input_shape_w,input_layout,input_datatype,input_flag,input_padding,filter_shape_n,filter_shape_c,filter_shape_h,filter_shape_w,filter_layout,filter_datatype,filter_flag,filter_stride_h,filter_stride_w,filter_stride_c,filter_dilation_h,filter_dilation_w,filter_dilation_c,filter_groupcount,output_shape_n,output_shape_c,output_shape_h,output_shape_w,output_layout,output_datatype,output_flag,output_padding,bias,direction,activation,exec_flag\n")
for i, nline in enumerate(node_file.readlines()):
    if i == 0:
        continue
    node_info = nline.split(";")
    layer_name = node_info[0]

    shape = node_info[-1].rstrip().split(",")
    if len(shape) == 4 :
        shape = node_info[-1].rstrip()
        #print(shape)
        ddi_file = open(os.path.join(rootpath, ddi_csv))
        for j, dline in enumerate(ddi_file.readlines()):
            if j == 0:
                continue
            
            info_list = dline.rstrip().split(",")      
            filter_shape = "["+",".join(info_list[15:19])+"]"
            if shape == filter_shape \
                  and j not in used_line_index:
                # if ("proj_in" in layer_name and info_list[6] == "NHWC") \
                #     or ("proj_out" in layer_name and info_list[6] == "NCHW"):            
                #     continue
                used_line_index.append(j)
                
                newline = ",".join(node_info[0:3]) + ","+dline
                new_file.write(newline)
                break

new_file.close()


   

# Create Pivot table

In [None]:
import pandas as pd
rootpath = r"C:\Users\GAME\Documents\Project\AIGC\2023Q4_Profiling_sd1.5"
log_file = "unet_PIX.txt"
file = os.path.join(rootpath, log_file)
file = file.strip(".txt")

csv_file = f"{file}_test.csv"
data = pd.read_csv(csv_file)
df = pd.DataFrame(data)
pivot_table = df.pivot_table(values='time', index=['layer type','execute type'], aggfunc='sum')
print(pivot_table)

# Extract info from benchmark.bat log

## DML backend

In [None]:
log_file = "dml_log_release.txt"
csv_file = "dml_perf_release.csv"
csvf = open(csv_file,"w",newline='')
writer = csv.writer(csvf)
line = ["model name","not save model","save model"]
writer.writerow(line)
with open(log_file,"r") as f:
    all_lines = f.readlines()
    idx = 0
    while idx < len(all_lines):
        line = all_lines[idx]
        if "model_rename" in line:
            model_name = line.rstrip().split("model_rename\\")[-1]
            if "Average inference" not in all_lines[idx+6]:
                perf_save = all_lines[idx+10].rstrip().split("time cost:")[-1]  
                writer.writerow([model_name, "erro", perf_save])
                idx = idx + 25
                print(idx)
            else:
                perf_nosave = all_lines[idx+6].rstrip().split("time cost:")[-1]   
                perf_save = all_lines[idx+26].rstrip().split("time cost:")[-1]
                writer.writerow( [model_name , perf_nosave, perf_save])
                idx = idx+41
csvf.close()



## OpenVINO backend

In [None]:
log_file = "ov_log.txt"
csv_file = "ov_perf.csv"
csvf = open(csv_file,"w",newline='')
writer = csv.writer(csvf)
line = ["model name","FP16","FP32"]
writer.writerow(line)
with open(log_file,"r") as f:
    all_lines = f.readlines()
    idx = 0
    while idx < len(all_lines):
        line = all_lines[idx]
        if "model_rename" in line:
            model_name = line.rstrip().split("model_rename\\")[-1]
            print(model_name)
            if "time cost" not in all_lines[idx+6]:
                perf_save = all_lines[idx+7].rstrip().split("time cost:")[-1]  
                writer.writerow([model_name, "erro", perf_save])
                idx = idx + 22
            else:
                perf_nosave = all_lines[idx+6].rstrip().split("time cost:")[-1]   
                perf_save = all_lines[idx+26].rstrip().split("time cost:")[-1]
                writer.writerow( [model_name , perf_nosave, perf_save])
                idx = idx+41
csvf.close()


# <a id='section3'> Extract OpenVINO performance data</a>
## Option 1 if you have ort_perf_test.exe debug version:
1. set `ORT_OPENVINO_ENABLE_DEBUG=1`, and run `ort_perf_test.exe  -m times -r 1 -I -e openvino -i "device_type|GPU_FP32 cache_dir|ov_cache" onnx_model_path`
2. some performance logs will be appeared in the command window, here is an example below. You need to manually save it and process into csv file [The script below is some reference but it is not very good]
```
convolution10/WithoutBiases   EXECUTED       layerType: Convolution        realTime: 374       cpu: 0               execType: jit:ir__f16
convolution10                 OPTIMIZED_OUT  layerType: Add                realTime: 0         cpu: 0               execType: undef
activation10                  OPTIMIZED_OUT  layerType: Relu               realTime: 0         cpu: 0               execType: undef
convolution11/WithoutBiases   EXECUTED       layerType: Convolution        realTime: 330       cpu: 0               execType: jit:ir__f16
convolution11                 OPTIMIZED_OUT  layerType: Add                realTime: 0         cpu: 0               execType: undef
activation11                  OPTIMIZED_OUT  layerType: Relu               realTime: 0         cpu: 0               execType: undef
convolution12/WithoutBiases   EXECUTED       layerType: Convolution        realTime: 294       cpu: 0               execType: jit:ir__f16
convolution12                 OPTIMIZED_OUT  layerType: Add                realTime: 0         cpu: 0               execType: undef
activation12                  OPTIMIZED_OUT  layerType: Relu               realTime: 0         cpu: 0               execType: undef
```
3. The parsed onnx model will also be saved as `OpenVINOExecutionProvider_OpenVINO-EP-subgraph_1_0.onnx`, you can use this model to run `openvino benchmark_app` if the original model cannot be parsed by openvino

## Option 2 (Recommend)
1. set python environment: pip install openvino_dev
2. this package contains a tool called benchmark_app
3. run benchmark_app with the command line
`benchmark_app -m your_onnx_model_path -d GPU -nireq 1 -niter 10 --report_type detailed_counters --report_folder perf\`
4. after running this, a performance csv file will be generated under `perf\` folder


> I think the performance result in Option2 is validated for profiling ORT_OV performance because  I have checked the pipeline in onnxruntime ov. From my understanding, ort only did some work about parsing onnx model and rest of the works are all handled by OV

In [None]:
perf_file = r"C:\Users\GAME\Documents\Project\Adobe\analyze\Adobe-Ps_SuperZoom_V316.ov.txt"
csv_file = perf_file.replace("txt",'csv')
csvf = open(csv_file,"w",newline='')
writer = csv.writer(csvf)
line = ["layer_type","layer name","gpu(ms)","cpu(ms)"]
writer.writerow(line)
total_time = 0
with open(perf_file, 'r') as f:
    for line in f.readlines():
        line_items = line.rstrip().split(" ")
        info = [i for i in line_items if i != '']
        #print(info)
        layer_type = info[3]
        layer_name = info[0]
        gpu_time = int(info[5])/1000
        cpu_time = int(info[7])/1000
        total_time+=gpu_time
        if gpu_time == 0.0:
            continue
        writer.writerow([layer_type,layer_name, gpu_time,cpu_time])
print("total time: {:2f}".format(total_time)) 
csvf.close()     

# Extract info from ORT profiling 

## ORT time

In [None]:
time_file = r'C:\Users\GAME\Documents\Project\AIGC\perf\unet_time_all.json'
csv_file = time_file.replace("json", 'csv')
csvf = open(csv_file,"w",newline='')
writer = csv.writer(csvf)
line = ["start","duration","name","op_name", "provider", "role", "data_type"]
writer.writerow(line)
with open(time_file, 'r') as f:
    # load the contents of the file into a dictionary
    data = json.load(f)
    for i in range(len(data)):
        temp = ""
        if "op_name" not in data[i]["args"]:
            continue
        if "provider" not in data[i]["args"]:
            temp = "none"
            data_type = "none"
            line = [data[i]["ts"],data[i]["dur"],data[i]["name"],data[i]["args"]["op_name"], temp, "none", data_type]
            writer.writerow(line)
        else:
            temp = data[i]["args"]["provider"]
            inputs = data[i]["args"]["input_type_shape"]
            outputs = data[i]["args"]["output_type_shape"]
            for input in inputs:
                for key in input.keys():
                    line = [data[i]["ts"],data[i]["dur"],data[i]["name"],data[i]["args"]["op_name"], temp, "input", key]
                    writer.writerow(line)
            for output in outputs:
                for key in output.keys():
                    line = [data[i]["ts"],data[i]["dur"],data[i]["name"],data[i]["args"]["op_name"], temp, "output", key]
                    writer.writerow(line)
        #print(line)
        
        
csvf.close()

## ORT memroy

In [None]:
mem_file = r"C:\Users\GAME\Documents\Project\AIGC\perf\unet_mem_all.txt"
csv_file = mem_file.replace("txt", 'csv')

csvf = open(csv_file,"w",newline='')
writer = csv.writer(csvf)
line = ["Tensor name","op_type","Index","Reuse inplace","Reused Node index","Alloc type","Device type","Memory type","Device id", "lifetime start","lifetime end", "planned block start","planned block end","planned size", "allocated block start","allocated block end", "allocated size"]
writer.writerow(line)
init_count = 0
with open(mem_file,'r') as f:
    for line in f.readlines():
        if "Initializer in Device" in line:
            init_count += 1        
        if init_count == 3:
            break
        if "Tensor name" not in line:
            continue
        tensor_name = line.split("Tensor name: ")[1].split(", Index")[0].strip()
      
        index = line.split("Index: ")[1].split(", Reuse inplace")[0].strip()
        reuse = line.split("Reuse inplace: ")[1].split(", Reused Node index")[0].strip()
        reuse_index = line.split("Reused Node index: ")[1].split(", Alloc type")[0].strip()
        alloc_type= line.split("Alloc type: ")[1].split(", Location")[0].strip()
        if alloc_type =="AllocateStatically":
            op_type = ""
        else:
            op_type = tensor_name.split("/")[-1].split("_")[0] # this op is still incorrect
        location = line.split("Location: ")[1].split(", lifetime")[0].strip()
        device_type = location.split("DeviceType:")[1].split("MemoryType")[0].strip()
        memory_type = location.split("MemoryType:")[1].split("DeviceId")[0].strip()
        device_id = location.split("DeviceId:")[1].split("]")[0].strip()
        
        lt_start = line.split("lifetime: (")[1].split(",")[0].strip()
        lt_end = line.split("lifetime: (")[1].split(",")[1].split(")")[0].strip()
        pb_start = line.split("planned block: (")[1].split(",")[0].strip()
        pb_end = line.split("planned block: (")[1].split(",")[1].split(")")[0].strip()
        pb_size = line.split("planned size: ")[1].split(", allocated block")[0].strip()
        ab_start = line.split("allocated block: (")[1].split(",")[0].strip()
        ab_end = line.split("allocated block: (")[1].split(",")[1].split(")")[0].strip()
        ab_size = line.split("allocated size: ")[1].strip()
        newline = [tensor_name,op_type,index,reuse,reuse_index,alloc_type,device_type,memory_type,device_id,lt_start,lt_end,pb_start,pb_end,pb_size, ab_start,ab_end,ab_size]
       
        # strings =[tensor_name,index,reuse,alloc_type,location,lt_start,lt_end,pb_start,pb_end,pb_size, ab_start,ab_end,ab_size]
        # newline = ",".join(strings)
        writer.writerow(newline)
csvf.close()


In [None]:
# time file is for node, cannot mapping with memory info(output tensor)
# combine optimized_onnx_node with memory info
onnx_csv = r'C:\Users\GAME\Documents\Project\AIGC\perf\unet_optimized_onnx_node.csv'
memory_csv = r'C:\Users\GAME\Documents\Project\AIGC\perf\unet_mem_all.csv'
new_csv = memory_csv.replace(".csv","_new.csv")
csvf = open(new_csv,"w",newline='')
writer = csv.writer(csvf)
count = 0
with open(memory_csv, 'r') as mem_file:
    reader = csv.reader(mem_file)
    for i, mem_row in enumerate(reader):
        if i == 0:
            mem_row.append("op_type")
            #mem_row.append("shape")
            writer.writerow(mem_row)
            continue

        with open(onnx_csv, 'r') as onnx_file:
            reader2 = csv.reader(onnx_file)
            for j, row in enumerate(reader2):
                if j ==0:
                    continue
                if row[2]=="output" and row[3] == mem_row[0]:
                    mem_row.append(row[1])
                    #mem_row.append(row[2])
                    count+=1
                    break
                # if j == 921 and mem_row[4] != "AllocateStatically":
                #     pass
                    #print(mem_row[0],mem_row[-1], row[0],row[2])
        writer.writerow(mem_row)
csvf.close()
print(count)


In [None]:
#compare uent_time_all.csv with dml_node.csv
time_csv = r'C:\Users\GAME\Documents\Project\AIGC\perf\dynamic_dim\unet_time_all.csv'
dml_csv = r'C:\Users\GAME\Documents\Project\AIGC\perf\unet_dml_node.csv'

node_set =set()

# row_count = sum(1 for row in dmlreader)
count = 0
total_memory_size = 0
max_memory_size = 0
with open(time_csv, 'r') as timef:
    reader = csv.reader(timef)
    for i, row in enumerate(reader):
        if i==0 or len(row) == 0:
            continue
        node_set.add(row[0])
        with open(dml_csv, 'r') as dmlf:
            dmlreader = csv.reader(dmlf)
            for j, dmlrow in enumerate(dmlreader):
                if j==0 or len(dmlrow) ==0:
                    continue
                if row[0] == dmlrow[0]:
                    count+=1
                    shape = row[3].split(",")
                    if len(shape) == 1:
                        continue
                    memory_size = 1
                    for v in shape:
                        if "[" in v:
                            v_int = int(v.split("[")[-1].strip())
                        else:
                            if "]" in v:
                                v_int = int(v.split("]")[0].strip())
                            else:
                                v_int = int(v.strip())
                        memory_size *=v_int
                    #print(memory_size)
                    total_memory_size+=memory_size
                    max_memory_size = max_memory_size if max_memory_size > memory_size else memory_size
                    break
                if j == 920:                   
                    print(row[0], dmlrow[0])
print(len(node_set)-count)
print(total_memory_size*2/1000000000)
print(max_memory_size*2/1000000000)



# Miscellaneous

In [None]:
def get_max_size(node_name, filename):
    with open(filename, 'r') as f:
        # load the contents of the file into a dictionary
        data = json.load(f)
        max_size = 0
        for i in range(len(data)):
            if "provider" not in data[i]["args"]:
                continue
            provider = data[i]["args"]["provider"]
            op_name = data[i]["args"]["op_name"]
            if provider !="DmlExecutionProvider":
                continue
                # for value in data[i]["args"]["input_type_shape"]:
                #     if "float" in value:
                #         print(data[i]["name"], value)
            if node_name in op_name:
                for value in data[i]["args"]["input_type_shape"]:
                    if "float16" in value: # float16 = 2B

                        x_shape = np.array(value["float16"])
                        temp_size = np.prod(x_shape)#[0]*x_shape[1]*x_shape[2]        
                        if max_size < temp_size:
                            max_size =  temp_size
                            kernel_name = data[i]["name"]
    print(max_size)
    print(kernel_name)

In [None]:
filename = r'C:\Users\GAME\Documents\Project\AIGC\perf\onnxruntime_profile__unet_1.5_olive.json'
#get_max_size("LayerNormalization")
#get_max_size("InstanceNormalization")
get_max_size("MatMul",filename)
#get_max_size("Mul")


In [None]:
def get_time(provider,op, path):
    total_time = 0
    provider_time = 0
    ops_time = {}
    with open(path, 'r') as f:
        # load the contents of the file into a dictionary
        data = json.load(f)
        for i in range(len(data)):
            if "op_name" not in data[i]["args"]:
                continue
            op_name = data[i]["args"]["op_name"]               
            duration = int(data[i]["dur"])
            total_time +=duration
            if "provider" in data[i]["args"]:
                provider_name = data[i]["args"]["provider"]
                if provider == provider_name:
                    if op == op_name or op == None:
                        provider_time +=duration
                        if "DmlFusedNode" in op_name:
                            continue
                        if op_name in ops_time:
                            ops_time[op_name] += duration
                        else:
                            ops_time[op_name] = 0

                        
            
    print(total_time)
    print(ops_time)

In [None]:
path = r'C:\Users\GAME\Documents\Project\AIGC\perf\onnxruntime_profile__DG2_unet_1.5_olive.json'
get_time("DmlExecutionProvider","MemcpyFromHost", path)
get_time("DmlExecutionProvider","MemcpyToHost", path)
get_time("DmlExecutionProvider",None, path)
get_time("CPUExecutionProvider",None, path)


In [None]:
def get_kernelname(provider, op,filename):
    ops = set()
    with open(path, 'r') as f:
        # load the contents of the file into a dictionary
        data = json.load(f)
        for i in range(len(data)):
            if "op_name" not in data[i]["args"]:
                continue
            op_name = data[i]["args"]["op_name"]               
            if "provider" in data[i]["args"]:
                provider_name = data[i]["args"]["provider"]
                if provider == provider_name:
                    if op == op_name or op == None:
                        print(data[i]['name'])
                        

In [None]:
path = r'C:\Users\GAME\Documents\Project\AIGC\perf\onnxruntime_profile__DG2_unet_1.5_olive.json'
get_kernelname("CPUExecutionProvider","Mul", path)

In [None]:
def get_type(filename):
    input_data_type = set()
    output_data_type = set()
    with open(filename, 'r') as f:
        # load the contents of the file into a dictionary
        data = json.load(f)
        max_size = 0
        for i in range(len(data)):
            if "provider" not in data[i]["args"]:
                continue
            
            inputs = data[i]["args"]["input_type_shape"]
            outputs = data[i]["args"]["output_type_shape"]
            for input in inputs:
                for key in input.keys():
                    input_data_type.add(key)
            for output in outputs:
                for key in output.keys():
                    output_data_type.add(key)
    print(input_data_type)
    print(output_data_type)
 

In [None]:
path = r'C:\Users\GAME\Documents\Project\AIGC\perf\onnxruntime_profile__DG2_unet_1.5_olive.json'
get_type(path)

analyze openvino model

In [None]:
import xml.etree.ElementTree as ET
tree = ET.parse(r'..\AIGC\optimize\unet_ov.xml')
root = tree.getroot()
for child in root:
    if child.tag !="layers":
        continue
    for subchild in child:
        if "FullyConnected" == subchild.attrib["type"]:
            ports = subchild.find("input/port")
            if len(ports) == 2:
                print(subchild.attrib["name"])
                break
            # data_size = 1
            # for value in ports.iter("dim"):
            #     data_size *= int(value.text)
            # print(data_size)

      
        

Convert onnx memory file into csv

In [None]:
mem_file = r"C:\Users\GAME\Documents\Project\AIGC\perf\unet_mem_all.txt"
csv_file = mem_file.replace("txt", 'csv')


with open(mem_file,'r') as f:
    for line in f.readlines():
        if "peak_rss" not in line:
            continue
        print(line.rstrip())

## Analyze csv file



In [None]:
import pandas as pandasForSortingCSV
def get_graph_info(file_csv,idx):
    # assign dataset
    csvData = pandasForSortingCSV.read_csv(file_csv)
    print(csvData.groupby(csvData.columns[idx]).sum() )
                                         
    # sort data frame
    # csvData.sort_values(csvData.columns[1], 
    #                     axis=0,
    #                     inplace=True)                    
    
    # # displaying sorted data frame
    # print("\nAfter sorting:")
    # print(csvData)