## Add more statistics to analyze the an ONNX model

### 2-2-1. model characteristics
- 檢查一下 width, height, channel 是不是 onnx 的 input??? 果然不是，笑死那是 input 的名字，還好有訂正好了（應該？）

In [1]:
import onnx 
import json
onnx_model = onnx.load('./mobilenetv2-10.onnx')
onnx_model = onnx.shape_inference.infer_shapes(onnx_model)

## List all tensor names in the graph
input_nlist = [k.name for k in onnx_model.graph.input]
initializer_nlist = [k.name for k in onnx_model.graph.initializer]
value_info_nlist = [k.name for k in onnx_model.graph.value_info]

def get_size(shape):
    dims = []
    ndim = len(shape.dim)
    size = 1;
    for i in range(ndim):
        size = size * shape.dim[i].dim_value
        dims.append(shape.dim[i].dim_value)
    return dims, size

##### Collect Attribute ####
def OperatorAttr(op_type):
    JSON_list = []
    for i in onnx_model.graph.node:
        if i.op_type == op_type:
            JSON = {}
            JSON[i.name] = {}
            if i.op_type == 'Conv':
                for j in i.input:
                    if j in input_nlist:
                        idx = input_nlist.index(j)
                        (dims, size) = get_size(onnx_model.graph.input[idx].type.tensor_type.shape)
                        c = dims[1]
                        h = dims[2]
                        w = dims[3]
                        
                    elif j in initializer_nlist:
                        idx = initializer_nlist.index(j)
                        dims = onnx_model.graph.initializer[idx].dims
                    elif j in value_info_nlist:
                        idx = value_info_nlist.index(j)
                        (dims, size) = get_size(onnx_model.graph.value_info[idx].type.tensor_type.shape)
                        c = dims[1]
                        h = dims[2]
                        w = dims[3]
                    JSON[i.name]['channel'] = c
                    JSON[i.name]['height'] = h
                    JSON[i.name]['width'] = w
            for attr in i.attribute:
                JSON[i.name][attr.name] = attr.i if len(attr.ints) == 0 else attr.ints
                # str.replace(old, new[, max])
            JSON_list.append(JSON)
    return JSON_list

def Print_JSON(JSON):
    key = list(JSON.keys())[0]
    print(key)
    for ckey in JSON[key]:
        print(f'|- {ckey} : {JSON[key][ckey]}')
    

op_dict = {}
op_operator = set()

# Compute the each op_type
for i in onnx_model.graph.node:
    if i.op_type in op_dict:
        op_dict[i.op_type] += 1
    else:
        op_dict[i.op_type] = 1
    op_operator.add(i.op_type)

print(f'Total operators: {sum(op_dict.values())}')
print(f'Unique Operator: {op_operator}')
print('=====')
for op in op_dict:
    print(f'Operator[{op}]: {op_dict[op]}')
    JSON_list = OperatorAttr(op)
    if len(JSON_list) == 0:
        print('No attribute')
    for item in JSON_list:
        Print_JSON(item)
    print('====')

Total operators: 105
Unique Operator: {'Unsqueeze', 'Gather', 'Reshape', 'Constant', 'Add', 'GlobalAveragePool', 'Clip', 'Conv', 'Gemm', 'Concat', 'Shape'}
=====
Operator[Conv]: 52
Conv_0
|- channel : 3
|- height : 224
|- width : 224
|- dilations : [1, 1]
|- group : 1
|- kernel_shape : [3, 3]
|- pads : [1, 1, 1, 1]
|- strides : [2, 2]
Conv_2
|- channel : 32
|- height : 112
|- width : 112
|- dilations : [1, 1]
|- group : 32
|- kernel_shape : [3, 3]
|- pads : [1, 1, 1, 1]
|- strides : [1, 1]
Conv_4
|- channel : 32
|- height : 112
|- width : 112
|- dilations : [1, 1]
|- group : 1
|- kernel_shape : [1, 1]
|- pads : [0, 0, 0, 0]
|- strides : [1, 1]
Conv_5
|- channel : 16
|- height : 112
|- width : 112
|- dilations : [1, 1]
|- group : 1
|- kernel_shape : [1, 1]
|- pads : [0, 0, 0, 0]
|- strides : [1, 1]
Conv_7
|- channel : 96
|- height : 112
|- width : 112
|- dilations : [1, 1]
|- group : 96
|- kernel_shape : [3, 3]
|- pads : [1, 1, 1, 1]
|- strides : [2, 2]
Conv_9
|- channel : 96
|- height 

### 2-2-2. Data bandwidth requirement
- 為什麼加上 batch = 1 答案就會正常呢？

In [1]:
from torchvision import models, datasets, transforms as T
mobilenet_v2 = models.mobilenet_v2(pretrained=True)

import torch
image_height = 224
image_width = 224
x = torch.randn(1, 3, image_height, image_width, requires_grad=True)
torch_out = mobilenet_v2(x)

# Export the model
torch.onnx.export(mobilenet_v2,              # model being run
                  x,                         # model input (or a tuple for multiple inputs)
                  "mobilenet_v2_test.onnx", # where to save the model (can be a file or file-like object)
                  export_params=True,        # store the trained parameter weights inside the model file
                  opset_version=12,          # the ONNX version to export the model to
                  do_constant_folding=True,  # whether to execute constant folding for optimization
                  input_names = ['input'],   # the model's input names
                  output_names = ['output']) # the model's output names

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /home/guofangyu/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
100%|██████████| 13.6M/13.6M [00:00<00:00, 56.2MB/s]


In [4]:
import onnx
from onnx import shape_inference
from os import path
import sys
from tabulate import tabulate
from onnx import onnx_ml_pb2 as xpb2


onnx_model = onnx.load("./mobilenet_v2_test.onnx", load_external_data=False)
onnx.checker.check_model(onnx_model)

inferred_model = shape_inference.infer_shapes(onnx_model)
print('shape inference complete ...')

def _parse_element(elem: xpb2.ValueInfoProto):
    name = getattr(elem, 'name', "None")
    data_type = "NA"
    shape_str = "NA"
    etype = getattr(elem, 'type', False)
    if etype:
        ttype = getattr(etype, 'tensor_type', False)
        if ttype:
            data_type = getattr(ttype, 'elem_type', 0)
            shape = getattr(elem.type.tensor_type, "shape", False)
            if shape:
                shape_str = "["
                dims = getattr(shape, 'dim', [])
                for dim in dims:
                    vals = getattr(dim, 'dim_value', "?")
                    shape_str += (str(vals) + ",")
                shape_str = shape_str.rstrip(",")
                shape_str += "]"
    return name, data_type, shape_str

def get_valueproto_or_tensorproto_by_name(name: str, graph: xpb2.GraphProto):
    for i, node in enumerate(inferred_model.graph.node):
            if node.name == "":
                inferred_model.graph.node[i].name = str(i)
    input_nlist = [k.name for k in graph.input]
    initializer_nlist = [k.name for k in graph.initializer]
    value_info_nlist = [k.name for k in graph.value_info]
    output_nlist = [k.name for k in graph.output]

    # get tensor data
    if name in input_nlist:
        idx = input_nlist.index(name)
        return graph.input[idx], int(1)
    elif name in value_info_nlist:
        idx = value_info_nlist.index(name)
        return graph.value_info[idx], int(2)
    elif name in initializer_nlist:
        idx = initializer_nlist.index(name)
        return graph.initializer[idx], int(3)
    elif name in output_nlist:
        idx = output_nlist.index(name)
        return graph.output[idx], int(4)
    else:
        print("[ERROR MASSAGE] Can't find the tensor: ", name)
        print('input_nlist:\n', input_nlist)
        print('===================')
        print('value_info_nlist:\n', value_info_nlist)
        print('===================')
        print('initializer_nlist:\n', initializer_nlist)
        print('===================')
        print('output_nlist:\n', output_nlist)
        print('===================')
        return False, 0

def cal_tensor_mem_size(elem_type: str, shape: [int]):
    """ given the element type of the tensor and its shape, and return its memory size.

    Utility.

    Args:
        ttype: the type of the element of the given tensor. format: 'int', ...
        shape: the shape of the given tensor. format: [] of int

    Returns:
        mem_size: int
    """
    # init
    mem_size = int(1)
    # traverse the list to get the number of the elements
    # print(shape)
    for num in shape:
        mem_size *= num
    # multiple the size of variable with the number of the elements
    # "FLOAT": 1,
    # "UINT8": 2,
    # "INT8": 3,
    # "UINT16": 4,
    # "INT16": 5,
    # "INT32": 6,
    # "INT64": 7,
    # # "STRING" : 8,
    # "BOOL": 9,
    # "FLOAT16": 10,
    # "DOUBLE": 11,
    # "UINT32": 12,
    # "UINT64": 13,
    # "COMPLEX64": 14,
    # "COMPLEX128": 15
    if elem_type == 1:
        mem_size *= 4
    elif elem_type == 2:
        mem_size *= 1
    elif elem_type == 3:
        mem_size *= 1
    elif elem_type == 4:
        mem_size *= 2
    elif elem_type == 5:
        mem_size *= 2
    elif elem_type == 6:
        mem_size *= 4
    elif elem_type == 7:
        mem_size *= 8
    elif elem_type == 9:
        mem_size *= 1
    elif elem_type == 10:
        mem_size *= 2
    elif elem_type == 11:
        mem_size *= 8
    elif elem_type == 12:
        mem_size *= 4
    elif elem_type == 13:
        mem_size *= 8
    elif elem_type == 14:
        mem_size *= 8
    elif elem_type == 15:
        mem_size *= 16
    else:
        print("Undefined data type")

    return mem_size



def get_bandwidth(graph: xpb2.GraphProto):
    try:
        mem_BW_list = []
        total_mem_BW = 0
        unknown_tensor_list = []
        # traverse all the nodes
        for nodeProto in graph.node:
            if nodeProto.op_type == 'Constant':
                continue
            # init variables
            read_mem_BW_each_layer = 0
            write_mem_BW_each_layer = 0
            total_each_layer = 0
            # traverse all input tensor
            for input_name in nodeProto.input:
                # get the TensorProto/ValueInfoProto by searching its name
                proto, type_Num = get_valueproto_or_tensorproto_by_name(
                    input_name, graph)
                # parse the ValueInfoProto/TensorProto
                if proto:
                    if type_Num == 3:
                        dtype = getattr(proto, 'data_type', False)
                        # get the shape of the tensor
                        shape = getattr(proto, 'dims', [])
                    elif type_Num == 1 or type_Num == 2:
                        name, dtype, shape_str = _parse_element(proto)
                        shape_str = shape_str.strip('[]')
                        shape_str = shape_str.split(',')
                        shape = []
                        for dim in shape_str:
                            try:
                                shape.append(int(dim))
                            except:
                                shape.append(0)
                    else:
                        print(
                            '[ERROR MASSAGE] [get_info/mem_BW_without_buf] The Tensor: ',
                            input_name, ' is from a wrong list !')
                else:
                    print(
                        '[ERROR MASSAGE] [get_info/mem_BW_without_buf] The Tensor: ',
                        input_name, ' is no found !')
                    unknown_tensor_list.append(
                        (nodeProto.name, input_name, nodeProto.op_type))
                # calculate the tensor size in btye
                
                read_mem_BW_each_layer += cal_tensor_mem_size(dtype, shape)

            # traverse all output tensor
            for output_name in nodeProto.output:
                # get the TensorProto/ValueInfoProto by searching its name
                proto, type_Num = get_valueproto_or_tensorproto_by_name(
                    output_name, graph)
                # parse the ValueInfoProto
                if proto:
                    if type_Num == 2 or type_Num == 4:
                        # name, dtype, shape = utils._parse_ValueInfoProto(proto)
                        name, dtype, shape_str = _parse_element(proto)
                        shape_str = shape_str.strip('[]')
                        shape_str = shape_str.split(',')
                        shape = []
                        for dim in shape_str:
                            try:
                                shape.append(int(dim))
                            except:
                                shape.append(0)
                            
                    else:
                        print(
                            '[ERROR MASSAGE] [get_info/mem_BW_without_buf] The Tensor: ',
                            output_name, ' is from a wrong list !')
                else:
                    print(
                        '[ERROR MASSAGE] [get_info/mem_BW_without_buf] The Tensor: ',
                        input_name, ' is no found !')
                    unknown_tensor_list.append(
                        (nodeProto.name, output_name, nodeProto.op_type))
                # calculate the tensor size in btye
                write_mem_BW_each_layer += cal_tensor_mem_size(dtype, shape)
            # cal total bw
            total_each_layer = read_mem_BW_each_layer + write_mem_BW_each_layer

            # store into tuple
            temp_tuple = (nodeProto.name, read_mem_BW_each_layer,
                        write_mem_BW_each_layer, total_each_layer)
            #append it
            mem_BW_list.append(temp_tuple)
            # accmulate the value
            total_mem_BW += total_each_layer

        # display the mem_bw of eahc layer
        columns = ['layer', 'read_bw', 'write_bw', 'total_bw']
        # resort the list
        mem_BW_list = sorted(mem_BW_list,
                             key=lambda Layer: Layer[1],
                             reverse=True)
        print(tabulate(mem_BW_list, headers=columns))
        print(
            '====================================================================================\n'
        )
        # display it
        print(
            "The memory bandwidth for processor to execute a whole model without on-chip-buffer is: \n",
            total_mem_BW, '(bytes)\n',
            float(total_mem_BW) / float(1000000), '(MB)\n')
        # display the unknown tensor
        columns = ['op_name', 'unfound_tensor', 'op_type']
        print(tabulate(unknown_tensor_list, headers=columns))
        print(
            '====================================================================================\n'
        )
    except Exception as e:
        print("[ERROR MASSAGE] Unable to display: " + str(e))
        return False

    return True

#從這裡開始
print("start")
get_bandwidth(inferred_model.graph)

shape inference complete ...
start
layer                                              read_bw    write_bw    total_bw
-----------------------------------------------  ---------  ----------  ----------
/classifier/classifier.1/Gemm                      5129120        4000     5133120
/features/features.2/conv/conv.1/conv.1.0/Conv     4820736     1204224     6024960
/features/features.2/conv/conv.0/conv.0.2/Clip     4816896     4816896     9633792
/features/features.3/conv/conv.2/Conv              1820256      301056     2121312
/features/features.3/conv/conv.1/conv.1.0/Conv     1812096     1806336     3618432
/features/features.4/conv/conv.1/conv.1.0/Conv     1812096      451584     2263680
/features/features.3/conv/conv.0/conv.0.2/Clip     1806336     1806336     3612672
/features/features.3/conv/conv.1/conv.1.2/Clip     1806336     1806336     3612672
/features/features.4/conv/conv.0/conv.0.2/Clip     1806336     1806336     3612672
/features/features.18/features.18.0/Conv           1

True

### 2-2-3. activation memory storage requirement

In [20]:
import torchvision.models as models
import torch
activation = {}
# Define a hook function
def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook

# Load a pre-trained AlexNet model
model = models.mobilenet_v2(pretrained=True)
model.eval()

# Dictionary to store activations from each layer
activation = {}

# Register hook to each linear layer
for layer_name, layer in model.named_modules():
    layer.register_forward_hook(get_activation(layer_name))

# Run model inference
data = torch.randn(1, 3, 224, 224)
output = model(data)

# Access the saved activations
local_memory = 0
for layer in activation:
    # 所有 layer 的 tensor 皆為 float32，因此以 4 byte 計算
    local_memory += torch.numel(activation[layer])*4
    # print(f"Activation from layer {layer}: {activation[layer].shape}")

print(f"Activation memory storage requirement: {local_memory} byte ({round(local_memory/1048576, 2)}MB)")

Activation memory storage requirement: 107117792 byte (102.16MB)


401408