# ONNX 量化模型

In [None]:
%cd ..
from utils.onnx_utils import (
    get_input_data_shape_dict,
    make_constant_node, get_onnxruntime_output,
    get_tvm_output, get_tvm_output_with_vm,
    verify_with_ort, verify_with_ort_with_inputs,
    quantize_and_verify_with_ort
)

## `qlinearadd`

In [None]:
from onnx import TensorProto, helper

def verify_qlinearadd(a_shape, b_shape, c_shape):
    _ = np.random.random(a_shape).astype("float32")
    _ = np.random.random(b_shape).astype("float32")

    input_nodes = [
        helper.make_tensor_value_info("a", TensorProto.FLOAT, list(a_shape)),
        helper.make_tensor_value_info("b", TensorProto.FLOAT, list(b_shape)),
    ]
    input_names = [
        "a",
        "b",
    ]

    node = helper.make_node("Add", ["a", "b"], ["C"])
    graph = helper.make_graph(
        [node],
        "qlinearadd_test",
        inputs=input_nodes,
        outputs=[helper.make_tensor_value_info("C", TensorProto.FLOAT, list(c_shape))],
    )
    model = helper.make_model(graph, producer_name="qlinearadd_test")
    target = "llvm"
    dev = tvm.cpu()
    quantize_and_verify_with_ort(model, input_names, [a_shape, b_shape], target, dev)

verify_qlinearadd([4, 2], [4, 2], [4, 2])
verify_qlinearadd([4, 2], [2], [4, 2])
verify_qlinearadd([5, 1, 7], [2, 7], [5, 2, 7])


## `qlinearmul`

In [None]:
import numpy as np
from onnx import TensorProto, helper
import tvm

def verify_qlinearmul(a_shape, b_shape, c_shape):
    _ = np.random.random(a_shape).astype("float32")
    _ = np.random.random(b_shape).astype("float32")

    input_nodes = [
        helper.make_tensor_value_info("a", TensorProto.FLOAT, list(a_shape)),
        helper.make_tensor_value_info("b", TensorProto.FLOAT, list(b_shape)),
    ]
    input_names = [
        "a",
        "b",
    ]

    node = helper.make_node("Mul", input_names, ["C"])
    graph = helper.make_graph(
        [node],
        "qlinearmul_test",
        inputs=input_nodes,
        outputs=[helper.make_tensor_value_info("C", TensorProto.FLOAT, list(c_shape))],
    )
    model = helper.make_model(graph, producer_name="qlinearmul_test")
    target = "llvm"
    dev = tvm.cpu()
    quantize_and_verify_with_ort(model, input_names, [a_shape, b_shape], target, dev)

verify_qlinearmul([7], [7], [7])
verify_qlinearmul([4, 2], [4, 2], [4, 2])
verify_qlinearmul([4, 2], [2], [4, 2])
verify_qlinearmul([5, 1, 7], [2, 7], [5, 2, 7])

## 其他

In [None]:



@pytest.mark.skip(reason="See https://github.com/apache/tvm/issues/11375")
@tvm.testing.parametrize_targets
def test_qlinearleakyrelu(target, dev):
    """test_qlinearleakyrelu"""

    def verify_qlinearleakyrelu(inshape, kwargs):

        in_array = np.random.random(inshape).astype("float32")
        node = helper.make_node("LeakyRelu", ["X"], ["Y"], **kwargs)

        graph = helper.make_graph(
            [node],
            "qlinearRelu_test",
            inputs=[helper.make_tensor_value_info("X", TensorProto.FLOAT, list(in_array.shape))],
            outputs=[helper.make_tensor_value_info("Y", TensorProto.FLOAT, list(in_array.shape))],
        )
        model = helper.make_model(graph, producer_name="qlinearRelu_test")
        args = (model, ["X"], [in_array.shape], target, dev)
        if dev == "cuda":
            quantize_and_verify_with_ort(*args, rtol=1e-2, atol=1e-2)
        else:
            quantize_and_verify_with_ort(*args)

    verify_qlinearleakyrelu([2, 4, 5, 6], {"alpha": 0.25})
    verify_qlinearleakyrelu([6, 5, 6, 7], {"alpha": 0.35})
    verify_qlinearleakyrelu([5, 1, 4, 6], {"alpha": 0.65})


@pytest.mark.skip(reason="See https://github.com/apache/tvm/issues/11375")
@tvm.testing.parametrize_targets
def test_qlinearsigmoid(target, dev):
    """test_qlinearsigmoid"""

    def verify_qlinearsigmoid(a_shape):

        _ = np.random.random(a_shape).astype("float32")

        input_nodes = [helper.make_tensor_value_info("a", TensorProto.FLOAT, list(a_shape))]

        node = helper.make_node("Sigmoid", ["a"], ["B"])
        graph = helper.make_graph(
            [node],
            "qlinearsigmoid_test",
            inputs=input_nodes,
            outputs=[helper.make_tensor_value_info("B", TensorProto.FLOAT, list(a_shape))],
        )
        model = helper.make_model(graph, producer_name="qlinearsigmoid_test")
        quantize_and_verify_with_ort(model, ["a"], [a_shape], target, dev)

    verify_qlinearsigmoid([4, 2])
    verify_qlinearsigmoid([5])
    verify_qlinearsigmoid([3, 4, 5])
    verify_qlinearsigmoid([])


@tvm.testing.parametrize_targets
def test_qlinearsoftmax(target, dev):
    """test_qlinearsoftmax"""

    def verify_qlinearsoftmax(a_shape):

        _ = np.random.random(a_shape).astype("float32")

        input_nodes = [helper.make_tensor_value_info("a", TensorProto.FLOAT, list(a_shape))]

        node = helper.make_node("Softmax", ["a"], ["B"])
        graph = helper.make_graph(
            [node],
            "qlinearsoftmax_test",
            inputs=input_nodes,
            outputs=[helper.make_tensor_value_info("B", TensorProto.FLOAT, list(a_shape))],
        )
        model = helper.make_model(graph, producer_name="qlinearsoftmax_test")
        quantize_and_verify_with_ort(model, ["a"], [a_shape], target, dev)

    verify_qlinearsoftmax([4, 2])
    verify_qlinearsoftmax([5])
    verify_qlinearsoftmax([3, 4, 5])



@pytest.mark.skip("flaky")
@tvm.testing.parametrize_targets
def test_qlinear_average_pool(target, dev):
    """test_qlinear_average_pool"""

    def verify_qlinear_average_pool(
        x_shape, kernel_shape, strides, pads, out_shape, auto_pad="NOTSET"
    ):
        input_nodes = [
            helper.make_tensor_value_info("X", TensorProto.FLOAT, list(x_shape)),
        ]

        output_nodes = [
            helper.make_tensor_value_info("Y", TensorProto.FLOAT, list(out_shape)),
        ]

        input_names = ["X"]

        node = helper.make_node(
            "AveragePool",
            inputs=input_names,
            outputs=["Y"],
            kernel_shape=kernel_shape,
            strides=strides,
        )

        if pads is None:
            pad_attr = helper.make_attribute("auto_pad", auto_pad)
        else:
            pad_attr = helper.make_attribute("pads", pads)
        node.attribute.append(pad_attr)

        graph = helper.make_graph(
            [node],
            "qlinear_average_pool_test",
            inputs=input_nodes,
            outputs=output_nodes,
        )

        model = helper.make_model(graph, producer_name="qlinear_average_pool_Test")
        quantize_and_verify_with_ort(model, input_names, [x_shape], target, dev)

    # Pool1D
    verify_qlinear_average_pool(
        x_shape=[1, 1, 32],
        kernel_shape=[3],
        strides=[1],
        pads=[1, 1],
        out_shape=[1, 1, 32],
    )
    # Pool2D
    verify_qlinear_average_pool(
        x_shape=[1, 1, 32, 32],
        kernel_shape=[3, 3],
        strides=[1, 1],
        pads=[1, 1, 1, 1],
        out_shape=[1, 1, 32, 32],
    )

    # Pool1D with stride
    verify_qlinear_average_pool(
        x_shape=[1, 1, 32],
        kernel_shape=[3],
        strides=[2],
        pads=[1, 1],
        out_shape=[1, 1, 16],
    )
    # Pool2D with stride
    verify_qlinear_average_pool(
        x_shape=[1, 1, 32, 32],
        kernel_shape=[3, 3],
        strides=[2, 2],
        pads=[1, 1, 1, 1],
        out_shape=[1, 1, 16, 16],
    )

    # Pool1D with stride and autopadding
    verify_qlinear_average_pool(
        x_shape=[1, 1, 32],
        kernel_shape=[3],
        strides=[2],
        pads=None,
        out_shape=[1, 1, 16],
        auto_pad="SAME_UPPER",
    )
    # Pool2D with stride and autopadding
    verify_qlinear_average_pool(
        x_shape=[1, 1, 32, 32],
        kernel_shape=[3, 3],
        strides=[2, 2],
        pads=None,
        out_shape=[1, 1, 16, 16],
        auto_pad="SAME_UPPER",
    )

    # Pool3D with stride
    verify_qlinear_average_pool(
        x_shape=[1, 1, 32, 32, 32],
        kernel_shape=[3, 3, 3],
        strides=[2, 2, 2],
        pads=[1, 1, 1, 1, 1, 1],
        out_shape=[1, 1, 16, 16, 16],
    )

    # Pool3D with stride and autopadding
    verify_qlinear_average_pool(
        x_shape=[1, 1, 32, 32, 32],
        kernel_shape=[3, 3, 3],
        strides=[2, 2, 2],
        pads=None,
        out_shape=[1, 1, 16, 16, 16],
        auto_pad="SAME_UPPER",
    )


@tvm.testing.parametrize_targets
def test_qlinear_global_average_pool(target, dev):
    """test_qlinear_global_average_pool"""

    def verify_qlinear_global_average_pool(x_shape):
        out_shape = x_shape[:2] + [1] * (len(x_shape) - 2)

        node_type = "GlobalAveragePool"

        input_names = ["X"]

        pool_node = helper.make_node(node_type, inputs=input_names, outputs=["Y"])

        graph = helper.make_graph(
            [pool_node],
            "qlinear_global_average_pool_test",
            inputs=[helper.make_tensor_value_info("X", TensorProto.FLOAT, list(x_shape))],
            outputs=[helper.make_tensor_value_info("Y", TensorProto.FLOAT, list(out_shape))],
        )

        model = helper.make_model(graph, producer_name="qlinear_global_average_pool_test")
        quantize_and_verify_with_ort(model, input_names, [x_shape], target, dev)

    # 1D Pooling (NCW)
    verify_qlinear_global_average_pool([1, 8, 8])
    verify_qlinear_global_average_pool([4, 1, 4])

    # 2D Pooling (NCHW)
    verify_qlinear_global_average_pool([1, 8, 8, 8])
    verify_qlinear_global_average_pool([4, 1, 6, 4])

    # 3D Pooling (NCDHW)
    verify_qlinear_global_average_pool([1, 8, 6, 8, 8])
    verify_qlinear_global_average_pool([4, 1, 2, 6, 4])

@tvm.testing.parametrize_targets
def test_qlinearconcat(target, dev):
    """test_qlinearconcat"""

    def verify_qlinearconcat(shapes, out_shape, axis=None):
        input_names = []
        input_values = []
        input_nodes = []
        for i, shape in enumerate(shapes):
            tensor_name = chr(ord("a") + i)
            node = helper.make_tensor_value_info(tensor_name, TensorProto.FLOAT, list(shape))

            input_names.append(tensor_name)
            input_values.append(np.random.random(shape).astype("float32"))
            input_nodes.append(node)

        node = helper.make_node("Concat", input_names, ["C"])
        if axis is not None:
            axis_attr = helper.make_attribute("axis", axis)
            node.attribute.append(axis_attr)
        graph = helper.make_graph(
            [node],
            "qlinearconcat_test",
            inputs=input_nodes,
            outputs=[helper.make_tensor_value_info("C", TensorProto.FLOAT, list(out_shape))],
        )
        model = helper.make_model(graph, producer_name="qlinearconcat_test")
        quantize_and_verify_with_ort(model, input_names, shapes, target, dev)

    verify_qlinearconcat([[2, 1], [2, 1]], [4, 1], 0)
    verify_qlinearconcat([[2, 1], [2, 1]], [2, 2], 1)
    verify_qlinearconcat([[1, 2], [2, 2], [3, 2]], [6, 2], 0)