本 Notebook 

In [None]:
pip install funasr torch onnxruntime numpy modelscope

In [None]:
#模型下载
from modelscope import snapshot_download

model_dir = snapshot_download(
    model_id='iic/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch', 
    local_dir='speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch'
)

print(model_dir)

In [16]:
"""
将 Paraformer 模型导出为 ONNX 格式
使用旧版 ONNX 导出器绕过 torch.export 的兼容性问题
"""

import os
from funasr import AutoModel

def patch_export_utils():
    """
    修补 export_utils 以使用旧版 ONNX 导出器
    """
    import torch
    from funasr.utils import export_utils
    
    # 保存原始的 _onnx 函数
    original_onnx = export_utils._onnx
    
    def patched_onnx(
        model,
        data_in=None,
        quantize: bool = False,
        opset_version: int = 14,
        export_dir: str = None,
        **kwargs,
    ):
        """修补后的 ONNX 导出函数，使用旧版导出器"""
        device = kwargs.get("device", "cpu")
        dummy_input = model.export_dummy_inputs()

        if isinstance(dummy_input, torch.Tensor):
            dummy_input = dummy_input.to(device)
        else:
            dummy_input = tuple([input.to(device) for input in dummy_input])

        verbose = kwargs.get("verbose", False)

        if isinstance(model.export_name, str):
            export_name = model.export_name + ".onnx"
        else:
            export_name = model.export_name()
        model_path = os.path.join(export_dir, export_name)
        
        print(f"\n使用旧版 ONNX 导出器...")
        print(f"导出路径: {model_path}")
        
        # 使用旧版导出器，避免 torch.export 的问题
        with torch.onnx.select_model_mode_for_export(model, torch.onnx.TrainingMode.EVAL):
            torch.onnx.export(
                model,
                dummy_input,
                model_path,
                verbose=verbose,
                do_constant_folding=True,
                opset_version=opset_version,
                input_names=model.export_input_names(),
                output_names=model.export_output_names(),
                dynamic_axes=model.export_dynamic_axes(),
                export_params=True,
                # 关键：使用旧版导出器
                dynamo=False,  # 禁用 dynamo（新版导出器）
            )

        if quantize:
            try:
                from onnxruntime.quantization import QuantType, quantize_dynamic
                import onnx
            except:
                raise RuntimeError(
                    "You are quantizing the onnx model, please install onnxruntime first. via \n`pip install onnx`\n`pip install onnxruntime`."
                )

            quant_model_path = model_path.replace(".onnx", "_quant.onnx")
            onnx_model = onnx.load(model_path)
            nodes = [n.name for n in onnx_model.graph.node]
            nodes_to_exclude = [
                m for m in nodes if "output" in m or "bias_encoder" in m or "bias_decoder" in m
            ]
            print("Quantizing model from {} to {}".format(model_path, quant_model_path))
            quantize_dynamic(
                model_input=model_path,
                model_output=quant_model_path,
                op_types_to_quantize=["MatMul"],
                per_channel=True,
                reduce_range=False,
                weight_type=QuantType.QUInt8,
                nodes_to_exclude=nodes_to_exclude,
            )
    
    # 替换导出函数
    export_utils._onnx = patched_onnx
    print("✓ 已应用 ONNX 导出补丁（使用旧版导出器）")


def export_paraformer_to_onnx(MODEL_NAME, EXPORT_CONFIG):
    """
    导出 Paraformer 模型到 ONNX 格式
    """
    print("=" * 80)
    print("开始导出 Paraformer 模型到 ONNX 格式")
    print("=" * 80)
    print(f"\n模型名称: {MODEL_NAME}")
    print(f"量化模式: {'是' if EXPORT_CONFIG['quantize'] else '否'}")
    print(f"ONNX Opset 版本: {EXPORT_CONFIG['opset_version']}")
    
    # 应用补丁
    print("\n正在应用导出补丁...")
    patch_export_utils()
    
    print("\n正在加载模型...")
    
    # 加载模型
    model = AutoModel(
        model=MODEL_NAME,
        device=EXPORT_CONFIG['device']
    )
    
    print("✓ 模型加载成功！")
    print("\n开始导出 ONNX 模型...")
    
    # 导出模型
    try:
        export_dir = model.export(
            type='onnx',
            quantize=EXPORT_CONFIG['quantize'],
            opset_version=EXPORT_CONFIG['opset_version'],
            fallback_num=EXPORT_CONFIG['fallback_num'],
            calib_num=EXPORT_CONFIG['calib_num'],
        )
    except Exception as e:
        print(f"\n导出过程中遇到错误，尝试使用备用方案...")
        print(f"错误信息: {str(e)[:200]}...")
        raise
    
    print("\n" + "=" * 80)
    print("✓ 导出成功！")
    print("=" * 80)
    print(f"\nONNX 模型保存路径: {export_dir}")
    
    # 显示导出的文件
    print("\n导出的文件列表:")
    if os.path.exists(export_dir):
        total_size = 0
        for file in sorted(os.listdir(export_dir)):
            file_path = os.path.join(export_dir, file)
            if os.path.isfile(file_path):
                size_mb = os.path.getsize(file_path) / (1024 * 1024)
                total_size += size_mb
                print(f"  - {file:<40} {size_mb:>10.2f} MB")
        print(f"\n  总大小: {total_size:.2f} MB")
    
    print("\n" + "=" * 80)
    print("提示:")
    print("  1. 如需量化模型，请将 EXPORT_CONFIG['quantize'] 设置为 True")
    print("  2. 量化后的模型文件名为 model_quant.onnx")
    print("  3. 非量化模型文件名为 model.onnx")
    print("=" * 80)
    
    return export_dir





# 模型配置
MODEL_NAME = './speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch'

# 导出配置
EXPORT_CONFIG = {
    'quantize': False,          # 是否量化，True 为 int8 量化，False 为 fp32
    'device': 'cpu',            # 导出时使用的设备
    'opset_version': 14,        # ONNX opset 
    'fallback_num': 5,          # 量化时的 fallback 数量
    'calib_num': 100,           # 量化时的校准数量
}


try:
    export_dir = export_paraformer_to_onnx(MODEL_NAME, EXPORT_CONFIG)
except Exception as e:
    print("\n" + "=" * 80)
    print("✗ 导出失败！")
    print("=" * 80)
    print(f"错误信息: {e}")
    print("\n请确保:")
    print("  1. 已安装 funasr: pip install -U funasr")
    print("  2. 已安装 modelscope: pip install -U modelscope")
    print("  3. 已安装 onnx: pip install -U onnx onnxruntime")
    print("  4. 网络连接正常，可以下载模型")
    print("  5. PyTorch 版本兼容（建议 2.0+）")
    print("=" * 80)
    import traceback
    traceback.print_exc()
    raise

开始导出 Paraformer 模型到 ONNX 格式

模型名称: ./speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch
量化模式: 否
ONNX Opset 版本: 14

正在应用导出补丁...
✓ 已应用 ONNX 导出补丁（使用旧版导出器）

正在加载模型...
funasr version: 1.3.0.
Check update of funasr, and it would cost few times. You may disable it by set `disable_update=True` in AutoModel




You are using the latest version of funasr-1.3.0
✓ 模型加载成功！

开始导出 ONNX 模型...

使用旧版 ONNX 导出器...
导出路径: ./speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch\model.onnx

✓ 导出成功！

ONNX 模型保存路径: ./speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch

导出的文件列表:
  - .DS_Store                                      0.01 MB
  - .mdl                                           0.00 MB
  - .msc                                           0.00 MB
  - .mv                                            0.00 MB
  - README.md                                      0.02 MB
  - am.mvn                                         0.01 MB
  - config.yaml                                    0.00 MB
  - configuration.json                             0.00 MB
  - model.onnx                                   843.91 MB
  - model.pt                                     858.98 MB
  - seg_dict                                       7.90 MB
  - tokens.json                                    

In [18]:
# 转换 tokens.json 为 tokens.txt

import sys
from typing import Dict


def load_tokens():
    ans = dict()
    i = 0
    with open(f"{model_dir}/tokens.json", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if '[' in line: continue
            if ']' in line: continue
            if '"' in line and ',' in line:
              line = line[1:-2]

            ans[i] = line.strip()
            i += 1
    print('num tokens', i)
    return ans


def write_tokens(tokens: Dict[int, str]):
    with open(f"{model_dir}/tokens.txt", "w", encoding="utf-8") as f:
        for idx, s in tokens.items():
            f.write(f"{s} {idx}\n")


def main():
    tokens = load_tokens()
    write_tokens(tokens)


if __name__ == "__main__":
    main()


num tokens 8404


In [4]:
# 添加元数据到 ONNX 模型

from typing import Dict

import numpy as np
import onnx


def load_cmvn():
    neg_mean = None
    inv_stddev = None

    with open(f"{model_dir}/am.mvn") as f:
        for line in f:
            if not line.startswith("<LearnRateCoef>"):
                continue
            t = line.split()[3:-1]

            if neg_mean is None:
                neg_mean = ",".join(t)
            else:
                inv_stddev = ",".join(t)

    return neg_mean, inv_stddev


def load_lfr_params():
    with open(f"{model_dir}/config.yaml", encoding="utf-8") as f:
        for line in f:
            if "lfr_m" in line:
                lfr_m = int(line.split()[-1])
            elif "lfr_n" in line:
                lfr_n = int(line.split()[-1])
                break
    lfr_window_size = lfr_m
    lfr_window_shift = lfr_n
    return lfr_window_size, lfr_window_shift


def get_vocab_size():
    with open(f"{model_dir}/tokens.txt", encoding="utf-8") as f:
        return len(f.readlines())


def add_meta_data(filename: str, meta_data: Dict[str, str]):
    """Add meta data to an ONNX model. It is changed in-place.
    Args:
      filename:
        Filename of the ONNX model to be changed.
      meta_data:
        Key-value pairs.
    """
    model = onnx.load(filename)
    for key, value in meta_data.items():
        meta = model.metadata_props.add()
        meta.key = key
        meta.value = value

    onnx.save(model, filename)
    print(f"Updated {filename}")


def main():
    lfr_window_size, lfr_window_shift = load_lfr_params()
    neg_mean, inv_stddev = load_cmvn()
    vocab_size = get_vocab_size()

    meta_data = {
        "lfr_window_size": str(lfr_window_size),
        "lfr_window_shift": str(lfr_window_shift),
        "neg_mean": neg_mean,
        "inv_stddev": inv_stddev,
        "model_type": "paraformer",
        "version": "1",
        "model_author": "iic",
        "vocab_size": str(vocab_size),
        "description": "This is a Chinese model. It supports only Chinese",
        "comment": "iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8358-tensorflow1",
        "git_tag": "v1.1.9",
        "url": "https://www.modelscope.cn/models/iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8358-tensorflow1",
    }
    add_meta_data(f"{model_dir}/model.onnx", meta_data)
    # add_meta_data(f"{model_dir}/model.int8.onnx", meta_data)


if __name__ == "__main__":
    main()


Updated ./speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch/model.onnx
