In [5]:
import os
import json
import shutil

from modelscope.pipelines import pipeline
from modelscope.metainfo import Trainers
from modelscope.trainers import build_trainer
from modelscope.utils.constant import Tasks

from funasr.datasets.ms_dataset import MsDataset
from funasr.utils.compute_wer import compute_wer


def modelscope_finetune(params):
    if not os.path.exists(params["model_dir"]):
        os.makedirs(params["model_dir"], exist_ok=True)
    # dataset split ["train", "validation"]
    ds_dict = MsDataset.load(params["dataset_path"])
    kwargs = dict(
        model=params["modelscope_model_name"],
        data_dir=ds_dict,
        dataset_type=params["dataset_type"],
        work_dir=params["model_dir"],
        batch_bins=params["batch_bins"],
        max_epoch=params["max_epoch"],
        lr=params["lr"])
    trainer = build_trainer(Trainers.speech_asr_trainer, default_args=kwargs)
    trainer.train()
    pretrained_model_path = os.path.join(os.environ["HOME"], ".cache/modelscope/hub", params["modelscope_model_name"])
    required_files = ["am.mvn", "decoding.yaml", "configuration.json"]
    for file_name in required_files:
        shutil.copy(os.path.join(pretrained_model_path, file_name),
                    os.path.join(params["model_dir"], file_name))
    

def modelscope_infer(params):
    # prepare for decoding
    with open(os.path.join(params["model_dir"], "configuration.json")) as f:
        config_dict = json.load(f)
        config_dict["model"]["am_model_name"] = params["decoding_model_name"]
    with open(os.path.join(params["model_dir"], "configuration.json"), "w") as f:
        json.dump(config_dict, f, indent=4, separators=(',', ': '))
    decoding_path = os.path.join(params["model_dir"], "decode_results")
    if os.path.exists(decoding_path):
        shutil.rmtree(decoding_path)
    os.mkdir(decoding_path)

    # decoding
    inference_pipeline = pipeline(
        task=Tasks.auto_speech_recognition,
        model=params["model_dir"],
        output_dir=decoding_path,
        batch_size=64
    )
    audio_in = os.path.join(params["test_data_dir"], "wav.scp")
    inference_pipeline(audio_in=audio_in)

    # computer CER if GT text is set
    text_in = os.path.join(params["test_data_dir"], "text")
    if os.path.exists(text_in):
        text_proc_file = os.path.join(decoding_path, "1best_recog/token")
        compute_wer(text_in, text_proc_file, os.path.join(decoding_path, "text.cer"))
        os.system("tail -n 3 {}".format(os.path.join(decoding_path, "text.cer")))

In [4]:
from funasr.utils.modelscope_param import modelscope_args
params = modelscope_args(model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch")
params.output_dir = "./checkpoint"                      # 模型保存路径
params.data_path = "speech_asr_aishell1_trainsets"      # 数据路径，可以为modelscope中已上传数据，也可以是本地数据
params.dataset_type = "small"                           # 小数据量设置small，若数据量大于1000小时，请使用large
params.batch_bins = 2000                                # batch size，如果dataset_type="small"，batch_bins单位为fbank特征帧数，如果dataset_type="large"，batch_bins单位为毫秒，
params.max_epoch = 50                                   # 最大训练轮数
params.lr = 0.00005                                     # 设置学习率

modelscope_finetune(params)

2023-09-02 01:38:32,637 - modelscope - INFO - No subset_name specified, defaulting to the default
2023-09-02 01:38:33,220 - modelscope - INFO - Generating dataset speech_asr_aishell1_trainsets (/home/lihongji/.cache/modelscope/hub/datasets/speech_asr/speech_asr_aishell1_trainsets/master/data_files)
2023-09-02 01:38:33,221 - modelscope - INFO - Reusing cached meta-data file: /home/lihongji/.cache/modelscope/hub/datasets/speech_asr/speech_asr_aishell1_trainsets/master/data_files/84d2248455376a60b4f10107a1c4a3ac
2023-09-02 01:38:33,221 - modelscope - INFO - Reusing cached meta-data file: /home/lihongji/.cache/modelscope/hub/datasets/speech_asr/speech_asr_aishell1_trainsets/master/data_files/25158edb524e11376428a1643343653c
2023-09-02 01:38:33,222 - modelscope - INFO - Reusing cached meta-data file: /home/lihongji/.cache/modelscope/hub/datasets/speech_asr/speech_asr_aishell1_trainsets/master/data_files/bbff82a2cc4f66385c049317a256bf93

Downloading data files #1: 100%|██████████| 1/1 [00:00

SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
