From a213f9e1c9c2b6805c0feaba849ab4e34f44ef46 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BD=AD=E9=9C=87=E4=B8=9C?= <275331498@qq.com> Date: Fri, 14 Oct 2022 12:54:04 +0800 Subject: [PATCH] [Chinese text normalization] speed up graph building (#5128) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 彭震东 <275331498@qq.com> Signed-off-by: 彭震东 <275331498@qq.com> --- .../text_normalization/zh/taggers/tokenize_and_classify.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo_text_processing/text_normalization/zh/taggers/tokenize_and_classify.py b/nemo_text_processing/text_normalization/zh/taggers/tokenize_and_classify.py index 364259db5c7b..1c51c44dd1ef 100644 --- a/nemo_text_processing/text_normalization/zh/taggers/tokenize_and_classify.py +++ b/nemo_text_processing/text_normalization/zh/taggers/tokenize_and_classify.py @@ -86,7 +86,7 @@ def __init__( ) token = pynutil.insert("tokens { ") + classify + pynutil.insert(" } ") - tagger = pynini.cdrewrite(token.optimize(), "", "", NEMO_SIGMA).optimize() + tagger = token.optimize().star preprocessor = PreProcessor(remove_interjections=True, fullwidth_to_halfwidth=True,) self.fst = preprocessor.fst @ tagger