From a213f9e1c9c2b6805c0feaba849ab4e34f44ef46 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BD=AD=E9=9C=87=E4=B8=9C?= <275331498@qq.com>
Date: Fri, 14 Oct 2022 12:54:04 +0800
Subject: [PATCH] [Chinese text normalization] speed up graph building (#5128)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: 彭震东 <275331498@qq.com>

Signed-off-by: 彭震东 <275331498@qq.com>
---
 .../text_normalization/zh/taggers/tokenize_and_classify.py      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nemo_text_processing/text_normalization/zh/taggers/tokenize_and_classify.py b/nemo_text_processing/text_normalization/zh/taggers/tokenize_and_classify.py
index 364259db5c7b..1c51c44dd1ef 100644
--- a/nemo_text_processing/text_normalization/zh/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/text_normalization/zh/taggers/tokenize_and_classify.py
@@ -86,7 +86,7 @@ def __init__(
             )
             token = pynutil.insert("tokens { ") + classify + pynutil.insert(" } ")
 
-            tagger = pynini.cdrewrite(token.optimize(), "", "", NEMO_SIGMA).optimize()
+            tagger = token.optimize().star
 
             preprocessor = PreProcessor(remove_interjections=True, fullwidth_to_halfwidth=True,)
             self.fst = preprocessor.fst @ tagger