From 1fc3108f1e7592f710a9cbd43750c8c15ce410d4 Mon Sep 17 00:00:00 2001
From: BuyuanCui <alexcui1994@gmail.com>
Date: Thu, 27 Jul 2023 16:14:43 -0700
Subject: [PATCH 01/17]  updates on itn grammar to pass sparrowhawk tests

Signed-off-by: BuyuanCui <alexcui1994@gmail.com>
---
 .../zh/taggers/cardinal.py                         | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/zh/taggers/cardinal.py b/nemo_text_processing/inverse_text_normalization/zh/taggers/cardinal.py
index b29fc5fb3..28239786a 100644
--- a/nemo_text_processing/inverse_text_normalization/zh/taggers/cardinal.py
+++ b/nemo_text_processing/inverse_text_normalization/zh/taggers/cardinal.py
@@ -78,7 +78,7 @@ def __init__(self):
             | (graph_digits + delete_ten_thousands + pynini.cross(pynini.closure("零"), "00") + graph_all)
             | (graph_digits + delete_ten_thousands + pynini.cross(pynini.closure("零"), "000") + graph_digits)
         )
-        graph_ten_thousands = graph_ten_thousands_simple | graph_ten_thousands_complex | pynutil.insert("00000")
+        graph_ten_thousands = (pynutil.add_weight(graph_ten_thousands_simple, -1.0) | graph_ten_thousands_complex | pynutil.insert("00000"))
 
         # grammmar for hundred thousands 十万
         graph_hundred_thousands_simple = graph_all + closure_ten_thousands
@@ -88,8 +88,8 @@ def __init__(self):
             | (graph_all + delete_ten_thousands + pynini.cross(pynini.closure("零"), "00") + graph_all)
             | (graph_all + delete_ten_thousands + pynini.cross(pynini.closure("零"), "000") + graph_digits)
         )
-        graph_hundred_thousands = (graph_hundred_thousands_simple | graph_hundred_thousands_complex) | pynutil.insert(
-            "000000"
+        graph_hundred_thousands = (pynutil.add_weight(graph_hundred_thousands_simple, -1.0) | graph_hundred_thousands_complex | pynutil.insert(
+            "000000")
         )
 
         # grammar for millions 百万
@@ -168,7 +168,7 @@ def __init__(self):
             | (graph_digits + delete_hundred_millions + pynini.cross(pynini.closure("零"), "0000000") + graph_digits)
         )
         graph_hundred_millions = (
-            graph_hundred_millions_simple | graph_hundred_millions_complex | pynutil.insert("000000000")
+            pynutil.add_weight(graph_hundred_millions_simple, -1.0) | graph_hundred_millions_complex | pynutil.insert("000000000")
         )
 
         # grammar for billions 十亿
@@ -203,7 +203,7 @@ def __init__(self):
             | (graph_all + delete_hundred_millions + pynini.cross(pynini.closure("零"), "000000") + graph_all)
             | (graph_all + delete_hundred_millions + pynini.cross(pynini.closure("零"), "0000000") + graph_digits)
         )
-        graph_billions = graph_billions_simple | graph_billions_complex | pynutil.insert("0000000000")
+        graph_billions = (pynutil.add_weight(graph_billions_simple, -1.0) | graph_billions_complex | pynutil.insert("0000000000"))
 
         # grammar for ten billions 百亿
         graph_ten_billions_simple = graph_hundreds_complex + closure_hundred_millions
@@ -252,7 +252,7 @@ def __init__(self):
                 + graph_digits
             )
         )
-        graph_ten_billions = graph_ten_billions_simple | graph_ten_billions_complex | pynutil.insert("00000000000")
+        graph_ten_billions = (pynutil.add_weight(graph_ten_billions_simple, -1.0) | graph_ten_billions_complex | pynutil.insert("00000000000"))
 
         # grammar for hundred billions 千亿
         graph_hundred_billions_simple = graph_thousands_complex + closure_hundred_millions
@@ -301,7 +301,7 @@ def __init__(self):
                 + graph_digits
             )
         )
-        graph_hundred_billions = graph_hundred_billions_simple | graph_hundred_billions_complex
+        graph_hundred_billions = (pynutil.add_weight(graph_hundred_billions_simple, -1.0) | graph_hundred_billions_complex)
 
         # combining grammar; output for cardinal grammar
         graph = pynini.union(

From e66cc851b23f4e631a53e6172a3172dd28c2c7f1 Mon Sep 17 00:00:00 2001
From: BuyuanCui <alexcui1994@gmail.com>
Date: Thu, 27 Jul 2023 16:14:59 -0700
Subject: [PATCH 02/17] updats for sparrowhawk tests

Signed-off-by: BuyuanCui <alexcui1994@gmail.com>
---
 .../zh/taggers/decimal.py                     | 33 ++++++-------------
 1 file changed, 10 insertions(+), 23 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/zh/taggers/decimal.py b/nemo_text_processing/inverse_text_normalization/zh/taggers/decimal.py
index f334f2675..1d6d2a9a4 100644
--- a/nemo_text_processing/inverse_text_normalization/zh/taggers/decimal.py
+++ b/nemo_text_processing/inverse_text_normalization/zh/taggers/decimal.py
@@ -63,40 +63,27 @@ def get_quantity(decimal, cardinal):
 
     return res
 
-
 class DecimalFst(GraphFst):
     def __init__(self, cardinal: GraphFst):
         super().__init__(name="decimal", kind="classify")
 
-        cardinal_after_decimal = pynini.string_file(get_abs_path("data/numbers/digit-nano.tsv"))
-        cardinal_before_decimal = cardinal.just_cardinals | (pynini.closure(pynini.cross("零", "0"), 0, 1))
-
-        delete_decimal = pynutil.delete("点") | pynutil.delete(
-            "點"
-        )  # delete decimal character, 'point' in english in 'one point two for 1.2'
+        cardinal_after_decimal = pynini.string_file(get_abs_path("data/numbers/digit-nano.tsv")) | pynini.closure(pynini.cross("零", "0"))
+        cardinal_before_decimal = cardinal.just_cardinals | pynini.cross("零", "0")
+                                                                                                                  
+        delete_decimal = pynutil.delete("点") | pynutil.delete("點")  
 
-        # grammar for integer part
         graph_integer = (
             pynutil.insert('integer_part: "')
-            + (cardinal_before_decimal | (pynini.closure(pynini.cross("零", "0"), 0, 1)))
+            + cardinal_before_decimal
             + pynutil.insert('" ')
-        )  # tokenization on just numbers
-        graph_integer_or_none = graph_integer | pynutil.insert('integer_part: "0" ', weight=0.01)  # integer or zero
+        )  
+
+        graph_string_of_cardinals = pynini.closure(cardinal_after_decimal, 1)
+        graph_fractional = pynutil.insert('fractional_part: "') + graph_string_of_cardinals + pynutil.insert('"')
 
-        # grammar for fractional part
-        delete_zero = pynini.closure(pynini.cross("零", "0"))
-        graph_string_of_cardinals = cardinal_after_decimal
-        graph_string_of_cardinals = pynini.closure(
-            (pynini.closure(graph_string_of_cardinals) + delete_zero + pynini.closure(graph_string_of_cardinals)), 1
-        )
-        graph_fractional = pynini.closure(
-            pynutil.insert('fractional_part: "') + graph_string_of_cardinals + pynutil.insert('"'), 1
-        )
 
-        # grammar for decimal: integer+delete character+part after decimal point
-        graph_decimal_no_sign = pynini.closure((graph_integer_or_none + delete_decimal + graph_fractional), 1)
+        graph_decimal_no_sign = pynini.closure((graph_integer + delete_decimal + graph_fractional), 1)
 
-        # New Grammar added for Money
         self.final_graph_wo_negative = graph_decimal_no_sign | get_quantity(
             graph_decimal_no_sign, cardinal.just_cardinals
         )

From a4a2ed19fdb2e903dd60b47cbf9be1e1236c4888 Mon Sep 17 00:00:00 2001
From: BuyuanCui <alexcui1994@gmail.com>
Date: Thu, 27 Jul 2023 16:15:15 -0700
Subject: [PATCH 03/17] updates fro sparrowhawk tests

Signed-off-by: BuyuanCui <alexcui1994@gmail.com>
---
 .../zh/verbalizers/decimal.py                     | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/zh/verbalizers/decimal.py b/nemo_text_processing/inverse_text_normalization/zh/verbalizers/decimal.py
index ea8fa4ab0..882363d2c 100644
--- a/nemo_text_processing/inverse_text_normalization/zh/verbalizers/decimal.py
+++ b/nemo_text_processing/inverse_text_normalization/zh/verbalizers/decimal.py
@@ -32,14 +32,6 @@ def __init__(self):
 
         # insert a "," for every three numbers before decimal point
         space_every_three_integer = at_most_three_digits + (pynutil.insert(",") + exactly_three_digits).closure()
-        # insert a "," for every three numbers after decimal point
-        space_every_three_decimal = (
-            pynini.accep(".") + (exactly_three_digits + pynutil.insert(",")).closure() + at_most_three_digits
-        )
-
-        # combine both
-        group_by_threes = space_every_three_integer | space_every_three_decimal
-        self.group_by_threes = group_by_threes
 
         # removing tokenizations, 'negative: '
         optional_sign = pynini.closure(
@@ -56,10 +48,10 @@ def __init__(self):
             pynutil.delete("integer_part:")
             + delete_space
             + pynutil.delete('"')
-            + pynini.closure(NEMO_NOT_QUOTE, 1)
+            + pynini.closure(NEMO_DIGIT, 1)
             + pynutil.delete('"')
         )
-        integer = integer @ group_by_threes
+        integer = integer @ space_every_three_integer
         optional_integer = pynini.closure(integer + delete_space, 0, 1)
 
         # removing tokenizations, 'fractionl_part'
@@ -81,10 +73,11 @@ def __init__(self):
             + pynini.closure(NEMO_NOT_QUOTE, 1)
             + pynutil.delete('"')
         )
-        optional_quantity = pynini.closure(quantity + delete_space)
+        optional_quantity = pynini.closure(delete_space + quantity)
 
         # combining graphs removing tokenizations *3
         graph = (optional_integer + optional_fractional + optional_quantity).optimize()
+    
         graph = optional_sign + graph  # add optional sign for negative number
         self.numebrs = graph
         delete_tokens = self.delete_tokens(graph)

From d1f8be2ff924799e15c03d51fc3e2e368af5f08f Mon Sep 17 00:00:00 2001
From: BuyuanCui <alexcui1994@gmail.com>
Date: Thu, 27 Jul 2023 16:27:07 -0700
Subject: [PATCH 04/17] coding style fix

Signed-off-by: BuyuanCui <alexcui1994@gmail.com>
---
 .../zh/taggers/cardinal.py                    | 30 ++++++++++++++-----
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/zh/taggers/cardinal.py b/nemo_text_processing/inverse_text_normalization/zh/taggers/cardinal.py
index 28239786a..c1b462472 100644
--- a/nemo_text_processing/inverse_text_normalization/zh/taggers/cardinal.py
+++ b/nemo_text_processing/inverse_text_normalization/zh/taggers/cardinal.py
@@ -78,7 +78,11 @@ def __init__(self):
             | (graph_digits + delete_ten_thousands + pynini.cross(pynini.closure("零"), "00") + graph_all)
             | (graph_digits + delete_ten_thousands + pynini.cross(pynini.closure("零"), "000") + graph_digits)
         )
-        graph_ten_thousands = (pynutil.add_weight(graph_ten_thousands_simple, -1.0) | graph_ten_thousands_complex | pynutil.insert("00000"))
+        graph_ten_thousands = (
+            pynutil.add_weight(graph_ten_thousands_simple, -1.0)
+            | graph_ten_thousands_complex
+            | pynutil.insert("00000")
+        )
 
         # grammmar for hundred thousands 十万
         graph_hundred_thousands_simple = graph_all + closure_ten_thousands
@@ -88,8 +92,10 @@ def __init__(self):
             | (graph_all + delete_ten_thousands + pynini.cross(pynini.closure("零"), "00") + graph_all)
             | (graph_all + delete_ten_thousands + pynini.cross(pynini.closure("零"), "000") + graph_digits)
         )
-        graph_hundred_thousands = (pynutil.add_weight(graph_hundred_thousands_simple, -1.0) | graph_hundred_thousands_complex | pynutil.insert(
-            "000000")
+        graph_hundred_thousands = (
+            pynutil.add_weight(graph_hundred_thousands_simple, -1.0)
+            | graph_hundred_thousands_complex
+            | pynutil.insert("000000")
         )
 
         # grammar for millions 百万
@@ -168,7 +174,9 @@ def __init__(self):
             | (graph_digits + delete_hundred_millions + pynini.cross(pynini.closure("零"), "0000000") + graph_digits)
         )
         graph_hundred_millions = (
-            pynutil.add_weight(graph_hundred_millions_simple, -1.0) | graph_hundred_millions_complex | pynutil.insert("000000000")
+            pynutil.add_weight(graph_hundred_millions_simple, -1.0)
+            | graph_hundred_millions_complex
+            | pynutil.insert("000000000")
         )
 
         # grammar for billions 十亿
@@ -203,7 +211,9 @@ def __init__(self):
             | (graph_all + delete_hundred_millions + pynini.cross(pynini.closure("零"), "000000") + graph_all)
             | (graph_all + delete_hundred_millions + pynini.cross(pynini.closure("零"), "0000000") + graph_digits)
         )
-        graph_billions = (pynutil.add_weight(graph_billions_simple, -1.0) | graph_billions_complex | pynutil.insert("0000000000"))
+        graph_billions = (
+            pynutil.add_weight(graph_billions_simple, -1.0) | graph_billions_complex | pynutil.insert("0000000000")
+        )
 
         # grammar for ten billions 百亿
         graph_ten_billions_simple = graph_hundreds_complex + closure_hundred_millions
@@ -252,7 +262,11 @@ def __init__(self):
                 + graph_digits
             )
         )
-        graph_ten_billions = (pynutil.add_weight(graph_ten_billions_simple, -1.0) | graph_ten_billions_complex | pynutil.insert("00000000000"))
+        graph_ten_billions = (
+            pynutil.add_weight(graph_ten_billions_simple, -1.0)
+            | graph_ten_billions_complex
+            | pynutil.insert("00000000000")
+        )
 
         # grammar for hundred billions 千亿
         graph_hundred_billions_simple = graph_thousands_complex + closure_hundred_millions
@@ -301,7 +315,9 @@ def __init__(self):
                 + graph_digits
             )
         )
-        graph_hundred_billions = (pynutil.add_weight(graph_hundred_billions_simple, -1.0) | graph_hundred_billions_complex)
+        graph_hundred_billions = (
+            pynutil.add_weight(graph_hundred_billions_simple, -1.0) | graph_hundred_billions_complex
+        )
 
         # combining grammar; output for cardinal grammar
         graph = pynini.union(

From 637cf1986fef1a04f528bb7bb1028227dbd7393a Mon Sep 17 00:00:00 2001
From: BuyuanCui <alexcui1994@gmail.com>
Date: Thu, 27 Jul 2023 16:29:44 -0700
Subject: [PATCH 05/17] updates for coding style and sparrowhawk test

Signed-off-by: BuyuanCui <alexcui1994@gmail.com>
---
 .../zh/taggers/decimal.py                        | 16 +++++++---------
 .../zh/verbalizers/decimal.py                    |  2 +-
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/nemo_text_processing/inverse_text_normalization/zh/taggers/decimal.py b/nemo_text_processing/inverse_text_normalization/zh/taggers/decimal.py
index 1d6d2a9a4..33f437955 100644
--- a/nemo_text_processing/inverse_text_normalization/zh/taggers/decimal.py
+++ b/nemo_text_processing/inverse_text_normalization/zh/taggers/decimal.py
@@ -63,25 +63,23 @@ def get_quantity(decimal, cardinal):
 
     return res
 
+
 class DecimalFst(GraphFst):
     def __init__(self, cardinal: GraphFst):
         super().__init__(name="decimal", kind="classify")
 
-        cardinal_after_decimal = pynini.string_file(get_abs_path("data/numbers/digit-nano.tsv")) | pynini.closure(pynini.cross("零", "0"))
+        cardinal_after_decimal = pynini.string_file(get_abs_path("data/numbers/digit-nano.tsv")) | pynini.closure(
+            pynini.cross("零", "0")
+        )
         cardinal_before_decimal = cardinal.just_cardinals | pynini.cross("零", "0")
-                                                                                                                  
-        delete_decimal = pynutil.delete("点") | pynutil.delete("點")  
 
-        graph_integer = (
-            pynutil.insert('integer_part: "')
-            + cardinal_before_decimal
-            + pynutil.insert('" ')
-        )  
+        delete_decimal = pynutil.delete("点") | pynutil.delete("點")
+
+        graph_integer = pynutil.insert('integer_part: "') + cardinal_before_decimal + pynutil.insert('" ')
 
         graph_string_of_cardinals = pynini.closure(cardinal_after_decimal, 1)
         graph_fractional = pynutil.insert('fractional_part: "') + graph_string_of_cardinals + pynutil.insert('"')
 
-
         graph_decimal_no_sign = pynini.closure((graph_integer + delete_decimal + graph_fractional), 1)
 
         self.final_graph_wo_negative = graph_decimal_no_sign | get_quantity(
diff --git a/nemo_text_processing/inverse_text_normalization/zh/verbalizers/decimal.py b/nemo_text_processing/inverse_text_normalization/zh/verbalizers/decimal.py
index 882363d2c..ab9831783 100644
--- a/nemo_text_processing/inverse_text_normalization/zh/verbalizers/decimal.py
+++ b/nemo_text_processing/inverse_text_normalization/zh/verbalizers/decimal.py
@@ -77,7 +77,7 @@ def __init__(self):
 
         # combining graphs removing tokenizations *3
         graph = (optional_integer + optional_fractional + optional_quantity).optimize()
-    
+
         graph = optional_sign + graph  # add optional sign for negative number
         self.numebrs = graph
         delete_tokens = self.delete_tokens(graph)

From 4a71afe3d1994366f03751e88dc8ff1366402a77 Mon Sep 17 00:00:00 2001
From: BuyuanCui <alexcui1994@gmail.com>
Date: Tue, 1 Aug 2023 14:02:02 -0700
Subject: [PATCH 06/17] updated classes for tests on whitelist and word grammar

Signed-off-by: BuyuanCui <alexcui1994@gmail.com>
---
 ..._sparrowhawk_inverse_text_normalization.sh | 42 +++++++++----------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/tests/nemo_text_processing/zh/test_sparrowhawk_inverse_text_normalization.sh b/tests/nemo_text_processing/zh/test_sparrowhawk_inverse_text_normalization.sh
index 4ca12af7f..ade1027a7 100644
--- a/tests/nemo_text_processing/zh/test_sparrowhawk_inverse_text_normalization.sh
+++ b/tests/nemo_text_processing/zh/test_sparrowhawk_inverse_text_normalization.sh
@@ -21,62 +21,62 @@ runtest () {
 }
 
 testITNCardinal() {
-  input=$PROJECT_DIR/fr/data_inverse_text_normalization/test_cases_cardinal.txt
+  input=$PROJECT_DIR/zh/data_inverse_text_normalization/test_cases_cardinal.txt
   runtest $input
 }
 
 testITNDate() {
-  input=$PROJECT_DIR/fr/data_inverse_text_normalization/test_cases_date.txt
+  input=$PROJECT_DIR/zh/data_inverse_text_normalization/test_cases_date.txt
   runtest $input
 }
 
 testITNDecimal() {
-  input=$PROJECT_DIR/fr/data_inverse_text_normalization/test_cases_decimal.txt
+  input=$PROJECT_DIR/zh/data_inverse_text_normalization/test_cases_decimal.txt
   runtest $input
 }
 
 testITNOrdinal() {
-  input=$PROJECT_DIR/fr/data_inverse_text_normalization/test_cases_ordinal.txt
+  input=$PROJECT_DIR/zh/data_inverse_text_normalization/test_cases_ordinal.txt
   runtest $input
 }
 
 testITNFraction() {
-  input=$PROJECT_DIR/fr/data_inverse_text_normalization/test_cases_fraction.txt
+  input=$PROJECT_DIR/zh/data_inverse_text_normalization/test_cases_fraction.txt
   runtest $input
 }
 
 testITNTime() {
-  input=$PROJECT_DIR/fr/data_inverse_text_normalization/test_cases_time.txt
+  input=$PROJECT_DIR/zh/data_inverse_text_normalization/test_cases_time.txt
   runtest $input
 }
 
-testITNMeasure() {
-  input=$PROJECT_DIR/fr/data_inverse_text_normalization/test_cases_measure.txt
-  runtest $input
-}
+#testITNMeasure() {
+#  input=$PROJECT_DIR/fr/data_inverse_text_normalization/test_cases_measure.txt
+#  runtest $input
+#}
 
 testITNMoney() {
-  input=$PROJECT_DIR/fr/data_inverse_text_normalization/test_cases_money.txt
+  input=$PROJECT_DIR/zh/data_inverse_text_normalization/test_cases_money.txt
   runtest $input
 }
 
 testITNWhitelist() {
-  input=$PROJECT_DIR/fr/data_inverse_text_normalization/test_cases_whitelist.txt
+  input=$PROJECT_DIR/zh/data_inverse_text_normalization/test_cases_whitelist.txt
   runtest $input
 }
 
-testITNTelephone() {
-  input=$PROJECT_DIR/fr/data_inverse_text_normalization/test_cases_telephone.txt
-  runtest $input
-}
+#testITNTelephone() {
+#  input=$PROJECT_DIR/zh/data_inverse_text_normalization/test_cases_telephone.txt
+#  runtest $input
+#}
 
-testITNElectronic() {
-  input=$PROJECT_DIR/fr/data_inverse_text_normalization/test_cases_electronic.txt
-  runtest $input
-}
+#testITNElectronic() {
+#  input=$PROJECT_DIR/fr/data_inverse_text_normalization/test_cases_electronic.txt
+#  runtest $input
+#}
 
 testITNWord() {
-  input=$PROJECT_DIR/fr/data_inverse_text_normalization/test_cases_word.txt
+  input=$PROJECT_DIR/zh/data_inverse_text_normalization/test_cases_word.txt
   runtest $input
 }
 

From 4067be82b3d9009789dfbce575bd1c271698a9fd Mon Sep 17 00:00:00 2001
From: BuyuanCui <alexcui1994@gmail.com>
Date: Tue, 1 Aug 2023 14:02:43 -0700
Subject: [PATCH 07/17] added for tests on whitelist

Signed-off-by: BuyuanCui <alexcui1994@gmail.com>
---
 .../test_cases_whitelist.txt                  | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 tests/nemo_text_processing/zh/data_inverse_text_normalization/test_cases_whitelist.txt

diff --git a/tests/nemo_text_processing/zh/data_inverse_text_normalization/test_cases_whitelist.txt b/tests/nemo_text_processing/zh/data_inverse_text_normalization/test_cases_whitelist.txt
new file mode 100644
index 000000000..f36dc4293
--- /dev/null
+++ b/tests/nemo_text_processing/zh/data_inverse_text_normalization/test_cases_whitelist.txt
@@ -0,0 +1,21 @@
+人力资源~HR
+自动取款机~ATM
+人力资源~HR
+首席执行官~CEO
+美国研究生入学考试~GRE
+研究生管理专业入学考试~GMAT
+全球定位系统~GPS
+刷卡机~POS机
+数位多功能光碟~DVD
+镭射唱片~CD
+通用串行总线~USB
+统一资源定位符~URL
+虚拟专用网络~VPN
+网络互联协议~IP
+脱氧核糖核酸~DNA
+核糖核酸~RNA
+平均学分绩点~GPA
+发光二极管~LED
+可移植文档格式~PDF
+社会性网络服务~SNS
+博士~PhD

From 2f101889f4bab855862798d696e5505a1282be4b Mon Sep 17 00:00:00 2001
From: BuyuanCui <alexcui1994@gmail.com>
Date: Tue, 1 Aug 2023 14:02:57 -0700
Subject: [PATCH 08/17] added for test on word

Signed-off-by: BuyuanCui <alexcui1994@gmail.com>
---
 .../test_cases_word.txt                       | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 tests/nemo_text_processing/zh/data_inverse_text_normalization/test_cases_word.txt

diff --git a/tests/nemo_text_processing/zh/data_inverse_text_normalization/test_cases_word.txt b/tests/nemo_text_processing/zh/data_inverse_text_normalization/test_cases_word.txt
new file mode 100644
index 000000000..1d0cac255
--- /dev/null
+++ b/tests/nemo_text_processing/zh/data_inverse_text_normalization/test_cases_word.txt
@@ -0,0 +1,21 @@
+你好~你好
+年级~年级
+秘密~秘密
+键盘~键盘
+借口~借口
+学生~学生
+人力~人力
+转移~转移
+徘徊~徘徊
+冤枉~冤枉
+浏览~浏览
+珍藏~珍藏
+患难 ~患难
+湿~湿
+眼眶~眼眶
+遗产~遗产
+流浪~流浪
+信仰~信仰
+戒指~戒指
+义无反顾~义无反顾
+交换~交换

From cc857f13a5ae42c7f8b96ac968762196b6a7cc15 Mon Sep 17 00:00:00 2001
From: BuyuanCui <alexcui1994@gmail.com>
Date: Tue, 1 Aug 2023 14:03:18 -0700
Subject: [PATCH 09/17] added to run test on whitelist

Signed-off-by: BuyuanCui <alexcui1994@gmail.com>
---
 .../nemo_text_processing/zh/test_whitelist.py | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 tests/nemo_text_processing/zh/test_whitelist.py

diff --git a/tests/nemo_text_processing/zh/test_whitelist.py b/tests/nemo_text_processing/zh/test_whitelist.py
new file mode 100644
index 000000000..9b09f4d9d
--- /dev/null
+++ b/tests/nemo_text_processing/zh/test_whitelist.py
@@ -0,0 +1,33 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import pytest
+from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
+from nemo_text_processing.text_normalization.normalize import Normalizer
+from nemo_text_processing.text_normalization.normalize_with_audio import NormalizerWithAudio
+from parameterized import parameterized
+
+from ..utils import CACHE_DIR, RUN_AUDIO_BASED_TESTS, parse_test_case_file
+
+
+class TestWhitelist:
+    inverse_normalizer = InverseNormalizer(lang='zh', cache_dir=CACHE_DIR, overwrite_cache=False)
+
+    @parameterized.expand(parse_test_case_file('zh/data_inverse_text_normalization/test_cases_whitelist.txt'))
+    @pytest.mark.run_only_on('CPU')
+    @pytest.mark.unit
+    def test_denorm(self, test_input, expected):
+        pred = self.inverse_normalizer.inverse_normalize(test_input, verbose=False)
+        assert pred == expected

From 406b9532509bae396984b69a1ffbbe0b1b6e7cb4 Mon Sep 17 00:00:00 2001
From: BuyuanCui <alexcui1994@gmail.com>
Date: Tue, 1 Aug 2023 14:03:33 -0700
Subject: [PATCH 10/17] added to run test on word

Signed-off-by: BuyuanCui <alexcui1994@gmail.com>
---
 tests/nemo_text_processing/zh/test_word.py | 35 ++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 tests/nemo_text_processing/zh/test_word.py

diff --git a/tests/nemo_text_processing/zh/test_word.py b/tests/nemo_text_processing/zh/test_word.py
new file mode 100644
index 000000000..663228633
--- /dev/null
+++ b/tests/nemo_text_processing/zh/test_word.py
@@ -0,0 +1,35 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import pytest
+from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
+from nemo_text_processing.text_normalization.normalize import Normalizer
+from nemo_text_processing.text_normalization.normalize_with_audio import NormalizerWithAudio
+from parameterized import parameterized
+
+from ..utils import CACHE_DIR, RUN_AUDIO_BASED_TESTS, parse_test_case_file
+
+
+class TestWord:
+    inverse_normalizer = InverseNormalizer(lang='zh', cache_dir=CACHE_DIR, overwrite_cache=False)
+
+    @parameterized.expand(parse_test_case_file('zh/data_inverse_text_normalization/test_cases_word.txt'))
+    @pytest.mark.run_only_on('CPU')
+    @pytest.mark.unit
+    def test_denorm(self, test_input, expected):
+        pred = self.inverse_normalizer.inverse_normalize(test_input, verbose=False)
+        assert pred == expected
+
+    
\ No newline at end of file

From 30fccd3ca0dbd55c39c64297f63a2b56169a688b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 1 Aug 2023 21:05:06 +0000
Subject: [PATCH 11/17] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 tests/nemo_text_processing/zh/test_word.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/nemo_text_processing/zh/test_word.py b/tests/nemo_text_processing/zh/test_word.py
index 663228633..8d3da9be5 100644
--- a/tests/nemo_text_processing/zh/test_word.py
+++ b/tests/nemo_text_processing/zh/test_word.py
@@ -31,5 +31,3 @@ class TestWord:
     def test_denorm(self, test_input, expected):
         pred = self.inverse_normalizer.inverse_normalize(test_input, verbose=False)
         assert pred == expected
-
-    
\ No newline at end of file

From 13432f90f25239053f11e8a9a4928068928c65f0 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 15 Aug 2023 18:43:51 +0000
Subject: [PATCH 12/17] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 tests/nemo_text_processing/zh/test_word.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/nemo_text_processing/zh/test_word.py b/tests/nemo_text_processing/zh/test_word.py
index c404a57bb..8d3da9be5 100644
--- a/tests/nemo_text_processing/zh/test_word.py
+++ b/tests/nemo_text_processing/zh/test_word.py
@@ -31,4 +31,3 @@ class TestWord:
     def test_denorm(self, test_input, expected):
         pred = self.inverse_normalizer.inverse_normalize(test_input, verbose=False)
         assert pred == expected
-

From 3cd5062fce84c9877cd3d31d1f46d2459c107709 Mon Sep 17 00:00:00 2001
From: "Buyuan(Alex) Cui" <69030297+BuyuanCui@users.noreply.github.com>
Date: Wed, 16 Aug 2023 09:12:13 -0700
Subject: [PATCH 13/17] Update test_word.py

Removed unused import.

Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com>
---
 tests/nemo_text_processing/zh/test_word.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/nemo_text_processing/zh/test_word.py b/tests/nemo_text_processing/zh/test_word.py
index 8d3da9be5..2f2b8444f 100644
--- a/tests/nemo_text_processing/zh/test_word.py
+++ b/tests/nemo_text_processing/zh/test_word.py
@@ -19,7 +19,7 @@
 from nemo_text_processing.text_normalization.normalize_with_audio import NormalizerWithAudio
 from parameterized import parameterized
 
-from ..utils import CACHE_DIR, RUN_AUDIO_BASED_TESTS, parse_test_case_file
+from ..utils import CACHE_DIR, parse_test_case_file
 
 
 class TestWord:

From 049917bdbf398f1975af0ed60bb488a7b4c33ca2 Mon Sep 17 00:00:00 2001
From: "Buyuan(Alex) Cui" <69030297+BuyuanCui@users.noreply.github.com>
Date: Wed, 16 Aug 2023 09:15:09 -0700
Subject: [PATCH 14/17] Update test_word.py

Removed imports according to CodeQL

Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com>
---
 tests/nemo_text_processing/zh/test_word.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/nemo_text_processing/zh/test_word.py b/tests/nemo_text_processing/zh/test_word.py
index 2f2b8444f..5e2e1da45 100644
--- a/tests/nemo_text_processing/zh/test_word.py
+++ b/tests/nemo_text_processing/zh/test_word.py
@@ -15,8 +15,6 @@
 
 import pytest
 from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
-from nemo_text_processing.text_normalization.normalize import Normalizer
-from nemo_text_processing.text_normalization.normalize_with_audio import NormalizerWithAudio
 from parameterized import parameterized
 
 from ..utils import CACHE_DIR, parse_test_case_file

From 453ca80b2ce4a6978f61b687af76f969fde28c60 Mon Sep 17 00:00:00 2001
From: "Buyuan(Alex) Cui" <69030297+BuyuanCui@users.noreply.github.com>
Date: Wed, 16 Aug 2023 09:16:29 -0700
Subject: [PATCH 15/17] Update test_whitelist.py

Removing imports according to CodeQL

Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com>
---
 tests/nemo_text_processing/zh/test_whitelist.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/nemo_text_processing/zh/test_whitelist.py b/tests/nemo_text_processing/zh/test_whitelist.py
index 9b09f4d9d..8b3e871b1 100644
--- a/tests/nemo_text_processing/zh/test_whitelist.py
+++ b/tests/nemo_text_processing/zh/test_whitelist.py
@@ -15,11 +15,9 @@
 
 import pytest
 from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
-from nemo_text_processing.text_normalization.normalize import Normalizer
-from nemo_text_processing.text_normalization.normalize_with_audio import NormalizerWithAudio
 from parameterized import parameterized
 
-from ..utils import CACHE_DIR, RUN_AUDIO_BASED_TESTS, parse_test_case_file
+from ..utils import CACHE_DIR, parse_test_case_file
 
 
 class TestWhitelist:

From a03a5650bf193422c1c76a4e7eb91c11e34dc255 Mon Sep 17 00:00:00 2001
From: "Buyuan(Alex) Cui" <69030297+BuyuanCui@users.noreply.github.com>
Date: Fri, 18 Aug 2023 10:24:05 -0700
Subject: [PATCH 16/17] Update test_whitelist.py

Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com>

From 0e5e7b88d273f2d538f2a902944655ca0590506d Mon Sep 17 00:00:00 2001
From: "Buyuan(Alex) Cui" <69030297+BuyuanCui@users.noreply.github.com>
Date: Fri, 1 Sep 2023 08:00:24 -0700
Subject: [PATCH 17/17] Update Jenkinsfile

changed zh cache to 07-27-23 as it is the latest update.

Signed-off-by: Buyuan(Alex) Cui <69030297+BuyuanCui@users.noreply.github.com>
---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 281d58e81..3c516e700 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -22,7 +22,7 @@ pipeline {
     RU_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
     VI_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
     SV_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
-    ZH_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/07-12-23-0'
+    ZH_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/07-27-23-0'
     DEFAULT_TN_CACHE='/home/jenkinsci/TestData/text_norm/ci/grammars/06-08-23-0'
 
   }