From 3434200830b38602e3321373b7919e7112abf7a6 Mon Sep 17 00:00:00 2001
From: folivoramanh <palasek182@gmail.com>
Date: Thu, 3 Jul 2025 01:28:13 +0700
Subject: [PATCH 1/7] Date for vietnamese TN

Signed-off-by: folivoramanh <palasek182@gmail.com>
---
 .../vi/data/date/__init__.py                  | 13 +++
 .../text_normalization/vi/data/date/days.tsv  |  9 ++
 .../vi/data/date/months.tsv                   | 21 +++++
 .../text_normalization/vi/taggers/date.py     | 87 +++++++++++++++++++
 .../vi/taggers/tokenize_and_classify.py       |  7 ++
 .../text_normalization/vi/verbalizers/date.py | 51 +++++++++++
 .../vi/verbalizers/verbalize.py               |  6 +-
 .../test_cases_date.txt                       | 13 +++
 tests/nemo_text_processing/vi/test_date.py    | 44 +++++++---
 .../vi/test_sparrowhawk_normalization.sh      |  8 +-
 10 files changed, 240 insertions(+), 19 deletions(-)
 create mode 100644 nemo_text_processing/text_normalization/vi/data/date/__init__.py
 create mode 100644 nemo_text_processing/text_normalization/vi/data/date/days.tsv
 create mode 100644 nemo_text_processing/text_normalization/vi/data/date/months.tsv
 create mode 100644 nemo_text_processing/text_normalization/vi/taggers/date.py
 create mode 100644 nemo_text_processing/text_normalization/vi/verbalizers/date.py
 create mode 100644 tests/nemo_text_processing/vi/data_text_normalization/test_cases_date.txt

diff --git a/nemo_text_processing/text_normalization/vi/data/date/__init__.py b/nemo_text_processing/text_normalization/vi/data/date/__init__.py
new file mode 100644
index 000000000..6ebc808fa
--- /dev/null
+++ b/nemo_text_processing/text_normalization/vi/data/date/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/nemo_text_processing/text_normalization/vi/data/date/days.tsv b/nemo_text_processing/text_normalization/vi/data/date/days.tsv
new file mode 100644
index 000000000..1d3e2440c
--- /dev/null
+++ b/nemo_text_processing/text_normalization/vi/data/date/days.tsv
@@ -0,0 +1,9 @@
+01	một
+02	hai
+03	ba
+04	bốn
+05	năm
+06	sáu
+07	bảy
+08	tám
+09	chín
\ No newline at end of file
diff --git a/nemo_text_processing/text_normalization/vi/data/date/months.tsv b/nemo_text_processing/text_normalization/vi/data/date/months.tsv
new file mode 100644
index 000000000..fb836fba1
--- /dev/null
+++ b/nemo_text_processing/text_normalization/vi/data/date/months.tsv
@@ -0,0 +1,21 @@
+1	một
+2	hai
+3	ba
+4	tư
+5	năm
+6	sáu
+7	bảy
+8	tám
+9	chín
+10	mười
+11	mười một
+12	mười hai
+01	một
+02	hai
+03	ba
+04	tư
+05	năm
+06	sáu
+07	bảy
+08	tám
+09	chín
\ No newline at end of file
diff --git a/nemo_text_processing/text_normalization/vi/taggers/date.py b/nemo_text_processing/text_normalization/vi/taggers/date.py
new file mode 100644
index 000000000..941b830e4
--- /dev/null
+++ b/nemo_text_processing/text_normalization/vi/taggers/date.py
@@ -0,0 +1,87 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.text_normalization.vi.utils import load_labels, get_abs_path
+from nemo_text_processing.text_normalization.en.graph_utils import NEMO_DIGIT, GraphFst
+
+
+class DateFst(GraphFst):
+    """
+    Finite state transducer for classifying Vietnamese dates, e.g.
+        15/01/2024 -> date { day: "mười lăm" month: "một" year: "hai nghìn hai mươi tư" }
+        tháng 4 2024 -> date { month: "tư" year: "hai nghìn hai mươi tư" }
+        ngày 15/01/2024 -> date { day: "mười lăm" month: "một" year: "hai nghìn hai mươi tư" }
+        ngày 12 tháng 5 năm 2025 -> date { day: "mười hai" month: "năm" year: "hai nghìn hai mươi lăm" }
+    """
+
+    def __init__(self, cardinal, deterministic: bool = True):
+        super().__init__(name="date", kind="classify", deterministic=deterministic)
+
+        day_mappings = load_labels(get_abs_path("data/date/days.tsv"))
+        month_mappings = load_labels(get_abs_path("data/date/months.tsv"))
+        
+        day_digit = pynini.closure(NEMO_DIGIT, 1, 2)    
+        month_digit = pynini.closure(NEMO_DIGIT, 1, 2)
+        year_digit = pynini.closure(NEMO_DIGIT, 4, 4)  
+        separator = pynini.union("/", "-", ".")
+        
+        day_convert = pynini.string_map([(k, v) for k, v in day_mappings]) | pynini.compose(day_digit, cardinal.graph)
+        month_convert = pynini.string_map([(k, v) for k, v in month_mappings])
+        year_convert = pynini.compose(year_digit, cardinal.graph)
+        
+        day_part = pynutil.insert("day: \"") + day_convert + pynutil.insert("\" ")
+        month_part = pynutil.insert("month: \"") + month_convert + pynutil.insert("\" ")
+        year_part = pynutil.insert("year: \"") + year_convert + pynutil.insert("\"")
+        month_final = pynutil.insert("month: \"") + month_convert + pynutil.insert("\"")
+        
+        patterns = []
+        
+        date_sep = day_part + pynutil.delete(separator) + month_part + pynutil.delete(separator) + year_part
+        patterns.append(pynini.compose(day_digit + separator + month_digit + separator + year_digit, date_sep))
+        patterns.append(pynini.compose(
+            pynini.accep("ngày ") + day_digit + separator + month_digit + separator + year_digit,
+            pynutil.delete("ngày ") + date_sep
+        ))
+        
+        for sep in [separator, pynini.accep(" ")]:
+            patterns.append(pynini.compose(
+                pynini.accep("tháng ") + month_digit + sep + year_digit,
+                pynutil.delete("tháng ") + month_part + pynutil.delete(sep) + year_part
+            ))
+        
+        day_month_sep = day_part + pynutil.delete(separator) + month_final
+        patterns.append(pynini.compose(
+            pynini.accep("ngày ") + day_digit + separator + month_digit,
+            pynutil.delete("ngày ") + day_month_sep
+        ))
+        
+        patterns.append(pynini.compose(
+            pynini.accep("ngày ") + day_digit + pynini.accep(" tháng ") + month_digit,
+            pynutil.delete("ngày ") + day_part + pynutil.delete(" tháng ") + month_final
+        ))
+        
+        patterns.append(pynini.compose(
+            pynini.accep("ngày ") + day_digit + pynini.accep(" tháng ") + month_digit + pynini.accep(" năm ") + year_digit,
+            pynutil.delete("ngày ") + day_part + pynutil.delete(" tháng ") + month_part + pynutil.delete(" năm ") + year_part
+        ))
+        
+        patterns.append(pynini.compose(
+            pynini.accep("năm ") + year_digit,
+            pynutil.delete("năm ") + year_part
+        ))
+        
+        self.fst = self.add_tokens(pynini.union(*patterns))
\ No newline at end of file
diff --git a/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py b/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py
index 73feb7182..bde22dee3 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py
@@ -28,6 +28,7 @@
 from nemo_text_processing.text_normalization.vi.taggers.decimal import DecimalFst
 from nemo_text_processing.text_normalization.vi.taggers.fraction import FractionFst
 from nemo_text_processing.text_normalization.vi.taggers.ordinal import OrdinalFst
+from nemo_text_processing.text_normalization.vi.taggers.date import DateFst
 from nemo_text_processing.text_normalization.vi.taggers.punctuation import PunctuationFst
 from nemo_text_processing.text_normalization.vi.taggers.whitelist import WhiteListFst
 from nemo_text_processing.text_normalization.vi.taggers.word import WordFst
@@ -92,9 +93,15 @@ def __init__(
             fraction_graph = fraction.fst
             logger.debug(f"fraction: {time.time() - start_time: .2f}s -- {fraction_graph.num_states()} nodes")
 
+            start_time = time.time()
+            date = DateFst(cardinal=cardinal, deterministic=deterministic)
+            date_graph = date.fst
+            logger.debug(f"date: {time.time() - start_time: .2f}s -- {date_graph.num_states()} nodes")
+
             classify = (
                 pynutil.add_weight(whitelist_graph, 0.8)
                 | pynutil.add_weight(ordinal_graph, 0.81)
+                | pynutil.add_weight(date_graph, 0.83)  
                 | pynutil.add_weight(decimal_graph, 0.85)
                 | pynutil.add_weight(cardinal_graph, 0.9)
                 | pynutil.add_weight(fraction_graph, 1.0)
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/date.py b/nemo_text_processing/text_normalization/vi/verbalizers/date.py
new file mode 100644
index 000000000..f2b53de80
--- /dev/null
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/date.py
@@ -0,0 +1,51 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.text_normalization.en.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
+
+
+class DateFst(GraphFst):
+    """
+    Finite state transducer for verbalizing Vietnamese dates, e.g.
+        date { day: "mười lăm" month: "một" year: "hai nghìn hai mươi tư" } 
+        -> ngày mười lăm tháng một năm hai nghìn hai mươi tư
+        
+        date { month: "tư" year: "hai nghìn hai mươi tư" }
+        -> tháng tư năm hai nghìn hai mươi tư
+    """
+
+    def __init__(self, deterministic: bool = True):
+        super().__init__(name="date", kind="verbalize", deterministic=deterministic)
+
+        quoted_content = pynini.closure(NEMO_NOT_QUOTE)
+        day = pynutil.delete("day:") + delete_space + pynutil.delete("\"") + quoted_content + pynutil.delete("\"")
+        month = pynutil.delete("month:") + delete_space + pynutil.delete("\"") + quoted_content + pynutil.delete("\"")
+        year = pynutil.delete("year:") + delete_space + pynutil.delete("\"") + quoted_content + pynutil.delete("\"")
+
+        insert_day = pynutil.insert("ngày ")
+        insert_month = pynutil.insert("tháng ")
+        insert_year = pynutil.insert("năm ")
+        insert_space = pynutil.insert(" ")
+        
+        date_graph = pynini.union(
+            insert_day + day + delete_space + insert_space + insert_month + month + delete_space + insert_space + insert_year + year,
+            insert_month + month + delete_space + insert_space + insert_year + year,
+            insert_day + day + delete_space + insert_space + insert_month + month,
+            insert_year + year
+        )
+
+        self.fst = self.delete_tokens(date_graph).optimize()
\ No newline at end of file
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py b/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py
index e3d34b968..e2c4f54cf 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py
@@ -18,6 +18,7 @@
 from nemo_text_processing.text_normalization.vi.verbalizers.decimal import DecimalFst
 from nemo_text_processing.text_normalization.vi.verbalizers.fraction import FractionFst
 from nemo_text_processing.text_normalization.vi.verbalizers.ordinal import OrdinalFst
+from nemo_text_processing.text_normalization.vi.verbalizers.date import DateFst
 from nemo_text_processing.text_normalization.vi.verbalizers.whitelist import WhiteListFst
 
 
@@ -44,7 +45,10 @@ def __init__(self, deterministic: bool = True):
         fraction = FractionFst(deterministic=deterministic)
         fraction_graph = fraction.fst
 
+        date = DateFst(deterministic=deterministic)
+        date_graph = date.fst
+
         # Combine all verbalizers
-        graph = cardinal_graph | whitelist_graph | word_graph | ordinal_graph | decimal_graph | fraction_graph
+        graph = cardinal_graph | whitelist_graph | word_graph | ordinal_graph | decimal_graph | fraction_graph | date_graph
 
         self.fst = graph
diff --git a/tests/nemo_text_processing/vi/data_text_normalization/test_cases_date.txt b/tests/nemo_text_processing/vi/data_text_normalization/test_cases_date.txt
new file mode 100644
index 000000000..c95e00e97
--- /dev/null
+++ b/tests/nemo_text_processing/vi/data_text_normalization/test_cases_date.txt
@@ -0,0 +1,13 @@
+ngày 15/01/2024~ngày mười lăm tháng một năm hai nghìn hai mươi tư
+01/12/2023~ngày một tháng mười hai năm hai nghìn hai mươi ba
+25-03-1975~ngày hai mươi lăm tháng ba năm một nghìn chín trăm bảy mươi lăm
+10.05.2000~ngày mười tháng năm năm hai nghìn
+tháng 1 2024~tháng một năm hai nghìn hai mươi tư
+tháng 12 2023~tháng mười hai năm hai nghìn hai mươi ba
+ngày 12 tháng 5 năm 2025~ngày mười hai tháng năm năm hai nghìn hai mươi lăm
+tháng 5 năm nay~tháng năm năm nay
+ngày 4 tháng này~ngày bốn tháng này
+hôm nay là ngày 19/05/2025 sinh nhật Bác Hồ~hôm nay là ngày mười chín tháng năm năm hai nghìn hai mươi lăm sinh nhật Bác Hồ
+ngày 14/4 hàng năm~ngày mười bốn tháng tư hàng năm
+tháng 04/1969~tháng tư năm một nghìn chín trăm sáu mươi chín
+ngày 12 tháng mười hai năm 2023~ngày mười hai tháng mười hai năm hai nghìn hai mươi ba
\ No newline at end of file
diff --git a/tests/nemo_text_processing/vi/test_date.py b/tests/nemo_text_processing/vi/test_date.py
index 90885b6e4..20d646035 100644
--- a/tests/nemo_text_processing/vi/test_date.py
+++ b/tests/nemo_text_processing/vi/test_date.py
@@ -12,31 +12,47 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# pytest tests/nemo_text_processing/vi/test_date.py --cpu --cache-clear
 import pytest
 from parameterized import parameterized
 
-from ..utils import CACHE_DIR, parse_test_case_file
+from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
+from nemo_text_processing.text_normalization.normalize import Normalizer
+from nemo_text_processing.text_normalization.normalize_with_audio import NormalizerWithAudio
 
-try:
-    from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer
-
-    PYNINI_AVAILABLE = True
-except (ImportError, ModuleNotFoundError):
-    PYNINI_AVAILABLE = False
+from ..utils import CACHE_DIR, RUN_AUDIO_BASED_TESTS, parse_test_case_file
 
 
 class TestDate:
-    inverse_normalizer = (
-        InverseNormalizer(lang='vi', cache_dir=CACHE_DIR, overwrite_cache=False) if PYNINI_AVAILABLE else None
-    )
 
+    inverse_normalizer = InverseNormalizer(lang='vi', cache_dir=CACHE_DIR, overwrite_cache=False) 
+    
     @parameterized.expand(parse_test_case_file('vi/data_inverse_text_normalization/test_cases_date.txt'))
-    @pytest.mark.skipif(
-        not PYNINI_AVAILABLE,
-        reason="`pynini` not installed, please install via nemo_text_processing/pynini_install.sh",
-    )
     @pytest.mark.run_only_on('CPU')
     @pytest.mark.unit
     def test_denorm(self, test_input, expected):
         pred = self.inverse_normalizer.inverse_normalize(test_input, verbose=False)
         assert pred == expected
+
+    normalizer = Normalizer(input_case='cased', lang='vi', cache_dir=CACHE_DIR, overwrite_cache=False, post_process=True)
+    
+    normalizer_with_audio = (
+        NormalizerWithAudio(input_case='cased', lang='vi', cache_dir=CACHE_DIR, overwrite_cache=False)
+        if CACHE_DIR and RUN_AUDIO_BASED_TESTS
+        else None
+    )
+
+    @parameterized.expand(parse_test_case_file('vi/data_text_normalization/test_cases_date.txt'))
+    @pytest.mark.run_only_on('CPU')
+    @pytest.mark.unit
+    def test_norm(self, test_input, expected):
+        pred = self.normalizer.normalize(test_input, verbose=False, punct_post_process=False)
+        assert pred == expected, f"input: {test_input}"
+
+        if self.normalizer_with_audio:
+            pred_non_deterministic = self.normalizer_with_audio.normalize(
+                test_input,
+                n_tagged=30,
+                punct_post_process=False,
+            )
+            assert expected in pred_non_deterministic, f"input: {test_input}"
\ No newline at end of file
diff --git a/tests/nemo_text_processing/vi/test_sparrowhawk_normalization.sh b/tests/nemo_text_processing/vi/test_sparrowhawk_normalization.sh
index 6a277c28c..2c5a7f8df 100644
--- a/tests/nemo_text_processing/vi/test_sparrowhawk_normalization.sh
+++ b/tests/nemo_text_processing/vi/test_sparrowhawk_normalization.sh
@@ -28,10 +28,10 @@ testTNCardinal() {
   runtest $input
 }
 
-# testTNDate() {
-#   input=$PROJECT_DIR/vi/data_text_normalization/test_cases_date.txt
-#   runtest $input
-# }
+testTNDate() {
+  input=$PROJECT_DIR/vi/data_text_normalization/test_cases_date.txt
+  runtest $input
+}
 
 testTNDecimal() {
   input=$PROJECT_DIR/vi/data_text_normalization/test_cases_decimal.txt

From 228960558997048aafa84c2d4a417b466d7c2bb0 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 3 Jul 2025 01:17:56 +0000
Subject: [PATCH 2/7] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../text_normalization/vi/taggers/date.py     | 94 +++++++++++--------
 .../vi/taggers/tokenize_and_classify.py       |  4 +-
 .../text_normalization/vi/verbalizers/date.py | 21 +++--
 .../vi/verbalizers/verbalize.py               |  6 +-
 tests/nemo_text_processing/vi/test_date.py    | 12 ++-
 5 files changed, 83 insertions(+), 54 deletions(-)

diff --git a/nemo_text_processing/text_normalization/vi/taggers/date.py b/nemo_text_processing/text_normalization/vi/taggers/date.py
index 941b830e4..30c1459c3 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/date.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/date.py
@@ -15,8 +15,8 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.text_normalization.vi.utils import load_labels, get_abs_path
 from nemo_text_processing.text_normalization.en.graph_utils import NEMO_DIGIT, GraphFst
+from nemo_text_processing.text_normalization.vi.utils import get_abs_path, load_labels
 
 
 class DateFst(GraphFst):
@@ -33,55 +33,71 @@ def __init__(self, cardinal, deterministic: bool = True):
 
         day_mappings = load_labels(get_abs_path("data/date/days.tsv"))
         month_mappings = load_labels(get_abs_path("data/date/months.tsv"))
-        
-        day_digit = pynini.closure(NEMO_DIGIT, 1, 2)    
+
+        day_digit = pynini.closure(NEMO_DIGIT, 1, 2)
         month_digit = pynini.closure(NEMO_DIGIT, 1, 2)
-        year_digit = pynini.closure(NEMO_DIGIT, 4, 4)  
+        year_digit = pynini.closure(NEMO_DIGIT, 4, 4)
         separator = pynini.union("/", "-", ".")
-        
+
         day_convert = pynini.string_map([(k, v) for k, v in day_mappings]) | pynini.compose(day_digit, cardinal.graph)
         month_convert = pynini.string_map([(k, v) for k, v in month_mappings])
         year_convert = pynini.compose(year_digit, cardinal.graph)
-        
+
         day_part = pynutil.insert("day: \"") + day_convert + pynutil.insert("\" ")
         month_part = pynutil.insert("month: \"") + month_convert + pynutil.insert("\" ")
         year_part = pynutil.insert("year: \"") + year_convert + pynutil.insert("\"")
         month_final = pynutil.insert("month: \"") + month_convert + pynutil.insert("\"")
-        
+
         patterns = []
-        
+
         date_sep = day_part + pynutil.delete(separator) + month_part + pynutil.delete(separator) + year_part
         patterns.append(pynini.compose(day_digit + separator + month_digit + separator + year_digit, date_sep))
-        patterns.append(pynini.compose(
-            pynini.accep("ngày ") + day_digit + separator + month_digit + separator + year_digit,
-            pynutil.delete("ngày ") + date_sep
-        ))
-        
+        patterns.append(
+            pynini.compose(
+                pynini.accep("ngày ") + day_digit + separator + month_digit + separator + year_digit,
+                pynutil.delete("ngày ") + date_sep,
+            )
+        )
+
         for sep in [separator, pynini.accep(" ")]:
-            patterns.append(pynini.compose(
-                pynini.accep("tháng ") + month_digit + sep + year_digit,
-                pynutil.delete("tháng ") + month_part + pynutil.delete(sep) + year_part
-            ))
-        
+            patterns.append(
+                pynini.compose(
+                    pynini.accep("tháng ") + month_digit + sep + year_digit,
+                    pynutil.delete("tháng ") + month_part + pynutil.delete(sep) + year_part,
+                )
+            )
+
         day_month_sep = day_part + pynutil.delete(separator) + month_final
-        patterns.append(pynini.compose(
-            pynini.accep("ngày ") + day_digit + separator + month_digit,
-            pynutil.delete("ngày ") + day_month_sep
-        ))
-        
-        patterns.append(pynini.compose(
-            pynini.accep("ngày ") + day_digit + pynini.accep(" tháng ") + month_digit,
-            pynutil.delete("ngày ") + day_part + pynutil.delete(" tháng ") + month_final
-        ))
-        
-        patterns.append(pynini.compose(
-            pynini.accep("ngày ") + day_digit + pynini.accep(" tháng ") + month_digit + pynini.accep(" năm ") + year_digit,
-            pynutil.delete("ngày ") + day_part + pynutil.delete(" tháng ") + month_part + pynutil.delete(" năm ") + year_part
-        ))
-        
-        patterns.append(pynini.compose(
-            pynini.accep("năm ") + year_digit,
-            pynutil.delete("năm ") + year_part
-        ))
-        
-        self.fst = self.add_tokens(pynini.union(*patterns))
\ No newline at end of file
+        patterns.append(
+            pynini.compose(
+                pynini.accep("ngày ") + day_digit + separator + month_digit, pynutil.delete("ngày ") + day_month_sep
+            )
+        )
+
+        patterns.append(
+            pynini.compose(
+                pynini.accep("ngày ") + day_digit + pynini.accep(" tháng ") + month_digit,
+                pynutil.delete("ngày ") + day_part + pynutil.delete(" tháng ") + month_final,
+            )
+        )
+
+        patterns.append(
+            pynini.compose(
+                pynini.accep("ngày ")
+                + day_digit
+                + pynini.accep(" tháng ")
+                + month_digit
+                + pynini.accep(" năm ")
+                + year_digit,
+                pynutil.delete("ngày ")
+                + day_part
+                + pynutil.delete(" tháng ")
+                + month_part
+                + pynutil.delete(" năm ")
+                + year_part,
+            )
+        )
+
+        patterns.append(pynini.compose(pynini.accep("năm ") + year_digit, pynutil.delete("năm ") + year_part))
+
+        self.fst = self.add_tokens(pynini.union(*patterns))
diff --git a/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py b/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py
index bde22dee3..6bf01c496 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py
@@ -25,10 +25,10 @@
     generator_main,
 )
 from nemo_text_processing.text_normalization.vi.taggers.cardinal import CardinalFst
+from nemo_text_processing.text_normalization.vi.taggers.date import DateFst
 from nemo_text_processing.text_normalization.vi.taggers.decimal import DecimalFst
 from nemo_text_processing.text_normalization.vi.taggers.fraction import FractionFst
 from nemo_text_processing.text_normalization.vi.taggers.ordinal import OrdinalFst
-from nemo_text_processing.text_normalization.vi.taggers.date import DateFst
 from nemo_text_processing.text_normalization.vi.taggers.punctuation import PunctuationFst
 from nemo_text_processing.text_normalization.vi.taggers.whitelist import WhiteListFst
 from nemo_text_processing.text_normalization.vi.taggers.word import WordFst
@@ -101,7 +101,7 @@ def __init__(
             classify = (
                 pynutil.add_weight(whitelist_graph, 0.8)
                 | pynutil.add_weight(ordinal_graph, 0.81)
-                | pynutil.add_weight(date_graph, 0.83)  
+                | pynutil.add_weight(date_graph, 0.83)
                 | pynutil.add_weight(decimal_graph, 0.85)
                 | pynutil.add_weight(cardinal_graph, 0.9)
                 | pynutil.add_weight(fraction_graph, 1.0)
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/date.py b/nemo_text_processing/text_normalization/vi/verbalizers/date.py
index f2b53de80..3c96a9ae2 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/date.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/date.py
@@ -21,9 +21,9 @@
 class DateFst(GraphFst):
     """
     Finite state transducer for verbalizing Vietnamese dates, e.g.
-        date { day: "mười lăm" month: "một" year: "hai nghìn hai mươi tư" } 
+        date { day: "mười lăm" month: "một" year: "hai nghìn hai mươi tư" }
         -> ngày mười lăm tháng một năm hai nghìn hai mươi tư
-        
+
         date { month: "tư" year: "hai nghìn hai mươi tư" }
         -> tháng tư năm hai nghìn hai mươi tư
     """
@@ -40,12 +40,21 @@ def __init__(self, deterministic: bool = True):
         insert_month = pynutil.insert("tháng ")
         insert_year = pynutil.insert("năm ")
         insert_space = pynutil.insert(" ")
-        
+
         date_graph = pynini.union(
-            insert_day + day + delete_space + insert_space + insert_month + month + delete_space + insert_space + insert_year + year,
+            insert_day
+            + day
+            + delete_space
+            + insert_space
+            + insert_month
+            + month
+            + delete_space
+            + insert_space
+            + insert_year
+            + year,
             insert_month + month + delete_space + insert_space + insert_year + year,
             insert_day + day + delete_space + insert_space + insert_month + month,
-            insert_year + year
+            insert_year + year,
         )
 
-        self.fst = self.delete_tokens(date_graph).optimize()
\ No newline at end of file
+        self.fst = self.delete_tokens(date_graph).optimize()
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py b/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py
index e2c4f54cf..8d4023436 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py
@@ -15,10 +15,10 @@
 from nemo_text_processing.text_normalization.en.graph_utils import GraphFst
 from nemo_text_processing.text_normalization.en.verbalizers.word import WordFst
 from nemo_text_processing.text_normalization.vi.verbalizers.cardinal import CardinalFst
+from nemo_text_processing.text_normalization.vi.verbalizers.date import DateFst
 from nemo_text_processing.text_normalization.vi.verbalizers.decimal import DecimalFst
 from nemo_text_processing.text_normalization.vi.verbalizers.fraction import FractionFst
 from nemo_text_processing.text_normalization.vi.verbalizers.ordinal import OrdinalFst
-from nemo_text_processing.text_normalization.vi.verbalizers.date import DateFst
 from nemo_text_processing.text_normalization.vi.verbalizers.whitelist import WhiteListFst
 
 
@@ -49,6 +49,8 @@ def __init__(self, deterministic: bool = True):
         date_graph = date.fst
 
         # Combine all verbalizers
-        graph = cardinal_graph | whitelist_graph | word_graph | ordinal_graph | decimal_graph | fraction_graph | date_graph
+        graph = (
+            cardinal_graph | whitelist_graph | word_graph | ordinal_graph | decimal_graph | fraction_graph | date_graph
+        )
 
         self.fst = graph
diff --git a/tests/nemo_text_processing/vi/test_date.py b/tests/nemo_text_processing/vi/test_date.py
index 20d646035..54e08b3fc 100644
--- a/tests/nemo_text_processing/vi/test_date.py
+++ b/tests/nemo_text_processing/vi/test_date.py
@@ -25,8 +25,8 @@
 
 class TestDate:
 
-    inverse_normalizer = InverseNormalizer(lang='vi', cache_dir=CACHE_DIR, overwrite_cache=False) 
-    
+    inverse_normalizer = InverseNormalizer(lang='vi', cache_dir=CACHE_DIR, overwrite_cache=False)
+
     @parameterized.expand(parse_test_case_file('vi/data_inverse_text_normalization/test_cases_date.txt'))
     @pytest.mark.run_only_on('CPU')
     @pytest.mark.unit
@@ -34,8 +34,10 @@ def test_denorm(self, test_input, expected):
         pred = self.inverse_normalizer.inverse_normalize(test_input, verbose=False)
         assert pred == expected
 
-    normalizer = Normalizer(input_case='cased', lang='vi', cache_dir=CACHE_DIR, overwrite_cache=False, post_process=True)
-    
+    normalizer = Normalizer(
+        input_case='cased', lang='vi', cache_dir=CACHE_DIR, overwrite_cache=False, post_process=True
+    )
+
     normalizer_with_audio = (
         NormalizerWithAudio(input_case='cased', lang='vi', cache_dir=CACHE_DIR, overwrite_cache=False)
         if CACHE_DIR and RUN_AUDIO_BASED_TESTS
@@ -55,4 +57,4 @@ def test_norm(self, test_input, expected):
                 n_tagged=30,
                 punct_post_process=False,
             )
-            assert expected in pred_non_deterministic, f"input: {test_input}"
\ No newline at end of file
+            assert expected in pred_non_deterministic, f"input: {test_input}"

From b8db764de9a1ad250ef2e4d23e543f1915121f22 Mon Sep 17 00:00:00 2001
From: folivoramanh <palasek182@gmail.com>
Date: Tue, 8 Jul 2025 22:16:29 +0700
Subject: [PATCH 3/7] Add roman support and correct copyright header

Signed-off-by: folivoramanh <palasek182@gmail.com>
---
 .../text_normalization/vi/__init__.py         |   2 +-
 .../text_normalization/vi/data/date/days.tsv  |  33 +++-
 .../vi/data/date/year_suffix.tsv              |   4 +
 .../vi/data/roman/__init__.py                 |  13 ++
 .../vi/data/roman/key_word.tsv                |  12 ++
 .../vi/data/roman/roman_numerals.tsv          |  13 ++
 .../text_normalization/vi/graph_utils.py      | 144 ++++++++++++++++++
 .../text_normalization/vi/taggers/__init__.py |   2 +-
 .../text_normalization/vi/taggers/cardinal.py |   4 +-
 .../text_normalization/vi/taggers/date.py     | 112 ++++++++------
 .../text_normalization/vi/taggers/decimal.py  |   4 +-
 .../text_normalization/vi/taggers/fraction.py |   4 +-
 .../text_normalization/vi/taggers/ordinal.py  |   4 +-
 .../vi/taggers/punctuation.py                 |   4 +-
 .../text_normalization/vi/taggers/roman.py    |  91 +++++++++++
 .../vi/taggers/tokenize_and_classify.py       |  25 +--
 .../vi/taggers/whitelist.py                   |   4 +-
 .../text_normalization/vi/taggers/word.py     |   4 +-
 .../text_normalization/vi/utils.py            |   2 +-
 .../vi/verbalizers/__init__.py                |   2 +-
 .../vi/verbalizers/cardinal.py                |   4 +-
 .../text_normalization/vi/verbalizers/date.py |  63 +++++---
 .../vi/verbalizers/decimal.py                 |   4 +-
 .../vi/verbalizers/fraction.py                |   4 +-
 .../vi/verbalizers/ordinal.py                 |   4 +-
 .../vi/verbalizers/roman.py                   |  51 +++++++
 .../vi/verbalizers/verbalize.py               |  20 ++-
 .../vi/verbalizers/verbalize_final.py         |   6 +-
 .../vi/verbalizers/whitelist.py               |   4 +-
 .../text_normalization/vi/verbalizers/word.py |   4 +-
 .../test_cases_roman.txt                      |  59 +++++++
 tests/nemo_text_processing/vi/test_roman.py   |  49 ++++++
 .../vi/test_sparrowhawk_normalization.sh      |   5 +
 33 files changed, 640 insertions(+), 120 deletions(-)
 create mode 100644 nemo_text_processing/text_normalization/vi/data/date/year_suffix.tsv
 create mode 100644 nemo_text_processing/text_normalization/vi/data/roman/__init__.py
 create mode 100644 nemo_text_processing/text_normalization/vi/data/roman/key_word.tsv
 create mode 100644 nemo_text_processing/text_normalization/vi/data/roman/roman_numerals.tsv
 create mode 100644 nemo_text_processing/text_normalization/vi/graph_utils.py
 create mode 100644 nemo_text_processing/text_normalization/vi/taggers/roman.py
 create mode 100644 nemo_text_processing/text_normalization/vi/verbalizers/roman.py
 create mode 100644 tests/nemo_text_processing/vi/data_text_normalization/test_cases_roman.txt
 create mode 100644 tests/nemo_text_processing/vi/test_roman.py

diff --git a/nemo_text_processing/text_normalization/vi/__init__.py b/nemo_text_processing/text_normalization/vi/__init__.py
index bc443be41..6ebc808fa 100644
--- a/nemo_text_processing/text_normalization/vi/__init__.py
+++ b/nemo_text_processing/text_normalization/vi/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/data/date/days.tsv b/nemo_text_processing/text_normalization/vi/data/date/days.tsv
index 1d3e2440c..5b70479a6 100644
--- a/nemo_text_processing/text_normalization/vi/data/date/days.tsv
+++ b/nemo_text_processing/text_normalization/vi/data/date/days.tsv
@@ -6,4 +6,35 @@
 06	sáu
 07	bảy
 08	tám
-09	chín
\ No newline at end of file
+09	chín
+1	một
+2	hai
+3	ba
+4	bốn
+5	năm
+6	sáu
+7	bảy
+8	tám
+9	chín
+10	mười
+11	mười một
+12	mười hai
+13	mười ba
+14	mười bốn
+15	mười lăm
+16	mười sáu
+17	mười bảy
+18	mười tám
+19	mười chín
+20	hai mươi
+21	hai mươi mốt
+22	hai mươi hai
+23	hai mươi ba
+24	hai mươi bốn
+25	hai mươi lăm
+26	hai mươi sáu
+27	hai mươi bảy
+28	hai mươi tám
+29	hai mươi chín
+30	ba mươi
+31	ba mươi mốt
\ No newline at end of file
diff --git a/nemo_text_processing/text_normalization/vi/data/date/year_suffix.tsv b/nemo_text_processing/text_normalization/vi/data/date/year_suffix.tsv
new file mode 100644
index 000000000..31b49f955
--- /dev/null
+++ b/nemo_text_processing/text_normalization/vi/data/date/year_suffix.tsv
@@ -0,0 +1,4 @@
+tcn	trước công nguyên
+scn	sau công nguyên
+TCN	trước công nguyên
+SCN	sau công nguyên
\ No newline at end of file
diff --git a/nemo_text_processing/text_normalization/vi/data/roman/__init__.py b/nemo_text_processing/text_normalization/vi/data/roman/__init__.py
new file mode 100644
index 000000000..6ebc808fa
--- /dev/null
+++ b/nemo_text_processing/text_normalization/vi/data/roman/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/nemo_text_processing/text_normalization/vi/data/roman/key_word.tsv b/nemo_text_processing/text_normalization/vi/data/roman/key_word.tsv
new file mode 100644
index 000000000..e5f3d75a9
--- /dev/null
+++ b/nemo_text_processing/text_normalization/vi/data/roman/key_word.tsv
@@ -0,0 +1,12 @@
+thế kỉ
+thế kỷ
+thứ
+chương
+phần
+mục
+đoạn
+năm
+khoản
+phụ lục
+khóa
+số
\ No newline at end of file
diff --git a/nemo_text_processing/text_normalization/vi/data/roman/roman_numerals.tsv b/nemo_text_processing/text_normalization/vi/data/roman/roman_numerals.tsv
new file mode 100644
index 000000000..d4d8ad20b
--- /dev/null
+++ b/nemo_text_processing/text_normalization/vi/data/roman/roman_numerals.tsv
@@ -0,0 +1,13 @@
+I	1
+V	5
+X	10
+L	50
+C	100
+D	500
+M	1000
+IV	4
+IX	9
+XL	40
+XC	90
+CD	400
+CM	900
\ No newline at end of file
diff --git a/nemo_text_processing/text_normalization/vi/graph_utils.py b/nemo_text_processing/text_normalization/vi/graph_utils.py
new file mode 100644
index 000000000..61a304eb5
--- /dev/null
+++ b/nemo_text_processing/text_normalization/vi/graph_utils.py
@@ -0,0 +1,144 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright 2015 and onwards Google, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import string
+from pathlib import Path
+from typing import Dict
+
+import pynini
+from pynini import Far
+from pynini.export import export
+from pynini.lib import byte, pynutil, utf8
+
+from nemo_text_processing.utils.logging import logger
+
+NEMO_CHAR = utf8.VALID_UTF8_CHAR
+
+NEMO_DIGIT = byte.DIGIT
+NEMO_LOWER = pynini.union(*string.ascii_lowercase).optimize()
+NEMO_UPPER = pynini.union(*string.ascii_uppercase).optimize()
+NEMO_ALPHA = pynini.union(NEMO_LOWER, NEMO_UPPER).optimize()
+NEMO_ALNUM = pynini.union(NEMO_DIGIT, NEMO_ALPHA).optimize()
+NEMO_HEX = pynini.union(*string.hexdigits).optimize()
+NEMO_NON_BREAKING_SPACE = "\u00a0"
+NEMO_SPACE = " "
+NEMO_WHITE_SPACE = pynini.union(" ", "\t", "\n", "\r", "\u00a0").optimize()
+NEMO_NOT_SPACE = pynini.difference(NEMO_CHAR, NEMO_WHITE_SPACE).optimize()
+NEMO_NOT_QUOTE = pynini.difference(NEMO_CHAR, r'"').optimize()
+
+NEMO_PUNCT = pynini.union(*map(pynini.escape, string.punctuation)).optimize()
+NEMO_GRAPH = pynini.union(NEMO_ALNUM, NEMO_PUNCT).optimize()
+
+NEMO_SIGMA = pynini.closure(NEMO_CHAR)
+
+delete_space = pynutil.delete(pynini.closure(NEMO_WHITE_SPACE))
+delete_zero_or_one_space = pynutil.delete(pynini.closure(NEMO_WHITE_SPACE, 0, 1))
+insert_space = pynutil.insert(" ")
+delete_extra_space = pynini.cross(pynini.closure(NEMO_WHITE_SPACE, 1), " ")
+def convert_space(fst) -> "pynini.FstLike":
+    """
+    Converts space to nonbreaking space.
+    Used only in tagger grammars for transducing token values within quotes, e.g. name: "hello kitty"
+    This is making transducer significantly slower, so only use when there could be potential spaces within quotes, otherwise leave it.
+
+    Args:
+        fst: input fst
+
+    Returns output fst where breaking spaces are converted to non breaking spaces
+    """
+    return fst @ pynini.cdrewrite(pynini.cross(NEMO_SPACE, NEMO_NON_BREAKING_SPACE), "", "", NEMO_SIGMA)
+
+def generator_main(file_name: str, graphs: Dict[str, "pynini.FstLike"]):
+    """
+    Exports graph as OpenFst finite state archive (FAR) file with given file name and rule name.
+
+    Args:
+        file_name: exported file name
+        graphs: Mapping of a rule name and Pynini WFST graph to be exported
+    """
+    exporter = export.Exporter(file_name)
+    for rule, graph in graphs.items():
+        exporter[rule] = graph.optimize()
+    exporter.close()
+    logger.info(f"Created {file_name}")
+
+class GraphFst:
+    """
+    Base class for all grammar fsts.
+
+    Args:
+        name: name of grammar class
+        kind: either 'classify' or 'verbalize'
+        deterministic: if True will provide a single transduction option,
+            for False multiple transduction are generated (used for audio-based normalization)
+    """
+
+    def __init__(self, name: str, kind: str, deterministic: bool = True):
+        self.name = name
+        self.kind = kind
+        self._fst = None
+        self.deterministic = deterministic
+
+        self.far_path = Path(os.path.dirname(__file__) + "/grammars/" + kind + "/" + name + ".far")
+        if self.far_exist():
+            self._fst = Far(self.far_path, mode="r", arc_type="standard", far_type="default").get_fst()
+
+    def far_exist(self) -> bool:
+        """
+        Returns true if FAR can be loaded
+        """
+        return self.far_path.exists()
+
+    @property
+    def fst(self) -> "pynini.FstLike":
+        return self._fst
+
+    @fst.setter
+    def fst(self, fst):
+        self._fst = fst
+
+    def add_tokens(self, fst) -> "pynini.FstLike":
+        """
+        Wraps class name around to given fst
+
+        Args:
+            fst: input fst
+
+        Returns:
+            Fst: fst
+        """
+        return pynutil.insert(f"{self.name} {{ ") + fst + pynutil.insert(" }")
+
+    def delete_tokens(self, fst) -> "pynini.FstLike":
+        """
+        Deletes class name wrap around output of given fst
+
+        Args:
+            fst: input fst
+
+        Returns:
+            Fst: fst
+        """
+        res = (
+            pynutil.delete(f"{self.name}")
+            + delete_space
+            + pynutil.delete("{")
+            + delete_space
+            + fst
+            + delete_space
+            + pynutil.delete("}")
+        )
+        return res @ pynini.cdrewrite(pynini.cross("\u00a0", " "), "", "", NEMO_SIGMA)
diff --git a/nemo_text_processing/text_normalization/vi/taggers/__init__.py b/nemo_text_processing/text_normalization/vi/taggers/__init__.py
index bc443be41..6ebc808fa 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/__init__.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/taggers/cardinal.py b/nemo_text_processing/text_normalization/vi/taggers/cardinal.py
index fa0f04fad..7f3743b05 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/cardinal.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/cardinal.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.text_normalization.en.graph_utils import NEMO_DIGIT, GraphFst, insert_space
+from nemo_text_processing.text_normalization.vi.graph_utils import NEMO_DIGIT, GraphFst, insert_space
 from nemo_text_processing.text_normalization.vi.utils import get_abs_path
 
 
diff --git a/nemo_text_processing/text_normalization/vi/taggers/date.py b/nemo_text_processing/text_normalization/vi/taggers/date.py
index 30c1459c3..efc7dd858 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/date.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/date.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,8 +15,8 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.text_normalization.en.graph_utils import NEMO_DIGIT, GraphFst
-from nemo_text_processing.text_normalization.vi.utils import get_abs_path, load_labels
+from nemo_text_processing.text_normalization.vi.utils import load_labels, get_abs_path
+from nemo_text_processing.text_normalization.vi.graph_utils import NEMO_DIGIT, GraphFst
 
 
 class DateFst(GraphFst):
@@ -26,6 +26,7 @@ class DateFst(GraphFst):
         tháng 4 2024 -> date { month: "tư" year: "hai nghìn hai mươi tư" }
         ngày 15/01/2024 -> date { day: "mười lăm" month: "một" year: "hai nghìn hai mươi tư" }
         ngày 12 tháng 5 năm 2025 -> date { day: "mười hai" month: "năm" year: "hai nghìn hai mươi lăm" }
+        năm 20 SCN -> date { year: "hai mươi" era: "sau công nguyên" }
     """
 
     def __init__(self, cardinal, deterministic: bool = True):
@@ -33,32 +34,42 @@ def __init__(self, cardinal, deterministic: bool = True):
 
         day_mappings = load_labels(get_abs_path("data/date/days.tsv"))
         month_mappings = load_labels(get_abs_path("data/date/months.tsv"))
-
-        day_digit = pynini.closure(NEMO_DIGIT, 1, 2)
+        era_mappings = load_labels(get_abs_path("data/date/year_suffix.tsv"))
+        
+        day_digit = pynini.closure(NEMO_DIGIT, 1, 2)    
         month_digit = pynini.closure(NEMO_DIGIT, 1, 2)
-        year_digit = pynini.closure(NEMO_DIGIT, 4, 4)
+        year_digit = pynini.closure(NEMO_DIGIT, 1, 4)  
         separator = pynini.union("/", "-", ".")
-
-        day_convert = pynini.string_map([(k, v) for k, v in day_mappings]) | pynini.compose(day_digit, cardinal.graph)
+        
+        day_convert = pynini.string_map([(k, v) for k, v in day_mappings])
         month_convert = pynini.string_map([(k, v) for k, v in month_mappings])
         year_convert = pynini.compose(year_digit, cardinal.graph)
-
+        
+        era_to_full = {}
+        for abbr, full_form in era_mappings:
+            era_to_full[abbr.lower()] = full_form
+            era_to_full[abbr.upper()] = full_form
+        
+        era_convert = pynini.string_map([(k, v) for k, v in era_to_full.items()])
+        
         day_part = pynutil.insert("day: \"") + day_convert + pynutil.insert("\" ")
         month_part = pynutil.insert("month: \"") + month_convert + pynutil.insert("\" ")
         year_part = pynutil.insert("year: \"") + year_convert + pynutil.insert("\"")
         month_final = pynutil.insert("month: \"") + month_convert + pynutil.insert("\"")
-
+        era_part = pynutil.insert("era: \"") + era_convert + pynutil.insert("\"")
+        
         patterns = []
 
         date_sep = day_part + pynutil.delete(separator) + month_part + pynutil.delete(separator) + year_part
-        patterns.append(pynini.compose(day_digit + separator + month_digit + separator + year_digit, date_sep))
-        patterns.append(
-            pynini.compose(
-                pynini.accep("ngày ") + day_digit + separator + month_digit + separator + year_digit,
-                pynutil.delete("ngày ") + date_sep,
-            )
-        )
-
+        patterns.append(pynini.compose(
+            day_digit + separator + month_digit + separator + year_digit, 
+            date_sep
+        ))
+        patterns.append(pynini.compose(
+            pynini.accep("ngày ") + day_digit + separator + month_digit + separator + year_digit,
+            pynutil.delete("ngày ") + date_sep
+        ))
+        
         for sep in [separator, pynini.accep(" ")]:
             patterns.append(
                 pynini.compose(
@@ -68,36 +79,37 @@ def __init__(self, cardinal, deterministic: bool = True):
             )
 
         day_month_sep = day_part + pynutil.delete(separator) + month_final
-        patterns.append(
-            pynini.compose(
-                pynini.accep("ngày ") + day_digit + separator + month_digit, pynutil.delete("ngày ") + day_month_sep
-            )
-        )
-
-        patterns.append(
-            pynini.compose(
-                pynini.accep("ngày ") + day_digit + pynini.accep(" tháng ") + month_digit,
-                pynutil.delete("ngày ") + day_part + pynutil.delete(" tháng ") + month_final,
-            )
-        )
-
-        patterns.append(
-            pynini.compose(
-                pynini.accep("ngày ")
-                + day_digit
-                + pynini.accep(" tháng ")
-                + month_digit
-                + pynini.accep(" năm ")
-                + year_digit,
-                pynutil.delete("ngày ")
-                + day_part
-                + pynutil.delete(" tháng ")
-                + month_part
-                + pynutil.delete(" năm ")
-                + year_part,
-            )
-        )
-
-        patterns.append(pynini.compose(pynini.accep("năm ") + year_digit, pynutil.delete("năm ") + year_part))
-
+        patterns.append(pynini.compose(
+            pynini.accep("ngày ") + day_digit + separator + month_digit,
+            pynutil.delete("ngày ") + day_month_sep
+        ))
+        
+        patterns.append(pynini.compose(
+            pynini.accep("ngày ") + day_digit + pynini.accep(" tháng ") + month_digit,
+            pynutil.delete("ngày ") + day_part + pynutil.delete(" tháng ") + month_final
+        ))
+        
+        patterns.append(pynini.compose(
+            pynini.accep("ngày ") + day_digit + pynini.accep(" tháng ") + month_digit + pynini.accep(" năm ") + year_digit,
+            pynutil.delete("ngày ") + day_part + pynutil.delete(" tháng ") + month_part + pynutil.delete(" năm ") + year_part
+        ))
+        
+        patterns.append(pynini.compose(
+            pynini.accep("năm ") + year_digit,
+            pynutil.delete("năm ") + year_part
+        ))
+        
+        era_abbrs = list(era_to_full.keys())
+        for era_abbr in era_abbrs:
+            patterns.append(pynini.compose(
+                pynini.accep("năm ") + year_digit + pynini.accep(" ") + pynini.accep(era_abbr),
+                pynutil.delete("năm ") + year_part + pynutil.delete(" ") + era_part
+            ))
+            
+            patterns.append(pynini.compose(
+                pynini.accep("năm thứ ") + year_digit + pynini.accep(" ") + pynini.accep(era_abbr),
+                pynutil.delete("năm thứ ") + pynutil.insert("ordinal: \"") + year_convert + pynutil.insert("\" ") + 
+                pynutil.delete(" ") + era_part
+            ))
+        
         self.fst = self.add_tokens(pynini.union(*patterns))
diff --git a/nemo_text_processing/text_normalization/vi/taggers/decimal.py b/nemo_text_processing/text_normalization/vi/taggers/decimal.py
index 0b314317b..8313ec46b 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/decimal.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/decimal.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.text_normalization.en.graph_utils import GraphFst
+from nemo_text_processing.text_normalization.vi.graph_utils import GraphFst
 from nemo_text_processing.text_normalization.vi.utils import get_abs_path, load_labels
 
 
diff --git a/nemo_text_processing/text_normalization/vi/taggers/fraction.py b/nemo_text_processing/text_normalization/vi/taggers/fraction.py
index 807e96dab..56b452297 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/fraction.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/fraction.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.text_normalization.en.graph_utils import GraphFst
+from nemo_text_processing.text_normalization.vi.graph_utils import GraphFst
 from nemo_text_processing.text_normalization.vi.taggers.cardinal import CardinalFst
 from nemo_text_processing.text_normalization.vi.utils import get_abs_path, load_labels
 
diff --git a/nemo_text_processing/text_normalization/vi/taggers/ordinal.py b/nemo_text_processing/text_normalization/vi/taggers/ordinal.py
index d896bcef3..0a4b81862 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/ordinal.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/ordinal.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,7 +16,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.text_normalization.en.graph_utils import NEMO_DIGIT, GraphFst
+from nemo_text_processing.text_normalization.vi.graph_utils import NEMO_DIGIT, GraphFst
 from nemo_text_processing.text_normalization.vi.utils import get_abs_path, load_labels
 
 
diff --git a/nemo_text_processing/text_normalization/vi/taggers/punctuation.py b/nemo_text_processing/text_normalization/vi/taggers/punctuation.py
index 1e08cb02d..c67129d5d 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/punctuation.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/punctuation.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.text_normalization.en.graph_utils import GraphFst
+from nemo_text_processing.text_normalization.vi.graph_utils import GraphFst
 
 
 class PunctuationFst(GraphFst):
diff --git a/nemo_text_processing/text_normalization/vi/taggers/roman.py b/nemo_text_processing/text_normalization/vi/taggers/roman.py
new file mode 100644
index 000000000..f7e6b90a2
--- /dev/null
+++ b/nemo_text_processing/text_normalization/vi/taggers/roman.py
@@ -0,0 +1,91 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.text_normalization.vi.graph_utils import GraphFst
+from nemo_text_processing.text_normalization.vi.utils import get_abs_path, load_labels
+
+
+class RomanFst(GraphFst):
+    """
+    Finite state transducer for classifying roman numbers in Vietnamese context:
+        e.g. "thế kỉ XV" -> tokens { roman { key_cardinal: "thế kỉ" integer: "mười lăm" } }
+        e.g. "thế kỷ IV" -> tokens { roman { key_cardinal: "thế kỷ" integer: "bốn" } }
+        e.g. "thứ IV" -> tokens { roman { key_cardinal: "thứ" integer: "bốn" } }
+        e.g. "chương III" -> tokens { roman { key_cardinal: "chương" integer: "ba" } }
+        e.g. "phần ix" -> tokens { roman { key_cardinal: "phần" integer: "chín" } }
+
+    Args:
+        cardinal: CardinalFst
+        deterministic: if True will provide a single transduction option,
+            for False multiple transduction are generated (used for audio-based normalization)
+    """
+
+    def __init__(self, cardinal: GraphFst, deterministic: bool = True):
+        super().__init__(name="roman", kind="classify", deterministic=deterministic)
+
+        key_words = []
+        key_word_path = get_abs_path("data/roman/key_word.tsv")
+        for k_word in load_labels(key_word_path):
+            key_words.append(k_word[0])
+        
+        key_words_fst = pynini.union(*[pynini.accep(word) for word in key_words]).optimize()
+        
+        roman_numeral_path = get_abs_path("data/roman/roman_numerals.tsv")
+        roman_numeral_pairs = load_labels(roman_numeral_path)
+        
+        roman_to_arabic = {}
+        for roman, value in roman_numeral_pairs:
+            roman_to_arabic[roman] = value
+            roman_to_arabic[roman.lower()] = value
+        
+        self.arabic_to_roman = {}
+        for roman, value in roman_numeral_pairs:
+            self.arabic_to_roman[int(value)] = roman
+        
+        valid_roman_pairs = []
+        for i in range(1, 4000):
+            roman_upper = self._int_to_roman(i)
+            roman_lower = roman_upper.lower()
+            valid_roman_pairs.append((roman_upper, str(i)))
+            valid_roman_pairs.append((roman_lower, str(i)))
+        
+        roman_to_arabic_fst = pynini.string_map(valid_roman_pairs).optimize()
+        
+        cardinal_graph = cardinal.graph
+        
+        graph = (
+            pynutil.insert("key_cardinal: \"") + 
+            key_words_fst + 
+            pynutil.insert("\"") +
+            pynini.accep(" ") +
+            pynutil.insert("integer: \"") + 
+            pynini.compose(roman_to_arabic_fst, cardinal_graph) + 
+            pynutil.insert("\"")
+        ).optimize()
+        
+        self.fst = self.add_tokens(graph).optimize()
+    
+    def _int_to_roman(self, num):
+        values = sorted(self.arabic_to_roman.keys(), reverse=True)
+        
+        roman_num = ''
+        for value in values:
+            while num >= value:
+                roman_num += self.arabic_to_roman[value]
+                num -= value
+        
+        return roman_num
\ No newline at end of file
diff --git a/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py b/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py
index 6bf01c496..0925cf218 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -18,7 +18,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.text_normalization.en.graph_utils import (
+from nemo_text_processing.text_normalization.vi.graph_utils import (
     GraphFst,
     delete_extra_space,
     delete_space,
@@ -30,6 +30,7 @@
 from nemo_text_processing.text_normalization.vi.taggers.fraction import FractionFst
 from nemo_text_processing.text_normalization.vi.taggers.ordinal import OrdinalFst
 from nemo_text_processing.text_normalization.vi.taggers.punctuation import PunctuationFst
+from nemo_text_processing.text_normalization.vi.taggers.roman import RomanFst
 from nemo_text_processing.text_normalization.vi.taggers.whitelist import WhiteListFst
 from nemo_text_processing.text_normalization.vi.taggers.word import WordFst
 from nemo_text_processing.utils.logging import logger
@@ -98,16 +99,22 @@ def __init__(
             date_graph = date.fst
             logger.debug(f"date: {time.time() - start_time: .2f}s -- {date_graph.num_states()} nodes")
 
+            start_time = time.time()
+            roman = RomanFst(cardinal=cardinal, deterministic=deterministic)
+            roman_graph = roman.fst
+            logger.debug(f"roman: {time.time() - start_time: .2f}s -- {roman_graph.num_states()} nodes")
+            
             classify = (
-                pynutil.add_weight(whitelist_graph, 0.8)
-                | pynutil.add_weight(ordinal_graph, 0.81)
-                | pynutil.add_weight(date_graph, 0.83)
-                | pynutil.add_weight(decimal_graph, 0.85)
-                | pynutil.add_weight(cardinal_graph, 0.9)
-                | pynutil.add_weight(fraction_graph, 1.0)
+                pynutil.add_weight(whitelist_graph, 1.01)
+                | pynutil.add_weight(roman_graph, 1.1)
+                | pynutil.add_weight(date_graph, 1.09)
+                | pynutil.add_weight(cardinal_graph, 1.1)
+                | pynutil.add_weight(ordinal_graph, 1.1)
+                | pynutil.add_weight(decimal_graph, 1.1)
+                | pynutil.add_weight(fraction_graph, 1.1)
                 | pynutil.add_weight(word_graph, 100)
             )
-            punct = pynutil.insert("tokens { ") + pynutil.add_weight(punct_graph, weight=2.1) + pynutil.insert(" }")
+            punct = pynutil.insert("tokens { ") + pynutil.add_weight(punct_graph, 1.1) + pynutil.insert(" }")
             token = pynutil.insert("tokens { ") + classify + pynutil.insert(" }")
             token_plus_punct = (
                 pynini.closure(punct + pynutil.insert(" ")) + token + pynini.closure(pynutil.insert(" ") + punct)
diff --git a/nemo_text_processing/text_normalization/vi/taggers/whitelist.py b/nemo_text_processing/text_normalization/vi/taggers/whitelist.py
index aed5e356a..5c2f5ff74 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/whitelist.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/whitelist.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.text_normalization.en.graph_utils import GraphFst, convert_space
+from nemo_text_processing.text_normalization.vi.graph_utils import GraphFst, convert_space
 from nemo_text_processing.text_normalization.vi.utils import get_abs_path, load_labels
 
 
diff --git a/nemo_text_processing/text_normalization/vi/taggers/word.py b/nemo_text_processing/text_normalization/vi/taggers/word.py
index f0be213c7..ca31c3ab8 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/word.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/word.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.text_normalization.en.graph_utils import NEMO_NOT_SPACE, GraphFst
+from nemo_text_processing.text_normalization.vi.graph_utils import NEMO_NOT_SPACE, GraphFst
 
 
 class WordFst(GraphFst):
diff --git a/nemo_text_processing/text_normalization/vi/utils.py b/nemo_text_processing/text_normalization/vi/utils.py
index 332330921..6b0871d9d 100644
--- a/nemo_text_processing/text_normalization/vi/utils.py
+++ b/nemo_text_processing/text_normalization/vi/utils.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/__init__.py b/nemo_text_processing/text_normalization/vi/verbalizers/__init__.py
index bc443be41..6ebc808fa 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/__init__.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/cardinal.py b/nemo_text_processing/text_normalization/vi/verbalizers/cardinal.py
index 530c3dfce..4c0d47392 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/cardinal.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/cardinal.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.text_normalization.en.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
+from nemo_text_processing.text_normalization.vi.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
 
 
 class CardinalFst(GraphFst):
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/date.py b/nemo_text_processing/text_normalization/vi/verbalizers/date.py
index 3c96a9ae2..49bdceebe 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/date.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/date.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.text_normalization.en.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
+from nemo_text_processing.text_normalization.vi.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space, insert_space
 
 
 class DateFst(GraphFst):
@@ -26,35 +26,52 @@ class DateFst(GraphFst):
 
         date { month: "tư" year: "hai nghìn hai mươi tư" }
         -> tháng tư năm hai nghìn hai mươi tư
+        
+        date { year: "hai mươi" era: "sau công nguyên" }
+        -> năm hai mươi sau công nguyên
+        
+        date { ordinal: "mười" era: "trước công nguyên" }
+        -> năm thứ mười trước công nguyên
     """
 
     def __init__(self, deterministic: bool = True):
         super().__init__(name="date", kind="verbalize", deterministic=deterministic)
 
         quoted_content = pynini.closure(NEMO_NOT_QUOTE)
-        day = pynutil.delete("day:") + delete_space + pynutil.delete("\"") + quoted_content + pynutil.delete("\"")
-        month = pynutil.delete("month:") + delete_space + pynutil.delete("\"") + quoted_content + pynutil.delete("\"")
-        year = pynutil.delete("year:") + delete_space + pynutil.delete("\"") + quoted_content + pynutil.delete("\"")
-
-        insert_day = pynutil.insert("ngày ")
-        insert_month = pynutil.insert("tháng ")
-        insert_year = pynutil.insert("năm ")
-        insert_space = pynutil.insert(" ")
+        
 
+        day_expr = pynutil.delete("day: \"") + quoted_content + pynutil.delete("\"")
+        day_with_prefix = pynutil.insert("ngày ") + day_expr
+        
+        month_expr = pynutil.delete("month: \"") + quoted_content + pynutil.delete("\"")
+        month_with_prefix = pynutil.insert("tháng ") + month_expr
+        
+        year_expr = pynutil.delete("year: \"") + quoted_content + pynutil.delete("\"")
+        year_with_prefix = pynutil.insert("năm ") + year_expr
+        
+        era_expr = pynutil.delete("era: \"") + quoted_content + pynutil.delete("\"")
+        
+        ordinal_expr = pynutil.delete("ordinal: \"") + quoted_content + pynutil.delete("\"")
+        ordinal_with_prefix = pynutil.insert("năm thứ ") + ordinal_expr
+        
         date_graph = pynini.union(
-            insert_day
-            + day
-            + delete_space
-            + insert_space
-            + insert_month
-            + month
-            + delete_space
-            + insert_space
-            + insert_year
-            + year,
-            insert_month + month + delete_space + insert_space + insert_year + year,
-            insert_day + day + delete_space + insert_space + insert_month + month,
-            insert_year + year,
+            day_with_prefix + delete_space + insert_space + 
+            month_with_prefix + delete_space + insert_space + 
+            year_with_prefix,
+            
+            month_with_prefix + delete_space + insert_space + 
+            year_with_prefix,
+            
+            day_with_prefix + delete_space + insert_space + 
+            month_with_prefix,
+            
+            year_with_prefix,
+            
+            year_with_prefix + delete_space + insert_space + 
+            era_expr,
+            
+            ordinal_with_prefix + delete_space + insert_space + 
+            era_expr,
         )
 
         self.fst = self.delete_tokens(date_graph).optimize()
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/decimal.py b/nemo_text_processing/text_normalization/vi/verbalizers/decimal.py
index 8fe523b37..6d811591c 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/decimal.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/decimal.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.text_normalization.en.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space, insert_space
+from nemo_text_processing.text_normalization.vi.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space, insert_space
 
 
 class DecimalFst(GraphFst):
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/fraction.py b/nemo_text_processing/text_normalization/vi/verbalizers/fraction.py
index 77ace3454..ec814a2fb 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/fraction.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/fraction.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.text_normalization.en.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
+from nemo_text_processing.text_normalization.vi.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
 
 
 class FractionFst(GraphFst):
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/ordinal.py b/nemo_text_processing/text_normalization/vi/verbalizers/ordinal.py
index 7388f7df4..cf8c5326e 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/ordinal.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/ordinal.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.text_normalization.en.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
+from nemo_text_processing.text_normalization.vi.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space
 
 
 class OrdinalFst(GraphFst):
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/roman.py b/nemo_text_processing/text_normalization/vi/verbalizers/roman.py
new file mode 100644
index 000000000..76427bd42
--- /dev/null
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/roman.py
@@ -0,0 +1,51 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.text_normalization.vi.graph_utils import (
+    NEMO_NOT_QUOTE, NEMO_SIGMA, GraphFst, delete_space
+)
+
+
+class RomanFst(GraphFst):
+    """
+    Finite state transducer for verbalizing Roman numerals in Vietnamese
+        e.g. tokens { roman { key_cardinal: "thế kỉ" integer: "mười lăm" } } -> thế kỉ mười lăm
+        e.g. tokens { roman { key_cardinal: "thế kỷ" integer: "bốn" } } -> thế kỷ bốn
+        e.g. tokens { roman { key_cardinal: "thứ" integer: "bốn" } } -> thứ bốn
+        e.g. tokens { roman { integer: "mười lăm" } } -> mười lăm
+
+    Args:
+        deterministic: if True will provide a single transduction option,
+            for False multiple transduction are generated (used for audio-based normalization)
+    """
+
+    def __init__(self, deterministic: bool = True):
+        super().__init__(name="roman", kind="verbalize", deterministic=deterministic)
+
+        key_cardinal = pynutil.delete("key_cardinal: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\"")
+        
+        integer = pynutil.delete("integer: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\"")
+        
+        graph_with_key = key_cardinal + delete_space + pynutil.insert(" ") + integer
+        
+        graph_without_key = integer
+        
+        graph = pynini.union(graph_with_key, graph_without_key)
+        
+        delete_tokens = self.delete_tokens(graph)
+        
+        self.fst = delete_tokens.optimize() 
\ No newline at end of file
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py b/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py
index 8d4023436..d49651e89 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,13 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from nemo_text_processing.text_normalization.en.graph_utils import GraphFst
-from nemo_text_processing.text_normalization.en.verbalizers.word import WordFst
+from nemo_text_processing.text_normalization.vi.graph_utils import GraphFst
+from nemo_text_processing.text_normalization.vi.verbalizers.word import WordFst
 from nemo_text_processing.text_normalization.vi.verbalizers.cardinal import CardinalFst
 from nemo_text_processing.text_normalization.vi.verbalizers.date import DateFst
 from nemo_text_processing.text_normalization.vi.verbalizers.decimal import DecimalFst
 from nemo_text_processing.text_normalization.vi.verbalizers.fraction import FractionFst
 from nemo_text_processing.text_normalization.vi.verbalizers.ordinal import OrdinalFst
+from nemo_text_processing.text_normalization.vi.verbalizers.date import DateFst
+from nemo_text_processing.text_normalization.vi.verbalizers.roman import RomanFst
 from nemo_text_processing.text_normalization.vi.verbalizers.whitelist import WhiteListFst
 
 
@@ -48,9 +50,19 @@ def __init__(self, deterministic: bool = True):
         date = DateFst(deterministic=deterministic)
         date_graph = date.fst
 
+        roman = RomanFst(deterministic=deterministic)
+        roman_graph = roman.fst
+
         # Combine all verbalizers
         graph = (
-            cardinal_graph | whitelist_graph | word_graph | ordinal_graph | decimal_graph | fraction_graph | date_graph
+            cardinal_graph 
+            | whitelist_graph 
+            | word_graph 
+            | ordinal_graph 
+            | decimal_graph 
+            | fraction_graph 
+            | date_graph
+            | roman_graph
         )
 
         self.fst = graph
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/verbalize_final.py b/nemo_text_processing/text_normalization/vi/verbalizers/verbalize_final.py
index cd9ec39eb..8911fe161 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/verbalize_final.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/verbalize_final.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,13 +17,13 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.text_normalization.en.graph_utils import (
+from nemo_text_processing.text_normalization.vi.graph_utils import (
     GraphFst,
     delete_extra_space,
     delete_space,
     generator_main,
 )
-from nemo_text_processing.text_normalization.en.verbalizers.word import WordFst
+from nemo_text_processing.text_normalization.vi.verbalizers.word import WordFst
 from nemo_text_processing.text_normalization.vi.verbalizers.verbalize import VerbalizeFst
 from nemo_text_processing.utils.logging import logger
 
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/whitelist.py b/nemo_text_processing/text_normalization/vi/verbalizers/whitelist.py
index 6e0699827..018955415 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/whitelist.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/whitelist.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.text_normalization.en.graph_utils import NEMO_NOT_QUOTE, NEMO_SIGMA, GraphFst, delete_space
+from nemo_text_processing.text_normalization.vi.graph_utils import NEMO_NOT_QUOTE, NEMO_SIGMA, GraphFst, delete_space
 
 
 class WhiteListFst(GraphFst):
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/word.py b/nemo_text_processing/text_normalization/vi/verbalizers/word.py
index f9547acba..0e6e07b81 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/word.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/word.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,7 +15,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.text_normalization.en.graph_utils import NEMO_CHAR, NEMO_SIGMA, GraphFst, delete_space
+from nemo_text_processing.text_normalization.vi.graph_utils import NEMO_CHAR, NEMO_SIGMA, GraphFst, delete_space
 
 
 class WordFst(GraphFst):
diff --git a/tests/nemo_text_processing/vi/data_text_normalization/test_cases_roman.txt b/tests/nemo_text_processing/vi/data_text_normalization/test_cases_roman.txt
new file mode 100644
index 000000000..543ef052b
--- /dev/null
+++ b/tests/nemo_text_processing/vi/data_text_normalization/test_cases_roman.txt
@@ -0,0 +1,59 @@
+thế kỉ XV~thế kỉ mười lăm
+thế kỉ XX~thế kỉ hai mươi
+thế kỉ XXI~thế kỉ hai mươi mốt
+thế kỷ IV~thế kỷ bốn
+thế kỷ V~thế kỷ năm
+thứ I~thứ một
+thứ V~thứ năm
+thứ X~thứ mười
+thứ XV~thứ mười lăm
+chương III~chương ba
+phần ix~phần chín
+chương C~chương một trăm
+mục XCIX~mục chín mươi chín
+chương MMMCMXCIX~chương ba nghìn chín trăm chín mươi chín
+thế kỉ xix~thế kỉ mười chín
+thế kỷ vi~thế kỷ sáu
+phần xl~phần bốn mươi
+mục xc~mục chín mươi
+mục cd~mục bốn trăm
+mục cm~mục chín trăm
+thứ viii~thứ tám
+thứ ix~thứ chín
+thứ xi~thứ mười một
+chương lxxxviii~chương tám mươi tám
+chương cccxlv~chương ba trăm bốn mươi lăm
+thế kỉ XV và chương IX~thế kỉ mười lăm và chương chín
+trong phần X có mục IV~trong phần mười có mục bốn
+chương I~chương một
+chương MMMCMXCIX~chương ba nghìn chín trăm chín mươi chín
+CPU I9 là dòng cao cấp~CPU I9 là dòng cao cấp
+Phiên bản V2.0 đã lỗi thời~Phiên bản V2.0 đã lỗi thời
+đoạn II~đoạn hai
+đoạn iv~đoạn bốn
+đoạn VII~đoạn bảy
+đoạn xii~đoạn mười hai
+năm MCMXCIX~năm một nghìn chín trăm chín mươi chín
+năm mmxx~năm hai nghìn hai mươi
+khoản III~khoản ba
+khoản vi~khoản sáu
+khoản XIV~khoản mười bốn
+khoản xxv~khoản hai mươi lăm
+phụ lục I~phụ lục một
+phụ lục v~phụ lục năm
+phụ lục XII~phụ lục mười hai
+phụ lục xx~phụ lục hai mươi
+khóa VII~khóa bảy
+khóa xi~khóa mười một
+khóa XV~khóa mười lăm
+khóa xxx~khóa ba mươi
+số I~số một
+số v~số năm
+số X~số mười
+số l~số năm mươi
+đoạn IX mục III~đoạn chín mục ba
+khoản II phụ lục IV~khoản hai phụ lục bốn
+khóa XII số IX~khóa mười hai số chín
+năm MMXXIII khoản V~năm hai nghìn hai mươi ba khoản năm
+chương VII đoạn XI~chương bảy đoạn mười một
+phần XX mục XV~phần hai mươi mục mười lăm
\ No newline at end of file
diff --git a/tests/nemo_text_processing/vi/test_roman.py b/tests/nemo_text_processing/vi/test_roman.py
new file mode 100644
index 000000000..a8ee137d8
--- /dev/null
+++ b/tests/nemo_text_processing/vi/test_roman.py
@@ -0,0 +1,49 @@
+# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# pytest tests/nemo_text_processing/vi/test_roman.py --cpu --cache-clear
+import pytest
+from parameterized import parameterized
+
+from nemo_text_processing.text_normalization.normalize import Normalizer
+from nemo_text_processing.text_normalization.normalize_with_audio import NormalizerWithAudio
+
+from ..utils import CACHE_DIR, RUN_AUDIO_BASED_TESTS, parse_test_case_file
+
+
+class TestRoman:
+    normalizer = Normalizer(
+        input_case='cased', lang='vi', cache_dir=CACHE_DIR, overwrite_cache=False, post_process=True
+    )
+
+    normalizer_with_audio = (
+        NormalizerWithAudio(input_case='cased', lang='vi', cache_dir=CACHE_DIR, overwrite_cache=False)
+        if CACHE_DIR and RUN_AUDIO_BASED_TESTS
+        else None
+    )
+
+    @parameterized.expand(parse_test_case_file('vi/data_text_normalization/test_cases_roman.txt'))
+    @pytest.mark.run_only_on('CPU')
+    @pytest.mark.unit
+    def test_norm(self, test_input, expected):
+        pred = self.normalizer.normalize(test_input, verbose=False, punct_post_process=False)
+        assert pred == expected, f"input: {test_input}"
+
+        if self.normalizer_with_audio:
+            pred_non_deterministic = self.normalizer_with_audio.normalize(
+                test_input,
+                n_tagged=30,
+                punct_post_process=False,
+            )
+            assert expected in pred_non_deterministic, f"input: {test_input}"
diff --git a/tests/nemo_text_processing/vi/test_sparrowhawk_normalization.sh b/tests/nemo_text_processing/vi/test_sparrowhawk_normalization.sh
index 2c5a7f8df..7c8b184bf 100644
--- a/tests/nemo_text_processing/vi/test_sparrowhawk_normalization.sh
+++ b/tests/nemo_text_processing/vi/test_sparrowhawk_normalization.sh
@@ -48,6 +48,11 @@ testTNFraction() {
   runtest $input
 }
 
+testTNRoman() {
+  input=$PROJECT_DIR/vi/data_text_normalization/test_cases_roman.txt
+  runtest $input
+}
+
 # testTNTime() {
 #   input=$PROJECT_DIR/vi/data_text_normalization/test_cases_time.txt
 #   runtest $input

From db7ec4634b6d3debf0ef28d6de3689a928ddc2bb Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 8 Jul 2025 15:23:58 +0000
Subject: [PATCH 4/7] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../text_normalization/vi/graph_utils.py      |   4 +
 .../text_normalization/vi/taggers/date.py     | 117 ++++++++++--------
 .../text_normalization/vi/taggers/roman.py    |  40 +++---
 .../vi/taggers/tokenize_and_classify.py       |   2 +-
 .../text_normalization/vi/verbalizers/date.py |  42 +++----
 .../vi/verbalizers/roman.py                   |  18 ++-
 .../vi/verbalizers/verbalize.py               |  15 ++-
 .../vi/verbalizers/verbalize_final.py         |   2 +-
 8 files changed, 127 insertions(+), 113 deletions(-)

diff --git a/nemo_text_processing/text_normalization/vi/graph_utils.py b/nemo_text_processing/text_normalization/vi/graph_utils.py
index 61a304eb5..b8d6aa509 100644
--- a/nemo_text_processing/text_normalization/vi/graph_utils.py
+++ b/nemo_text_processing/text_normalization/vi/graph_utils.py
@@ -48,6 +48,8 @@
 delete_zero_or_one_space = pynutil.delete(pynini.closure(NEMO_WHITE_SPACE, 0, 1))
 insert_space = pynutil.insert(" ")
 delete_extra_space = pynini.cross(pynini.closure(NEMO_WHITE_SPACE, 1), " ")
+
+
 def convert_space(fst) -> "pynini.FstLike":
     """
     Converts space to nonbreaking space.
@@ -61,6 +63,7 @@ def convert_space(fst) -> "pynini.FstLike":
     """
     return fst @ pynini.cdrewrite(pynini.cross(NEMO_SPACE, NEMO_NON_BREAKING_SPACE), "", "", NEMO_SIGMA)
 
+
 def generator_main(file_name: str, graphs: Dict[str, "pynini.FstLike"]):
     """
     Exports graph as OpenFst finite state archive (FAR) file with given file name and rule name.
@@ -75,6 +78,7 @@ def generator_main(file_name: str, graphs: Dict[str, "pynini.FstLike"]):
     exporter.close()
     logger.info(f"Created {file_name}")
 
+
 class GraphFst:
     """
     Base class for all grammar fsts.
diff --git a/nemo_text_processing/text_normalization/vi/taggers/date.py b/nemo_text_processing/text_normalization/vi/taggers/date.py
index efc7dd858..810c18887 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/date.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/date.py
@@ -15,8 +15,8 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.text_normalization.vi.utils import load_labels, get_abs_path
 from nemo_text_processing.text_normalization.vi.graph_utils import NEMO_DIGIT, GraphFst
+from nemo_text_processing.text_normalization.vi.utils import get_abs_path, load_labels
 
 
 class DateFst(GraphFst):
@@ -35,41 +35,40 @@ def __init__(self, cardinal, deterministic: bool = True):
         day_mappings = load_labels(get_abs_path("data/date/days.tsv"))
         month_mappings = load_labels(get_abs_path("data/date/months.tsv"))
         era_mappings = load_labels(get_abs_path("data/date/year_suffix.tsv"))
-        
-        day_digit = pynini.closure(NEMO_DIGIT, 1, 2)    
+
+        day_digit = pynini.closure(NEMO_DIGIT, 1, 2)
         month_digit = pynini.closure(NEMO_DIGIT, 1, 2)
-        year_digit = pynini.closure(NEMO_DIGIT, 1, 4)  
+        year_digit = pynini.closure(NEMO_DIGIT, 1, 4)
         separator = pynini.union("/", "-", ".")
-        
+
         day_convert = pynini.string_map([(k, v) for k, v in day_mappings])
         month_convert = pynini.string_map([(k, v) for k, v in month_mappings])
         year_convert = pynini.compose(year_digit, cardinal.graph)
-        
+
         era_to_full = {}
         for abbr, full_form in era_mappings:
             era_to_full[abbr.lower()] = full_form
             era_to_full[abbr.upper()] = full_form
-        
+
         era_convert = pynini.string_map([(k, v) for k, v in era_to_full.items()])
-        
+
         day_part = pynutil.insert("day: \"") + day_convert + pynutil.insert("\" ")
         month_part = pynutil.insert("month: \"") + month_convert + pynutil.insert("\" ")
         year_part = pynutil.insert("year: \"") + year_convert + pynutil.insert("\"")
         month_final = pynutil.insert("month: \"") + month_convert + pynutil.insert("\"")
         era_part = pynutil.insert("era: \"") + era_convert + pynutil.insert("\"")
-        
+
         patterns = []
 
         date_sep = day_part + pynutil.delete(separator) + month_part + pynutil.delete(separator) + year_part
-        patterns.append(pynini.compose(
-            day_digit + separator + month_digit + separator + year_digit, 
-            date_sep
-        ))
-        patterns.append(pynini.compose(
-            pynini.accep("ngày ") + day_digit + separator + month_digit + separator + year_digit,
-            pynutil.delete("ngày ") + date_sep
-        ))
-        
+        patterns.append(pynini.compose(day_digit + separator + month_digit + separator + year_digit, date_sep))
+        patterns.append(
+            pynini.compose(
+                pynini.accep("ngày ") + day_digit + separator + month_digit + separator + year_digit,
+                pynutil.delete("ngày ") + date_sep,
+            )
+        )
+
         for sep in [separator, pynini.accep(" ")]:
             patterns.append(
                 pynini.compose(
@@ -79,37 +78,57 @@ def __init__(self, cardinal, deterministic: bool = True):
             )
 
         day_month_sep = day_part + pynutil.delete(separator) + month_final
-        patterns.append(pynini.compose(
-            pynini.accep("ngày ") + day_digit + separator + month_digit,
-            pynutil.delete("ngày ") + day_month_sep
-        ))
-        
-        patterns.append(pynini.compose(
-            pynini.accep("ngày ") + day_digit + pynini.accep(" tháng ") + month_digit,
-            pynutil.delete("ngày ") + day_part + pynutil.delete(" tháng ") + month_final
-        ))
-        
-        patterns.append(pynini.compose(
-            pynini.accep("ngày ") + day_digit + pynini.accep(" tháng ") + month_digit + pynini.accep(" năm ") + year_digit,
-            pynutil.delete("ngày ") + day_part + pynutil.delete(" tháng ") + month_part + pynutil.delete(" năm ") + year_part
-        ))
-        
-        patterns.append(pynini.compose(
-            pynini.accep("năm ") + year_digit,
-            pynutil.delete("năm ") + year_part
-        ))
-        
+        patterns.append(
+            pynini.compose(
+                pynini.accep("ngày ") + day_digit + separator + month_digit, pynutil.delete("ngày ") + day_month_sep
+            )
+        )
+
+        patterns.append(
+            pynini.compose(
+                pynini.accep("ngày ") + day_digit + pynini.accep(" tháng ") + month_digit,
+                pynutil.delete("ngày ") + day_part + pynutil.delete(" tháng ") + month_final,
+            )
+        )
+
+        patterns.append(
+            pynini.compose(
+                pynini.accep("ngày ")
+                + day_digit
+                + pynini.accep(" tháng ")
+                + month_digit
+                + pynini.accep(" năm ")
+                + year_digit,
+                pynutil.delete("ngày ")
+                + day_part
+                + pynutil.delete(" tháng ")
+                + month_part
+                + pynutil.delete(" năm ")
+                + year_part,
+            )
+        )
+
+        patterns.append(pynini.compose(pynini.accep("năm ") + year_digit, pynutil.delete("năm ") + year_part))
+
         era_abbrs = list(era_to_full.keys())
         for era_abbr in era_abbrs:
-            patterns.append(pynini.compose(
-                pynini.accep("năm ") + year_digit + pynini.accep(" ") + pynini.accep(era_abbr),
-                pynutil.delete("năm ") + year_part + pynutil.delete(" ") + era_part
-            ))
-            
-            patterns.append(pynini.compose(
-                pynini.accep("năm thứ ") + year_digit + pynini.accep(" ") + pynini.accep(era_abbr),
-                pynutil.delete("năm thứ ") + pynutil.insert("ordinal: \"") + year_convert + pynutil.insert("\" ") + 
-                pynutil.delete(" ") + era_part
-            ))
-        
+            patterns.append(
+                pynini.compose(
+                    pynini.accep("năm ") + year_digit + pynini.accep(" ") + pynini.accep(era_abbr),
+                    pynutil.delete("năm ") + year_part + pynutil.delete(" ") + era_part,
+                )
+            )
+
+            patterns.append(
+                pynini.compose(
+                    pynini.accep("năm thứ ") + year_digit + pynini.accep(" ") + pynini.accep(era_abbr),
+                    pynutil.delete("năm thứ ")
+                    + pynutil.insert("ordinal: \"")
+                    + year_convert
+                    + pynutil.insert("\" ")
+                    + pynutil.delete(" ")
+                    + era_part,
+                )
+            )
+
         self.fst = self.add_tokens(pynini.union(*patterns))
diff --git a/nemo_text_processing/text_normalization/vi/taggers/roman.py b/nemo_text_processing/text_normalization/vi/taggers/roman.py
index f7e6b90a2..482e0cb38 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/roman.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/roman.py
@@ -41,51 +41,51 @@ def __init__(self, cardinal: GraphFst, deterministic: bool = True):
         key_word_path = get_abs_path("data/roman/key_word.tsv")
         for k_word in load_labels(key_word_path):
             key_words.append(k_word[0])
-        
+
         key_words_fst = pynini.union(*[pynini.accep(word) for word in key_words]).optimize()
-        
+
         roman_numeral_path = get_abs_path("data/roman/roman_numerals.tsv")
         roman_numeral_pairs = load_labels(roman_numeral_path)
-        
+
         roman_to_arabic = {}
         for roman, value in roman_numeral_pairs:
             roman_to_arabic[roman] = value
             roman_to_arabic[roman.lower()] = value
-        
+
         self.arabic_to_roman = {}
         for roman, value in roman_numeral_pairs:
             self.arabic_to_roman[int(value)] = roman
-        
+
         valid_roman_pairs = []
         for i in range(1, 4000):
             roman_upper = self._int_to_roman(i)
             roman_lower = roman_upper.lower()
             valid_roman_pairs.append((roman_upper, str(i)))
             valid_roman_pairs.append((roman_lower, str(i)))
-        
+
         roman_to_arabic_fst = pynini.string_map(valid_roman_pairs).optimize()
-        
+
         cardinal_graph = cardinal.graph
-        
+
         graph = (
-            pynutil.insert("key_cardinal: \"") + 
-            key_words_fst + 
-            pynutil.insert("\"") +
-            pynini.accep(" ") +
-            pynutil.insert("integer: \"") + 
-            pynini.compose(roman_to_arabic_fst, cardinal_graph) + 
-            pynutil.insert("\"")
+            pynutil.insert("key_cardinal: \"")
+            + key_words_fst
+            + pynutil.insert("\"")
+            + pynini.accep(" ")
+            + pynutil.insert("integer: \"")
+            + pynini.compose(roman_to_arabic_fst, cardinal_graph)
+            + pynutil.insert("\"")
         ).optimize()
-        
+
         self.fst = self.add_tokens(graph).optimize()
-    
+
     def _int_to_roman(self, num):
         values = sorted(self.arabic_to_roman.keys(), reverse=True)
-        
+
         roman_num = ''
         for value in values:
             while num >= value:
                 roman_num += self.arabic_to_roman[value]
                 num -= value
-        
-        return roman_num
\ No newline at end of file
+
+        return roman_num
diff --git a/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py b/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py
index 0925cf218..533f3c739 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py
@@ -103,7 +103,7 @@ def __init__(
             roman = RomanFst(cardinal=cardinal, deterministic=deterministic)
             roman_graph = roman.fst
             logger.debug(f"roman: {time.time() - start_time: .2f}s -- {roman_graph.num_states()} nodes")
-            
+
             classify = (
                 pynutil.add_weight(whitelist_graph, 1.01)
                 | pynutil.add_weight(roman_graph, 1.1)
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/date.py b/nemo_text_processing/text_normalization/vi/verbalizers/date.py
index 49bdceebe..46dc402d8 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/date.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/date.py
@@ -26,10 +26,10 @@ class DateFst(GraphFst):
 
         date { month: "tư" year: "hai nghìn hai mươi tư" }
         -> tháng tư năm hai nghìn hai mươi tư
-        
+
         date { year: "hai mươi" era: "sau công nguyên" }
         -> năm hai mươi sau công nguyên
-        
+
         date { ordinal: "mười" era: "trước công nguyên" }
         -> năm thứ mười trước công nguyên
     """
@@ -38,40 +38,34 @@ def __init__(self, deterministic: bool = True):
         super().__init__(name="date", kind="verbalize", deterministic=deterministic)
 
         quoted_content = pynini.closure(NEMO_NOT_QUOTE)
-        
 
         day_expr = pynutil.delete("day: \"") + quoted_content + pynutil.delete("\"")
         day_with_prefix = pynutil.insert("ngày ") + day_expr
-        
+
         month_expr = pynutil.delete("month: \"") + quoted_content + pynutil.delete("\"")
         month_with_prefix = pynutil.insert("tháng ") + month_expr
-        
+
         year_expr = pynutil.delete("year: \"") + quoted_content + pynutil.delete("\"")
         year_with_prefix = pynutil.insert("năm ") + year_expr
-        
+
         era_expr = pynutil.delete("era: \"") + quoted_content + pynutil.delete("\"")
-        
+
         ordinal_expr = pynutil.delete("ordinal: \"") + quoted_content + pynutil.delete("\"")
         ordinal_with_prefix = pynutil.insert("năm thứ ") + ordinal_expr
-        
+
         date_graph = pynini.union(
-            day_with_prefix + delete_space + insert_space + 
-            month_with_prefix + delete_space + insert_space + 
-            year_with_prefix,
-            
-            month_with_prefix + delete_space + insert_space + 
-            year_with_prefix,
-            
-            day_with_prefix + delete_space + insert_space + 
-            month_with_prefix,
-            
+            day_with_prefix
+            + delete_space
+            + insert_space
+            + month_with_prefix
+            + delete_space
+            + insert_space
+            + year_with_prefix,
+            month_with_prefix + delete_space + insert_space + year_with_prefix,
+            day_with_prefix + delete_space + insert_space + month_with_prefix,
             year_with_prefix,
-            
-            year_with_prefix + delete_space + insert_space + 
-            era_expr,
-            
-            ordinal_with_prefix + delete_space + insert_space + 
-            era_expr,
+            year_with_prefix + delete_space + insert_space + era_expr,
+            ordinal_with_prefix + delete_space + insert_space + era_expr,
         )
 
         self.fst = self.delete_tokens(date_graph).optimize()
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/roman.py b/nemo_text_processing/text_normalization/vi/verbalizers/roman.py
index 76427bd42..cd1384c21 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/roman.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/roman.py
@@ -15,9 +15,7 @@
 import pynini
 from pynini.lib import pynutil
 
-from nemo_text_processing.text_normalization.vi.graph_utils import (
-    NEMO_NOT_QUOTE, NEMO_SIGMA, GraphFst, delete_space
-)
+from nemo_text_processing.text_normalization.vi.graph_utils import NEMO_NOT_QUOTE, NEMO_SIGMA, GraphFst, delete_space
 
 
 class RomanFst(GraphFst):
@@ -37,15 +35,15 @@ def __init__(self, deterministic: bool = True):
         super().__init__(name="roman", kind="verbalize", deterministic=deterministic)
 
         key_cardinal = pynutil.delete("key_cardinal: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\"")
-        
+
         integer = pynutil.delete("integer: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\"")
-        
+
         graph_with_key = key_cardinal + delete_space + pynutil.insert(" ") + integer
-        
+
         graph_without_key = integer
-        
+
         graph = pynini.union(graph_with_key, graph_without_key)
-        
+
         delete_tokens = self.delete_tokens(graph)
-        
-        self.fst = delete_tokens.optimize() 
\ No newline at end of file
+
+        self.fst = delete_tokens.optimize()
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py b/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py
index d49651e89..08343f30e 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py
@@ -13,15 +13,14 @@
 # limitations under the License.
 
 from nemo_text_processing.text_normalization.vi.graph_utils import GraphFst
-from nemo_text_processing.text_normalization.vi.verbalizers.word import WordFst
 from nemo_text_processing.text_normalization.vi.verbalizers.cardinal import CardinalFst
 from nemo_text_processing.text_normalization.vi.verbalizers.date import DateFst
 from nemo_text_processing.text_normalization.vi.verbalizers.decimal import DecimalFst
 from nemo_text_processing.text_normalization.vi.verbalizers.fraction import FractionFst
 from nemo_text_processing.text_normalization.vi.verbalizers.ordinal import OrdinalFst
-from nemo_text_processing.text_normalization.vi.verbalizers.date import DateFst
 from nemo_text_processing.text_normalization.vi.verbalizers.roman import RomanFst
 from nemo_text_processing.text_normalization.vi.verbalizers.whitelist import WhiteListFst
+from nemo_text_processing.text_normalization.vi.verbalizers.word import WordFst
 
 
 class VerbalizeFst(GraphFst):
@@ -55,12 +54,12 @@ def __init__(self, deterministic: bool = True):
 
         # Combine all verbalizers
         graph = (
-            cardinal_graph 
-            | whitelist_graph 
-            | word_graph 
-            | ordinal_graph 
-            | decimal_graph 
-            | fraction_graph 
+            cardinal_graph
+            | whitelist_graph
+            | word_graph
+            | ordinal_graph
+            | decimal_graph
+            | fraction_graph
             | date_graph
             | roman_graph
         )
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/verbalize_final.py b/nemo_text_processing/text_normalization/vi/verbalizers/verbalize_final.py
index 8911fe161..e1be8a097 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/verbalize_final.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/verbalize_final.py
@@ -23,8 +23,8 @@
     delete_space,
     generator_main,
 )
-from nemo_text_processing.text_normalization.vi.verbalizers.word import WordFst
 from nemo_text_processing.text_normalization.vi.verbalizers.verbalize import VerbalizeFst
+from nemo_text_processing.text_normalization.vi.verbalizers.word import WordFst
 from nemo_text_processing.utils.logging import logger
 
 

From cf665c1a5c07ab0e20d9fb457c9cf3468f8dad10 Mon Sep 17 00:00:00 2001
From: folivoramanh <palasek182@gmail.com>
Date: Wed, 9 Jul 2025 00:08:30 +0700
Subject: [PATCH 5/7] change header to current year

Signed-off-by: folivoramanh <palasek182@gmail.com>
---
 nemo_text_processing/text_normalization/vi/__init__.py    | 2 +-
 .../text_normalization/vi/data/__init__.py                | 2 +-
 .../text_normalization/vi/data/date/__init__.py           | 2 +-
 .../text_normalization/vi/data/fraction/__init__.py       | 2 +-
 .../text_normalization/vi/data/numbers/__init__.py        | 2 +-
 .../text_normalization/vi/data/roman/__init__.py          | 2 +-
 nemo_text_processing/text_normalization/vi/graph_utils.py | 2 +-
 .../text_normalization/vi/taggers/__init__.py             | 2 +-
 .../text_normalization/vi/taggers/cardinal.py             | 2 +-
 .../text_normalization/vi/taggers/date.py                 | 2 +-
 .../text_normalization/vi/taggers/decimal.py              | 2 +-
 .../text_normalization/vi/taggers/fraction.py             | 2 +-
 .../text_normalization/vi/taggers/ordinal.py              | 2 +-
 .../text_normalization/vi/taggers/punctuation.py          | 2 +-
 .../text_normalization/vi/taggers/roman.py                | 2 +-
 .../vi/taggers/tokenize_and_classify.py                   | 2 +-
 .../text_normalization/vi/taggers/whitelist.py            | 2 +-
 .../text_normalization/vi/taggers/word.py                 | 2 +-
 .../text_normalization/vi/verbalizers/__init__.py         | 2 +-
 .../text_normalization/vi/verbalizers/cardinal.py         | 2 +-
 .../text_normalization/vi/verbalizers/date.py             | 2 +-
 .../text_normalization/vi/verbalizers/decimal.py          | 2 +-
 .../text_normalization/vi/verbalizers/fraction.py         | 2 +-
 .../text_normalization/vi/verbalizers/ordinal.py          | 2 +-
 .../text_normalization/vi/verbalizers/roman.py            | 8 ++------
 .../text_normalization/vi/verbalizers/verbalize.py        | 2 +-
 .../text_normalization/vi/verbalizers/verbalize_final.py  | 2 +-
 .../text_normalization/vi/verbalizers/whitelist.py        | 2 +-
 .../text_normalization/vi/verbalizers/word.py             | 2 +-
 tests/nemo_text_processing/vi/test_cardinal.py            | 1 -
 tests/nemo_text_processing/vi/test_date.py                | 1 -
 tests/nemo_text_processing/vi/test_decimal.py             | 1 -
 tests/nemo_text_processing/vi/test_fraction.py            | 1 -
 tests/nemo_text_processing/vi/test_ordinal.py             | 1 -
 tests/nemo_text_processing/vi/test_roman.py               | 1 -
 35 files changed, 30 insertions(+), 40 deletions(-)

diff --git a/nemo_text_processing/text_normalization/vi/__init__.py b/nemo_text_processing/text_normalization/vi/__init__.py
index 6ebc808fa..b2de1dca7 100644
--- a/nemo_text_processing/text_normalization/vi/__init__.py
+++ b/nemo_text_processing/text_normalization/vi/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/data/__init__.py b/nemo_text_processing/text_normalization/vi/data/__init__.py
index 6ebc808fa..b2de1dca7 100644
--- a/nemo_text_processing/text_normalization/vi/data/__init__.py
+++ b/nemo_text_processing/text_normalization/vi/data/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/data/date/__init__.py b/nemo_text_processing/text_normalization/vi/data/date/__init__.py
index 6ebc808fa..b2de1dca7 100644
--- a/nemo_text_processing/text_normalization/vi/data/date/__init__.py
+++ b/nemo_text_processing/text_normalization/vi/data/date/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/data/fraction/__init__.py b/nemo_text_processing/text_normalization/vi/data/fraction/__init__.py
index 6ebc808fa..b2de1dca7 100644
--- a/nemo_text_processing/text_normalization/vi/data/fraction/__init__.py
+++ b/nemo_text_processing/text_normalization/vi/data/fraction/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/data/numbers/__init__.py b/nemo_text_processing/text_normalization/vi/data/numbers/__init__.py
index 6ebc808fa..b2de1dca7 100644
--- a/nemo_text_processing/text_normalization/vi/data/numbers/__init__.py
+++ b/nemo_text_processing/text_normalization/vi/data/numbers/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/data/roman/__init__.py b/nemo_text_processing/text_normalization/vi/data/roman/__init__.py
index 6ebc808fa..b2de1dca7 100644
--- a/nemo_text_processing/text_normalization/vi/data/roman/__init__.py
+++ b/nemo_text_processing/text_normalization/vi/data/roman/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/graph_utils.py b/nemo_text_processing/text_normalization/vi/graph_utils.py
index b8d6aa509..fae4ba088 100644
--- a/nemo_text_processing/text_normalization/vi/graph_utils.py
+++ b/nemo_text_processing/text_normalization/vi/graph_utils.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 # Copyright 2015 and onwards Google, Inc.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/nemo_text_processing/text_normalization/vi/taggers/__init__.py b/nemo_text_processing/text_normalization/vi/taggers/__init__.py
index 6ebc808fa..b2de1dca7 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/__init__.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/taggers/cardinal.py b/nemo_text_processing/text_normalization/vi/taggers/cardinal.py
index 7f3743b05..58c59b530 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/cardinal.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/cardinal.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/taggers/date.py b/nemo_text_processing/text_normalization/vi/taggers/date.py
index 810c18887..36a1d1ae4 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/date.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/date.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/taggers/decimal.py b/nemo_text_processing/text_normalization/vi/taggers/decimal.py
index 8313ec46b..0e0d605d0 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/decimal.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/decimal.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/taggers/fraction.py b/nemo_text_processing/text_normalization/vi/taggers/fraction.py
index 56b452297..ed3394120 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/fraction.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/fraction.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/taggers/ordinal.py b/nemo_text_processing/text_normalization/vi/taggers/ordinal.py
index 0a4b81862..acacf63f7 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/ordinal.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/ordinal.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/taggers/punctuation.py b/nemo_text_processing/text_normalization/vi/taggers/punctuation.py
index c67129d5d..d4610b3ee 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/punctuation.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/punctuation.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/taggers/roman.py b/nemo_text_processing/text_normalization/vi/taggers/roman.py
index 482e0cb38..1c68c7875 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/roman.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/roman.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py b/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py
index 533f3c739..d18e04903 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/taggers/whitelist.py b/nemo_text_processing/text_normalization/vi/taggers/whitelist.py
index 5c2f5ff74..d2775f205 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/whitelist.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/whitelist.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/taggers/word.py b/nemo_text_processing/text_normalization/vi/taggers/word.py
index ca31c3ab8..d101204f1 100644
--- a/nemo_text_processing/text_normalization/vi/taggers/word.py
+++ b/nemo_text_processing/text_normalization/vi/taggers/word.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/__init__.py b/nemo_text_processing/text_normalization/vi/verbalizers/__init__.py
index 6ebc808fa..b2de1dca7 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/__init__.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/cardinal.py b/nemo_text_processing/text_normalization/vi/verbalizers/cardinal.py
index 4c0d47392..b096e759d 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/cardinal.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/cardinal.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/date.py b/nemo_text_processing/text_normalization/vi/verbalizers/date.py
index 46dc402d8..4e918e3d4 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/date.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/date.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/decimal.py b/nemo_text_processing/text_normalization/vi/verbalizers/decimal.py
index 6d811591c..bcda3d757 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/decimal.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/decimal.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/fraction.py b/nemo_text_processing/text_normalization/vi/verbalizers/fraction.py
index ec814a2fb..328bbcded 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/fraction.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/fraction.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/ordinal.py b/nemo_text_processing/text_normalization/vi/verbalizers/ordinal.py
index cf8c5326e..0a0bf3ac0 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/ordinal.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/ordinal.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/roman.py b/nemo_text_processing/text_normalization/vi/verbalizers/roman.py
index cd1384c21..d98d3ae4b 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/roman.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/roman.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -33,17 +33,13 @@ class RomanFst(GraphFst):
 
     def __init__(self, deterministic: bool = True):
         super().__init__(name="roman", kind="verbalize", deterministic=deterministic)
-
+        
         key_cardinal = pynutil.delete("key_cardinal: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\"")
-
         integer = pynutil.delete("integer: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\"")
 
         graph_with_key = key_cardinal + delete_space + pynutil.insert(" ") + integer
-
         graph_without_key = integer
-
         graph = pynini.union(graph_with_key, graph_without_key)
-
         delete_tokens = self.delete_tokens(graph)
 
         self.fst = delete_tokens.optimize()
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py b/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py
index 08343f30e..3c62c9651 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/verbalize_final.py b/nemo_text_processing/text_normalization/vi/verbalizers/verbalize_final.py
index e1be8a097..aa8344459 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/verbalize_final.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/verbalize_final.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/whitelist.py b/nemo_text_processing/text_normalization/vi/verbalizers/whitelist.py
index 018955415..7afda862e 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/whitelist.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/whitelist.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/word.py b/nemo_text_processing/text_normalization/vi/verbalizers/word.py
index 0e6e07b81..78aa1d7c1 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/word.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/word.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/tests/nemo_text_processing/vi/test_cardinal.py b/tests/nemo_text_processing/vi/test_cardinal.py
index 636932aed..00bafe3f1 100644
--- a/tests/nemo_text_processing/vi/test_cardinal.py
+++ b/tests/nemo_text_processing/vi/test_cardinal.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# pytest tests/nemo_text_processing/vi/test_cardinal.py --cpu --cache-clear
 import pytest
 from parameterized import parameterized
 
diff --git a/tests/nemo_text_processing/vi/test_date.py b/tests/nemo_text_processing/vi/test_date.py
index 54e08b3fc..b3da475db 100644
--- a/tests/nemo_text_processing/vi/test_date.py
+++ b/tests/nemo_text_processing/vi/test_date.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# pytest tests/nemo_text_processing/vi/test_date.py --cpu --cache-clear
 import pytest
 from parameterized import parameterized
 
diff --git a/tests/nemo_text_processing/vi/test_decimal.py b/tests/nemo_text_processing/vi/test_decimal.py
index a7b2103a8..73ed99f54 100644
--- a/tests/nemo_text_processing/vi/test_decimal.py
+++ b/tests/nemo_text_processing/vi/test_decimal.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# pytest tests/nemo_text_processing/vi/test_decimal.py --cpu --cache-clear
 import pytest
 from parameterized import parameterized
 
diff --git a/tests/nemo_text_processing/vi/test_fraction.py b/tests/nemo_text_processing/vi/test_fraction.py
index 1751c7b8a..efa35fcce 100644
--- a/tests/nemo_text_processing/vi/test_fraction.py
+++ b/tests/nemo_text_processing/vi/test_fraction.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# pytest tests/nemo_text_processing/vi/test_fraction.py --cpu --cache-clear
 import pytest
 from parameterized import parameterized
 
diff --git a/tests/nemo_text_processing/vi/test_ordinal.py b/tests/nemo_text_processing/vi/test_ordinal.py
index 3235e407a..9b15bd0c4 100644
--- a/tests/nemo_text_processing/vi/test_ordinal.py
+++ b/tests/nemo_text_processing/vi/test_ordinal.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# pytest tests/nemo_text_processing/vi/test_ordinal.py --cpu --cache-clear
 import pytest
 from parameterized import parameterized
 
diff --git a/tests/nemo_text_processing/vi/test_roman.py b/tests/nemo_text_processing/vi/test_roman.py
index a8ee137d8..22d1584bb 100644
--- a/tests/nemo_text_processing/vi/test_roman.py
+++ b/tests/nemo_text_processing/vi/test_roman.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# pytest tests/nemo_text_processing/vi/test_roman.py --cpu --cache-clear
 import pytest
 from parameterized import parameterized
 

From dc6b6e54d094437ded2e035eaeb07e7671d9e1b9 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 8 Jul 2025 17:11:05 +0000
Subject: [PATCH 6/7] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 nemo_text_processing/text_normalization/vi/verbalizers/roman.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/roman.py b/nemo_text_processing/text_normalization/vi/verbalizers/roman.py
index d98d3ae4b..977f7e313 100644
--- a/nemo_text_processing/text_normalization/vi/verbalizers/roman.py
+++ b/nemo_text_processing/text_normalization/vi/verbalizers/roman.py
@@ -33,7 +33,7 @@ class RomanFst(GraphFst):
 
     def __init__(self, deterministic: bool = True):
         super().__init__(name="roman", kind="verbalize", deterministic=deterministic)
-        
+
         key_cardinal = pynutil.delete("key_cardinal: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\"")
         integer = pynutil.delete("integer: \"") + pynini.closure(NEMO_NOT_QUOTE) + pynutil.delete("\"")
 

From c6f90a7a44f6581d108e0e8fe52103fbbc77db38 Mon Sep 17 00:00:00 2001
From: folivoramanh <palasek182@gmail.com>
Date: Wed, 9 Jul 2025 00:24:56 +0700
Subject: [PATCH 7/7] change header time

Signed-off-by: folivoramanh <palasek182@gmail.com>
---
 tests/nemo_text_processing/vi/test_roman.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/nemo_text_processing/vi/test_roman.py b/tests/nemo_text_processing/vi/test_roman.py
index 22d1584bb..a942eb140 100644
--- a/tests/nemo_text_processing/vi/test_roman.py
+++ b/tests/nemo_text_processing/vi/test_roman.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.