NVIDIA · tbartley94 · Oct 12, 2025 · Oct 6, 2025 · Oct 6, 2025 · Oct 6, 2025
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/post_processing.py b/nemo_text_processing/text_normalization/vi/verbalizers/post_processing.py
@@ -55,7 +55,7 @@ def get_vietnamese_punct_config(self) -> Dict[str, List[str]]:
         """
         return {
             # Punctuation that should not have space before them
-            'no_space_before': [",", ".", "!", "?", ":", ";", ")", r"\]", "}", "\""],
+            'no_space_before': [",", ".", "!", "?", ":", ";", ")", r"\]", "}"],
             # Punctuation that should not have space after them
             'no_space_after': ["(", r"\[", "{"],
             # Punctuation that can have space before them (exceptions)

diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/range.py b/nemo_text_processing/text_normalization/vi/verbalizers/range.py
@@ -0,0 +1,40 @@
+# Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pynini
+from pynini.lib import pynutil
+
+from nemo_text_processing.text_normalization.vi.graph_utils import NEMO_CHAR, NEMO_SIGMA, GraphFst, delete_space
+
+
+class RangeFst(GraphFst):
+    """
+    Finite state transducer for verbalizing Vietnamese ranges.
+    Range tokens are already verbalized by the tagger, so this just extracts the content.
+        e.g. tokens { name: "mười nghìn đến hai mười nghìn" } -> mười nghìn đến hai mười nghìn
+
+    Args:
+        deterministic: if True will provide a single transduction option,
+            for False multiple transduction are generated (used for audio-based normalization)
+    """
+
+    def __init__(self, deterministic: bool = True):
+        super().__init__(name="range", kind="verbalize", deterministic=deterministic)
+
+        # Range content is already verbalized by the tagger, just extract it
+        chars = pynini.closure(NEMO_CHAR - " ", 1)
+        char = pynutil.delete("name:") + delete_space + pynutil.delete("\"") + chars + pynutil.delete("\"")
+        graph = char @ pynini.cdrewrite(pynini.cross(u"\u00a0", " "), "", "", NEMO_SIGMA)
+
+        self.fst = graph.optimize()
diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py b/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py
@@ -20,6 +20,7 @@
 from nemo_text_processing.text_normalization.vi.verbalizers.measure import MeasureFst
 from nemo_text_processing.text_normalization.vi.verbalizers.money import MoneyFst
 from nemo_text_processing.text_normalization.vi.verbalizers.ordinal import OrdinalFst
+from nemo_text_processing.text_normalization.vi.verbalizers.range import RangeFst
 from nemo_text_processing.text_normalization.vi.verbalizers.roman import RomanFst
 from nemo_text_processing.text_normalization.vi.verbalizers.time import TimeFst
 from nemo_text_processing.text_normalization.vi.verbalizers.whitelist import WhiteListFst
@@ -63,6 +64,9 @@ def __init__(self, deterministic: bool = True):
         measure = MeasureFst(decimal=decimal, cardinal=cardinal, fraction=fraction, deterministic=deterministic)
         measure_graph = measure.fst
 
+        range_fst = RangeFst(deterministic=deterministic)
+        range_graph = range_fst.fst
+
         graph = (
             cardinal_graph
             | whitelist_graph
@@ -75,6 +79,7 @@ def __init__(self, deterministic: bool = True):
             | time_graph
             | money_graph
             | measure_graph
+            | range_graph
         )
 
         self.fst = graph