From 60314eb4e373989b9b63eaed97096d126235d851 Mon Sep 17 00:00:00 2001 From: folivoramanh Date: Fri, 27 Jun 2025 14:56:58 +0700 Subject: [PATCH 1/4] Fraction class for Vietnamese TN Signed-off-by: folivoramanh --- .../vi/data/fraction/__init__.py | 13 ++++ .../data/fraction/denominator_exceptions.tsv | 1 + .../text_normalization/vi/taggers/fraction.py | 59 +++++++++++++++++++ .../vi/taggers/tokenize_and_classify.py | 7 +++ .../vi/verbalizers/fraction.py | 53 +++++++++++++++++ .../vi/verbalizers/verbalize.py | 6 +- .../test_cases_fraction.txt | 15 +++++ .../nemo_text_processing/vi/test_fraction.py | 45 +++++++++----- .../vi/test_sparrowhawk_normalization.sh | 8 +-- 9 files changed, 187 insertions(+), 20 deletions(-) create mode 100644 nemo_text_processing/text_normalization/vi/data/fraction/__init__.py create mode 100644 nemo_text_processing/text_normalization/vi/data/fraction/denominator_exceptions.tsv create mode 100644 nemo_text_processing/text_normalization/vi/taggers/fraction.py create mode 100644 nemo_text_processing/text_normalization/vi/verbalizers/fraction.py create mode 100644 tests/nemo_text_processing/vi/data_text_normalization/test_cases_fraction.txt diff --git a/nemo_text_processing/text_normalization/vi/data/fraction/__init__.py b/nemo_text_processing/text_normalization/vi/data/fraction/__init__.py new file mode 100644 index 000000000..6ebc808fa --- /dev/null +++ b/nemo_text_processing/text_normalization/vi/data/fraction/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo_text_processing/text_normalization/vi/data/fraction/denominator_exceptions.tsv b/nemo_text_processing/text_normalization/vi/data/fraction/denominator_exceptions.tsv new file mode 100644 index 000000000..7b305e655 --- /dev/null +++ b/nemo_text_processing/text_normalization/vi/data/fraction/denominator_exceptions.tsv @@ -0,0 +1 @@ +4 tư \ No newline at end of file diff --git a/nemo_text_processing/text_normalization/vi/taggers/fraction.py b/nemo_text_processing/text_normalization/vi/taggers/fraction.py new file mode 100644 index 000000000..6eeab1622 --- /dev/null +++ b/nemo_text_processing/text_normalization/vi/taggers/fraction.py @@ -0,0 +1,59 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pynini +from pynini.lib import pynutil + +from nemo_text_processing.text_normalization.en.graph_utils import GraphFst +from nemo_text_processing.text_normalization.vi.taggers.cardinal import CardinalFst +from nemo_text_processing.text_normalization.vi.utils import get_abs_path, load_labels + + +class FractionFst(GraphFst): + """ + Finite state transducer for classifying Vietnamese fraction numbers, e.g. + 23 1/5 -> fraction { integer_part: "hai mươi ba" numerator: "một" denominator: "năm" } + 3/9 -> fraction { numerator: "ba" denominator: "chín" } + 1/4 -> fraction { numerator: "một" denominator: "tư" } + + Args: + cardinal: CardinalFst for converting numbers to Vietnamese words + deterministic: if True will provide a single transduction option, + for False multiple options (used for audio-based normalization) + """ + + def __init__(self, cardinal: CardinalFst, deterministic: bool = True): + super().__init__(name="fraction", kind="classify", deterministic=deterministic) + + cardinal_graph = cardinal.graph + digit = pynini.union(*[str(i) for i in range(10)]) + number = pynini.closure(digit, 1) + + denominator_exceptions = { + row[0]: row[1] for row in load_labels(get_abs_path("data/fraction/denominator_exceptions.tsv")) + } + + denominator_exception_patterns = [pynini.cross(k, v) for k, v in denominator_exceptions.items()] + denominator_exception_graph = pynini.union(*denominator_exception_patterns) if denominator_exception_patterns else None + denominator_graph = pynini.union(denominator_exception_graph, cardinal_graph) if denominator_exception_graph else cardinal_graph + + numerator = pynutil.insert("numerator: \"") + (number @ cardinal_graph) + pynutil.insert("\" ") + pynutil.delete("/") + denominator = pynutil.insert("denominator: \"") + (number @ denominator_graph) + pynutil.insert("\"") + integer_part = pynutil.insert("integer_part: \"") + (number @ cardinal_graph) + pynutil.insert("\" ") + + simple_fraction = numerator + denominator + mixed_fraction = integer_part + pynutil.delete(" ") + numerator + denominator + optional_graph_negative = (pynutil.insert("negative: ") + pynini.cross("-", "\"true\" ")).ques + + self.fst = self.add_tokens(optional_graph_negative + (simple_fraction | mixed_fraction)).optimize() \ No newline at end of file diff --git a/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py b/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py index 53a2822c8..6029b84e5 100644 --- a/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py +++ b/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py @@ -27,6 +27,7 @@ from nemo_text_processing.text_normalization.vi.taggers.cardinal import CardinalFst from nemo_text_processing.text_normalization.vi.taggers.decimal import DecimalFst from nemo_text_processing.text_normalization.vi.taggers.ordinal import OrdinalFst +from nemo_text_processing.text_normalization.vi.taggers.fraction import FractionFst from nemo_text_processing.text_normalization.vi.taggers.punctuation import PunctuationFst from nemo_text_processing.text_normalization.vi.taggers.whitelist import WhiteListFst from nemo_text_processing.text_normalization.vi.taggers.word import WordFst @@ -86,11 +87,17 @@ def __init__( decimal_graph = decimal.fst logger.debug(f"decimal: {time.time() - start_time: .2f}s -- {decimal_graph.num_states()} nodes") + start_time = time.time() + fraction = FractionFst(cardinal=cardinal, deterministic=deterministic) + fraction_graph = fraction.fst + logger.debug(f"fraction: {time.time() - start_time: .2f}s -- {fraction_graph.num_states()} nodes") + classify = ( pynutil.add_weight(whitelist_graph, 0.8) | pynutil.add_weight(ordinal_graph, 0.81) | pynutil.add_weight(decimal_graph, 0.85) | pynutil.add_weight(cardinal_graph, 0.9) + | pynutil.add_weight(fraction_graph, 1.0) | pynutil.add_weight(word_graph, 100) ) punct = pynutil.insert("tokens { ") + pynutil.add_weight(punct_graph, weight=2.1) + pynutil.insert(" }") diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/fraction.py b/nemo_text_processing/text_normalization/vi/verbalizers/fraction.py new file mode 100644 index 000000000..0060f8b2f --- /dev/null +++ b/nemo_text_processing/text_normalization/vi/verbalizers/fraction.py @@ -0,0 +1,53 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pynini +from pynini.lib import pynutil + +from nemo_text_processing.text_normalization.en.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space + + +class FractionFst(GraphFst): + """ + Finite state transducer for verbalizing Vietnamese fraction numbers, e.g. + fraction { negative: "true" integer_part: "hai mươi ba" numerator: "một" denominator: "năm" } -> âm hai mươi ba và một phần năm + fraction { numerator: "ba" denominator: "chín" } -> ba phần chín + fraction { integer_part: "một trăm" numerator: "hai" denominator: "ba" } -> một trăm và hai phần ba + + Args: + deterministic: if True will provide a single transduction option, + for False multiple options (used for audio-based normalization) + """ + + def __init__(self, deterministic: bool = True): + super().__init__(name="fraction", kind="verbalize", deterministic=deterministic) + + optional_sign = pynini.cross("negative: \"true\"", "âm ") + if not deterministic: + optional_sign |= pynini.cross("negative: \"true\"", "trừ ") + optional_sign = pynini.closure(optional_sign + delete_space, 0, 1) + + part = pynini.closure(NEMO_NOT_QUOTE) + delete_quotes = delete_space + pynutil.delete("\"") + part + pynutil.delete("\"") + + integer_tagged = pynutil.delete("integer_part:") + delete_quotes + numerator_tagged = pynutil.delete("numerator:") + delete_quotes + denominator_tagged = pynutil.delete("denominator:") + delete_quotes + + fraction_part = numerator_tagged + delete_space + pynutil.insert(" phần ") + denominator_tagged + + simple_fraction = fraction_part + mixed_fraction = integer_tagged + delete_space + pynutil.insert(" và ") + fraction_part + + self.fst = self.delete_tokens(optional_sign + (simple_fraction | mixed_fraction)).optimize() \ No newline at end of file diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py b/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py index b61776c01..c30bb03c4 100644 --- a/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py +++ b/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py @@ -17,6 +17,7 @@ from nemo_text_processing.text_normalization.vi.verbalizers.cardinal import CardinalFst from nemo_text_processing.text_normalization.vi.verbalizers.decimal import DecimalFst from nemo_text_processing.text_normalization.vi.verbalizers.ordinal import OrdinalFst +from nemo_text_processing.text_normalization.vi.verbalizers.fraction import FractionFst from nemo_text_processing.text_normalization.vi.verbalizers.whitelist import WhiteListFst @@ -40,7 +41,10 @@ def __init__(self, deterministic: bool = True): decimal = DecimalFst(cardinal=cardinal, deterministic=deterministic) decimal_graph = decimal.fst + fraction = FractionFst(deterministic=deterministic) + fraction_graph = fraction.fst + # Combine all verbalizers - graph = cardinal_graph | whitelist_graph | word_graph | ordinal_graph | decimal_graph + graph = cardinal_graph | whitelist_graph | word_graph | ordinal_graph | decimal_graph | fraction_graph self.fst = graph diff --git a/tests/nemo_text_processing/vi/data_text_normalization/test_cases_fraction.txt b/tests/nemo_text_processing/vi/data_text_normalization/test_cases_fraction.txt new file mode 100644 index 000000000..1d73ebfd7 --- /dev/null +++ b/tests/nemo_text_processing/vi/data_text_normalization/test_cases_fraction.txt @@ -0,0 +1,15 @@ +0~không +1/2~một phần hai +4/9~bốn phần chín +9/4~chín phần tư +1/4~một phần tư +3/4~ba phần tư +15/5~mười lăm phần năm +1/3~một phần ba +2/10~hai phần mười +23 1/5~hai mươi ba và một phần năm +-3/4~âm ba phần tư +-12 1/4 nha~âm mười hai và một phần tư nha +-5 2/3~âm năm và hai phần ba +5 1/2~năm và một phần hai +0,5~không phẩy năm \ No newline at end of file diff --git a/tests/nemo_text_processing/vi/test_fraction.py b/tests/nemo_text_processing/vi/test_fraction.py index acd465cfd..32d45bff1 100644 --- a/tests/nemo_text_processing/vi/test_fraction.py +++ b/tests/nemo_text_processing/vi/test_fraction.py @@ -12,32 +12,47 @@ # See the License for the specific language governing permissions and # limitations under the License. - +# pytest tests/nemo_text_processing/vi/test_fraction.py --cpu --cache-clear import pytest from parameterized import parameterized -from ..utils import CACHE_DIR, parse_test_case_file - -try: - from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer +from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer +from nemo_text_processing.text_normalization.normalize import Normalizer +from nemo_text_processing.text_normalization.normalize_with_audio import NormalizerWithAudio - PYNINI_AVAILABLE = True -except (ImportError, ModuleNotFoundError): - PYNINI_AVAILABLE = False +from ..utils import CACHE_DIR, RUN_AUDIO_BASED_TESTS, parse_test_case_file class TestFraction: - inverse_normalizer = ( - InverseNormalizer(lang='vi', cache_dir=CACHE_DIR, overwrite_cache=False) if PYNINI_AVAILABLE else None - ) + inverse_normalizer = InverseNormalizer(lang='vi', cache_dir=CACHE_DIR, overwrite_cache=False) + @parameterized.expand(parse_test_case_file('vi/data_inverse_text_normalization/test_cases_fraction.txt')) - @pytest.mark.skipif( - not PYNINI_AVAILABLE, - reason="`pynini` not installed, please install via nemo_text_processing/pynini_install.sh", - ) @pytest.mark.run_only_on('CPU') @pytest.mark.unit def test_denorm(self, test_input, expected): pred = self.inverse_normalizer.inverse_normalize(test_input, verbose=False) assert pred == expected + + normalizer = Normalizer(input_case='cased', lang='vi', cache_dir=CACHE_DIR, overwrite_cache=False, post_process=True) + + normalizer_with_audio = ( + NormalizerWithAudio(input_case='cased', lang='vi', cache_dir=CACHE_DIR, overwrite_cache=False) + if CACHE_DIR and RUN_AUDIO_BASED_TESTS + else None + ) + + @parameterized.expand(parse_test_case_file('vi/data_text_normalization/test_cases_fraction.txt')) + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_norm(self, test_input, expected): + pred = self.normalizer.normalize(test_input, verbose=False, punct_post_process=False) + assert pred == expected, f"input: {test_input}" + + if self.normalizer_with_audio: + pred_non_deterministic = self.normalizer_with_audio.normalize( + test_input, + n_tagged=30, + punct_post_process=False, + ) + assert expected in pred_non_deterministic, f"input: {test_input}" \ No newline at end of file diff --git a/tests/nemo_text_processing/vi/test_sparrowhawk_normalization.sh b/tests/nemo_text_processing/vi/test_sparrowhawk_normalization.sh index ab3c03d86..6a277c28c 100644 --- a/tests/nemo_text_processing/vi/test_sparrowhawk_normalization.sh +++ b/tests/nemo_text_processing/vi/test_sparrowhawk_normalization.sh @@ -43,10 +43,10 @@ testTNOrdinal() { runtest $input } -# testTNFraction() { -# input=$PROJECT_DIR/vi/data_text_normalization/test_cases_fraction.txt -# runtest $input -# } +testTNFraction() { + input=$PROJECT_DIR/vi/data_text_normalization/test_cases_fraction.txt + runtest $input +} # testTNTime() { # input=$PROJECT_DIR/vi/data_text_normalization/test_cases_time.txt From 6b8dff2add52c3e607264cae0f8aa14d5a641a0a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 27 Jun 2025 08:16:02 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../text_normalization/vi/taggers/fraction.py | 16 ++++++++++++---- .../vi/taggers/tokenize_and_classify.py | 4 ++-- .../vi/verbalizers/fraction.py | 6 +++--- .../vi/verbalizers/verbalize.py | 2 +- tests/nemo_text_processing/vi/test_fraction.py | 12 +++++++----- 5 files changed, 25 insertions(+), 15 deletions(-) diff --git a/nemo_text_processing/text_normalization/vi/taggers/fraction.py b/nemo_text_processing/text_normalization/vi/taggers/fraction.py index 6eeab1622..807e96dab 100644 --- a/nemo_text_processing/text_normalization/vi/taggers/fraction.py +++ b/nemo_text_processing/text_normalization/vi/taggers/fraction.py @@ -45,10 +45,18 @@ def __init__(self, cardinal: CardinalFst, deterministic: bool = True): } denominator_exception_patterns = [pynini.cross(k, v) for k, v in denominator_exceptions.items()] - denominator_exception_graph = pynini.union(*denominator_exception_patterns) if denominator_exception_patterns else None - denominator_graph = pynini.union(denominator_exception_graph, cardinal_graph) if denominator_exception_graph else cardinal_graph + denominator_exception_graph = ( + pynini.union(*denominator_exception_patterns) if denominator_exception_patterns else None + ) + denominator_graph = ( + pynini.union(denominator_exception_graph, cardinal_graph) + if denominator_exception_graph + else cardinal_graph + ) - numerator = pynutil.insert("numerator: \"") + (number @ cardinal_graph) + pynutil.insert("\" ") + pynutil.delete("/") + numerator = ( + pynutil.insert("numerator: \"") + (number @ cardinal_graph) + pynutil.insert("\" ") + pynutil.delete("/") + ) denominator = pynutil.insert("denominator: \"") + (number @ denominator_graph) + pynutil.insert("\"") integer_part = pynutil.insert("integer_part: \"") + (number @ cardinal_graph) + pynutil.insert("\" ") @@ -56,4 +64,4 @@ def __init__(self, cardinal: CardinalFst, deterministic: bool = True): mixed_fraction = integer_part + pynutil.delete(" ") + numerator + denominator optional_graph_negative = (pynutil.insert("negative: ") + pynini.cross("-", "\"true\" ")).ques - self.fst = self.add_tokens(optional_graph_negative + (simple_fraction | mixed_fraction)).optimize() \ No newline at end of file + self.fst = self.add_tokens(optional_graph_negative + (simple_fraction | mixed_fraction)).optimize() diff --git a/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py b/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py index 6029b84e5..73feb7182 100644 --- a/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py +++ b/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py @@ -26,8 +26,8 @@ ) from nemo_text_processing.text_normalization.vi.taggers.cardinal import CardinalFst from nemo_text_processing.text_normalization.vi.taggers.decimal import DecimalFst -from nemo_text_processing.text_normalization.vi.taggers.ordinal import OrdinalFst from nemo_text_processing.text_normalization.vi.taggers.fraction import FractionFst +from nemo_text_processing.text_normalization.vi.taggers.ordinal import OrdinalFst from nemo_text_processing.text_normalization.vi.taggers.punctuation import PunctuationFst from nemo_text_processing.text_normalization.vi.taggers.whitelist import WhiteListFst from nemo_text_processing.text_normalization.vi.taggers.word import WordFst @@ -97,7 +97,7 @@ def __init__( | pynutil.add_weight(ordinal_graph, 0.81) | pynutil.add_weight(decimal_graph, 0.85) | pynutil.add_weight(cardinal_graph, 0.9) - | pynutil.add_weight(fraction_graph, 1.0) + | pynutil.add_weight(fraction_graph, 1.0) | pynutil.add_weight(word_graph, 100) ) punct = pynutil.insert("tokens { ") + pynutil.add_weight(punct_graph, weight=2.1) + pynutil.insert(" }") diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/fraction.py b/nemo_text_processing/text_normalization/vi/verbalizers/fraction.py index 0060f8b2f..77ace3454 100644 --- a/nemo_text_processing/text_normalization/vi/verbalizers/fraction.py +++ b/nemo_text_processing/text_normalization/vi/verbalizers/fraction.py @@ -40,14 +40,14 @@ def __init__(self, deterministic: bool = True): part = pynini.closure(NEMO_NOT_QUOTE) delete_quotes = delete_space + pynutil.delete("\"") + part + pynutil.delete("\"") - + integer_tagged = pynutil.delete("integer_part:") + delete_quotes numerator_tagged = pynutil.delete("numerator:") + delete_quotes denominator_tagged = pynutil.delete("denominator:") + delete_quotes fraction_part = numerator_tagged + delete_space + pynutil.insert(" phần ") + denominator_tagged - + simple_fraction = fraction_part mixed_fraction = integer_tagged + delete_space + pynutil.insert(" và ") + fraction_part - self.fst = self.delete_tokens(optional_sign + (simple_fraction | mixed_fraction)).optimize() \ No newline at end of file + self.fst = self.delete_tokens(optional_sign + (simple_fraction | mixed_fraction)).optimize() diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py b/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py index c30bb03c4..e3d34b968 100644 --- a/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py +++ b/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py @@ -16,8 +16,8 @@ from nemo_text_processing.text_normalization.en.verbalizers.word import WordFst from nemo_text_processing.text_normalization.vi.verbalizers.cardinal import CardinalFst from nemo_text_processing.text_normalization.vi.verbalizers.decimal import DecimalFst -from nemo_text_processing.text_normalization.vi.verbalizers.ordinal import OrdinalFst from nemo_text_processing.text_normalization.vi.verbalizers.fraction import FractionFst +from nemo_text_processing.text_normalization.vi.verbalizers.ordinal import OrdinalFst from nemo_text_processing.text_normalization.vi.verbalizers.whitelist import WhiteListFst diff --git a/tests/nemo_text_processing/vi/test_fraction.py b/tests/nemo_text_processing/vi/test_fraction.py index 32d45bff1..1751c7b8a 100644 --- a/tests/nemo_text_processing/vi/test_fraction.py +++ b/tests/nemo_text_processing/vi/test_fraction.py @@ -25,8 +25,8 @@ class TestFraction: - inverse_normalizer = InverseNormalizer(lang='vi', cache_dir=CACHE_DIR, overwrite_cache=False) - + inverse_normalizer = InverseNormalizer(lang='vi', cache_dir=CACHE_DIR, overwrite_cache=False) + @parameterized.expand(parse_test_case_file('vi/data_inverse_text_normalization/test_cases_fraction.txt')) @pytest.mark.run_only_on('CPU') @pytest.mark.unit @@ -34,8 +34,10 @@ def test_denorm(self, test_input, expected): pred = self.inverse_normalizer.inverse_normalize(test_input, verbose=False) assert pred == expected - normalizer = Normalizer(input_case='cased', lang='vi', cache_dir=CACHE_DIR, overwrite_cache=False, post_process=True) - + normalizer = Normalizer( + input_case='cased', lang='vi', cache_dir=CACHE_DIR, overwrite_cache=False, post_process=True + ) + normalizer_with_audio = ( NormalizerWithAudio(input_case='cased', lang='vi', cache_dir=CACHE_DIR, overwrite_cache=False) if CACHE_DIR and RUN_AUDIO_BASED_TESTS @@ -55,4 +57,4 @@ def test_norm(self, test_input, expected): n_tagged=30, punct_post_process=False, ) - assert expected in pred_non_deterministic, f"input: {test_input}" \ No newline at end of file + assert expected in pred_non_deterministic, f"input: {test_input}" From 902c5901e18748f5d2b5e4aa58d3ef9693cb182c Mon Sep 17 00:00:00 2001 From: folivoramanh Date: Mon, 30 Jun 2025 22:51:14 +0700 Subject: [PATCH 3/4] Remove irrelavant test case Signed-off-by: folivoramanh --- .../vi/data_text_normalization/test_cases_fraction.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/nemo_text_processing/vi/data_text_normalization/test_cases_fraction.txt b/tests/nemo_text_processing/vi/data_text_normalization/test_cases_fraction.txt index 1d73ebfd7..27ce6127c 100644 --- a/tests/nemo_text_processing/vi/data_text_normalization/test_cases_fraction.txt +++ b/tests/nemo_text_processing/vi/data_text_normalization/test_cases_fraction.txt @@ -1,4 +1,3 @@ -0~không 1/2~một phần hai 4/9~bốn phần chín 9/4~chín phần tư @@ -11,5 +10,4 @@ -3/4~âm ba phần tư -12 1/4 nha~âm mười hai và một phần tư nha -5 2/3~âm năm và hai phần ba -5 1/2~năm và một phần hai -0,5~không phẩy năm \ No newline at end of file +5 1/2~năm và một phần hai\ \ No newline at end of file From c3aaaf6f066c80855312f289f816f1961867f1fa Mon Sep 17 00:00:00 2001 From: folivoramanh Date: Mon, 30 Jun 2025 22:51:25 +0700 Subject: [PATCH 4/4] Remove irrelavant test case Signed-off-by: folivoramanh --- .../vi/data_text_normalization/test_cases_fraction.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/nemo_text_processing/vi/data_text_normalization/test_cases_fraction.txt b/tests/nemo_text_processing/vi/data_text_normalization/test_cases_fraction.txt index 27ce6127c..1ccd7af94 100644 --- a/tests/nemo_text_processing/vi/data_text_normalization/test_cases_fraction.txt +++ b/tests/nemo_text_processing/vi/data_text_normalization/test_cases_fraction.txt @@ -10,4 +10,4 @@ -3/4~âm ba phần tư -12 1/4 nha~âm mười hai và một phần tư nha -5 2/3~âm năm và hai phần ba -5 1/2~năm và một phần hai\ \ No newline at end of file +5 1/2~năm và một phần hai \ No newline at end of file