diff --git a/nemo_text_processing/text_normalization/vi/data/fraction/__init__.py b/nemo_text_processing/text_normalization/vi/data/fraction/__init__.py new file mode 100644 index 000000000..6ebc808fa --- /dev/null +++ b/nemo_text_processing/text_normalization/vi/data/fraction/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemo_text_processing/text_normalization/vi/data/fraction/denominator_exceptions.tsv b/nemo_text_processing/text_normalization/vi/data/fraction/denominator_exceptions.tsv new file mode 100644 index 000000000..7b305e655 --- /dev/null +++ b/nemo_text_processing/text_normalization/vi/data/fraction/denominator_exceptions.tsv @@ -0,0 +1 @@ +4 tư \ No newline at end of file diff --git a/nemo_text_processing/text_normalization/vi/taggers/fraction.py b/nemo_text_processing/text_normalization/vi/taggers/fraction.py new file mode 100644 index 000000000..807e96dab --- /dev/null +++ b/nemo_text_processing/text_normalization/vi/taggers/fraction.py @@ -0,0 +1,67 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pynini +from pynini.lib import pynutil + +from nemo_text_processing.text_normalization.en.graph_utils import GraphFst +from nemo_text_processing.text_normalization.vi.taggers.cardinal import CardinalFst +from nemo_text_processing.text_normalization.vi.utils import get_abs_path, load_labels + + +class FractionFst(GraphFst): + """ + Finite state transducer for classifying Vietnamese fraction numbers, e.g. + 23 1/5 -> fraction { integer_part: "hai mươi ba" numerator: "một" denominator: "năm" } + 3/9 -> fraction { numerator: "ba" denominator: "chín" } + 1/4 -> fraction { numerator: "một" denominator: "tư" } + + Args: + cardinal: CardinalFst for converting numbers to Vietnamese words + deterministic: if True will provide a single transduction option, + for False multiple options (used for audio-based normalization) + """ + + def __init__(self, cardinal: CardinalFst, deterministic: bool = True): + super().__init__(name="fraction", kind="classify", deterministic=deterministic) + + cardinal_graph = cardinal.graph + digit = pynini.union(*[str(i) for i in range(10)]) + number = pynini.closure(digit, 1) + + denominator_exceptions = { + row[0]: row[1] for row in load_labels(get_abs_path("data/fraction/denominator_exceptions.tsv")) + } + + denominator_exception_patterns = [pynini.cross(k, v) for k, v in denominator_exceptions.items()] + denominator_exception_graph = ( + pynini.union(*denominator_exception_patterns) if denominator_exception_patterns else None + ) + denominator_graph = ( + pynini.union(denominator_exception_graph, cardinal_graph) + if denominator_exception_graph + else cardinal_graph + ) + + numerator = ( + pynutil.insert("numerator: \"") + (number @ cardinal_graph) + pynutil.insert("\" ") + pynutil.delete("/") + ) + denominator = pynutil.insert("denominator: \"") + (number @ denominator_graph) + pynutil.insert("\"") + integer_part = pynutil.insert("integer_part: \"") + (number @ cardinal_graph) + pynutil.insert("\" ") + + simple_fraction = numerator + denominator + mixed_fraction = integer_part + pynutil.delete(" ") + numerator + denominator + optional_graph_negative = (pynutil.insert("negative: ") + pynini.cross("-", "\"true\" ")).ques + + self.fst = self.add_tokens(optional_graph_negative + (simple_fraction | mixed_fraction)).optimize() diff --git a/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py b/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py index 53a2822c8..73feb7182 100644 --- a/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py +++ b/nemo_text_processing/text_normalization/vi/taggers/tokenize_and_classify.py @@ -26,6 +26,7 @@ ) from nemo_text_processing.text_normalization.vi.taggers.cardinal import CardinalFst from nemo_text_processing.text_normalization.vi.taggers.decimal import DecimalFst +from nemo_text_processing.text_normalization.vi.taggers.fraction import FractionFst from nemo_text_processing.text_normalization.vi.taggers.ordinal import OrdinalFst from nemo_text_processing.text_normalization.vi.taggers.punctuation import PunctuationFst from nemo_text_processing.text_normalization.vi.taggers.whitelist import WhiteListFst @@ -86,11 +87,17 @@ def __init__( decimal_graph = decimal.fst logger.debug(f"decimal: {time.time() - start_time: .2f}s -- {decimal_graph.num_states()} nodes") + start_time = time.time() + fraction = FractionFst(cardinal=cardinal, deterministic=deterministic) + fraction_graph = fraction.fst + logger.debug(f"fraction: {time.time() - start_time: .2f}s -- {fraction_graph.num_states()} nodes") + classify = ( pynutil.add_weight(whitelist_graph, 0.8) | pynutil.add_weight(ordinal_graph, 0.81) | pynutil.add_weight(decimal_graph, 0.85) | pynutil.add_weight(cardinal_graph, 0.9) + | pynutil.add_weight(fraction_graph, 1.0) | pynutil.add_weight(word_graph, 100) ) punct = pynutil.insert("tokens { ") + pynutil.add_weight(punct_graph, weight=2.1) + pynutil.insert(" }") diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/fraction.py b/nemo_text_processing/text_normalization/vi/verbalizers/fraction.py new file mode 100644 index 000000000..77ace3454 --- /dev/null +++ b/nemo_text_processing/text_normalization/vi/verbalizers/fraction.py @@ -0,0 +1,53 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pynini +from pynini.lib import pynutil + +from nemo_text_processing.text_normalization.en.graph_utils import NEMO_NOT_QUOTE, GraphFst, delete_space + + +class FractionFst(GraphFst): + """ + Finite state transducer for verbalizing Vietnamese fraction numbers, e.g. + fraction { negative: "true" integer_part: "hai mươi ba" numerator: "một" denominator: "năm" } -> âm hai mươi ba và một phần năm + fraction { numerator: "ba" denominator: "chín" } -> ba phần chín + fraction { integer_part: "một trăm" numerator: "hai" denominator: "ba" } -> một trăm và hai phần ba + + Args: + deterministic: if True will provide a single transduction option, + for False multiple options (used for audio-based normalization) + """ + + def __init__(self, deterministic: bool = True): + super().__init__(name="fraction", kind="verbalize", deterministic=deterministic) + + optional_sign = pynini.cross("negative: \"true\"", "âm ") + if not deterministic: + optional_sign |= pynini.cross("negative: \"true\"", "trừ ") + optional_sign = pynini.closure(optional_sign + delete_space, 0, 1) + + part = pynini.closure(NEMO_NOT_QUOTE) + delete_quotes = delete_space + pynutil.delete("\"") + part + pynutil.delete("\"") + + integer_tagged = pynutil.delete("integer_part:") + delete_quotes + numerator_tagged = pynutil.delete("numerator:") + delete_quotes + denominator_tagged = pynutil.delete("denominator:") + delete_quotes + + fraction_part = numerator_tagged + delete_space + pynutil.insert(" phần ") + denominator_tagged + + simple_fraction = fraction_part + mixed_fraction = integer_tagged + delete_space + pynutil.insert(" và ") + fraction_part + + self.fst = self.delete_tokens(optional_sign + (simple_fraction | mixed_fraction)).optimize() diff --git a/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py b/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py index b61776c01..e3d34b968 100644 --- a/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py +++ b/nemo_text_processing/text_normalization/vi/verbalizers/verbalize.py @@ -16,6 +16,7 @@ from nemo_text_processing.text_normalization.en.verbalizers.word import WordFst from nemo_text_processing.text_normalization.vi.verbalizers.cardinal import CardinalFst from nemo_text_processing.text_normalization.vi.verbalizers.decimal import DecimalFst +from nemo_text_processing.text_normalization.vi.verbalizers.fraction import FractionFst from nemo_text_processing.text_normalization.vi.verbalizers.ordinal import OrdinalFst from nemo_text_processing.text_normalization.vi.verbalizers.whitelist import WhiteListFst @@ -40,7 +41,10 @@ def __init__(self, deterministic: bool = True): decimal = DecimalFst(cardinal=cardinal, deterministic=deterministic) decimal_graph = decimal.fst + fraction = FractionFst(deterministic=deterministic) + fraction_graph = fraction.fst + # Combine all verbalizers - graph = cardinal_graph | whitelist_graph | word_graph | ordinal_graph | decimal_graph + graph = cardinal_graph | whitelist_graph | word_graph | ordinal_graph | decimal_graph | fraction_graph self.fst = graph diff --git a/tests/nemo_text_processing/vi/data_text_normalization/test_cases_fraction.txt b/tests/nemo_text_processing/vi/data_text_normalization/test_cases_fraction.txt new file mode 100644 index 000000000..1ccd7af94 --- /dev/null +++ b/tests/nemo_text_processing/vi/data_text_normalization/test_cases_fraction.txt @@ -0,0 +1,13 @@ +1/2~một phần hai +4/9~bốn phần chín +9/4~chín phần tư +1/4~một phần tư +3/4~ba phần tư +15/5~mười lăm phần năm +1/3~một phần ba +2/10~hai phần mười +23 1/5~hai mươi ba và một phần năm +-3/4~âm ba phần tư +-12 1/4 nha~âm mười hai và một phần tư nha +-5 2/3~âm năm và hai phần ba +5 1/2~năm và một phần hai \ No newline at end of file diff --git a/tests/nemo_text_processing/vi/test_fraction.py b/tests/nemo_text_processing/vi/test_fraction.py index acd465cfd..1751c7b8a 100644 --- a/tests/nemo_text_processing/vi/test_fraction.py +++ b/tests/nemo_text_processing/vi/test_fraction.py @@ -12,32 +12,49 @@ # See the License for the specific language governing permissions and # limitations under the License. - +# pytest tests/nemo_text_processing/vi/test_fraction.py --cpu --cache-clear import pytest from parameterized import parameterized -from ..utils import CACHE_DIR, parse_test_case_file - -try: - from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer +from nemo_text_processing.inverse_text_normalization.inverse_normalize import InverseNormalizer +from nemo_text_processing.text_normalization.normalize import Normalizer +from nemo_text_processing.text_normalization.normalize_with_audio import NormalizerWithAudio - PYNINI_AVAILABLE = True -except (ImportError, ModuleNotFoundError): - PYNINI_AVAILABLE = False +from ..utils import CACHE_DIR, RUN_AUDIO_BASED_TESTS, parse_test_case_file class TestFraction: - inverse_normalizer = ( - InverseNormalizer(lang='vi', cache_dir=CACHE_DIR, overwrite_cache=False) if PYNINI_AVAILABLE else None - ) + + inverse_normalizer = InverseNormalizer(lang='vi', cache_dir=CACHE_DIR, overwrite_cache=False) @parameterized.expand(parse_test_case_file('vi/data_inverse_text_normalization/test_cases_fraction.txt')) - @pytest.mark.skipif( - not PYNINI_AVAILABLE, - reason="`pynini` not installed, please install via nemo_text_processing/pynini_install.sh", - ) @pytest.mark.run_only_on('CPU') @pytest.mark.unit def test_denorm(self, test_input, expected): pred = self.inverse_normalizer.inverse_normalize(test_input, verbose=False) assert pred == expected + + normalizer = Normalizer( + input_case='cased', lang='vi', cache_dir=CACHE_DIR, overwrite_cache=False, post_process=True + ) + + normalizer_with_audio = ( + NormalizerWithAudio(input_case='cased', lang='vi', cache_dir=CACHE_DIR, overwrite_cache=False) + if CACHE_DIR and RUN_AUDIO_BASED_TESTS + else None + ) + + @parameterized.expand(parse_test_case_file('vi/data_text_normalization/test_cases_fraction.txt')) + @pytest.mark.run_only_on('CPU') + @pytest.mark.unit + def test_norm(self, test_input, expected): + pred = self.normalizer.normalize(test_input, verbose=False, punct_post_process=False) + assert pred == expected, f"input: {test_input}" + + if self.normalizer_with_audio: + pred_non_deterministic = self.normalizer_with_audio.normalize( + test_input, + n_tagged=30, + punct_post_process=False, + ) + assert expected in pred_non_deterministic, f"input: {test_input}" diff --git a/tests/nemo_text_processing/vi/test_sparrowhawk_normalization.sh b/tests/nemo_text_processing/vi/test_sparrowhawk_normalization.sh index ab3c03d86..6a277c28c 100644 --- a/tests/nemo_text_processing/vi/test_sparrowhawk_normalization.sh +++ b/tests/nemo_text_processing/vi/test_sparrowhawk_normalization.sh @@ -43,10 +43,10 @@ testTNOrdinal() { runtest $input } -# testTNFraction() { -# input=$PROJECT_DIR/vi/data_text_normalization/test_cases_fraction.txt -# runtest $input -# } +testTNFraction() { + input=$PROJECT_DIR/vi/data_text_normalization/test_cases_fraction.txt + runtest $input +} # testTNTime() { # input=$PROJECT_DIR/vi/data_text_normalization/test_cases_time.txt