In [37]:
import pynini 
from nemo_text_processing.inverse_text_normalization.hi.utils import get_abs_path, apply_fst
from nemo_text_processing.text_normalization.en.graph_utils import (
    INPUT_CASED,
    INPUT_LOWER_CASED,
    MIN_NEG_WEIGHT,
    MINUS,
    NEMO_DIGIT,
    NEMO_SIGMA,
    TO_LOWER,
    GraphFst,
    capitalized_input_graph,
    delete_extra_space,
    insert_space,
    delete_space,
)
from nemo_text_processing.inverse_text_normalization.hi.graph_utils import NEMO_HI_DIGIT
from nemo_text_processing.inverse_text_normalization.hi.taggers.cardinal import CardinalFst
from nemo_text_processing.inverse_text_normalization.hi.taggers.date import DateFst
from pynini.lib import pynutil, rewrite

class DateFst(GraphFst):
    """
    Finite state transducer for classifying fraction
          Finite state transducer for classifying date, 
        e.g. पांच जनवरी दो हज़ार बारह -> date { month: "जनवरी" day: "५" year: "२०१२" preserve_order: true }
        e.g. दो हज़ार बारह -> date { year: "२०१२" preserve_order: true }     
    Args:
        cardinal: CardinalFst
        date: DateFst
    """
    def __init__(self, cardinal: GraphFst):
        super().__init__(name="date", kind="classify")
        # integer_part # numerator # denominator
        graph_cardinal = cardinal.graph_no_exception
        month_graph = pynini.string_file(get_abs_path("data/months.tsv"))
        graph_digit = pynini.string_file(get_abs_path("data/numbers/digit.tsv")).invert()
        graph_teens_and_ties = pynini.string_file(get_abs_path("data/numbers/teens_and_ties.tsv")).invert()
        graph_date_days = pynini.string_file(get_abs_path("data/date_days.tsv")).invert()
        graph_hundred = pynini.cross("सौ", "")
        delete_thousand = pynutil.delete("हज़ार") | pynutil.delete("हजार")
        graph_hundred_component = pynini.union(graph_digit + delete_space + graph_hundred, pynutil.insert("०"))
        graph_hundred_component += delete_space
        graph_hundred_component += pynutil.insert("००")

        graph_hundred_component_at_least_one_none_zero_digit = graph_hundred_component @ (
                pynini.closure(NEMO_HI_DIGIT) + (NEMO_HI_DIGIT - "०") + pynini.closure(NEMO_HI_DIGIT)
        )
        self.graph_hundred_component_at_least_one_none_zero_digit = (
            graph_hundred_component_at_least_one_none_zero_digit
        )

        # Transducer for eleven hundred -> 1100 or twenty one hundred eleven -> 2111
        graph_hundred_as_thousand = pynini.union(graph_teens_and_ties + delete_space + graph_hundred, pynutil.insert("०"))
        graph_hundred_as_thousand += delete_space  
        graph_hundred_as_thousand += pynutil.insert("००")

        graph_hundreds = graph_hundred_component | graph_hundred_as_thousand

        
        graph_teens_and_ties_component = pynini.union(
            graph_teens_and_ties | pynutil.insert("00") + delete_space + (graph_digit | pynutil.insert("0")),
        )
        
        graph_date = graph_digit | graph_date_days
        self.graph_date = graph_date
        #print(get_abs_path("data/numbers/digit.tsv"))
        graph_no_prefix = pynini.union(pynini.cross("सौ", "१००") | pynini.cross("हज़ार", "१०००") | pynini.cross("लाख", "१०००००") | pynini.cross("करोड़", "१०००००००"), pynutil.insert("graph_no_prefix", weight=2))
        
        cardinal_graph = pynini.union(graph_hundreds, graph_no_prefix) #graph_digit_plus_hundred, 


        
        self.day = pynini.closure(pynutil.insert("day: \"") + graph_date + pynutil.insert("\""))
        self.month = pynini.closure(pynutil.insert("month: \"") + month_graph + pynutil.insert("\""))
        self.year = pynini.closure(pynutil.insert("year: \"") + cardinal_graph + pynutil.insert("\""))
        #insert_comma = pynutil.insert(", ") 
        
        graph_date = self.day + delete_space + self.month + pynini.closure(delete_space + self.year, 0,1)

        graph = graph_date 
        self.graph = graph.optimize()
        
        final_graph = self.add_tokens(graph)
        self.fst = final_graph.optimize()

from nemo_text_processing.inverse_text_normalization.hi.taggers.cardinal import CardinalFst
cardinal = CardinalFst()
date = DateFst(cardinal)
input_text = "दो हज़ार बीस"

output = apply_fst(input_text, cardinal.fst)
print(output)


Error: No valid output with given input: 'दो हज़ार बीस'
None


ERROR: StringFstToOutputLabels: Invalid start state
