In [17]:
import pynini
import nemo_text_processing
from pynini.lib import pynutil

In [18]:
from pynini.lib import pynutil

def apply_fst(text, fst):
  """ Given a string input, returns the output string
  produced by traversing the path with lowest weight.
  If no valid path accepts input string, returns an
  error.
  """
  try:
     print(pynini.shortestpath(text @ fst).string())
  except pynini.FstOpError:
    print(f"Error: No valid output with given input: '{text}'")

## Digit

In [19]:
zero = pynini.string_map([('zero','0')])
digit = pynini.string_map([
    ('uno','1'),
    ('due','2'),
    ('tre','3'),
    ('quattro','4'),
    ('cinque','5'),
    ('sei','6'),
    ('sette','7'),
    ('otto','8'),
    ('nove','9'),
])

In [20]:
apply_fst("zero", zero)
apply_fst("uno", digit)
apply_fst("nove", digit)

0
1
9


## Teens

In [21]:
teen = pynini.string_map([
    ('dieci','10'),
    ('undici','11'),
    ('dodici','12'),
    ('tredici','13'),
    ('quattordici','14'),
    ('quindici','15'),
    ('sedici','16'),
    ('diciassette','17'),
    ('diciotto','18'),
    ('diciannove','19')
])

In [22]:
graph_all = zero | teen | digit

In [23]:
apply_fst("zero", graph_all)
apply_fst("tre", graph_all)
apply_fst("quindici", graph_all)
apply_fst("uno", graph_all)

0
3
15
1


## Tens

In [24]:
tens = pynini.string_map([
    ('venti','2'),
    ('trenta','3'),
    ('quaranta','4'),
    ('cinquanta','5'),
    ('sessanta','6'),
    ('settanta','7'),
    ('ottanta','8'),
    ('novanta','9'),
])

# delete_hyphen = pynini.closure(pynutil.delete("-"), 0, 1)
# delete_space = pynini.closure(pynutil.delete(" "), 0, 1)

# graph_tens_h = tens + delete_hyphen + digit
# graph_tens_s = tens + delete_space + digit
# graph_teens_and_tens = graph_tens_h | graph_tens_s | teen | digit

graph_teens_and_tens = tens + digit | teen | digit
graph_all = graph_teens_and_tens | zero 

In [25]:
# apply_fst('settanta-tre', graph_all)
# apply_fst('settanta tre', graph_all)
apply_fst('settantatre', graph_all)
apply_fst('trenta', graph_all)
apply_fst('dieci', graph_all)
apply_fst('uno', graph_all)
apply_fst('ottantuno', graph_all)

73
Error: No valid output with given input: 'trenta'
10
1
Error: No valid output with given input: 'ottantuno'


ERROR: StringFstToOutputLabels: Invalid start state
ERROR: StringFstToOutputLabels: Invalid start state


In [10]:
tens_one = pynini.string_map([
    ('ventuno','21'),
    ('trentuno','31'),
    ('quarantuno','41'),
    ('cinquantuno','51'),
    ('sessantuno','61'),
    ('settantuno','71'),
    ('ottantuno','81'),
    ('navantuno','91'),
])

In [11]:
graph_all = graph_teens_and_tens | tens_one | zero

In [12]:
# apply_fst('settanta-tre', graph_all)
# apply_fst('settanta tre', graph_all)
apply_fst('settantatre', graph_all)
apply_fst('dieci', graph_all)
apply_fst('ottantuno', graph_all)
apply_fst('uno', graph_all)

73
10
81
1


In [13]:
graph_all.num_states()

382

In [14]:
graph_all.optimize()
graph_all.num_states()

184

## Hundreds

In [15]:
hundred = pynini.string_map([
    ('duecento','20'),
    ('trecento','30'),
    ('quattrocento','40'),
    ('cinquecento','50'),
    ('seicento','60'),
    ('settecento','70'),
    ('ottocento','80'),
    ('novecento','90'),
])

In [16]:
graph_teens_and_tens = tens + digit | teen | tens_one

graph_hundred_and_zero = hundred + pynutil.insert("0")
graph_hundred_and_digit = hundred + digit
graph_hundred_and_tens = digit + pynutil.delete("cento") + tens + pynutil.insert('0')
graph_hundred = digit + pynutil.delete("cento") + graph_teens_and_tens

In [17]:
hundreds = graph_hundred_and_zero | graph_hundred_and_digit | graph_hundred | graph_hundred_and_tens

In [18]:
apply_fst('duecentoventitre', hundreds)
apply_fst('duecentoventuno', hundreds)
apply_fst('duecentotre', hundreds)
apply_fst('duecento', hundreds)
apply_fst('duecentoventi', hundreds)

223
221
203
200
220


In [19]:
graph_teens_and_tens = tens + digit | teen | tens_one
cento = pynini.string_map([('cento','1')])

cent = cento + pynutil.insert("00")
graph_cent_and_tens = cento + tens + pynutil.insert('0')
graph_cent = cento + graph_teens_and_tens

hundreds_cent = cent | graph_cent_and_tens | graph_cent

In [20]:
apply_fst('cento', hundreds_cent)
apply_fst('centoventiquattro', hundreds_cent)
apply_fst('centoventi', hundreds_cent)
apply_fst('centotrentuno', hundreds_cent)

100
124
120
131


In [21]:
hundreds_all = hundreds | hundreds_cent

In [22]:
apply_fst('duecentoventitre', hundreds_all)
apply_fst('duecentoventuno', hundreds_all)
apply_fst('duecentotre', hundreds_all)
apply_fst('duecento', hundreds_all)
apply_fst('duecentoventi', hundreds_all)
apply_fst('cento', hundreds_all)
apply_fst('centoventiquattro', hundreds_all)
apply_fst('centoventi', hundreds_all)
apply_fst('centotrentuno', hundreds_all)

223
221
203
200
220
100
124
120
131


## Thousands

In [23]:
graph_teens_and_tens = tens + digit | teen | tens_one
thousands = hundreds_all | graph_teens_and_tens | digit
graph_thousands_hundred = thousands + pynutil.delete('mila') + hundreds_all
graph_thousands_tens = thousands + pynutil.delete('mila') + pynutil.insert('0') + graph_teens_and_tens
graph_thousands_digit = thousands + pynutil.delete('mila') + pynutil.insert('00') + digit
graph_thousands = digit + pynutil.delete('mila') + pynutil.insert('000')

mila = graph_thousands_hundred | graph_thousands_tens | graph_thousands_digit | graph_thousands

In [24]:
apply_fst('duecentosedicimilatrecentoquattro', graph_thousands_hundred)
apply_fst('ventunomiladuecentoquaranta', graph_thousands_hundred)
apply_fst('duecentosedicimilaventitre', graph_thousands_tens)
apply_fst('duecentosedicimiladue', graph_thousands_digit)
apply_fst('sedicimiladue', graph_thousands_digit)
apply_fst('duemilasettantacinque', graph_thousands_tens)

216304
21240
216023
216002
16002
2075


In [25]:
apply_fst('duecentosedicimilatrecentoquattro', mila)
apply_fst('ventunomiladuecentoquaranta', mila)
apply_fst('duecentosedicimilaventitre', mila)
apply_fst('duecentosedicimiladue', mila)
apply_fst('sedicimiladue', mila)
apply_fst('sedicimilaventuno', mila)
apply_fst('duemilasettantacinque', mila)
apply_fst('duemila', mila)

216304
21240
216023
216002
16002
16021
2075
2000


In [26]:
graph_teens_and_tens = tens + digit | teen | tens_one
mille = pynini.string_map([('mille','1')])

mille_zero = mille + pynutil.insert("000")
graph_mille_hundreds = mille + hundreds_all
graph_mille_tens = mille + pynutil.insert("0") + graph_teens_and_tens
graph_mille_digit = mille + pynutil.insert('00') + digit

thousands_mille = mille_zero | graph_mille_hundreds | graph_mille_tens | graph_mille_digit

In [27]:
apply_fst('mille', thousands_mille)
apply_fst('milletre', thousands_mille)
apply_fst('milleventiquattro', thousands_mille)
apply_fst('milletrecentododici', thousands_mille)

1000
1003
1024
1312


In [28]:
thousands_all = thousands_mille | mila

In [29]:
apply_fst('mille', thousands_all)
apply_fst('milletre', thousands_all)
apply_fst('milleventiquattro', thousands_all)
apply_fst('milletrecentododici', thousands_all)
apply_fst('duecentosedicimilatrecentoquattro', thousands_all)
apply_fst('ventunomiladuecentoquaranta', thousands_all)
apply_fst('duecentosedicimilaventitre', thousands_all)
apply_fst('duecentosedicimiladue', thousands_all)
apply_fst('sedicimiladue', thousands_all)
apply_fst('sedicimilaventuno', thousands_all)
apply_fst('duemilasettantacinque', thousands_all)
apply_fst('duemila', thousands_all)

1000
1003
1024
1312
216304
21240
216023
216002
16002
16021
2075
2000


## Weight

In [86]:
graph_teens_and_tens = tens + digit | teen | tens_one
thousands = hundreds_all | graph_teens_and_tens | digit

graph_one_thousand = pynini.cross('mille','1')
graph_many_thousand = thousands + pynutil.delete('mila')
graph_thousands = (graph_one_thousand | graph_many_thousand) + thousands

In [87]:
apply_fst('milleduecento', graph_thousands)
apply_fst('duemiladuecento', graph_thousands)
apply_fst('duecentotredicimiladuecentoquattordici', graph_thousands)
apply_fst('milletre', graph_thousands)
apply_fst('tremilatre', graph_thousands)
apply_fst('tremiladodici', graph_thousands)

1200
2200
213214
13
33
312


In [92]:
graph_teens_and_tens = tens + digit | teen | tens_one
thousands = hundreds_all | graph_teens_and_tens | digit
graph_thousands_hundred = thousands + pynutil.delete('mila') + hundreds_all
graph_thousands_tens = thousands + pynutil.delete('mila') + pynutil.insert('0') + graph_teens_and_tens
graph_thousands_digit = thousands + pynutil.delete('mila') + pynutil.insert('00') + digit
graph_thousands = digit + pynutil.delete('mila') + pynutil.insert('000')

mila = graph_thousands_hundred | graph_thousands_tens | graph_thousands_digit | graph_thousands

In [96]:
pynutil.delete?

[0;31mSignature:[0m
[0mpynutil[0m[0;34m.[0m[0mdelete[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mexpr[0m[0;34m:[0m [0;34m'typing.Union[Fst, str]'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mweight[0m[0;34m:[0m [0mUnion[0m[0;34m[[0m[0mForwardRef[0m[0;34m([0m[0;34m'Union[Weight, Union[str, int, float]]'[0m[0;34m)[0m[0;34m,[0m [0mNoneType[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0m_pynini[0m[0;34m.[0m[0mFst[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Creates the transducer for expr x <epsilon>.

Args:
  expr: an acceptor or string.
  weight: an optional weight or string.

Returns:
  An FST.
[0;31mFile:[0m      /opt/conda/lib/python3.8/site-packages/pynini/lib/pynutil.py
[0;31mType:[0m      function
