In [1]:
import hfst
import fstutils as fst

In [2]:
type(hfst.compile_lexc_file('esu_toy.lexc'))

libhfst.HfstTransducer

In [3]:
defs = fst.Definitions({
    "Stop":'[ p | t | c | k | q ]',
    "Nasal":'[ m | n | "ng" | ḿ | ń | "ńg" ]',
    "Fricative":'[  v   |  l   |  s   |  g   |  r   | "vv" | "ll" | "ss" | "gg" | "rr" |"u͡g" | "u͡gg" | "u͡r" | "u͡rr" ]',
    'C':'[ Stop | Nasal | Fricative | w | y ]',
    'FullVowel':'[ a | i | u ]',
    'V':'[ e | FullVowel ]',
    'Alphabet':'[ C | V ]',
    'MorphPhonSymbols':'[ "~" | "+" | "-" | ":" | "@" | "`" | "(ng)" | "(s)" | "(g)" | "(t)" ]'
})

In [4]:
print(defs.defs)

{'Stop': '[ p | t | c | k | q ]', 'Nasal': '[ m | n | "ng" | ḿ | ń | "ńg" ]', 'Fricative': '[  v   |  l   |  s   |  g   |  r   | "vv" | "ll" | "ss" | "gg" | "rr" |"u͡g" | "u͡gg" | "u͡r" | "u͡rr" ]', 'C': '[ [ p | t | c | k | q ] | [ m | n | "ng" | ḿ | ń | "ńg" ] | [  v   |  l   |  s   |  g   |  r   | "vv" | "ll" | "ss" | "gg" | "rr" |"u͡g" | "u͡gg" | "u͡r" | "u͡rr" ] | w | y ]', 'FullVowel': '[ a | i | u ]', 'V': '[ e | [ a | i | u ] ]', 'Alphabet': '[ [ [ p | t | c | k | q ] | [ m | n | "ng" | ḿ | ń | "ńg" ] | [  v   |  l   |  s   |  g   |  r   | "vv" | "ll" | "ss" | "gg" | "rr" |"u͡g" | "u͡gg" | "u͡r" | "u͡rr" ] | w | y ] | [ e | [ a | i | u ] ] ]', 'MorphPhonSymbols': '[ "~" | "+" | "-" | ":" | "@" | "`" | "(ng)" | "(s)" | "(g)" | "(t)" ]'}


In [5]:
allomorphy = hfst.regex(defs.replace('"(ng)" -> "ng", "(s)" -> s || V MorphPhonSymbols* _ .o. "(t)" -> t || [ g | r ] MorphPhonSymbols* _ .o. "(g)" -> g || V V MorphPhonSymbols* _ .o. [ "(ng)" | "(s)" | "(g)" | "(t)" ] -> 0'))
dropConsonant = hfst.regex(defs.replace('C -> 0 || _ MorphPhonSymbols* "-" .o. "-" -> 0'))
keepConsonant = hfst.regex(defs.replace('"+" -> 0'))
eDeletion = hfst.regex(defs.replace('e -> 0 || _ MorphPhonSymbols* "~" .o. "~" -> 0'))
velarDropping = hfst.regex(defs.replace('[ g | r | "ng" ] -> 0 || C V _ ":" V C .o. ":" -> 0'))
baseFinalEndings = hfst.regex(defs.replace('r -> q, g -> k, e -> a || _ [ "+" | .#. ]'))
tripleConsonant = hfst.regex(defs.replace('[..] -> e || C C _ C'))
engi = hfst.regex(defs.replace('e ng i -> a i'))                        

In [6]:
defs.replace('C -> 0 || _ MorphPhonSymbols* "-" .o. "-" -> 0')

'[ [ p | t | c | k | q ] | [ m | n | "ng" | ḿ | ń | "ńg" ] | [  v   |  l   |  s   |  g   |  r   | "vv" | "ll" | "ss" | "gg" | "rr" |"u͡g" | "u͡gg" | "u͡r" | "u͡rr" ] | w | y ] -> 0 || _ [ "~" | "+" | "-" | ":" | "@" | "`" | "(ng)" | "(s)" | "(g)" | "(t)" ]* "-" .o. "-" -> 0'

In [7]:
grammar = hfst.compile_lexc_file('esu_toy.lexc')
grammar.compose(allomorphy)
grammar.compose(dropConsonant)
grammar.compose(keepConsonant)
grammar.compose(eDeletion)
grammar.compose(velarDropping)
grammar.compose(baseFinalEndings)
grammar.compose(tripleConsonant)
grammar.compose(engi)

In [9]:
fst.lookup(grammar, 'boat-big\ABS.sg')

['angyarpak']

In [69]:
defs.replace('C -> 0 || _ MorphPhonSymbols* "-" .o. "-" -> 0')

'[ Stop | Nasal | Fricative | w | y ] -> 0 || _ [ "~" | "+" | "-" | ":" | "@" | "`" | "(ng)" | "(s)" | "(g)" | "(t)" ]* "-" .o. "-" -> 0'

In [71]:
print(defs.defs)

{'Stop': '[ p | t | c | k | q ]', 'Nasal': '[ m | n | "ng" | ḿ | ń | "ńg" ]', 'Fricative': '[  v   |  l   |  s   |  g   |  r   | "vv" | "ll" | "ss" | "gg" | "rr" |"u͡g" | "u͡gg" | "u͡r" | "u͡rr" ]', 'C': '[ Stop | Nasal | Fricative | w | y ]', 'FullVowel': '[ a | i | u ]', 'V': '[ e | FullVowel ]', 'Alphabet': '[ C | V ]', 'MorphPhonSymbols': '[ "~" | "+" | "-" | ":" | "@" | "`" | "(ng)" | "(s)" | "(g)" | "(t)" ]'}
