In [1]:
# This notebook presents a simple function mc_reduce() with which to
# reduce the morphological encoding of a space-delimited word to a
# minimal notation.
# Its inverse, mc_expand(), restores the canonical notation.

In [2]:
# The prefixes used in the encoding of Hebrew and Syriac
prefixes = ['!', ']', '@']

In [3]:
# A list of output forms for testing purposes
output_forms = ["BR>[", "B-R>CJT/", "!M!RXP[/T:d", "W:n-!J!>MR[",
   "]H]MV&JR[", "W-]H]CQH[", "!M!](H]BD&JL[/", "W:n-!J!](H]BDL[",
   "!!@>(T&Z@(Z&DHR[:d", "@>(T&C@](C&T](J&WDJ[T=",
   "D-L-!M!@(>(T&Z@(Z&DHR[/W:d", "D-@>(T&S@](S&T]QBL[W",
   "D-!M!@(>(T&C@](C&T]XLP[/JN", "!M!@(>(T&C@](C&T]BHR[/JN",
   "D-@>(T&C@(C&TBJ==[TWN", "@>(T&C@(C&T&J&WDJ[T="]

In [4]:
from re import sub

# The reduction consists of removing the left-most marker from all
# the doubly marked prefixes and the redundant colon of the vowel
# pattern mark.
def mc_reduce(s):
   for c in prefixes:
      s = sub(f'{c}([^{c}]*{c})', r'\1', s)
   return s.replace(':', '')

In [5]:
# This function undoes the reduction. The hyphen in the search pattern
# makes sure that we stay within a single analytical word.
def mc_expand(s):
   s = sub(r'([a-z]+)', r':\1', s)
   r = sub('(.)', r'\\\1', ''.join(prefixes))
   for c in prefixes:
      s = sub(f'([^-{r}]*{c})', f'{c}\\1', s)
   return s

In [6]:
# The boolean value in the test output indicates whether mc_expand()
# in fact functions as the inverse of mc_reduce().
for s in output_forms:
   r = mc_reduce(s)
   print(s, r, s == mc_expand(r), sep='\t')

BR>[	BR>[	True
B-R>CJT/	B-R>CJT/	True
!M!RXP[/T:d	M!RXP[/Td	True
W:n-!J!>MR[	Wn-J!>MR[	True
]H]MV&JR[	H]MV&JR[	True
W-]H]CQH[	W-H]CQH[	True
!M!](H]BD&JL[/	M!(H]BD&JL[/	True
W:n-!J!](H]BDL[	Wn-J!(H]BDL[	True
!!@>(T&Z@(Z&DHR[:d	!>(T&Z@(Z&DHR[d	True
@>(T&C@](C&T](J&WDJ[T=	>(T&C@(C&T](J&WDJ[T=	True
D-L-!M!@(>(T&Z@(Z&DHR[/W:d	D-L-M!(>(T&Z@(Z&DHR[/Wd	True
D-@>(T&S@](S&T]QBL[W	D->(T&S@(S&T]QBL[W	True
D-!M!@(>(T&C@](C&T]XLP[/JN	D-M!(>(T&C@(C&T]XLP[/JN	True
!M!@(>(T&C@](C&T]BHR[/JN	M!(>(T&C@(C&T]BHR[/JN	True
D-@>(T&C@(C&TBJ==[TWN	D->(T&C@(C&TBJ==[TWN	True
@>(T&C@(C&T&J&WDJ[T=	>(T&C@(C&T&J&WDJ[T=	True
