In [1]:
# Import the morphological analyzer MorphAn
import morphan
ma=morphan.MorphAn()

# Import the Annotated Syriac New Testament as training data
import syrnt
nt=syrnt.SyrNT()

# Import descriptive values for the morphological annotations
from constants import SyrNT as c
annotations = dict(c.ANNOTATIONS)

In [2]:
# Train the analyzer on the training corpus
ma.train(nt)

In [3]:
# Since the morphological analyzer returns rather cryptic
# annotation values, provide two helper functions to
# return descriptive strings in dicts

def get_values(analysis):
    if analysis is None:
        return None
    keys = "prefix stem suffix lexeme tag".split()
    result = {}
    for k, v in zip(keys, analysis):
        result[k] = v
    result['tag'] = get_an_values(result['tag'])  
    return result

def get_an_values(tag):
    result = {}
    keys = "grammatical_category verbal_conjugation aspect state person number gender".split()
    for k, v in zip(keys, tag):
        result[k] = annotations[k][v]
    return result

In [4]:
# Use the first five lines of the Book of the Laws of the Countries as example corpus
blc = """TWB KTB> DNMWS> D>TRWT>
MN QDM JWMT> <LJN HWJN LMS<R LCMCGRM >XWN W>T> >CKXN TMN BRDJYN
WKD GCH WXZ> DCPJR <BJD C>LN DMN> MMLLJN HWJTWN CM<T GJR QLKWN MN LBR KD <>L >N>
M<D HW> GJR D>MTJ DMCKX HW> LN DMMLLJN HWJN MDM MN QDMWHJ DNC>LN MN> >MRJN HWJTWN D<LWHJ NMLL <MN
XNN DJN >MRN LH <WJD> LM HN> >MR HW> LN D>N XD HW >LH> >JKN> D>MRJN >NTWN WHW >KJN LBNJNC> WYB> BHN> MDM DMPQDJN >NTWN DT<BDWN MVL MN> L> >KJN >NWN LBNJ >NC> >JKN> DL> NCKXWN LMSKLW >L> DBKLZBN DVB HWW <BDJN BHD> GJR MTML> HW> YBJNH
""".split()

In [5]:
for word in blc:
    analyses = ma.analyze(word)
    analysis = morphan.best_analysis(analyses)
    print(word, get_values(analysis))

TWB {'prefix': '', 'stem': 'TWB', 'suffix': '', 'lexeme': 'TWB', 'tag': {'grammatical_category': 'particle', 'verbal_conjugation': 'n/a', 'aspect': 'n/a', 'state': 'n/a', 'person': 'n/a', 'number': 'n/a', 'gender': 'n/a'}}
KTB> {'prefix': '', 'stem': 'KTB>', 'suffix': '', 'lexeme': 'KTB>', 'tag': {'grammatical_category': 'noun', 'verbal_conjugation': 'n/a', 'aspect': 'n/a', 'state': 'emphatic', 'person': 'n/a', 'number': 'singular', 'gender': 'masculine'}}
DNMWS> {'prefix': 'D', 'stem': 'NMWS>', 'suffix': '', 'lexeme': 'NMWS>', 'tag': {'grammatical_category': 'noun', 'verbal_conjugation': 'n/a', 'aspect': 'n/a', 'state': 'emphatic', 'person': 'n/a', 'number': 'singular', 'gender': 'masculine'}}
D>TRWT> {'prefix': 'D', 'stem': '>TRWT>', 'suffix': '', 'lexeme': '>TR>', 'tag': {'grammatical_category': 'noun', 'verbal_conjugation': 'n/a', 'aspect': 'n/a', 'state': 'emphatic', 'person': 'n/a', 'number': 'plural', 'gender': 'masculine'}}
MN {'prefix': '', 'stem': 'MN', 'suffix': '', 'lexeme'