In [1]:
import pandas as pd
import nltk
import json

# UniParser

In [3]:
pip install uniparser_morph

Note: you may need to restart the kernel to use updated packages.


In [2]:
from uniparser_morph import Analyzer

Initialise analyzer:

In [3]:
a = Analyzer()
a.load_grammar()

Pass a word:

In [17]:
analyses=a.analyze_words("ġamlax")
print(analyses)

[<Wordform object>
ġamlax
ġamla; NOUN,POSS.2ms,Sg,m
ġaml-ax
STEM=2ms
]


In [18]:
for ana in analyses:
      print("wordform: ", ana.wf)
      print("lemma: ", ana.lemma)
      print("grammatical features: ", ana.gramm)
      print("glossed wordform: ", ana.wfGlossed)
      print("glosses: ", ana.gloss)

wordform:  ġamlax
lemma:  ġamla
grammatical features:  NOUN,m,Sg,POSS.2ms
glossed wordform:  ġaml-ax
glosses:  STEM=2ms


Or a list of words:

In [24]:
analyses = a.analyze_words(['ana', 'xett', 'batt', 'ṯinaġelča'], format="conll")
print(analyses)

1	ana	ana	PRON	pron_type=Pers|person=1|number=Sg	ana	I
2	xett	xett	ADV	adv_type=Mod	xett	STEM
3	batt	abət yabət|batte	AUX|VERB	Stamm=IVy|tense=Perf|person=3|number=Sg|gender=m | person=1|number=Sg|gender=c	batt	FUT|STEM
4	ṯinaġelča	ṯinaġelča	NOUN	gender=f|number=Sg	ṯinaġel-č-a	STEM-F-FREE



If do not have prepared list of words, you can use this function to get it from text strings:

In [5]:
def text_to_tok(text_to_gloss):
    first_tokens = nltk.word_tokenize(text_to_gloss)
    final_tokens=[]
    for tok in first_tokens:
        tok=tok.lower()
        if "-" in tok:
            final_tokens.extend(tok.split("-"))
        elif "_" in tok:
            final_tokens.extend(tok.split("_"))
        elif tok.isalpha() or "ḏ̣" in tok:
            final_tokens.append(tok)
    return final_tokens

# Evaluation tool

To evaluate morphological parser two functions below can be used: the first one (`get_gold_annotation`) prepares gold annotation in certain format and the second one (`get_annotation_scores`) prints number of all words, number of totally correct words (i. e. words which have correct lemma, pos-tag and grammtical features simultaneously), weighted arithmetic mean and returns a dataframe with correct number of lemmata, pos-tags and grammatical features per part of speech (both number and percentage).

Prepare gold annotation:

In [4]:
annotations=[]
g=open("annotation.txt", "r", encoding="utf-8")
ann_sentence=[]
for line in g:
    if line.startswith("0") or line.startswith("3"):
        if len(ann_sentence)>0:
            annotations.append(ann_sentence)
        ann_sentence=[]
    if "\t\t" in line:
        help_ann=[]
        line_splitted=line.split("\t\t")[:3]
        if line_splitted[1]!="EMPTY":
            help_ann.append(line_splitted[0][:-1])
            help_ann.append(line_splitted[1])
            grams=line_splitted[-1].split(",")
            help_ann.append(grams[0])
            help_ann.append(set(grams[1:]))
            ann_sentence.append(help_ann)
        
g.close()

Here is how it looks like:

In [6]:
print(annotations[0])

[['ē', 'ē', 'PART', set()], ['čbōʕin', 'ibəʕ yibəʕ', 'VERB', {'Prs', '2', 'Pl', 'm', 'I'}], ['nišw', 'išw yišw', 'VERB', {'Subj', 'Pl', 'c', '1', 'I'}], ['ḥenna', 'ḥenna', 'NOUN', {'m', 'Sg'}]]


In [7]:
def get_gold_annotation(gold_annotation):
    
    g=open(gold_annotation, "r", encoding="utf-8")
    
    annotations_non_empty=[]
    annotations_all=[]
    annotations_non_empty_unique=[]
    annotations_all_unique=[]
    
    for line in g:        
        
        if "\t\t" in line:
            help_ann=[]
            line_splitted=line.split("\t\t")[:3]
            help_ann.append(line_splitted[0][:-1])
            help_ann.append(line_splitted[1])
            grams=line_splitted[-1].split(",")
            help_ann.append(grams[0])
            help_ann.append(set(grams[1:]))
            annotations_all.append(help_ann)
            if help_ann not in annotations_all_unique:
                annotations_all_unique.append(help_ann)
            
            help_ann2=[]
            if line_splitted[1]!="EMPTY":
                help_ann2.append(line_splitted[0][:-1])
                help_ann2.append(line_splitted[1])
                grams=line_splitted[-1].split(",")
                help_ann2.append(grams[0])
                help_ann2.append(set(grams[1:]))
                annotations_non_empty.append(help_ann2)
                if help_ann2 not in annotations_non_empty_unique:
                    annotations_non_empty_unique.append(help_ann2) 
    g.close()
    
    return annotations_all, annotations_all_unique, annotations_non_empty, annotations_non_empty_unique

In [8]:
ann1,ann2,ann3,ann4=get_gold_annotation("annotation.txt")

List of sents on which check is done:

In [9]:
sents_ann=['005. «ē, čbōʕin nišw ḥenna?»', '014. bōṯar irpiʕ yūm mʕawītin xett mayṯyin ḏbīḥča, naxsilla.', '0. ḳarribōye ḳaʕyin b-anna payṯa, mayṯyin ḳurʔān w ḳaryill lanna ḳurʔān mn-awwalče l-axerče.', '016. ḳōymin ommṯa bakkar, nōḥčin ʕa žēmʕa ʕam-ṣallyin w zlillun ʕa žappōnča.', '003. zlallen ʕṣofra yzūran.', '004. hū čū ōxel menna, mōrəl ḏ̣ḥīṯa ču maḥiḳḳle yīxul, yḏuḳenne bnaw bnawb, yīxul menna.', '009. amrulla: «nḥūč!» niḥčaṯ, niḥčaṯ ʕal-ōḏ mʕarrṯa.', '004. ḳaʕya willa la iḥmaṯ illa iʕber aʕla, iʕber aʕla, iḏʕaṯ hī.', '015. ʕala ṭūl mičwaḥḥ bē w ʕala ṭūl p-ḥaṣṣir rayše w iṭleb menne metti w batte yapplēle.', '003. ōṯ ġabrōna, wōb b-ġayrib blōta w ōṯ.', '001. b-zamōne ṯōle aḥḥaḏ dumōnay l-ōxa, ōmar mbaḳḳar p-šaġəlṯil mōya p-ḳīsa.', '016. ē, lōmar, la karr, aḳam ōčem ikbet b-ḏokkṯe w iḳəʕ, w hōṯa mafizza.', '004. b-zamōna ʕaččīḳa msawəlfin wōṯ ḏēba rōfeḳ ṯaʕla malʕun w inžes w šūne lēle rfīḳa ġassem maʕ ḏibō ti iṯḳen mamrille yičwaʕʕ menne w yabʕeḏ meʕle.', '028. mʕapparlun ṯaʕla ʕa mahle w ōxel, w minəpṣaṭ hū w bnōye w eččṯe.', '007. b-anna tarba allxinnaḥ, // w meʕle lorkaʕ ʕawitinnaḥ. // p-ḥaṣṣiš šenna šaʕʕlinnaḥ, // w ʕa šenna irčfaʕ ṣlība.', '003. b-ʕaynōš nḥamēl kawna. // ʕaya la amrīšəl b-riḥmūṯiš? // ʕaya ḏikkliš aʕəl, amrīšəl: čimʕawīta. // hašš warṯṯa, ʕaya la affīšən nzurʕenna b-lipp?', '004. inəbraḏ — ext bann nišwi? // bann nišči ḳūrəl mišwi, // w mina mann nayṯi ḥamra?', '001. ḥmičča sallīḳa ʕa ḏayra // w zannīra b-zunnōrəs sayra // w lippi la irḥam ġayra // w hōḏ raḥme, berčis sōba.', '004. amrōli: «payṯaḥ iʕli». // amrilla: «p-ḥaṣṣe nṭayyer.»', '002. hōš nizlōli ʕa payṯa w niṯyōli, // immi p-tarba hōši minčaḳyōli. // hačči l-ḥōlax zellax w_ana l-ḥōli, // w ʕrōba čḳōmez ʕa payṯa ḳmōza.', '004. awʕax čīmar: batt w batt // mn-ōt tunya, tunya mīṯa. // barnōša ʕa ḥōle mʕatt // mn-īḏe isḳaṭ bə-ḥṭīṯa.', '012. yīfuš ti čṯēx l-ġappe, // čmiščaḥ ġawza w pšōṯa, // ōxel w mišwi b-ʕoppe, // w šōḳel m-maḥərmōṯa.', '011. xēfəl matōra ḳōtrin ʕa baġla w mafčel ḥetta yṯuḳnun ex ḥmīra.', '006. maḳimill lanna nšīfa w mawḳfin maʕ ġrōsa, ṯyillun atar ġayrayy, xett šappō, ḳaʕyillun ʕal-ōġ ġrōrča, ġōrsin w mʕannyin w masəkrin w mbaṣṭin.', '018. čūb ext_imōḏ, imōḏ ḥarīma baḥar čannīḥan, awwalča ḥarīma baḥar čaʕbōnan.', '009. nafḏiṯ l-ellel, niḥčiṯ ʕa mʕarrṯa, lā ōṯ ʕimm nohra w lā ōṯ ʕimm mett.', '015. ḳōmiṯ zlill ʕa napka.', '009. w ḥrēna žabḏunne mōya ḥetta amṭunne r-reḥyil ḳamṣa l-awwalčiš šiḳya.', '010. ḏukkil ataḳ mōyər rayša, amrilla: «wuš ḳʕāš niḥəm!»', '005. mišwille b-ann finžanō, mišwille ʕa ffōye ġawzō másalan mšakklille, w mḏ̣ayīfin bē.', '028. «nōz nišč nofəšṯa.»', '016. nmišwille ʕal-anna ġāz, w nmayṯyin kuppōyṯa w nmayṯyin maṣṣōṣča w nmayṯyin ʕolpṯis sukker w nmišwillen ḳummaynaḥ.', '005. xaṭərṯa aʕzmannaḥ ḥōl nislaḳ leʕle ʕa mazraʕṯa nsaʕitenne p-ḳulḳās w filō.', '001. xarma raḏyille xanūnay w raḏyille ʕamlay.', '009. w ōbəl xalil ʕam-nayyarəl ʕaraḳ w əl-māza, w ayṯull lanna mšammaʕ w fartunne ʕa mluṯṯil lōḏ sahəlṯa.', '005. nmišwēle p-ḳoffṯa w nkamarle ḥamša šečča yūm.', '018. amrille: «lā, bann nzill.»', '003. awwalča miččažʕin ommṯa hōxa bə-blōta — čūṯ ḥkimō.', '016. ṯill ayṯiččil lōx xšurīṯa w zabničča, w ayṯiṯ maḳəḏḥa, w ṯiḳniṯ nmišwēla ḳiḏḥō ḳiḏḥō l-ōx xšurīṯa.', '006. baʕdēn nḳatrilla m-ṭarfa p-ḥūṭa.', '090. ʕayniṯ ana, aʕžbačč ʕezza, nḳōyem mamelle: «hōʕ ʕezza p-ṯmēn warḳan. ayṯa kmōlča!»', '010. ōmar: «wrāx, la yʕaṣṣrunnaḥ!»', '008. amrille: «wrāx, ana ču nfakker nḳaṭʕiṯ ʕa tarba ġēr l-emmat šarikiččax.»', '039. bess yḥassel ramla xulle sawa, yīb iṯḳen felkiš šaʕṯa, yaʕni ʕisər w ḥammeš miṣrōyan.', '004. w ġufərlēḥ ḥṭiyōṯaḥ, // ex min nġafərlill ti maḥṭ ʕimmaynaḥ.', '003. mayṯyin ḥiṭṭō w xušnō w sʕarō w zarʕillun p-ṣaḥnō zʕūrin xann, ḥetta yirbun.', '014. rēbeʕ ḥašoppa m-ṣawma ušme ḥašoppa ti zahra w warta.', '009. bess yḥassel xenša w čḥassel ṣlōṯa, xett mražīʕin tōpkin ext_awwal yōma lə-ʕrōba.', '008. m-mar lawandīyus ṣlōṯa l-ʕāde.', '003. hanna yōma, yōmlə sčašhet bē hanna ḳattēša šwunne ʕēḏa lēle.', '045. ṯōḳen tōḳeḳ naḳōsəl mar_ilyas w naḳōsəl berkṯa w ġayre w sawōye l-ḥāṣlo.', '007. hann mašəʕlō ṯōḳnin ḳīsa, ṭūle mett iṯər mičər, šammen xann ḳalles, w b-rayšil ḳīsa ōṯ ʕolpṯa m-ḥatīta aw ṣōža.', '035. w hōxa atar mballeš... šaġlōṯa ḥrinyōṯa.', '004. mḥáttitin yōma ti batte yišwull awwal ʔurbāne bē.', '033. čbaḳḳaṯ xann ṭūlčil ḥayōṯa.', '005. ōmar: «ē!»', '013. «ču nimkarr, imeṯ ḏōḏ.»', '011. amerlun: «ḥmōn hōxa ġapplə šbabō!»', '004. ōmar: «ʕṣofra.»', '010. «hačč mō čayyeṯ?»', '012. aḳam alṭun rayḥan batte yṣōraʕ hū w ayyub.', '003. anaḥ šimʕinnaḥ keləmṯa, affiččun ču tayyirill balayy w ḳōmiṯ zlill ščiččil ḥamra.', '001. xaṭərṯa nībin nsallīḳin m-dūma, nassīḳin banadōra mxaramča l-šarāb.', '002. bess ḥmiččun ana — sakrōnin —, ḳōmiṯ sakkriṯ w šamṭiṯ minnayy.', '011. w haṯinn aʕleḳ b baʕḏ̣inn w anaḥ hōxa nḏ̣ōḥkin.', '040. batte yizʕuḳ «yā ʕaḏra», lorkaʕ infeḳ ḥesse.', '001. wōṯ l-aḥḥaḏ ġabrōna eččṯa, čuṯ šunīṯa aḥla menna b-ʕōlma.', '004. yōma mn-ann yumō infeḳ ōbəl yawse ʕa ʕarḳūba ti baʕʕeḏ ḥetta yayṯ ḳalles ḏlūḳa, ḳalles xšūra, willa išmeʕ ḥessa.', '354. «ana mann napplēle!»', '091. amrōle: «ana ḳiṣəṯ xāāānn xāāānn xāāānn. affḳanni bnōṯəl ḥōlčax, aḳʕanni p-šimša.', '035. amrilla: la, atʕāy aʕle!', '003. amella: «mō bann niščġel?»', '012. wōb ʕam-fakkar yḳuṭlenne, bess la irəṣ.', '021. amrōla: «uxxul ma čmaḥəḳya čaffeḳ m-ṯimmiš žawəhrōṯa!»', '007. sčahtaṯ ʕal_eḥḏa, ḳōyma hōḏ amralla: «ṭalpiš ġapp ana. šarṭōš hann arpʕa ana nwaffīḳa ʕlayy w nraṣṣīya bōn.»', '012. išw semla w iḳlab, willa ḳaʕya elġul mapṣūṭ, amella: «wrēš mō ḳiṣṣṯiš?»', '025. aḳam applēle.', '017. aḳam ʕṣofra ščḥunne arḥel.', '015. taššrunne w zalle.', '012. isleḳ lahhīyin ʕam-mōxlin.', '004. bōṯar čiḳrīban felkiš šaʕṯa aḳa ḥōne ščafəḳte.', '010. bess hū m-zaʕle w ʕemmiš šaṯṯ w hōṯe ičbar p-xōṭre, miḥəl ḏrōʕe mett yaffeḳ besra, aḳam ameṯ.', '014. amrulle: «čūṯ ġappaynaḥ.»', '023. ʕōbra, w surtōbiš šenna rabbi.', '033. ḏukkil iṭlab xōla, aḳam hann ḥkimō čʕažžab lə-brōm wakīn.', '021. amelle: «fōk hann ḥablō m-ʕal-anna ġešra!»', '007. ḳʕōle mišṭaʕ b-iṯər sayf, ṭarḳil īḏe p-ḥelsil awwal aḥḥaḏ w ōfez, ḳaṭṭaʕ maʕ šobʕa ḥōḏ, maʕ šobʕa ḥmōr.', '003. zōyʕin menne ommṯa, hū ižreʕ, ču mahemmle.', '003. aḳa hanna šappa batte yṣaḥḥenna, amella: «ḳūm yḳuṭʕell ʕumriš! ana ʕanmōzaḥ ʕimmiš.»', '005. baʕdēn illa naffeḏ aḥḥaḏ mažnun xwōṯe amellun: «ana nmaḥḥečle.»', '001. ōṯ ġabrōna m-ḥilpul, ġappe psōna ʕomre uppe eʕsar išən.', '004. xaṭpa w kallel aʕla.', '003. waḳčil ḥimne ḥkīma xann, amelle: «xalaṣ, hačči ṯiḳnič kayyes, šoppṯa ḥrīṯa bann naffennax čzellax ʕa tiḏōx, lōfaš uppax mett.»', '012. amelle: «ʕalle mʕallaḳ ʕa ṯarʕit tikkōna!»', '011. waxma taḳḳe w naʕʕme w šūne ʕa ffōyəl ʕolpṯa w zalle l-ʕa harūn ər-rašīd.', '004. amrōle: «ē, ṭabʕan, nyaḏḏīʕa innu ṯiḳninnaḥ šaxṣa aḥḥaḏ, bess la činəš čiṭlub xōla l-iṯər!»', '010. aḥref ebre zʕōra, amelle: «ana nhamešle w nmaṣeṣle w ntaḳeḳle w nsafefle.»', '011. amelle: «ana nʔažərlēle ḥmōra nʔažərlēle fayya?»', '003. maʕzmōle: «čfaḏ̣ḏ̣āl yā ḳašīša, našḳennax ḳahwe, čfaḏ̣ḏ̣āl!»', '005. W-ḥayyil l-ann ʕaynō! Čūl meʕle ġnō. // Ex ūle leppa yiṣlinni ex ṣafərnō.']
print(len(sents_ann))

100


Get four different annotation scores:

In [59]:
def get_annotation_scores(annotations, sents_ann):
    
    all=0
    correct=0

    scores={}
    posses=["NOUN", "VERB", "AUX", "ADJ", "ADV", "PRON", "NUM", "SCONJ", "CCONJ", "DET", "PREP", "PART", "INTJ", "PROPN", "all"]
    for elem in posses:
        scores[elem]={"total": 0, "total_percent": 0, "corr_lemma": 0, "corr_lemma_percent": 0, "corr_pos": 0, "corr_pos_percent": 0, "corr_gramm": 0, "corr_gramm_percent": 0}


    all=len(annotations)
    for an_word in annotations:
            
        analyses = a.analyze_words(an_word[0])
        pos_tag=an_word[2]
        scores[pos_tag]["total"]+=1

        p=True
        q=True
        r=True

        for ana in analyses:
            if ana.lemma!="":
                    
                if p==True:
                    if ana.lemma==an_word[1]:
                        scores[pos_tag]["corr_lemma"]+=1
                        p=False
                if q==True:
                    if ana.gramm.split(",")[0]==pos_tag:
                        scores[pos_tag]["corr_pos"]+=1
                        q=False
                if r==True:
                    if set(ana.gramm.split(",")[1:])==an_word[3]:
                        scores[pos_tag]["corr_gramm"]+=1
                        r=False
                
                if ana.lemma==an_word[1] and ana.gramm.split(",")[0]==pos_tag and set(ana.gramm.split(",")[1:])==an_word[3]:
                    correct+=1
    
    
    print("Total correct words: ", correct)
    print("Total words: ", all)


    all_total=0
    all_lemma=0
    all_pos=0
    all_gramm=0
    for pos_scores in scores:
        all_total+=scores[pos_scores]["total"]
        all_lemma+=scores[pos_scores]["corr_lemma"]
        all_pos+=scores[pos_scores]["corr_pos"]
        all_gramm+=scores[pos_scores]["corr_gramm"]
    
    scores["all"]["total"]=all_total
    scores["all"]["corr_lemma"]=all_lemma
    scores["all"]["corr_pos"]=all_pos
    scores["all"]["corr_gramm"]=all_gramm

    for pos_scores in scores:
        scores[pos_scores]["total_percent"]=round(scores[pos_scores]["total"]*100/all_total, 2)
        scores[pos_scores]["corr_lemma_percent"]=round(scores[pos_scores]["corr_lemma"]*100/scores[pos_scores]["total"], 2)
        scores[pos_scores]["corr_pos_percent"]=round(scores[pos_scores]["corr_pos"]*100/scores[pos_scores]["total"], 2)
        scores[pos_scores]["corr_gramm_percent"]=round(scores[pos_scores]["corr_gramm"]*100/scores[pos_scores]["total"], 2)

    wam={"lemma_wam": 0, "pos_wam": 0, "gramm_wam": 0}
    
    for pos_scores in scores:
        if pos_scores != "all":
            wam["lemma_wam"]+=scores[pos_scores]["corr_lemma_percent"]*scores[pos_scores]["total_percent"]
            wam["pos_wam"]+=scores[pos_scores]["corr_pos_percent"]*scores[pos_scores]["total_percent"]
            wam["gramm_wam"]+=scores[pos_scores]["corr_gramm_percent"]*scores[pos_scores]["total_percent"]

    wam["lemma_wam"]=round(wam["lemma_wam"]/100, 2)
    wam["pos_wam"]=round(wam["pos_wam"]/100, 2)
    wam["gramm_wam"]=round(wam["gramm_wam"]/100, 2)

    scores_df = pd.DataFrame(scores).drop(["total_percent"]).transpose()
    scores_df[['total', 'corr_lemma', "corr_pos", "corr_gramm"]] = scores_df[['total', 'corr_lemma', "corr_pos", "corr_gramm"]].astype(int)

    print("The weighted arithmetic mean: ", wam)

    return scores_df

In [55]:
get_annotation_scores(ann1, sents_ann)

Total correct words:  938
Total words:  1055
The weighted arithmetic mean:  {'lemma_wam': 89.49, 'pos_wam': 89.68, 'gramm_wam': 88.92}


Unnamed: 0,total,corr_lemma,corr_lemma_percent,corr_pos,corr_pos_percent,corr_gramm,corr_gramm_percent
NOUN,251,226,90.04,227,90.44,225,89.64
VERB,296,227,76.69,228,77.03,222,75.0
AUX,38,38,100.0,38,100.0,38,100.0
ADJ,21,19,90.48,19,90.48,19,90.48
ADV,53,47,88.68,47,88.68,47,88.68
PRON,48,48,100.0,48,100.0,48,100.0
NUM,23,23,100.0,23,100.0,23,100.0
SCONJ,20,18,90.0,18,90.0,18,90.0
CCONJ,83,83,100.0,83,100.0,83,100.0
DET,29,29,100.0,29,100.0,29,100.0


In [56]:
get_annotation_scores(ann2, sents_ann)

Total correct words:  557
Total words:  670
The weighted arithmetic mean:  {'lemma_wam': 84.03, 'pos_wam': 84.33, 'gramm_wam': 83.13}


Unnamed: 0,total,corr_lemma,corr_lemma_percent,corr_pos,corr_pos_percent,corr_gramm,corr_gramm_percent
NOUN,208,185,88.94,186,89.42,184,88.46
VERB,258,191,74.03,192,74.42,186,72.09
AUX,18,18,100.0,18,100.0,18,100.0
ADJ,20,18,90.0,18,90.0,18,90.0
ADV,29,23,79.31,23,79.31,23,79.31
PRON,21,21,100.0,21,100.0,21,100.0
NUM,14,14,100.0,14,100.0,14,100.0
SCONJ,10,8,80.0,8,80.0,8,80.0
CCONJ,2,2,100.0,2,100.0,2,100.0
DET,16,16,100.0,16,100.0,16,100.0


In [57]:
get_annotation_scores(ann3, sents_ann)

Total correct words:  938
Total words:  952
The weighted arithmetic mean:  {'lemma_wam': 99.18, 'pos_wam': 99.39, 'gramm_wam': 98.55}


Unnamed: 0,total,corr_lemma,corr_lemma_percent,corr_pos,corr_pos_percent,corr_gramm,corr_gramm_percent
NOUN,229,226,98.69,227,99.13,225,98.25
VERB,228,227,99.56,228,100.0,222,97.37
AUX,38,38,100.0,38,100.0,38,100.0
ADJ,19,19,100.0,19,100.0,19,100.0
ADV,49,47,95.92,47,95.92,47,95.92
PRON,48,48,100.0,48,100.0,48,100.0
NUM,23,23,100.0,23,100.0,23,100.0
SCONJ,19,18,94.74,18,94.74,18,94.74
CCONJ,83,83,100.0,83,100.0,83,100.0
DET,29,29,100.0,29,100.0,29,100.0


In [58]:
get_annotation_scores(ann4, sents_ann)

Total correct words:  557
Total words:  571
The weighted arithmetic mean:  {'lemma_wam': 98.6, 'pos_wam': 98.95, 'gramm_wam': 97.55}


Unnamed: 0,total,corr_lemma,corr_lemma_percent,corr_pos,corr_pos_percent,corr_gramm,corr_gramm_percent
NOUN,188,185,98.4,186,98.94,184,97.87
VERB,192,191,99.48,192,100.0,186,96.88
AUX,18,18,100.0,18,100.0,18,100.0
ADJ,18,18,100.0,18,100.0,18,100.0
ADV,25,23,92.0,23,92.0,23,92.0
PRON,21,21,100.0,21,100.0,21,100.0
NUM,14,14,100.0,14,100.0,14,100.0
SCONJ,9,8,88.89,8,88.89,8,88.89
CCONJ,2,2,100.0,2,100.0,2,100.0
DET,16,16,100.0,16,100.0,16,100.0
