# Parse morphology

In [1]:
import collections
import pandas as pd

In [2]:
from tf.app import use
MT = use('etcbc/bhsa', hoist=globals())
SP = use('DT-UCPH/sp:hot', hoist=globals())

The requested data is not available offline
	~/text-fabric-data/github/DT-UCPH/sp/tf/1.3.3 not found
	cannot find releases
	cannot find releases


In [3]:
features = ['g_cons',
            'lex',
            'mt_feat']

In [4]:
feature_dict = collections.defaultdict(lambda: collections.defaultdict())

for w in SP.api.F.otype.s('word'):
    feature_dict[w]['ref'] = '''{} {} {}'''.format(*SP.api.T.sectionFromNode(w))
    
    for f in features:
        feat = f'SP.api.F.{f}.v({w})'
        feature_dict[w][f] = eval(feat)
        
df = pd.DataFrame(feature_dict).T
df

Unnamed: 0,ref,g_cons,lex,mt_feat
101908,Genesis 1 1,B,B,True
101909,Genesis 1 1,R>CJT,R>CJT/,True
101910,Genesis 1 1,BR>,BR>[,True
101911,Genesis 1 1,>LHJM,>LHJM/,True
101912,Genesis 1 1,>T,>T,True
...,...,...,...,...
130847,Genesis 50 26,JWFM,FJM[,False
130848,Genesis 50 26,B,B,True
130849,Genesis 50 26,>RN,>RWN/,False
130850,Genesis 50 26,B,B,True


In [59]:
SP.displaySetup(extraFeatures=["lex", "sp", "g_nme", "g_vbe","g_prs"])
MT.displaySetup(extraFeatures=["lex", "sp", "g_nme", "g_vbe","prs_gn","prs_nu","prs_ps",'g_prs','prs'])

def show(w, feat):
    print(eval(f'df[df.index == w].{feat}.item()'))
    SP.pretty(SP.api.L.u(w, 'verse')[0], highlights={w: 'gold'})
    bo, ch, ve = SP.api.T.sectionFromNode(w)
    mt_ref = MT.api.T.nodeFromSection((bo, int(ch), int(ve)))
    MT.pretty(mt_ref)

## Tense

In [58]:
vt = []

for w in SP.api.F.otype.s('word'):
    if SP.api.F.sp.v(w) == 'verb':
        
        #Participles and infinitives
        if SP.api.F.g_nme.v(w): 
            if not SP.api.F.g_pfm.v(w):
                vt.append('ptca|ptcp')
            elif SP.api.F.g_pfm.v(w) in {'!!','!H!','!>!'}:
                if SP.api.F.g_prs.v(w) or (SP.api.F.sp.v(w-1) == 'prep' and SP.api.F.trailer.v(w-1) == '') or SP.api.F.g_nme.v(w) in {'/T','/>','/WT','/WTJ','/TN'}:
                    vt.append('infc')
                elif SP.api.F.lex.v(w-1) == SP.api.F.lex.v(w) or SP.api.F.lex.v(w+1) == SP.api.F.lex.v(w):
                    vt.append('infa')
                else:
                    vt.append('infc|infa')
            else:
                vt.append('infc|ptca|ptcp')
        
        #Qatal, yiqtol and imperatives
        else:
            if SP.api.F.g_pfm.v(w):
                if SP.api.F.g_pfm.v(w) in {'!!','!H!'}:
                    vt.append('impv')
                elif SP.api.F.lex.v(w-1) == 'W' and SP.api.F.trailer.v(w-1) == '':
                    vt.append('wayq')
                else:
                    vt.append('impf')
            else:
                vt.append('perf')
    else:
        vt.append('NA')
        
df['vt'] = vt
df

Unnamed: 0,ref,g_cons,lex,mt_feat,vt
101887,Genesis 1 1,B,B,True,
101888,Genesis 1 1,R>CJT,R>CJT/,True,
101889,Genesis 1 1,BR>,BR>[,True,perf
101890,Genesis 1 1,>LHJM,>LHJM/,True,
101891,Genesis 1 1,>T,>T,True,
...,...,...,...,...,...
130814,Genesis 50 26,JWFM,FJM[,False,wayq
130815,Genesis 50 26,B,B,True,
130816,Genesis 50 26,>RN,>RWN/,False,
130817,Genesis 50 26,B,B,True,


In [60]:
df[df.vt=='ptcp']

Unnamed: 0,ref,g_cons,lex,mt_feat,vt
103325,Genesis 3 14,>RWR,>RR[,True,ptcp
103410,Genesis 3 17,>RWRH,>RR[,True,ptcp
103729,Genesis 4 11,>RWR,>RR[,True,ptcp
106346,Genesis 9 25,>RWR,>RR[,True,ptcp
106355,Genesis 9 26,BRWK,BRK[,True,ptcp
108571,Genesis 14 19,BRWK,BRK[,True,ptcp
108581,Genesis 14 20,BRWK,BRK[,True,ptcp
112585,Genesis 22 17,BRWK,BRK[,False,ptcp
113614,Genesis 24 27,BRWK,BRK[,True,ptcp
113713,Genesis 24 31,BRWK,BRK[,True,ptcp


In [59]:
def check(df):
    mismatches = []
    
    for n, row in df.iterrows():
        if row['mt_feat'] == 'True':
            bo, ch, ve = row['ref'].split()
            mt_ref = MT.api.T.nodeFromSection((bo, int(ch), int(ve)))
            mt_words = MT.api.L.d(mt_ref, 'word')
            for w in mt_words:
                if MT.api.F.g_cons.v(w) == row['g_cons'] and MT.api.F.lex.v(w) == row['lex']:
                    if MT.api.F.vt.v(w) != row['vt']: #and row['vt'] not in {'infc|infa','infc|infa|ptca|ptcp','ptca|ptcp'}:
                        mismatches.append(n)
                        break
    return mismatches

mismatches = check(df)
len(mismatches)

230

In [51]:
export_df = pd.DataFrame(df['vt'])
export_df.columns = [0]
export_df.to_csv(f'./data/vt.csv', index=0)

In [63]:
show(mismatches[n])
n+=1

infc|ptca|ptcp


## Person

In [114]:
ps = []

for w in SP.api.F.otype.s('word'):
    if SP.api.F.sp.v(w) == 'verb':
        if SP.api.F.vt.v(w) == 'impv':
            ps.append('p2')
        elif SP.api.F.vt.v(w) == 'perf':
            if SP.api.F.g_vbe.v(w) in {'[TJ','[NW'}:
                ps.append('p1')
            elif SP.api.F.g_vbe.v(w) in {'[TN','[TM','[T'}:
                ps.append('p2')
            elif SP.api.F.g_vbe.v(w) in {'[TH'}:
                ps.append('p2|p3')
            elif SP.api.F.g_vbe.v(w) in {'[W','[WN','[H','[HN','['}:
                ps.append('p3')
            else:
                ps.append('p1|p2|p3')
        elif SP.api.F.vt.v(w) in {'impf','wayq'}:
            if SP.api.F.g_pfm.v(w) in {'!>!','!N!'}:
                ps.append('p1')
            elif SP.api.F.g_pfm.v(w) in {'!T!'} and SP.api.F.g_vbe.v(w) in {'[J','[W'}:
                ps.append('p2')
            elif SP.api.F.g_pfm.v(w) in {'!T!'} and SP.api.F.g_vbe.v(w) in {'[NH','['}:
                ps.append('p2|p3')
            elif SP.api.F.g_pfm.v(w) in {'!J!'}:
                ps.append('p3')
            else:
                ps.append('p1|p2|p3')
        else:
            ps.append('unknown')
    elif SP.api.F.lex.v(w) in {'>NJ', '>NKJ', '>NXNW', 'NXNW'}:
        ps.append('p1')
    elif SP.api.F.lex.v(w) in {'>T=', '>TH', '>TM', '>TN', '>TNH'}:
        ps.append('p2')
    elif SP.api.F.lex.v(w) in {'HJ>', 'HM', 'HMH', 'HNH=', 'HW>'}:
        ps.append('p3')
    else:
        ps.append('NA')
        
len(ps)

28935

In [115]:
df['ps'] = ps
df

Unnamed: 0,ref,g_cons,lex,mt_feat,ps
101886,Genesis 1 1,B,B,True,
101887,Genesis 1 1,R>CJT,R>CJT/,True,
101888,Genesis 1 1,BR>,BR>[,True,p3
101889,Genesis 1 1,>LHJM,>LHJM/,True,
101890,Genesis 1 1,>T,>T,True,
...,...,...,...,...,...
130816,Genesis 50 26,JWFM,FJM[,False,p3
130817,Genesis 50 26,B,B,True,
130818,Genesis 50 26,>RN,>RWN/,False,
130819,Genesis 50 26,B,B,True,


In [116]:
def check(df):
    mismatches = []
    
    for n, row in df.iterrows():
        if row['mt_feat'] == 'True':
            bo, ch, ve = row['ref'].split()
            mt_ref = MT.api.T.nodeFromSection((bo, int(ch), int(ve)))
            mt_words = MT.api.L.d(mt_ref, 'word')
            for w in mt_words:
                if MT.api.F.g_cons.v(w) == row['g_cons'] and MT.api.F.lex.v(w) == row['lex']:
                    if MT.api.F.ps.v(w) != row['ps'] and row['ps'] != 'p2|p3':
                        mismatches.append(n)
                        break
    return mismatches

mismatches = check(df)
len(mismatches)

30

In [127]:
export_df = pd.DataFrame(df['ps'])
export_df.columns = [0]
export_df.to_csv(f'./data/ps.csv', index=0)

## Suffix person

In [33]:
prs_ps = []

for w in SP.api.F.otype.s('word'):
    if SP.api.F.g_prs.v(w) in {'+J','+NJ','+NW'}:
        prs_ps.append('p1')
    elif SP.api.F.g_prs.v(w) in {'+K','+KH','+KM','+KN'}:
        prs_ps.append('p2')
    elif SP.api.F.g_prs.v(w) in {'+H','+HM','+HN','+HW','+M','+MW','+NH','+W'}:
        prs_ps.append('p3')
    elif SP.api.F.g_prs.v(w) == '+':
        prs_ps.append('p1|p3')
    else:
        prs_ps.append('NA')
    
len(prs_ps)

28932

In [34]:
df['prs_ps'] = prs_ps
df

Unnamed: 0,ref,g_cons,lex,mt_feat,prs_ps
101883,Genesis 1 1,B,B,True,
101884,Genesis 1 1,R>CJT,R>CJT/,True,
101885,Genesis 1 1,BR>,BR>[,True,
101886,Genesis 1 1,>LHJM,>LHJM/,True,
101887,Genesis 1 1,>T,>T,True,
...,...,...,...,...,...
130810,Genesis 50 26,JWFM,FJM[,False,
130811,Genesis 50 26,B,B,True,
130812,Genesis 50 26,>RN,>RWN/,False,
130813,Genesis 50 26,B,B,True,


In [36]:
def check(df):
    mismatches = []
    
    for n, row in df.iterrows():
        if row['mt_feat'] == 'True':
            bo, ch, ve = row['ref'].split()
            mt_ref = MT.api.T.nodeFromSection((bo, int(ch), int(ve)))
            mt_words = MT.api.L.d(mt_ref, 'word')
            for w in mt_words:
                if MT.api.F.g_cons.v(w) == row['g_cons'] and MT.api.F.lex.v(w) == row['lex']:
                    if MT.api.F.prs_ps.v(w) != row['prs_ps']:
                        mismatches.append(n)
                        break
    return mismatches

mismatches = check(df)
len(mismatches)

109

In [37]:
export_df = pd.DataFrame(df['prs_ps'])
export_df.columns = [0]
export_df.to_csv(f'./data/prs_ps.csv', index=0)

## Number

In [7]:
nu = []

for w in SP.api.F.otype.s('word'):
    
    if SP.api.F.lex.v(w) in {'CNJM/'}:
        nu.append('du')
    
    elif SP.api.F.vt.v(w) in {'infc','infa'}:
        nu.append('unknown')
        
    elif SP.api.F.vt.v(w) in {'perf','impv'}:
        if SP.api.F.g_vbe.v(w) in {'[','[H','[J','[T','[TH','[TJ'}:
            nu.append('sg')
        elif SP.api.F.g_vbe.v(w) in {'[NW','[TM','[TN','[W','[WN'}:
            nu.append('pl')
        else:
            nu.append('sg|pl')
            
    elif SP.api.F.vt.v(w) in {'impf','wayq'}:
        if SP.api.F.g_pfm.v(w) == '!N!' or SP.api.F.g_vbe.v(w) in {'[HN','[NH','[W','[WN'}:
            nu.append('pl')
        elif SP.api.F.g_pfm.v(w) == '!>!' or SP.api.F.g_vbe.v(w) in {'[','[J','[JN','[TJ'}:
            nu.append('sg')
        else:
            nu.append('sg|pl')
                
    elif SP.api.F.sp.v(w) == 'prin':
        nu.append('unknown')
            
    elif SP.api.F.sp.v(w) in {'adjv','nmpr','subs'} or SP.api.F.vt.v(w) in {'ptca','ptcp'}:
        if SP.api.F.g_nme.v(w) in {'/','/H','/T'}:
            nu.append('sg')
        elif SP.api.F.g_nme.v(w) in {'/J','/JM','/M','/TJ','/WT','/WTJ'}:
            nu.append('pl')
        elif SP.api.F.g_nme.v(w) in {'/TJM'}:
            nu.append('du')
        else:
            nu.append('sg|pl')
        
    elif SP.api.F.sp.v(w) in {'prde','prps'}:
        if SP.api.F.lex.v(w) in {'>L===','>LH','>NXNW','>TM','>TN','>TNH','HM','HMH','HNH=','NXNW'}:
            nu.append('pl')
        elif SP.api.F.lex.v(w) in {'LZ','LZH','LZW','Z>T','ZH','ZH=','ZW=','>NJ','>NKJ','>T=','>TH','HJ>','HW>'}:
            nu.append('sg')
        else:
            nu.append('sg|pl')
        
    else:
        nu.append('NA')
        
len(nu)

28932

In [8]:
df['nu'] = nu
df

Unnamed: 0,ref,g_cons,lex,mt_feat,nu
101883,Genesis 1 1,B,B,True,
101884,Genesis 1 1,R>CJT,R>CJT/,True,sg
101885,Genesis 1 1,BR>,BR>[,True,sg
101886,Genesis 1 1,>LHJM,>LHJM/,True,pl
101887,Genesis 1 1,>T,>T,True,
...,...,...,...,...,...
130810,Genesis 50 26,JWFM,FJM[,False,sg
130811,Genesis 50 26,B,B,True,
130812,Genesis 50 26,>RN,>RWN/,False,sg
130813,Genesis 50 26,B,B,True,


In [9]:
def check(df):
    mismatches = []
    
    for n, row in df.iterrows():
        if row['mt_feat'] == 'True':
            bo, ch, ve = row['ref'].split()
            mt_ref = MT.api.T.nodeFromSection((bo, int(ch), int(ve)))
            mt_words = MT.api.L.d(mt_ref, 'word')
            for w in mt_words:
                if MT.api.F.g_cons.v(w) == row['g_cons'] and MT.api.F.lex.v(w) == row['lex']:
                    if MT.api.F.nu.v(w) != row['nu']:
                        mismatches.append(n)
                        break
    return mismatches

mismatches = check(df)
len(mismatches)

205

In [139]:
n=0

In [68]:
#show(mismatches[n])
n+=1

NameError: name 'n' is not defined

In [10]:
export_df = pd.DataFrame(df['nu'])
export_df.columns = [0]
export_df.to_csv(f'./data/nu.csv', index=0)

## Suffix number

In [88]:
prs_nu = []

for w in SP.api.F.otype.s('word'):
    if SP.api.F.g_prs.v(w) in {'+','+J','+NJ','+K','+KH','+H','+HW','+W'}:
        prs_nu.append('sg')
    elif SP.api.F.g_prs.v(w) in {'+NW','+KM','+KN','+HM','+HN','+M','+MW','+NH'}:
        prs_nu.append('pl')
    else:
        prs_nu.append('NA')
    
len(prs_nu)

28938

In [89]:
df['prs_nu'] = prs_nu
df

Unnamed: 0,ref,g_cons,lex,mt_feat,prs_nu
101908,Genesis 1 1,B,B,True,
101909,Genesis 1 1,R>CJT,R>CJT/,True,
101910,Genesis 1 1,BR>,BR>[,True,
101911,Genesis 1 1,>LHJM,>LHJM/,True,
101912,Genesis 1 1,>T,>T,True,
...,...,...,...,...,...
130841,Genesis 50 26,JWFM,FJM[,False,
130842,Genesis 50 26,B,B,True,
130843,Genesis 50 26,>RN,>RWN/,False,
130844,Genesis 50 26,B,B,True,


In [90]:
def check(df):
    mismatches = []
    
    for n, row in df.iterrows():
        if row['mt_feat'] == 'True':
            bo, ch, ve = row['ref'].split()
            mt_ref = MT.api.T.nodeFromSection((bo, int(ch), int(ve)))
            mt_words = MT.api.L.d(mt_ref, 'word')
            for w in mt_words:
                if MT.api.F.g_cons.v(w) == row['g_cons'] and MT.api.F.lex.v(w) == row['lex']:
                    if MT.api.F.prs_nu.v(w) != row['prs_nu']:
                        mismatches.append(n)
                        break
    return mismatches

mismatches = check(df)
len(mismatches)

13

In [91]:
n=0

In [109]:
show(mismatches[n], feat='prs_nu')
n+=1

IndexError: list index out of range

In [110]:
export_df = pd.DataFrame(df['prs_nu'])
export_df.columns = [0]
export_df.to_csv(f'./data/prs_nu.csv', index=0)

## Gender

In [7]:
gn = []

for w in SP.api.F.otype.s('word'):
    
    if SP.api.F.vt.v(w) in {'infc','infa'}:
        gn.append('unknown')
        
    elif SP.api.F.vt.v(w) in {'perf','impv'}:
        if SP.api.F.g_vbe.v(w) in {'[','[TM'}:
            gn.append('m')
        elif SP.api.F.vt.v(w) == 'impv' and SP.api.F.g_vbe.v(w) in {'[W','[H'}:
            gn.append('m')
        elif SP.api.F.g_vbe.v(w) in {'[H','[J','[TN'}:
            gn.append('f')
        elif SP.api.F.g_vbe.v(w) in {'[NW','[TJ','[W','[WN'}:
            gn.append('unknown')
        elif SP.api.F.g_vbe.v(w) in {'[TH'} and SP.api.F.ps.v(w) == 'p2':
            gn.append('m')
        elif SP.api.F.g_vbe.v(w) in {'[TH'} and SP.api.F.ps.v(w) == 'p3':
            gn.append('f')
        else:
            gn.append('m/f')
            
    elif SP.api.F.vt.v(w) in {'impf','wayq'}:
        
        if SP.api.F.g_pfm.v(w) in {'!>!','!N!'}:
            gn.append('unknown')
        elif SP.api.F.g_pfm.v(w) == '!J!' or SP.api.F.g_vbe.v(w) in {'[W','[WN'}:
            gn.append('m')
        elif SP.api.F.g_vbe.v(w) in {'[HN','[NH','[J','[JN'}:
            gn.append('f')
        elif SP.api.F.g_pfm.v(w) == '!T!' and SP.api.F.ps.v(w) == 'p2':
            gn.append('m')
        elif SP.api.F.g_pfm.v(w) == '!T!' and SP.api.F.ps.v(w) == 'p3':
            gn.append('f')
        else:
            gn.append('m/f')
                
    elif SP.api.F.sp.v(w) == 'prin':
        gn.append('unknown')
            
    elif SP.api.F.sp.v(w) in {'adjv','nmpr','subs'} or SP.api.F.vt.v(w) in {'ptca','ptcp'}:
        if SP.api.F.mt_feat.v(w) == 'True': #If corresponding MT-word
            bo, ch, ve = SP.api.T.sectionFromNode(w)
            mt_ref = MT.api.T.nodeFromSection((bo, int(ch), int(ve)))
            mt_words = MT.api.L.d(mt_ref, 'word')
            for w1 in mt_words:
                if MT.api.F.g_cons.v(w1) == SP.api.F.g_cons.v(w) and MT.api.F.lex.v(w1) == SP.api.F.lex.v(w):
                    gn.append(MT.api.F.gn.v(w1))
                    break
                    
        elif SP.api.F.g_nme.v(w) == '/' and SP.api.F.mt_feat.v(w) == 'False': #If no corresponding MT-word and no elaborate nominal ending
            gn.append('unknown')
        elif SP.api.F.g_nme.v(w) in {'/J','/JM','/M','/TJM'}:
            gn.append('m')
        elif SP.api.F.g_nme.v(w) in {'/H','/T','/TJ','/WT','/WTJ'}:
            gn.append('f')
        else:
            gn.append('m/f')
        
    elif SP.api.F.sp.v(w) in {'prde','prps'}:
        if SP.api.F.lex.v(w) in {'LZH','ZH','>TH','>TM','HM','HMH','HW>'}:
            gn.append('m')
        elif SP.api.F.lex.v(w) in {'LZW','Z>T','ZH=','ZW=','>T=','>TN','>TNH','HJ>','HNH='}:
            gn.append('f')
        elif SP.api.F.lex.v(w) in {'>L===','>LH','LZ','>NJ','>NKJ','>NXNW','NXNW'}:
            gn.append('unknown')
        else:
            gn.append('m/f')
        
    else:
        gn.append('NA')
        
len(gn)

28938

In [8]:
df['gn'] = gn
df

Unnamed: 0,ref,g_cons,lex,mt_feat,gn
101908,Genesis 1 1,B,B,True,
101909,Genesis 1 1,R>CJT,R>CJT/,True,f
101910,Genesis 1 1,BR>,BR>[,True,m
101911,Genesis 1 1,>LHJM,>LHJM/,True,m
101912,Genesis 1 1,>T,>T,True,
...,...,...,...,...,...
130841,Genesis 50 26,JWFM,FJM[,False,m
130842,Genesis 50 26,B,B,True,
130843,Genesis 50 26,>RN,>RWN/,False,unknown
130844,Genesis 50 26,B,B,True,


In [9]:
def check(df):
    mismatches = []
    
    for n, row in df.iterrows():
        if row['mt_feat'] == 'True':
            bo, ch, ve = row['ref'].split()
            mt_ref = MT.api.T.nodeFromSection((bo, int(ch), int(ve)))
            mt_words = MT.api.L.d(mt_ref, 'word')
            for w in mt_words:
                if MT.api.F.g_cons.v(w) == row['g_cons'] and MT.api.F.lex.v(w) == row['lex']:
                    if MT.api.F.gn.v(w) != row['gn']:
                        mismatches.append(n)
                        break
    return mismatches

mismatches = check(df)
len(mismatches)

128

In [10]:
n=0

In [136]:
show(mismatches[n], 'gn')
n+=1

m/f


In [11]:
export_df = pd.DataFrame(df['gn'])
export_df.columns = [0]
export_df.to_csv(f'./data/gn.csv', index=0)

## Suffix gender

In [11]:
MT.load('g_prs')

True

In [15]:
{MT.api.F.gn.v(w) for w in MT.api.F.otype.s('word') if MT.api.F.language.v(w) == 'Hebrew'}

{'NA', 'f', 'm', 'unknown'}

In [45]:
prs_gn = []

for w in SP.api.F.otype.s('word'):
    if SP.api.F.g_prs.v(w) in {'+KH','+KM','+HW','+W','+HM','+M','+MW'}:
        prs_gn.append('m')
    elif SP.api.F.g_prs.v(w) in {'+H','+KN','+HN','+NH'}:
        prs_gn.append('f')
    elif SP.api.F.g_prs.v(w) == '+K': #Can be both fem and masc.
        if SP.api.F.mt_feat.v(w) == 'True': #If corresponding MT-word
            bo, ch, ve = SP.api.T.sectionFromNode(w)
            mt_ref = MT.api.T.nodeFromSection((bo, int(ch), int(ve)))
            mt_words = MT.api.L.d(mt_ref, 'word')
            for w1 in mt_words:
                if MT.api.F.g_cons.v(w1) == SP.api.F.g_cons.v(w) and MT.api.F.lex.v(w1) == SP.api.F.lex.v(w):
                    prs_gn.append(MT.api.F.prs_gn.v(w1))
                    break
        else:
            prs_gn.append('m/f')       
        
    elif SP.api.F.g_prs.v(w) in {'+','+J','+NJ','+NW'}:
        prs_gn.append('unknown')
    else:
        prs_gn.append('NA')
    
len(prs_gn)

28944

In [46]:
df['prs_gn'] = prs_gn
df

Unnamed: 0,ref,g_cons,lex,mt_feat,prs_gn
101908,Genesis 1 1,B,B,True,
101909,Genesis 1 1,R>CJT,R>CJT/,True,
101910,Genesis 1 1,BR>,BR>[,True,
101911,Genesis 1 1,>LHJM,>LHJM/,True,
101912,Genesis 1 1,>T,>T,True,
...,...,...,...,...,...
130847,Genesis 50 26,JWFM,FJM[,False,
130848,Genesis 50 26,B,B,True,
130849,Genesis 50 26,>RN,>RWN/,False,
130850,Genesis 50 26,B,B,True,


In [47]:
def check(df):
    mismatches = []
    
    for n, row in df.iterrows():
        if row['mt_feat'] == 'True':
            bo, ch, ve = row['ref'].split()
            mt_ref = MT.api.T.nodeFromSection((bo, int(ch), int(ve)))
            mt_words = MT.api.L.d(mt_ref, 'word')
            for w in mt_words:
                if MT.api.F.g_cons.v(w) == row['g_cons'] and MT.api.F.lex.v(w) == row['lex']:
                    if MT.api.F.prs_gn.v(w) != row['prs_gn']:
                        mismatches.append(n)
                        break
    return mismatches

mismatches = check(df)
len(mismatches)

13

In [60]:
n=0

In [74]:
show(mismatches[n], 'prs_gn')
n+=1

IndexError: list index out of range

In [75]:
export_df = pd.DataFrame(df['prs_gn'])
export_df.columns = [0]
export_df.to_csv(f'./data/prs_gn.csv', index=0)

In [None]:
query = '''
word lex=>RY/ gn=unknown nme
'''

MT.show(MT.search(query))

  0.26s 2428 results


In [31]:
MT.pretty(MT.api.T.nodeFromSection(('Genesis',14,23)))