# Tripartite clauses in the BHSA

With this notebook the tripartite verbless clauses are selected and features are saved in the csv file "tripartite_bib.csv". In the ETCBC database these clauses are encoded as nominal, resumptive clauses with a third person pronoun as subject. Not included here are clause with an EPPr phrase.

In [1]:
import sys, collections, os, csv

In [2]:
from tf.app import use
A = use('bhsa', hoist=globals())
A.displaySetup(extraFeatures='kind')
A.displaySetup(extraFeatures='prs')

TF app is up-to-date.
Using annotation/app-bhsa commit d3cf8f0c2ab5d690a0fda14ea31c33da5c5c8483 (=latest)
  in C:\Users\geitb/text-fabric-data/__apps__/bhsa.
Using etcbc/bhsa/tf - c r1.5 in C:\Users\geitb/text-fabric-data
Using etcbc/phono/tf - c r1.2 in C:\Users\geitb/text-fabric-data
Using etcbc/parallels/tf - c r1.2 in C:\Users\geitb/text-fabric-data


**Documentation:** <a target="_blank" href="https://etcbc.github.io/bhsa" title="provenance of BHSA = Biblia Hebraica Stuttgartensia Amstelodamensis">BHSA</a> <a target="_blank" href="https://annotation.github.io/text-fabric/Writing/Hebrew" title="('Hebrew characters and transcriptions',)">Character table</a> <a target="_blank" href="https://etcbc.github.io/bhsa/features/0_home" title="BHSA feature documentation">Feature docs</a> <a target="_blank" href="https://github.com/annotation/app-bhsa" title="bhsa API documentation">bhsa API</a> <a target="_blank" href="https://annotation.github.io/text-fabric/Api/Fabric/" title="text-fabric-api">Text-Fabric API 7.4.8</a> <a target="_blank" href="https://annotation.github.io/text-fabric/Use/Search/" title="Search Templates Introduction and Reference">Search Reference</a>

In [5]:
genre_dict = {}

prose = ['Genesis', 'Exodus', 'Leviticus', 'Numbers', 'Deuteronomy', 'Joshua', 'Judges', '1_Samuel', '2_Samuel', '1_Kings', '2_Kings', 'Jonah', 'Ruth', 'Esther', 'Daniel', 'Ezra', 'Nehemiah', '1_Chronicles', '2_Chronicles']
prophecy = ['Isaiah', 'Jeremiah', 'Ezekiel', 'Hosea', 'Joel', 'Obadiah', 'Micah', 'Zephaniah', 'Haggai', 'Zechariah', 'Malachi', 'Amos', 'Nahum', 'Habakkuk']
poetry = ['Song_of_songs','Proverbs','Ecclesiastes', 'Lamentations', 'Psalms', 'Job']

for genre in [prose, prophecy, poetry]:
    for book in genre:
        if book in prose:
            genre_dict[book] = 'prose'
        elif book in prophecy:
            genre_dict[book] = 'prophecy'
        elif book in poetry:
            genre_dict[book] = 'poetry'

In [6]:
ebh_lbh_dict = {}

ebh = ['Genesis', 'Exodus', 'Leviticus', 'Numbers', 'Deuteronomy', 'Joshua', 'Judges', '1_Samuel', '2_Samuel', '1_Kings', '2_Kings']
lbh = ['Esther', 'Daniel', 'Ezra', 'Nehemiah', '1_Chronicles', '2_Chronicles']
for book in ebh:
    ebh_lbh_dict[book] = 'ebh'
for book in lbh:
    ebh_lbh_dict[book] = 'lbh'

double_books = {'1_Samuel': 'Samuel', '2_Samuel' : 'Samuel', '2_Kings' : 'Kings', '1_Kings': 'Kings', '1_Chronicles' : 'Chronicles', '2_Chronicles' : 'Chronicles'}

In [7]:
def in_dep_calc(cl):  
      
    in_dep = ''        
    if F.rela.v(cl) == 'ReSu': # is the clause resumptive?
        moth_obj = E.mother.f(cl)[0]
        in_dep = rela_calc(moth_obj)
    else:
        in_dep = rela_calc(cl) # does the clause have a dependent CCR?

    if in_dep == '':
    	words = L.d(cl, 'word') # is there a wayyiqtol?
    	for word in words:
            if F.vt.v(word) == 'wayq':
                in_dep += 'Main'
                        
    if in_dep == '':  # if everything else does not give a result, we look at the CARC
        cl_atoms = L.d(cl, 'clause_atom')
        in_dep = carc_calc(cl_atoms)
        
    return(in_dep)


#####################################


def carc_calc(cl_atoms):
    in_dep_c = ''
    carc = F.code.v(cl_atoms[0])
    if 999 > int(carc) > 499:
        in_dep_c += 'SubAdv'
    elif int(carc) in {0, 999}:
        in_dep_c = 'Main'
    elif 17 > int(carc) > 9:
        in_dep_c += 'SubAdv'
    elif 75 > int(carc) > 50:
        in_dep_c += 'SubAdv'
    elif 168 > int(carc) > 99:
        in_dep_c += 'Main'
    elif 500 > int(carc) > 299:
        in_dep_c += 'Main'
    elif int(carc) in {200, 201}:         
        while F.code.v(cl_atoms[0]) in {200, 201}:
            cl_atoms = E.mother.f(cl_atoms[0])
        carc = F.code.v(cl_atoms[0])
        if 999 > int(carc) > 499:
            in_dep_c += 'SubAdv'
        elif int(carc) in {0, 999}:
            in_dep_c = 'Main'
        elif 17 > int(carc) > 9:
            in_dep_c += 'SubAdv'
        elif 75 > int(carc) > 50:
            in_dep_c += 'SubAdv'
        elif 168 > int(carc) > 99:
            in_dep_c += 'Main'
        elif 500 > int(carc) > 299:
            in_dep_c += 'Main'
        elif int(carc) in {220, 221, 222, 223}:
            in_dep_c += 'Undc'
        
    else:
        in_dep_c += 'Undc'
        
    return(in_dep_c)

###################################################################################################

def rela_calc(cl):
    in_dep_r = ''
    ccr = F.rela.v(cl)
    if ccr in {'Subj', 'Objc', 'Cmpl', 'PreC', 'Voct', 'Frnt'}:
        in_dep_r += 'SubArg'
    elif ccr in {'Attr', 'RgRc', 'Spec'}:
        in_dep_r += 'SubMod'
    elif ccr in {'Adju', 'PrAd'}:
        in_dep_r += 'SubAdv'
    elif ccr == 'Coor':
        moth_obj = E.mother.f(cl)[0]
        if F.otype.v(moth_obj) in {'word', 'phrase'}:
            in_dep_r += 'SubMod'
        else:
            while F.rela.v(moth_obj) == 'Coor':
                moth_obj = E.mother.f(moth_obj)[0]
            ccr = F.rela.v(cl)
            if ccr in {'Subj', 'Objc', 'Cmpl', 'PreC', 'Voct', 'Frnt'}:
                in_dep_r += 'SubArg'
            elif ccr in {'Attr', 'RgRc', 'Spec'}:
                in_dep_r += 'SubMod'
            elif ccr in {'Adju', 'PrAd'}:
                in_dep_r += 'SubAdv'
                
        if in_dep_r == '':
            if F.otype.v(moth_obj) != 'clause':
                in_dep_r += 'SubMod'
            else:
                cl_atoms = L.d(moth_obj, 'clause_atom')
                in_dep_r = carc_calc(cl_atoms)
                
    return(in_dep_r)

############################################################################################


In [8]:
def mother_tense_calc(cl):
    
    mother_tense = ''
    cl_atoms = L.d(cl, 'clause_atom')
    moth = E.mother.f(cl_atoms[0])
    if len(moth) == 0:
        mother_tense += 'no_mother'
    else: 
        if F.otype.v(moth[0]) in {'word', 'phrase'}:
            mother_tense += F.otype.v(moth[0])
        else:
            cl = L.u(moth[0], 'clause')[0]
            if F.kind.v(cl) == 'NC':
                mother_tense += 'nominal'
            elif F.kind.v(cl) == 'WP':
                mother_tense += 'no_pred'
                        
            else:
                phrases = L.d(cl, 'phrase')
                pred = False
                prec = False
                for phr in phrases:
                    if F.function.v(phr) in {'Pred', 'PreS', 'PreO'}:
                        pred = True
                        pred_phr = phr
                    elif F.function.v(phr) in {'PreC', 'PtcO'}:
                        prec = True
                        prec_phr = phr
                if pred == True:
                    words = L.d(pred_phr, 'word')
                    for word in words:
                        if F.sp.v(word) == 'verb':
                            mother_tense += F.vt.v(word)
                elif prec == True:
                    words = L.d(prec_phr, 'word')
                    for word in words:
                        if F.sp.v(word) == 'verb':
                            mother_tense += F.vt.v(word)
                            
    return(mother_tense)

In [9]:
extra_cl_funcs = set()

for phrase in F.otype.s('phrase'):
    phr_func = F.function.v(phrase)
    if not phr_func in {'Subj', 'Pred', 'PreC'}:
        extra_cl_funcs.add(phr_func)
                    
extra_cl_funcs = sorted(list(extra_cl_funcs))                
extra_cl_funcs.append('Unkn')

In [10]:
print(extra_cl_funcs)

['Adju', 'Cmpl', 'Conj', 'EPPr', 'ExsS', 'Exst', 'Frnt', 'IntS', 'Intj', 'Loca', 'ModS', 'Modi', 'NCoS', 'NCop', 'Nega', 'Objc', 'PrAd', 'PrcS', 'PreO', 'PreS', 'PtcO', 'Ques', 'Rela', 'Supp', 'Time', 'Voct', 'Unkn']


In [22]:
trip_list = []
trip_clauses = {}

for cl in F.otype.s('clause'):
    
    # there must be resumtion
    if F.rela.v(cl) != 'Resu':
        continue
        
    # and also, 
    if F.kind.v(cl) != 'NC':
        continue
     
    # find subj and prec in the clause
    phrases = L.d(cl, 'phrase')
    funcs = [F.function.v(ph) for ph in phrases]
    if 'Subj' in funcs and 'PreC' in funcs:
        subj_ind = funcs.index('Subj')
        
        if 'EPPr' in funcs:
            continue
        
        if F.typ.v(phrases[subj_ind]) != 'PPrP':
            continue
            
        pronoun = L.d(phrases[subj_ind], 'word')[0]
        if F.ps.v(pronoun) != 'p3':
            continue
        
        # check the mother, this is the fronted element
        mo = E.mother.f(cl)[0]
        phrss = L.d(mo, 'phrase')
        types = [F.typ.v(phr) for phr in phrss]

        if types in [['CP'], ['CP', 'VP', 'NP', 'PP']]:
            continue
                
        feat_list = []
        # collect basic info about clause
        #feat_list.append(cl_type) # type of clause: hyh or nom
        feat_list.append(str(cl)) # clause id
        bo, ch, ve = T.sectionFromNode(cl)
        feat_list.append(bo) # book
        feat_list.append(str(ch)) # chapter
        feat_list.append(str(ve)) # verse
        feat_list.append(F.typ.v(cl)) # clause type of database
        feat_list.append(F.rela.v(cl) + '_') # clause relation
        
        # eg if book is '1_Samuel', add 'Samuel' to feat_list
        if bo in double_books: 
            feat_list.append(double_books[bo])
        else:
            feat_list.append(bo)
            
        # ebh_lbh
        if bo in ebh_lbh_dict.keys():
            feat_list.append(ebh_lbh_dict[bo])
        else:
            feat_list.append('other')
            
        feat_list.append(genre_dict[bo]) #genre
        feat_list.append(F.txt.v(cl)[-1]) # Q, D, N
            
        if funcs.index('Subj') < funcs.index('PreC'): #subj and prec order
            feat_list.append('SP')
        else:
            feat_list.append('PS')
            
        # which conjunction
        phr_types = [F.typ.v(phr) for phr in phrases]
        if 'CP' in phr_types: 
            conj = phrases[phr_types.index('CP')]
            words = L.d(conj, 'word')
            words_lex = '_'.join([F.lex.v(wo) for wo in words])
            feat_list.append(words_lex)
        else:
            feat_list.append('no_conj')
                
        # clause length, in verbal clauses the Pred is not counted
        feat_list.append(str(len(phrases)))
                
        subj_pos = funcs.index('Subj')
        feat_list.append(F.typ.v(phrases[subj_pos])) #phrase type of subject
        feat_list.append(F.det.v(phrases[subj_pos])) #determination of subject
        
        # length of subj in words
        words_subj = L.d(phrases[subj_pos], 'word') # length of subject in words
        subj_len = len(words_subj)
        for wo in words_subj:                    
            if not F.prs.v(wo) in {'absent', 'n/a'}:
                subj_len += 1
        feat_list.append(str(subj_len))
                
        prec_pos = funcs.index('PreC')
        feat_list.append(F.typ.v(phrases[prec_pos])) #phrase type of prec
        feat_list.append(F.det.v(phrases[prec_pos])) #determination of prec
        
        # length of prec in words
        words_prec = L.d(phrases[prec_pos], 'word')
        prec_len = len(words_prec)
        for wo in words_prec:                    
            if not F.prs.v(wo) in {'absent', 'n/a'}:
                prec_len += 1
        feat_list.append(str(prec_len))
            
        # main or subordinate clause
        feat_list.append(in_dep_calc(cl))
                                     
        # negation in clause
        phr_types = [F.typ.v(phr) for phr in phrases]
        if 'NegP' in phr_types:
            feat_list.append('neg')
        else:
            feat_list.append('non_neg')
                
        # aramaic or hebrew
        words = L.d(cl, 'word')
        feat_list.append(F.language.v(words[0]))
                
        # collect info about other phrases in clause (present (1) or absent (0))
        for item in extra_cl_funcs:   
            if item in funcs:
                feat_list.append('1')
            else:
                feat_list.append('0')
                
        # tense of the mother of the clause_atom
        feat_list.append(mother_tense_calc(cl))
                           
        trip_list.append(cl)
        trip_clauses[cl] = feat_list

In [23]:
csvh = open(r"tripartite_bib.csv", "w")

header = ['cl_id', 'book', 'chapter', 'verse', 'clause_type_etcbc', 'clause_rela', 'book2', 'ebh_lbh', 'genre', 'txt_type', 's_p_order', 'CP', 'cl_len', 'subj_type', 'subj_det', 'subj_len', 'pc_type', 'pc_det', 'pc_len', 'main_sub', 'nega', 'language']

for item in extra_cl_funcs:
    header.append(item)
header.append('mother')

csvh.write('{}\n'.format(','.join(header)))

for key in trip_list:
    features = trip_clauses[key]
    csvh.write('{}\n'.format(','.join(features)))
    
csvh.close()