# Linking clauses with נתן and שים  with data from the ETCBC database

In this notebook all clauses with the verbs נתן and שים are extracted from the database, together with information about discourse environment, genre, and whether the clause is a main or subordinate clause. These data are linked to valence data of these verbs. 

In [None]:
import sys, os, csv, collections

In [None]:
from tf.app import use
A = use('bhsa', hoist=globals())

A.displaySetup(extraFeatures='kind')
A.displaySetup(extraFeatures='prs')
A.displaySetup(extraFeatures='g_cons')

In [None]:
# %load main_sub
def in_dep_calc(cl):  
      
    in_dep = ''        
    if F.rela.v(cl) == 'ReSu': # is the clause resumptive?
        moth_obj = E.mother.f(cl)[0]
        in_dep = rela_calc(moth_obj)
    else:
        in_dep = rela_calc(cl) # does the clause have a dependent CCR?

    if in_dep == '':
    	words = L.d(cl, 'word') # is there a wayyiqtol?
    	for word in words:
            if F.vt.v(word) == 'wayq':
                in_dep += 'Main'
                        
    if in_dep == '':  # if everything else does not give a result, we look at the CARC
        cl_atoms = L.d(cl, 'clause_atom')
        in_dep = carc_calc(cl_atoms)
        
    return(in_dep)


#####################################


def carc_calc(cl_atoms):
    in_dep_c = ''
    carc = F.code.v(cl_atoms[0])
    if 999 > int(carc) > 499:
        in_dep_c += 'SubAdv'
    elif int(carc) in {0, 999}:
        in_dep_c = 'Main'
    elif 17 > int(carc) > 9:
        in_dep_c += 'SubAdv'
    elif 75 > int(carc) > 50:
        in_dep_c += 'SubAdv'
    elif 168 > int(carc) > 99:
        in_dep_c += 'Main'
    elif 500 > int(carc) > 299:
        in_dep_c += 'Main'
    elif int(carc) in {200, 201}:         
        while F.code.v(cl_atoms[0]) in {200, 201}:
            cl_atoms = E.mother.f(cl_atoms[0])
        carc = F.code.v(cl_atoms[0])
        if 999 > int(carc) > 499:
            in_dep_c += 'SubAdv'
        elif int(carc) in {0, 999}:
            in_dep_c = 'Main'
        elif 17 > int(carc) > 9:
            in_dep_c += 'SubAdv'
        elif 75 > int(carc) > 50:
            in_dep_c += 'SubAdv'
        elif 168 > int(carc) > 99:
            in_dep_c += 'Main'
        elif 500 > int(carc) > 299:
            in_dep_c += 'Main'
        elif int(carc) in {220, 221, 222, 223}:
            in_dep_c += 'Undc'
        
    else:
        in_dep_c += 'Undc'
        
    return(in_dep_c)

###################################################################################################

def rela_calc(cl):
    in_dep_r = ''
    ccr = F.rela.v(cl)
    if ccr in {'Subj', 'Objc', 'Cmpl', 'PreC', 'Voct', 'Frnt'}:
        in_dep_r += 'SubArg'
    elif ccr in {'Attr', 'RgRc', 'Spec'}:
        in_dep_r += 'SubMod'
    elif ccr in {'Adju', 'PrAd'}:
        in_dep_r += 'SubAdv'
    elif ccr == 'Coor':
        moth_obj = E.mother.f(cl)[0]
        if F.otype.v(moth_obj) in {'word', 'phrase'}:
            in_dep_r += 'SubMod'
        else:
            while F.rela.v(moth_obj) == 'Coor':
                moth_obj = E.mother.f(moth_obj)[0]
            ccr = F.rela.v(cl)
            if ccr in {'Subj', 'Objc', 'Cmpl', 'PreC', 'Voct', 'Frnt'}:
                in_dep_r += 'SubArg'
            elif ccr in {'Attr', 'RgRc', 'Spec'}:
                in_dep_r += 'SubMod'
            elif ccr in {'Adju', 'PrAd'}:
                in_dep_r += 'SubAdv'
                
        if in_dep_r == '':
            if F.otype.v(moth_obj) != 'clause':
                in_dep_r += 'SubMod'
            else:
                cl_atoms = L.d(moth_obj, 'clause_atom')
                in_dep_r = carc_calc(cl_atoms)
                
    return(in_dep_r)

In [None]:
ntn_list = []
ntn_dict = {}

fjm_list = []
fjm_dict = {}

for cl in F.otype.s('clause'):
    lexemes = [F.lex.v(w) for w in L.d(cl, 'word')]
    consonants = [F.g_cons.v(w) for w in L.d(cl, 'word')]
    lang_of_words = [F.language.v(w) for w in L.d(cl, 'word')]
    
    if 'NTN[' in lexemes:
        bo, ch, ve = T.sectionFromNode(cl)
        feat_list = ['NTN', str(cl), bo, str(ch), str(ve), lang_of_words[0]]
        
        feat_list.append(in_dep_calc(cl)) # main or subordinate clause
        feat_list.append(F.txt.v(cl)[-1]) # Q, D, N, ?
        feat_list.append(" ".join(consonants))
        
        ntn_list.append(cl)
        ntn_dict[cl] = feat_list
        
    if 'FJM[' in lexemes:
        bo, ch, ve = T.sectionFromNode(cl)
        feat_list = ['FJM', str(cl), bo, str(ch), str(ve), lang_of_words[0]]
        
        feat_list.append(in_dep_calc(cl)) # main or subordinate clause
        feat_list.append(F.txt.v(cl)[-1]) # Q, D, N, ?
        feat_list.append(" ".join(consonants))
        
        fjm_list.append(cl)
        fjm_dict[cl] = feat_list

In [None]:
csvh = open(r"fjm_ntn_qn_mainsub.csv", "w")
header = ['verb', 'cl_id', 'book', 'chapter', 'verse', 'language', 'main_sub', 'txt_type', 'clause']
csvh.write('{}\n'.format(','.join(header)))

for item in ntn_list:
    features = ntn_dict[item]
    csvh.write('{}\n'.format(','.join(features)))
    
for item in fjm_list:
    features = fjm_dict[item]
    csvh.write('{}\n'.format(','.join(features)))
    
csvh.close()