# Export Text Examples

In [1]:
import sys
from pathlib import Path
sys.path.append('../../scripts/analysis')
from load_dfs import DfLoader
datadir = Path('../../../results/datasets')

In [2]:
verb_dirs = [p for p in datadir.glob('*') if p.is_dir() and p.name != 'qtl_old']

In [3]:
import re

def add_hit_symbols(text, search, symbol='_'):
    """Use re to find matches and insert hit symbols."""
    find = f'({search})'
    replace = f'{symbol}\g<1>{symbol}'
    return re.sub(find, replace, text)

def clean_puncts(text):
    """Fix space-separated punctuations."""
    text = re.sub(' ([,.?!;”:’])', '\g<1>', text)
    text = re.sub('([“‘]) ', '\g<1>', text)
    return text

def clean_roman_nums(text):
    """Replace Roman numerals in the verse ref."""
    text = re.sub('^I ', '1 ', text)
    text = re.sub('^II ', '2 ', text)
    return text

def make_text_examples(df):
    
    """Build text examples."""

    # sort out texts
    # NB that esv and niv texts might also be similarly formatted later on
    bhs_joiner = ''
    bhs_text = ['sentence']
    bhs_text = df[bhs_text].astype(str).agg(bhs_joiner.join, axis=1)
    exs = []

    for i, node in enumerate(df.index):
        ref = df.loc[node]['ref_abbr']
        esv = df.loc[node]['esv']
        niv = df.loc[node]['niv'] 
        esv_verse = df.loc[node]['esv_verse']
        niv_verse = df.loc[node]['niv_verse']
        bhs = bhs_text[node].strip()
        heb = df.loc[node]['text_full']
         
        if type(esv_verse) != str:
            esv_verse = '' 
        if type(niv_verse) != str:
            niv_verse = ''
#         if type(esv) != str:
#             esv = '' 
#         if type(niv) != str:
#             niv = ''
            
        # add formatting to verse texts
        ref = clean_roman_nums(ref)
        esv_verse = add_hit_symbols(esv_verse, esv)
        niv_verse = add_hit_symbols(niv_verse, niv)
        bhs = add_hit_symbols(bhs, heb)
        esv_verse = clean_puncts(esv_verse)
        niv_verse = clean_puncts(niv_verse)

        # build and add example text
        ex = f'(x)\t{bhs} ({ref})\nNIV\t{niv_verse}\nESV\t{esv_verse}\n'
        exs.append(ex)
    
    return exs 

In [4]:
# export

outdir = Path('/Users/cody/Desktop/text_examples/')

for verb_dir in verb_dirs:
    df_loader = DfLoader(verb_dir.glob('*.csv'))
    df = df_loader.df
    examples = make_text_examples(df)
    doc = '\n'.join(examples)
    outfile = outdir.joinpath(verb_dir.name+'_examples_.txt')
    outfile.write_text(doc)