# Parallels Prep

Preparing the function that filters out clauses from Chronicles that are word-for-word copied out of Kings. 

In [5]:
from tf.fabric import Fabric
from tf.extra.bhsa import Bhsa
import collections

locations = ['~/github/etcbc/bhsa/tf', 
             '~/github/etcbc/parallels/tf']

# load TF and BHSA data
TF = Fabric(locations=locations, modules='2017', silent=True)
api = TF.load('''
              otype language
              book chapter verse
              function domain
              typ pdp kind
              crossref
              ''', silent=True)

api.makeAvailableIn(globals())

B = Bhsa(api, '', version='2017')

**Documentation:** <a target="_blank" href="https://etcbc.github.io/bhsa" title="{provenance of this corpus}">BHSA</a> <a target="_blank" href="https://etcbc.github.io/bhsa/features/hebrew/2017/0_home.html" title="{CORPUS.upper()} feature documentation">Feature docs</a> <a target="_blank" href="https://dans-labs.github.io/text-fabric/Api/Bhsa/" title="BHSA API documentation">BHSA API</a> <a target="_blank" href="https://dans-labs.github.io/text-fabric/Api/General/" title="text-fabric-api">Text-Fabric API 5.5.18</a> <a target="_blank" href="https://dans-labs.github.io/text-fabric/Api/General/#search-templates" title="Search Templates Introduction and Reference">Search Reference</a>


This notebook online:
<a target="_blank" href="http://nbviewer.jupyter.org/github/Probabilistic_Language_Change/cody_NB/blob/master/.ipynb">NBViewer</a>
<a target="_blank" href="https://github.com/Probabilistic_Language_Change/cody_NB/blob/master/.ipynb">GitHub</a>


In [20]:
one_chron = T.nodeFromSection(('1_Chronicles',))

for i, verse in enumerate(L.d(one_chron, 'verse')):
    
    cref = E.crossref.f(verse)
        
    if cref:
        for cr in cref:
            print(T.sectionFromNode(cr[0]), '-', cr[1])
            print()
    
    if i > 20:
        break

('Genesis', 10, 2) - 100

('Genesis', 10, 3) - 95

('Genesis', 10, 4) - 95

('Genesis', 10, 6) - 100

('Genesis', 10, 7) - 100

('Genesis', 10, 8) - 100

('Genesis', 10, 13) - 100

('Genesis', 10, 14) - 100

('Genesis', 10, 15) - 100

('Genesis', 10, 16) - 100

('Genesis', 15, 21) - 83

('Genesis', 10, 17) - 100

('Genesis', 15, 20) - 76

('Genesis', 10, 22) - 77

('Genesis', 10, 24) - 100

('Genesis', 10, 25) - 100

('Genesis', 10, 26) - 100

('Genesis', 10, 27) - 100

('2_Chronicles', 11, 9) - 78

('Genesis', 10, 28) - 100



## Count Non-Cross-referenced Clauses

In [45]:
good_clauses = []
skipped_clauses = 0
cref_books = collections.Counter()


for book in ('1_Chronicles', '2_Chronicles'):
    book_node = T.nodeFromSection((book,))
    
    for verse in L.d(book_node, 'verse'):
        
        # skip 100% matched verses
        cr_scores = [cr[1] < 75 for cr in E.crossref.f(verse)]

        if all(cr_scores):
            good_clauses.extend(L.d(verse, 'clause'))
        else:
            skipped_clauses += len(L.d(verse, 'clause'))
            
            for cr in E.crossref.f(verse):
                cref_books[T.sectionFromNode(cr[0])[0]] += 1
        
        
print(f'{len(good_clauses)} clauses kept...')
print(f'{skipped_clauses} clauses skipped...')

3750 clauses kept...
2078 clauses skipped...


Here is the data on which books' material is skipped...

In [46]:
cref_books

Counter({'Genesis': 44,
         '2_Chronicles': 103,
         'Exodus': 22,
         '1_Chronicles': 415,
         'Ruth': 2,
         'Nehemiah': 4,
         '2_Samuel': 109,
         'Numbers': 83,
         'Ezra': 4,
         'Joshua': 42,
         '1_Samuel': 11,
         'Psalms': 35,
         'Leviticus': 61,
         '1_Kings': 221,
         '2_Kings': 157,
         'Isaiah': 1,
         'Jeremiah': 14,
         'Ezekiel': 1,
         'Jonah': 2})