In [128]:
pretty(verse1)
pretty(verse2)

In [1]:
from IPython.display import display, HTML
from tf.fabric import Fabric

In [82]:
BHSA = 'BHSA/tf/2017'

TF = Fabric(locations='~/github/etcbc', modules=BHSA)
api = TF.load('''
    sp
    gloss
''')
api.makeAvailableIn(globals())

This is Text-Fabric 3.2.2
Api reference : https://github.com/Dans-labs/text-fabric/wiki/Api
Tutorial      : https://github.com/Dans-labs/text-fabric/blob/master/docs/tutorial.ipynb
Example data  : https://github.com/Dans-labs/text-fabric-data

115 features found and 0 ignored
  0.00s loading features ...
   |     0.13s B sp                   from /Users/dirk/github/etcbc/BHSA/tf/2017
   |     0.01s B gloss                from /Users/dirk/github/etcbc/BHSA/tf/2017
   |     0.00s Feature overview: 109 for nodes; 5 for edges; 1 configs; 7 computed
  4.73s All features loaded/computed - for details use loadLog()


# Research

Does any *x*`_atom` ever cross a verse boundary? For *x* one of `sentence`, `clause`, `phrase`?

In [3]:
breaches = {}
breachSet = set()

for v in F.otype.s('verse'):
    words = L.d(v, otype='word')
    firstWord = words[0]
    lastWord = words[-1]
    firstAtom = {}
    lastAtom = {}
    for tp in ('sentence', 'clause', 'phrase'):
        for boundaryWord in (firstWord, lastWord):
            boundaryAtom = L.u(boundaryWord, otype=f'{tp}_atom')[0]
            atomWords = L.d(boundaryAtom, otype='word')
            if boundaryWord == firstWord:
                if atomWords[0] < boundaryWord:
                    breaches.setdefault(tp, {}).setdefault('start', set()).add(v)
                    breachSet.add(v)
            else:
                if atomWords[-1] > boundaryWord:
                    breaches.setdefault(tp, {}).setdefault('end', set()).add(v)
                    breachSet.add(v)
len(breachSet)

1970

In [4]:
for (tp, boundaries) in breaches.items():
    for (boundary, verses) in boundaries.items():
        print(f'{len(verses):>4} in {tp}-{boundary}')

1231 in sentence-end
1231 in sentence-start


In [5]:
T.sectionFromNode(sorted(breachSet)[0])

('Genesis', 1, 17)

In [118]:
def pretty(v):
    label = '{} {}:{}'.format(*T.sectionFromNode(v))
    (firstWord, lastWord) = _getBoundary(v)
    satoms = (
        set(L.d(v, otype='sentence_atom')) |
        {
            L.u(firstWord, otype='sentence_atom')[0],
            L.u(lastWord, otype='sentence_atom')[0],
        }
    )
    html = []
    html.append('''<div class="verse">''')
    html.append('''<div class="vl">''')
    html.append(f'''<div class="vs">{label}</div>''')
    html.append(f'''<div class="nd">{v}</div>''')
    html.append('''</div>''')
    for sa in sortNodes(satoms):
        _prettySA(sa, html, firstWord, lastWord)
    html.append('''</div>''')
    htmlStr = '\n'.join(html)
    display(HTML(htmlStr))

In [119]:
def _prettySA(sa, html, firstWord, lastWord):
    catoms = L.d(sa, otype='clause_atom')
    (superStart, superEnd) = _getSuperBoundary(sa, 'sentence')
    (atomStart, atomEnd) = _getBoundary(sa)
    boundaryClass = ''
    if superStart < atomStart: boundaryClass += ' l'
    if superEnd > atomEnd: boundaryClass += ' r'
    if atomStart < firstWord: boundaryClass += ' L'
    if atomEnd > lastWord: boundaryClass += ' R'
    html.append(f'''<div class="satom {boundaryClass}">''')
    for ca in catoms:
        (atomStart, atomEnd) = _getBoundary(ca)
        if atomEnd < firstWord or atomStart > lastWord:
            continue
        _prettyCA(ca, html, atomStart, atomEnd)
    html.append('''</div>''')

In [120]:
def _prettyCA(ca, html, atomStart, atomEnd):
    patoms = L.d(ca, otype='phrase_atom')
    (superStart, superEnd) = _getSuperBoundary(ca, 'clause')
    boundaryClass = ''
    if superStart < atomStart: boundaryClass += ' l'
    if superEnd > atomEnd: boundaryClass += ' r'
    html.append(f'''<div class="catom {boundaryClass}">''')
    for pa in patoms:
        (atomStart, atomEnd) = _getBoundary(pa)
        _prettyPA(pa, html, atomStart, atomEnd)
    html.append('''</div>''')

In [121]:
def _prettyPA(pa, html, atomStart, atomEnd):
    words = L.d(pa, otype='word')
    (superStart, superEnd) = _getSuperBoundary(pa, 'phrase')
    boundaryClass = ''
    if superStart < atomStart: boundaryClass += ' l'
    if superEnd > atomEnd: boundaryClass += ' r'
    html.append(f'''<div class="patom {boundaryClass}">''')
    for w in words:
        (atomStart, atomEnd) = _getBoundary(pa)
        _prettyW(w, html, atomStart, atomEnd)
    html.append('''</div>''')

In [122]:
def _prettyW(w, html, firstWord, lastWord):
    if w < firstWord or w > lastWord:
        return
    lx = L.u(w, otype='lex')[0]
    html.append('''<div class="word">''')
    html.append(f'''<div class="h">{T.text([w])}</div>''')
    html.append(f'''<div class="sp">{F.sp.v(w)}</div>''')
    html.append(f'''<div class="gl">{F.gloss.v(lx).replace('<', '&lt;')}</div>''')
    html.append('''</div>''')

In [123]:
def _getBoundary(n):
    words = L.d(n, otype='word')
    return (words[0], words[-1])

In [124]:
def _getSuperBoundary(n, tp):
    superNode = L.u(n, otype=tp)[0]
    return _getBoundary(superNode)

In [125]:
CSS = '''
<style>
.verse {
    display: flex;
    flex-flow: row wrap;
}
.vl {
    display: flex;
    flex-flow: column nowrap;
}
.satom,.catom,.patom {
    padding: 0.5em;
    margin: 0.5em;
    border-style: solid;
    display: flex;
    flex-flow: row wrap;
}
.satom {
    border-color: #aa3333;
    border-width: 4px;
}
.catom {
    border-color: #aaaa33;
    border-width: 3px;
}
.patom {
    border-color: #33aaaa;
    border-width: 3px;
}
.word {
    padding: 0.2em;
    margin: 0.2em;
    border: 1px solid #cccccc;
    display: flex;
    flex-flow: column nowrap;
}
.satom.l,.catom.l,.patom.l {
    border-left-style: dotted
}
.satom.r,.catom.r,.patom.r {
    border-right-style: dotted
}
.satom.L,.catom.L,.patom.L {
    border-left-style: none
}
.satom.R,.catom.R,.patom.R {
    border-right-style: none
}
.h {
    font-family: "Ezra SIL", "SBL Hebrew", sans-serif;
    font-size: 14pt;
}
.sp {
    font-family: monospace;
    font-size: 12pt;
    color: #0000bb;
}
.gl {
    font-family: sans-serif;
    font-size: 9pt;
    color: #aaaaaa;
}
.vs {
    font-family: sans-serif;
    font-size: 11pt;
    font-weight: bold;
    color: #444444;
}
.nd {
    font-family: monospace;
    font-size: 11pt;
    font-style: italic;
    color: #666666;
}
</style>
'''

In [126]:
HTML(CSS)

In [127]:
verse1 = T.nodeFromSection(('Genesis', 1, 7))
verse2 = T.nodeFromSection(('Genesis', 1, 17))