In [13]:
import os

from tf.fabric import Fabric
from tf.extra.bhsa import Bhsa

In [2]:
VERSION = 'c'
BHSA = f'bhsa/tf/{VERSION}'

TF = Fabric(locations=['~/github/etcbc'], modules=[BHSA])
api = TF.load('')
api.makeAvailableIn(globals())

This is Text-Fabric 5.5.25
Api reference : https://dans-labs.github.io/text-fabric/Api/General/
Tutorial      : https://github.com/Dans-labs/text-fabric/blob/master/docs/tutorial.ipynb
Example data  : https://github.com/Dans-labs/text-fabric-data

114 features found and 0 ignored
  0.00s loading features ...
  5.44s All features loaded/computed - for details use loadLog()


In [3]:
B = Bhsa(api, 'books')

**Documentation:** <a target="_blank" href="https://etcbc.github.io/bhsa" title="{provenance of this corpus}">BHSA</a> <a target="_blank" href="https://etcbc.github.io/bhsa/features/hebrew/c/0_home.html" title="BHSA feature documentation">Feature docs</a> <a target="_blank" href="https://dans-labs.github.io/text-fabric/Api/Bhsa/" title="BHSA API documentation">BHSA API</a> <a target="_blank" href="https://dans-labs.github.io/text-fabric/Api/General/" title="text-fabric-api">Text-Fabric API 5.5.25</a> <a target="_blank" href="https://dans-labs.github.io/text-fabric/Api/General/#search-templates" title="Search Templates Introduction and Reference">Search Reference</a>


This notebook online:
<a target="_blank" href="http://nbviewer.jupyter.org/github/etcbc/lingo/blob/master/bits-and-pieces/books.ipynb">NBViewer</a>
<a target="_blank" href="https://github.com/etcbc/lingo/blob/master/bits-and-pieces/books.ipynb">GitHub</a>


In [4]:
slotType = F.otype.slotType
slotType

'word'

In [5]:
sectionTypes = set(T.sectionTypes)
sectionTypes

{'book', 'chapter', 'verse'}

In [6]:
testNodes = (
  list(F.otype.s('clause')[20000:20010]) + 
  list(F.otype.s('phrase')[100000:100010]) + 
  list(F.otype.s('chapter')[200:210]) +
  list(F.otype.s('word')[300000:300010])
)
testNodes

[447553,
 447554,
 447555,
 447556,
 447557,
 447558,
 447559,
 447560,
 447561,
 447562,
 751542,
 751543,
 751544,
 751545,
 751546,
 751547,
 751548,
 751549,
 751550,
 751551,
 426824,
 426825,
 426826,
 426827,
 426828,
 426829,
 426830,
 426831,
 426832,
 426833,
 300001,
 300002,
 300003,
 300004,
 300005,
 300006,
 300007,
 300008,
 300009,
 300010]

In [7]:
textInfo = {}
sectionInfo = {}

In [8]:
for n in testNodes:
  nType = F.otype.v(n)
  if nType not in sectionTypes:
    sns = [n] if nType == slotType else L.d(n, otype=slotType)
    textInfo[n] = T.text(sns)
    sParts = T.sectionFromNode(n)
    nParts = len(sParts)
    sParts += (None,) * (4 - nParts)
    sectionInfo[n] = sParts

In [9]:
textInfo

{447553: 'וְיָרְקָ֖ה בְּפָנָ֑יו ',
 447554: 'וְעָֽנְתָה֙ ',
 447555: 'וְאָ֣מְרָ֔ה ',
 447556: 'כָּ֚כָה יֵעָשֶׂ֣ה לָאִ֔ישׁ ',
 447557: 'אֲשֶׁ֥ר לֹא־יִבְנֶ֖ה אֶת־בֵּ֥ית אָחִֽיו׃ ',
 447558: 'וְנִקְרָ֥א שְׁמֹ֖ו בְּיִשְׂרָאֵ֑ל ',
 447559: 'בֵּ֖ית חֲל֥וּץ הַנָּֽעַל׃ ס ',
 447560: 'כִּֽי־יִנָּצ֨וּ אֲנָשִׁ֤ים יַחְדָּו֙ אִ֣ישׁ וְאָחִ֔יו ',
 447561: 'וְקָֽרְבָה֙ אֵ֣שֶׁת הָֽאֶחָ֔ד ',
 447562: 'לְהַצִּ֥יל אֶת־אִישָּׁ֖הּ מִיַּ֣ד מַכֵּ֑הוּ ',
 751542: 'וְ',
 751543: 'הִנֵּ֛ה ',
 751544: 'כָּל־אִ֥ישׁ יִשְׂרָאֵ֖ל ',
 751545: 'בָּאִ֣ים ',
 751546: 'אֶל־הַמֶּ֑לֶךְ ',
 751547: 'וַ',
 751548: 'יֹּאמְר֣וּ ',
 751549: 'אֶל־הַמֶּ֡לֶךְ ',
 751550: 'מַדּוּעַ֩ ',
 751551: 'גְּנָב֨וּךָ ',
 300001: 'הָיָ֥ה ',
 300002: 'מַטִּ֖יף ',
 300003: 'הָ',
 300004: 'עָ֥ם ',
 300005: 'הַ',
 300006: 'זֶּֽה׃ ',
 300007: 'אָסֹ֨ף ',
 300008: 'אֶאֱסֹ֜ף ',
 300009: 'יַעֲקֹ֣ב ',
 300010: 'כֻּלָּ֗ךְ '}

In [10]:
sectionInfo

{447553: ('Deuteronomy', 25, 9, None),
 447554: ('Deuteronomy', 25, 9, None),
 447555: ('Deuteronomy', 25, 9, None),
 447556: ('Deuteronomy', 25, 9, None),
 447557: ('Deuteronomy', 25, 9, None),
 447558: ('Deuteronomy', 25, 10, None),
 447559: ('Deuteronomy', 25, 10, None),
 447560: ('Deuteronomy', 25, 11, None),
 447561: ('Deuteronomy', 25, 11, None),
 447562: ('Deuteronomy', 25, 11, None),
 751542: ('2_Samuel', 19, 42, None),
 751543: ('2_Samuel', 19, 42, None),
 751544: ('2_Samuel', 19, 42, None),
 751545: ('2_Samuel', 19, 42, None),
 751546: ('2_Samuel', 19, 42, None),
 751547: ('2_Samuel', 19, 42, None),
 751548: ('2_Samuel', 19, 42, None),
 751549: ('2_Samuel', 19, 42, None),
 751550: ('2_Samuel', 19, 42, None),
 751551: ('2_Samuel', 19, 42, None),
 300001: ('Micah', 2, 11, None),
 300002: ('Micah', 2, 11, None),
 300003: ('Micah', 2, 11, None),
 300004: ('Micah', 2, 11, None),
 300005: ('Micah', 2, 11, None),
 300006: ('Micah', 2, 11, None),
 300007: ('Micah', 2, 12, None),
 300

In [11]:
def getContext(api, nodes):
  F = api.F
  Fs = api.Fs
  Fall = api.Fall
  T = api.T
  L = api.L
  slotType = F.otype.slotType
  sectionTypes = set(T.sectionTypes)

  rows = []
  feats = tuple(sorted(Fall()))
  rows.append(('node',) + tuple(T.sectionTypes) + feats + ('text',))
  for n in sorted(nodes):
    nType = F.otype.v(n)
    sParts = T.sectionFromNode(n)
    nParts = len(sParts)
    section = sParts + ((None,) * (3 - nParts))
    if nType in sectionTypes:
      text = ''
    else:
      sns = [n] if nType == slotType else L.d(n, otype=slotType)
      text = T.text(sns)
    rows.append((n,) + section + tuple(Fs(f).v(n) for f in feats) + (text,))
  return tuple(rows)

In [18]:
data = getContext(api, testNodes)
testFile = os.path.expanduser('~/Downloads/test.csv')
with open(testFile, 'w', encoding="utf_16_le") as tf:
  tf.write('\ufeff')
  for row in data:
    tf.write('\t'.join('' if r is None else str(r) for r in row))
    tf.write('\n')