This notebook looks for all features on lexeme nodes and spread their values over all its occurences, if it is not already done.

In [1]:
import os
import collections

from tf.fabric import Fabric

In [2]:
BASE = os.path.expanduser('~/github')
ORG = 'etcbc'
REPO = 'bhsa'
VERSION = 'c'

REPO_PATH = f'{BASE}/{ORG}/{REPO}'
TF_IN = f'{REPO_PATH}/tf/{VERSION}'
TF_OUT = f'{REPO_PATH}/_temp/lex/{VERSION}'

In [7]:
lexFeatures = '''
  gloss
  nametype
  voc_lex
  voc_lex_utf8
'''.strip().split()

In [16]:
generic = dict(
  author='Eep Talstra Centre for Bible and Computer',
  dataset='BHSA',
  datasetName='Biblia Hebraica Stuttgartensia Amstelodamensis',
  email='shebanq@ancient-data.org',
  encoders='Constantijn Sikkel (QDF), and Dirk Roorda (TF)',
  version='c',
  website='https://shebanq.ancient-data.org',
)

In [17]:
featureMeta = {feat: dict(valueType='str') for feat in lexFeatures}

In [18]:
metaData = {'': generic}
metaData.update(featureMeta)
metaData

{'': {'author': 'Eep Talstra Centre for Bible and Computer',
  'dataset': 'BHSA',
  'datasetName': 'Biblia Hebraica Stuttgartensia Amstelodamensis',
  'email': 'shebanq@ancient-data.org',
  'encoders': 'Constantijn Sikkel (QDF), and Dirk Roorda (TF)',
  'version': 'c',
  'website': 'https://shebanq.ancient-data.org'},
 'gloss': {'valueType': 'str'},
 'nametype': {'valueType': 'str'},
 'voc_lex': {'valueType': 'str'},
 'voc_lex_utf8': {'valueType': 'str'}}

In [19]:
TFin = Fabric(locations=TF_IN)

This is Text-Fabric 7.4.4
Api reference : https://annotation.github.io/text-fabric/Api/Fabric/

114 features found and 0 ignored


In [20]:
api = TFin.load(lexFeatures)
api.makeAvailableIn(globals())

  0.00s loading features ...
   |     0.01s B voc_lex_utf8         from /Users/dirk/github/etcbc/bhsa/tf/c
   |     0.01s B gloss                from /Users/dirk/github/etcbc/bhsa/tf/c
   |     0.00s B nametype             from /Users/dirk/github/etcbc/bhsa/tf/c
   |     0.01s B voc_lex              from /Users/dirk/github/etcbc/bhsa/tf/c
  3.58s All features loaded/computed - for details use loadLog()


[('Computed',
  'computed-data',
  ('C Computed', 'Call AllComputeds', 'Cs ComputedString')),
 ('Features', 'edge-features', ('E Edge', 'Eall AllEdges', 'Es EdgeString')),
 ('Fabric', 'loading', ('ensureLoaded', 'TF', 'ignored', 'loadLog')),
 ('Locality', 'locality', ('L Locality',)),
 ('Misc', 'messaging', ('cache', 'error', 'indent', 'info', 'reset')),
 ('Nodes',
  'navigating-nodes',
  ('N Nodes', 'sortKey', 'sortKeyTuple', 'otypeRank', 'sortNodes')),
 ('Features',
  'node-features',
  ('F Feature', 'Fall AllFeatures', 'Fs FeatureString')),
 ('Search', 'search', ('S Search',)),
 ('Text', 'text', ('T Text',))]

In [24]:
nodeFeatures = collections.defaultdict(dict)

for feat in lexFeatures:
  print(f'{feat} ...')
  for lx in F.otype.s('lex'):
    value = Fs(feat).v(lx)
    if value is not None:
      for w in L.d(lx, otype='word'):
        nodeFeatures[feat][w] = value
      nodeFeatures[feat][lx] = value

gloss ...
nametype ...
voc_lex ...
voc_lex_utf8 ...


In [25]:
TFout = Fabric(locations=TF_OUT)

This is Text-Fabric 7.4.4
Api reference : https://annotation.github.io/text-fabric/Api/Fabric/

0 features found and 0 ignored


  0.00s Warp feature "otype" not found in
/Users/dirk/github/etcbc/bhsa/_temp/lex/c/
  0.00s Warp feature "oslots" not found in
/Users/dirk/github/etcbc/bhsa/_temp/lex/c/


  0.00s Warp feature "otext" not found. Working without Text-API



In [27]:
TFout.save(nodeFeatures=nodeFeatures, edgeFeatures={}, metaData=metaData)

  0.00s Exporting 4 node and 0 edge and 0 config features to /Users/dirk/github/etcbc/bhsa/_temp/lex/c:
   |     0.56s T gloss                to /Users/dirk/github/etcbc/bhsa/_temp/lex/c
   |     0.05s T nametype             to /Users/dirk/github/etcbc/bhsa/_temp/lex/c
   |     0.51s T voc_lex              to /Users/dirk/github/etcbc/bhsa/_temp/lex/c
   |     0.56s T voc_lex_utf8         to /Users/dirk/github/etcbc/bhsa/_temp/lex/c
  1.69s Exported 4 node features and 0 edge features and 0 config features to /Users/dirk/github/etcbc/bhsa/_temp/lex/c


True