In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import re

from tf.fabric import Fabric
from tf.convert.walker import CV

TF_DIR = os.path.expanduser('~/Downloads/banks/tf')

TF = Fabric(locations=TF_DIR)

cv = CV(TF)

This is Text-Fabric 7.4.3
Api reference : https://annotation.github.io/text-fabric/Api/Fabric/

10 features found and 0 ignored


In [3]:
source = '''

No

## 3
Yes

# Consider Phlebas
$ author=Iain M. Banks

## 1
Everything about us,

in our own terms?

## 2
Besides,
it left
such as
'''

In [6]:
slotType = 'word'

generic = {
    'name': 'Culture quotes from Iain Banks',
    'compiler': 'Dirk Roorda',
    'source': 'Good Reads',
    'url': 'https://www.goodreads.com/work/quotes/14366-consider-phlebas',
}

otext = {
    'fmt:text-orig-full': '{letters}{punc} ',
    'sectionTypes': 'book,chapter',
    'sectionFeatures': 'title,number',
}

intFeatures = {
  'number',
}

featureMeta = {
    'number': {
        'description': 'number of chapter, or sentence in chapter, or line in sentence',
    },
    'title': {
        'description': 'the title of a book',
    },
    'author': {
        'description': 'the author of a book',
    },
    'terminator': {
        'description': 'the last character of a line',
    },
    'letters': {
        'description': 'the letters of a word',
    },
    'punc': {
        'description': 'the punctuation after a word',
    },
}

def director(cv):
  counter = dict(
    sentence=0,
    line=0,
  )
  cur = dict(
    book=None,
    chapter=None,
    sentence=None,
  )

  wordRe = re.compile(r'^(.*?)([^A-Za-z0-9]*)$')
  metaRe = re.compile(r'^\$\s*([^= ]+)\s*=\s*(.*)')

  for line in source.strip().split('\n'):
    line = line.rstrip()
    if not line:
      cv.terminate(cur['sentence'])              # action
      for ntp in counter:
        counter[ntp] += 1
      cur['sentence'] = cv.node('sentence')      # action
      cv.feature(
        cur['sentence'],
        number=counter['sentence'],
      )                                          # action
      continue
      
    if line.startswith('# '):
      for ntp in ('sentence', 'chapter', 'book'):
        cv.terminate(cur[ntp])                   # action
        cur[ntp] = None         
      title = line[2:].strip()
      cur['book'] = cv.node('book')              # action
      for ntp in counter:
        counter[ntp] = 0
      cv.feature(
        cur['book'],
        title=title,
      )                                          # action
      continue

    if line.startswith('## '):
      for ntp in ('sentence', 'chapter'):
        cv.terminate(cur[ntp])                   # action
        cur[ntp] = None         
      number = line[2:].strip()
      cur['chapter'] = cv.node('chapter')        # action
      for ntp in counter:
        counter[ntp] = 0
      cv.feature(
        cur['chapter'],
        number=number,
      )                                          # action
      continue

    if line.startswith('$'):
      match = metaRe.match(line)
      if not match:
        cv.stop(f'Malformed metadata line: "{line}"') # action
        return
      name = match.group(1)
      value = match.group(2)
      cv.feature(
        cur['book'],
        **{name: value},
      )                                           # action
      continue
        
    if not cur['sentence']:
      cur['sentence'] = cv.node('sentence')       # action
      counter['sentence'] += 1
      cv.feature(
        cur['sentence'],
        number=counter['sentence'],
      )                                           # action
      
    cur['line'] = cv.node('line')                 # action
    counter['line'] += 1
    cv.feature(
      cur['line'],
      terminator=line[-1],
      number=counter['line'],
    )                                              # action
    
    gap = False
    for word in line.split():
      if word.startswith('['):
        gap = True
        cv.terminate(cur['line'])   # action
        w = cv.slot()               # action
        cv.feature(w, gap=1)        # action
        word = word[1:]
      elif word.endswith(']'):
        w = cv.slot()               # action
        cv.resume(cur['line'])      # action
        cv.feature(w, gap=1)        # action
        gap = False
        word = word[0:-1]
      else:
        w = cv.slot()
        if gap:
          cv.feature(w, gap=1)      # action

      (letters, punc) = wordRe.findall(word)[0]
      cv.feature(w, letters=letters)            # action
      if punc:
        cv.feature(w, punc=punc)                # action
    cv.terminate(cur['line'])                   # action
    curLine = None
    
  for ntp in ('sentence', 'chapter', 'book'):
    cv.terminate(cur[ntp])                      # action
    

In [22]:
good = cv.walk(
    director,
    slotType,
    otext=otext,
    generic=generic,
    intFeatures=intFeatures,
    featureMeta=featureMeta,
    warn=True,
)

good

  0.00s Importing data from walking through the source ...
   |     0.00s Preparing metadata... 
   |   SECTION TYPES:    book, chapter
   |   SECTION FEATURES: title, number
   |   TEXT    FEATURES:
   |      |   text-orig-full       letters, punc
   |     0.00s OK
   |     0.00s Following director... 
   |     0.00s "edge" actions: 0
   |     0.00s "feature" actions: 38
   |     0.00s "node" actions: 20
   |     0.00s "resume" actions: 0
   |     0.00s "slot" actions: 14
   |     0.01s "terminate" actions: 24
   |          1 x "book" node 
   |          3 x "chapter" node 
   |          7 x "line" node 
   |          9 x "sentence" node 
   |         14 x "word" node  = slot type
   |         34 nodes of all types
   |     0.01s OK
   |     0.00s Removing unlinked nodes ... 
   |      |    4h 28m 21s      4 unlinked "sentence" nodes: [2, 4, 5, 8]
   |      |    4h 28m 21s      4 unlinked nodes
   |      |    4h 28m 21s Leaving     30 nodes
   |     0.00s checking for nodes and edges 

   |      |   1




False

In [14]:
TF = Fabric(locations=TF_DIR)

allFeatures = TF.explore(silent=True, show=True)
loadableFeatures = allFeatures['nodes'] + allFeatures['edges']
loadableFeatures

api = TF.load(loadableFeatures, silent=False)

api.makeAvailableIn(globals())

This is Text-Fabric 7.4.3
Api reference : https://annotation.github.io/text-fabric/Api/Fabric/

10 features found and 0 ignored
  0.00s loading features ...
   |     0.00s T otype                from /Users/dirk/Downloads/banks/tf
   |     0.00s T oslots               from /Users/dirk/Downloads/banks/tf
   |     0.00s T title                from /Users/dirk/Downloads/banks/tf
   |     0.00s T number               from /Users/dirk/Downloads/banks/tf
   |     0.00s T letters              from /Users/dirk/Downloads/banks/tf
   |     0.00s T punc                 from /Users/dirk/Downloads/banks/tf
   |      |     0.00s C __levels__           from otype, oslots, otext
   |      |     0.00s C __order__            from otype, oslots, __levels__
   |      |     0.00s C __rank__             from otype, __order__
   |      |     0.00s C __levUp__            from otype, oslots, __levels__, __rank__
   |      |     0.00s C __levDown__          from otype, __levUp__, __rank__
   |      |     0.00s 



   |      |     0.00s C __sections__         from otype, oslots, otext, __levUp__, __levels__, title, number
   |     0.00s T author               from /Users/dirk/Downloads/banks/tf
   |     0.00s T gap                  from /Users/dirk/Downloads/banks/tf
   |     0.00s T terminator           from /Users/dirk/Downloads/banks/tf
  0.04s All features loaded/computed - for details use loadLog()


[('Computed',
  'computed-data',
  ('C Computed', 'Call AllComputeds', 'Cs ComputedString')),
 ('Features', 'edge-features', ('E Edge', 'Eall AllEdges', 'Es EdgeString')),
 ('Fabric', 'loading', ('ensureLoaded', 'TF', 'ignored', 'loadLog')),
 ('Locality', 'locality', ('L Locality',)),
 ('Misc', 'messaging', ('cache', 'error', 'indent', 'info', 'reset')),
 ('Nodes',
  'navigating-nodes',
  ('N Nodes', 'sortKey', 'sortKeyTuple', 'otypeRank', 'sortNodes')),
 ('Features',
  'node-features',
  ('F Feature', 'Fall AllFeatures', 'Fs FeatureString')),
 ('Search', 'search', ('S Search',)),
 ('Text', 'text', ('T Text',))]

In [13]:
TF.clearCache()

### otype

In [12]:
with open(f'{TF_DIR}/otype.tf') as fh:
  print(fh.read())

@node
@compiler=Dirk Roorda
@name=Culture quotes from Iain Banks
@source=Good Reads
@url=https://www.goodreads.com/work/quotes/14366-consider-phlebas
@valueType=str
@writtenBy=Text-Fabric
@dateWritten=2019-01-30T15:30:24Z

1-100	word
101	book
102-104	chapter
105-117	line
118-121	sentence



### otext

In [13]:
with open(f'{TF_DIR}/otext.tf') as fh:
  print(fh.read())

@config
@compiler=Dirk Roorda
@fmt:text-orig-full={letters}{punc} 
@name=Culture quotes from Iain Banks
@sectionFeatures=title,number
@sectionTypes=book,chapter
@source=Good Reads
@url=https://www.goodreads.com/work/quotes/14366-consider-phlebas
@writtenBy=Text-Fabric
@dateWritten=2019-01-30T15:30:24Z




### oslots

In [14]:
with open(f'{TF_DIR}/oslots.tf') as fh:
  print(fh.read())

@edge
@compiler=Dirk Roorda
@name=Culture quotes from Iain Banks
@source=Good Reads
@url=https://www.goodreads.com/work/quotes/14366-consider-phlebas
@valueType=str
@writtenBy=Text-Fabric
@dateWritten=2019-01-30T15:30:24Z

101	2-100
1
2-56
57-100
1
2-4
5-7
8-10,15-21
22-28
29-39
40-52
53-56
57
58-76
77-78,82-84
85-89
90-100
1
2-28
29-56
57-100



In [22]:
F.letters.freqList()

(('the', 8),
 ('of', 5),
 ('and', 4),
 ('in', 3),
 ('we', 3),
 ('everything', 2),
 ('know', 2),
 ('most', 2),
 ('ones', 2),
 ('patterns', 2),
 ('us', 2),
 ('Besides', 1),
 ('Culture', 1),
 ('Everything', 1),
 ('So', 1),
 ('a', 1),
 ('about', 1),
 ('aid', 1),
 ('any', 1),
 ('around', 1),
 ('as', 1),
 ('barbarian', 1),
 ('bottom', 1),
 ('can', 1),
 ('care', 1),
 ('climbing', 1),
 ('composed', 1),
 ('control', 1),
 ('dead', 1),
 ('elegant', 1),
 ('enjoyable', 1),
 ('final', 1),
 ('find', 1),
 ('free', 1),
 ('games', 1),
 ('good', 1),
 ('harness', 1),
 ('have', 1),
 ('high', 1),
 ('humans', 1),
 ('impossible', 1),
 ('is', 1),
 ('it', 1),
 ('languages', 1),
 ('left', 1),
 ('life', 1),
 ('line', 1),
 ('make', 1),
 ('mattered', 1),
 ('mountains', 1),
 ('not', 1),
 ('nothing', 1),
 ('our', 1),
 ('over', 1),
 ('own', 1),
 ('problems', 1),
 ('really', 1),
 ('romance', 1),
 ('safety', 1),
 ('societies', 1),
 ('sports', 1),
 ('studying', 1),
 ('such', 1),
 ('take', 1),
 ('terms', 1),
 ('that', 1),