In [1]:
import os
import collections
import re

from tf.fabric import Fabric

# Local environment

In [2]:
BASE = os.path.expanduser('~/github')
ORG = 'Nino-cunei'
REPO = 'oldbabylonian'
VERSION = '0.2'

REPO_PATH = f'{BASE}/{ORG}/{REPO}'
TF_PATH = f'{REPO_PATH}/tf/{VERSION}'

# Launch Text-Fabric with all features loaded

In [3]:
TF = Fabric(locations=TF_PATH)
allFeatures = TF.explore(silent=True, show=True)
loadableFeatures = allFeatures['nodes'] + allFeatures['edges']
api = TF.load(loadableFeatures, silent=True)
api.makeAvailableIn(globals())

This is Text-Fabric 7.4.2
Api reference : https://annotation.github.io/text-fabric/Api/Fabric/

25 features found and 0 ignored


[('Computed',
  'computed-data',
  ('C Computed', 'Call AllComputeds', 'Cs ComputedString')),
 ('Features', 'edge-features', ('E Edge', 'Eall AllEdges', 'Es EdgeString')),
 ('Fabric', 'loading', ('ensureLoaded', 'TF', 'ignored', 'loadLog')),
 ('Locality', 'locality', ('L Locality',)),
 ('Misc', 'messaging', ('cache', 'error', 'indent', 'info', 'reset')),
 ('Nodes',
  'navigating-nodes',
  ('N Nodes', 'sortKey', 'sortKeyTuple', 'otypeRank', 'sortNodes')),
 ('Features',
  'node-features',
  ('F Feature', 'Fall AllFeatures', 'Fs FeatureString')),
 ('Search', 'search', ('S Search',)),
 ('Text', 'text', ('T Text',))]

# Quick exercises

What are the superscripts?

In [4]:
supers = collections.Counter()

for s in F.otype.s('sign'):
  if F.super.v(s):
    supers[F.reading.v(s)] += 1

In [5]:
supers

Counter({'d': 3848,
         'disz': 1153,
         'ki': 844,
         'gesz': 247,
         'sar': 76,
         'muszen': 3,
         'mi2': 44,
         'gi': 34,
         'na4': 17,
         'lu2': 29,
         'ap': 2,
         'tug2': 25,
         'am': 2,
         'duru5': 5,
         'dug': 2,
         'ku': 1,
         'iri': 25,
         'gar': 3,
         'kusz': 12,
         'uruda': 9,
         'u2': 4,
         'i7': 1,
         'iti': 1,
         'ir': 1,
         'p': 2,
         'id2': 2,
         'urudu': 6,
         'asz': 1,
         'an': 2,
         'uzu': 1,
         'ti': 1,
         'munus': 1,
         'la': 1,
         'ku6': 4,
         'at': 1,
         'ar': 1,
         'ururdu': 1,
         'szim': 3})

What is the language use?
* 1 = Akkadian
* 2 = Sumerian

In [6]:
F.language.freqList()

((1, 171644), (2, 18178))

# Proper nouns

List of sign stretches that occur between `um-ma` and `ma`.

In [7]:
introNouns = []

for line in F.otype.s('line'):
  signs = L.d(line, otype='sign')
  
  # um-ma ...  ma
  # 0  1  >=2  >=3
  
  # first find the ma on at least position 3
  
  mas = []
  
  for i in range(3, len(signs)):
    if F.reading.v(signs[i]) == 'ma':
      mas.append(i)
      
  # if there is no ma, skip the line
  
    if not mas:
      continue
      
  # test the last ma
  # find the first um-ma in front of it, with room for something in between
    
    lastMa = mas[-1]
    between = None
    
    for i in range(lastMa - 2):
      if (
        F.reading.v(signs[i]) == 'um'
        and
        F.reading.v(signs[i + 1]) == 'ma'
      ):
        between = signs[i + 2:lastMa]
      
    if between:
      introNouns.append(between)

print(f'Found {len(introNouns)} occurrences of intro nouns')

Found 2538 occurrences of intro nouns


Let's show them and their frequencies.

In [8]:
len(introNouns)

2538

In [14]:
introNounsCount = collections.Counter()

for between in introNouns:
  form = ''.join(f'{F.reading.v(n) or F.grapheme.v(n)}{F.after.v(n)}' for n in between)
  introNounsCount[form] += 1

In [16]:
len(introNounsCount)

681

In [20]:
for (proper, amount) in sorted(
  introNounsCount.items(),
  key=lambda x: (-x[1], x[0]),
)[0:100]:
  print(f'{proper:<30} {amount:>4} x')

szu-                            209 x
szu-u2-                         184 x
a-na-ku-                        149 x
at-ta-                          121 x
at-ta-a-                        118 x
ha-am-mu-ra-bi-                 108 x
at-ti-                           60 x
szu-nu-                          60 x
a-na-ku-u2-                      55 x
szi-                             40 x
a-hu-um-                         23 x
a-wi-il-dingir-                  23 x
disz                             21 x
ni-nu-                           19 x
be-li2-                          18 x
ha-am-mu-ra-pi2-                 18 x
a-bi-e-szu-uh-                   17 x
d-marduk-mu-sza-lim-             15 x
d-iszkur-                        14 x
d-na-bi-um-                      14 x
lu2-igi-sa6-                     13 x
s,i-li2-d-utu-                   13 x
szi-i-                           13 x
ta-tu-ur-                        13 x
am-mi-s,a-du-qa2-                12 x
d-nanna-                         12 x
la-         