In [76]:
import os
import collections
import re

from tf.fabric import Fabric

# Local environment

In [77]:
BASE = os.path.expanduser('~/github')
ORG = 'Nino-cunei'
REPO = 'oldbabylonian'
VERSION = '0.2'

REPO_PATH = f'{BASE}/{ORG}/{REPO}'
TF_PATH = f'{REPO_PATH}/tf/{VERSION}'

# Launch Text-Fabric with all features loaded

In [78]:
TF = Fabric(locations=TF_PATH)
allFeatures = TF.explore(silent=True, show=True)
loadableFeatures = allFeatures['nodes'] + allFeatures['edges']
api = TF.load(loadableFeatures, silent=True)
api.makeAvailableIn(globals())

This is Text-Fabric 7.4.3
Api reference : https://annotation.github.io/text-fabric/Api/Fabric/

25 features found and 0 ignored


[('Computed',
  'computed-data',
  ('C Computed', 'Call AllComputeds', 'Cs ComputedString')),
 ('Features', 'edge-features', ('E Edge', 'Eall AllEdges', 'Es EdgeString')),
 ('Fabric', 'loading', ('ensureLoaded', 'TF', 'ignored', 'loadLog')),
 ('Locality', 'locality', ('L Locality',)),
 ('Misc', 'messaging', ('cache', 'error', 'indent', 'info', 'reset')),
 ('Nodes',
  'navigating-nodes',
  ('N Nodes', 'sortKey', 'sortKeyTuple', 'otypeRank', 'sortNodes')),
 ('Features',
  'node-features',
  ('F Feature', 'Fall AllFeatures', 'Fs FeatureString')),
 ('Search', 'search', ('S Search',)),
 ('Text', 'text', ('T Text',))]

# Quick exercises

What are the superscripts?

In [79]:
supers = collections.Counter()

for s in F.otype.s('sign'):
  if F.super.v(s):
    supers[F.reading.v(s)] += 1

In [80]:
supers

Counter({'d': 3848,
         'disz': 1153,
         'ki': 844,
         'gesz': 247,
         'sar': 76,
         'muszen': 3,
         'mi2': 44,
         'gi': 34,
         'na4': 17,
         'lu2': 29,
         'ap': 2,
         'tug2': 25,
         'am': 2,
         'duru5': 5,
         'dug': 2,
         'ku': 1,
         'iri': 25,
         'gar': 3,
         'kusz': 12,
         'uruda': 9,
         'u2': 4,
         'i7': 1,
         'iti': 1,
         'ir': 1,
         'p': 2,
         'id2': 2,
         'urudu': 6,
         'asz': 1,
         'an': 2,
         'uzu': 1,
         'ti': 1,
         'munus': 1,
         'la': 1,
         'ku6': 4,
         'at': 1,
         'ar': 1,
         'ururdu': 1,
         'szim': 3})

What is the language use?
* 1 = Akkadian
* 2 = Sumerian

In [81]:
F.language.freqList()

((1, 171644), (2, 18178))

# Proper nouns

List of sign stretches that occur between `um-ma` and `ma`.

In [82]:
introNouns = []

for line in F.otype.s('line'):
  signs = L.d(line, otype='sign')
  
  # um-ma ...  ma
  # 0  1  >=2  >=3
  
  # first find the ma on at least position 3
  
  mas = []
  
  for i in range(3, len(signs)):
    if F.reading.v(signs[i]) == 'ma':
      mas.append(i)
      
  # if there is no ma, skip the line
  
    if not mas:
      continue
      
  # test the last ma
  # find the first um-ma in front of it, with room for something in between
    
    lastMa = mas[-1]
    between = None
    
    for i in range(lastMa - 2):
      if (
        F.reading.v(signs[i]) == 'um'
        and
        F.reading.v(signs[i + 1]) == 'ma'
      ):
        between = signs[i + 2:lastMa]
      
    if between:
      introNouns.append(between)

print(f'Found {len(introNouns)} occurrences of intro nouns')

Found 2538 occurrences of intro nouns


Let's show them and their frequencies.

In [83]:
len(introNouns)

2538

In [84]:
def getReading(word):
    signs = L.d(word, otype='sign') if type(word) is int else word
    return ''.join(f'{F.atf.v(n)}{F.after.v(n)}' for n in signs).rstrip('-')

In [85]:
introNounsCount = collections.Counter()

for between in introNouns:
    introNounsCount[getReading(between)] += 1

In [86]:
len(introNounsCount)

769

In [87]:
for (proper, amount) in sorted(
  introNounsCount.items(),
  key=lambda x: (-x[1], x[0]),
)[0:100]:
  print(f'{proper:<30} {amount:>4} x')

szu                             192 x
szu-u2                          179 x
a-na-ku                         140 x
at-ta                           114 x
at-ta-a                         110 x
ha-am-mu-ra-bi                   85 x
szu-nu                           60 x
at-ti                            59 x
a-na-ku-u2                       48 x
szi                              40 x
a-wi-il-dingir                   23 x
a-hu-um                          21 x
ni-nu                            19 x
be-li2                           18 x
szu#                             17 x
a-bi-e-szu-uh                    13 x
szi-i                            13 x
ta-tu-ur                         13 x
ha-am-mu-ra-pi2                  12 x
la                               12 x
s,i-li2-{d}-utu                  12 x
sag-il2                          12 x
{d}-iszkur                       12 x
{d}-na-bi-um                     12 x
2(disz)                          10 x
am-mi-s,a-du-qa2                 10 x
lu2-igi-sa6 

Most of these are proper names or prouns, but we should try to exclude broken passages from this list and correct the representation of divine names: {d}-iszkur-szar-rum should read {d}iszkur-szar-rum. There is also a recursion issue hiding in this list: the entry with si-i, recorded five times, is probably deriving from examples like AbB 12, 077, which has um-ma i3-li2-a-si-ima in line 3.

# Making sign lists

Now we are going to produce sign lists from the Akkadian parts of the letters and the Sumerograms in two different lists.

In [88]:
signList = collections.defaultdict(lambda: collections.Counter())
for sign in F.otype.s('sign'):
    reading = F.reading.v(sign)
    grapheme = F.grapheme.v(sign)
    form = reading if reading else grapheme if grapheme else None
    if form:
        signList[F.language.v(sign)][form] += 1 

In [89]:
byFreqDesc = lambda x: (-x[1], x[0])
byFreqAsc = lambda x: (x[1], x[0])
byAlpha = lambda x: x[0]

def makeSignList(showCase, sortKey):
    for language in signList:
        print('Akk' if language == 1 else 'Sum')
        for (sign, amount) in sorted(
            signList[language].items(), 
            key=sortKey,
        )[0:showCase]:
            print(f'\t{sign:<10} {amount:>4}')

In [90]:
makeSignList(10, byAlpha)

Akk
	(a            1
	(szu          1
	(u3           1
	(x            3
	...        1413
	...szu        1
	...x          6
	A             7
	AB           21
	AD           17
Sum
	(hi           1
	...           8
	AD            1
	AG            1
	ARAD         30
	ARAD2         7
	BA            2
	BAD           3
	BUR           1
	DU            2


In [91]:
makeSignList(100, byFreqDesc)

Akk
	ma         10587
	a          9738
	x          8198
	na         7811
	i          4579
	sza        4422
	szu        4130
	li         3611
	am         3577
	d          3235
	ta         3138
	u2         3039
	ka         3023
	um         2791
	ki         2695
	ni         2656
	la         2534
	bi         2446
	u3         2367
	ti         2312
	ri         2224
	ba         2217
	ra         2132
	nu         2045
	asz        2012
	im         1934
	ia         1694
	al         1632
	disz       1549
	ku         1496
	di         1470
	qi2        1470
	mi         1436
	...        1413
	isz        1405
	an         1372
	szi        1308
	e          1289
	lu         1256
	da         1225
	szum       1131
	tu         1090
	bi2        1087
	pu         1050
	at         1041
	ul         1038
	ha          971
	mu          958
	pi2         918
	utu         895
	tim         882
	bu          866
	pa          864
	ru          827
	li2         770
	ar          754
	it          736
	il          712
	marduk  

# First steps to identify verbs

In [92]:
word = F.otype.s('word')[100]
L.d(word, otype='sign')

[222, 223, 224, 225, 226, 227]

In [93]:
T.formats

{'text-graphic-full', 'text-ling-full', 'text-orig-full'}

In [94]:
include = {'u2', 'ib', 'im'}

verbs = collections.Counter()

for word in F.otype.s('word'):
    signs = L.d(word, otype='sign')
    
    if F.reading.v(signs[0]) not in include:
        continue
    if any(F.language.v(sign) == 2 for sign in signs):
        continue
    verbs[T.text(word, fmt='text-ling-full', descend=True)] += 1
    
len(verbs)

776

In [95]:
for (item, amount) in sorted(
    verbs.items(), 
    key=byFreqDesc,
)[0:300]:
    print(f'\t{item:<30} {amount:>4}')

	u2-ul                           737
	u2-ba-al-la-t,u2-szu             62
	u2-la                            35
	u2                               34
	u2-lam-mi-da-an-ni               34
	u2-sza-bi-lam                    22
	u2-lu                            15
	u2-ba-la-t,u2-szu                11
	ib-ni-d-mar-tu                   10
	u2-lam-mi-da-an-ni                9
	ib-ni-d-mar-tu                    8
	u2-                               8
	u2-da-ab-ba-ab                    8
	u2-sza-bi-la-kum                  8
	ib-ba-szu-u2                      7
	ib-ni-d-marduk                    7
	u2-ba-al-li-t,u2-szu              7
	u2-ka-al                          7
	u2-sza-bi-la-ki-im                7
	ib-ni-d-suen                      6
	u2-ka-al-lu                       6
	u2-lam-mi-du-ni-in-ni             6
	u2-sza-ab-ba-lam                  6
	u2-sza-ba-la-kum                  6
	u2-ul                             6
	ib-ni-d-marduk                    5
	im                                5
	