Read the lectionary file.

In [1]:
import os
import re
from tf.app import use

In [2]:
BASE = os.path.expanduser('~/github')
ORG = 'etcbc'
REPO = 'linksyr'

LECTIONARY_DATA = 'data/lectionaries'
DATA_FILE = 'pericopes.csv'
DATA_PATH = f'{BASE}/{ORG}/{REPO}/{LECTIONARY_DATA}/{DATA_FILE}'

TEMP = '_temp'
TEMP_PATH = f'{BASE}/{ORG}/{REPO}/{TEMP}'
PERI_RAW_FILE = 'periraw.txt'
PERI_RAW_PATH = f'{TEMP_PATH}/{PERI_RAW_FILE}'
PERI_FILE = 'peri.txt'
PERI_PATH = f'{TEMP_PATH}/{PERI_FILE}'
ERROR_FILE = 'error.txt'
ERROR_PATH = f'{TEMP_PATH}/{ERROR_FILE}'

In [3]:
if not os.path.exists(TEMP_PATH):
  os.makedirs(TEMP_PATH, exist_ok=True)

In [4]:
def readData():
  with open(DATA_PATH) as fh:
    header = next(fh)
    lines = list(fh)
  header = {n: name for (n, name) in enumerate(header.rstrip('\n').split(';'))}
  lines = [line.rstrip('\n').split(';') for line in lines]
  return (header, lines)

In [5]:
(header, lines) = readData()

In [6]:
print('\n'.join(f'{n:>2} = {name}' for (n, name) in header.items()))

 0 = ﻿no. MS
 1 = No.
 2 = Pericope
 3 = Intro
 4 = Intro.Remarks
 5 = Intro.Transl
 6 = Thales
 7 = Thales ID
 8 = Intro.Fol.A
 9 = 
10 = Intro.Col.A
11 = Intro.Line.A
12 = Intro.Fol.Z
13 = Intro.Col.Z
14 = Intro.Line.Z
15 = Taksa
16 = Taksa.Trans
17 = Taksa.Remarks
18 = Transl.remarks
19 = Taksa.Fol.A
20 = Taksa.Col.A
21 = Taksa.Line.A
22 = Taksa.Fol.Z
23 = Taksa.Col.Z
24 = Taksa.Line.Z
25 = Taksa.Remarks
26 = Siglum
27 = Siglum
28 = Link
29 = Link
30 = MS-List.link
31 = Pericope.Fol.A
32 = Pericope.Col.A
33 = Pericope.Line.A
34 = Pericope.Fol.Z
35 = Pericope.Col.Z
36 = Pericope.Line.Z
37 = Cross reference (corpus) [to be filled in aut.]
38 = Remarks Cross reference (corpus)
39 = Abbreviation
40 = Version
41 = Version.Remarks
42 = Version.Syriac
43 = Version.Transl
44 = Denomination
45 = Literature
46 = Literature.link
47 = Codicology
48 = Pericope.remarks
49 = Ref.remarks.
50 = Cross.ref.Lect
51 = Transl.Cross.ref.
52 = Addition before
53 = Addition after
54 = Addition.Fol.A
55 = Ad

In [7]:
print('\n'.join(f'{header[n]} = {value}' for (n, value) in enumerate(lines[0])))

﻿no. MS = 73
No. = 073-0265
Pericope = Acts#08:05-13.
Intro = ‎‏ܦܪܟܣܝܣ‏‎
Intro.Remarks = 
Intro.Transl = Acts
Thales = 
Thales ID = 
Intro.Fol.A = 161b
 = 
Intro.Col.A = 2
Intro.Line.A = 19
Intro.Fol.Z = 161b
Intro.Col.Z = 2
Intro.Line.Z = 19
Taksa = ‎‏ܬܘܒ ܛܟܣܐ ܥܠ ܚܕ̈ܒܫܒܐ ܐܚܪ̈ܢܐ ܬܡ̈ܢܝܐ ܕܩܝܡܬܐ ܡܫܲܒܚܬܐ‏‎
Taksa.Trans = Furthermore, the order on Last First (Day) of the Week, the Eighth, of the praiseworthy Resurrection
Taksa.Remarks = subrubr ‎‏ܛܟܣܐ ܩܕܡܝܐ ܕܩܝܡܬܐ ܡܲܐܚܝܢܝܬܐ‏‎ (12-13 - after a blank line) (The First Order of the live-giving Resurrection)
Transl.remarks = The First Order of the live-giving Resurrection
Taksa.Fol.A = 159b
Taksa.Col.A = 1.
Taksa.Line.A = 1.
Taksa.Fol.Z = 159b
Taksa.Col.Z = 1.
Taksa.Line.Z = 11.
Taksa.Remarks = 
Siglum = 16l1
Siglum = 16l01
Link = https://archive.org/stream/SMC1.1/SMC%201.1#page/n165/mode/1up
Link = https://archive.org/stream/SMC1.1/SMC%201.1#page/n165/mode/1up
MS-List.link = 16l01
Pericope.Fol.A = 161b
Pericope.Col.A = 2
Pericope.Line.A = 19
Per

# Pericope analysis

In [8]:
bookMapping = {
  'P': {
    '1Chr': 'Chr1',
    '2Chr': 'Chr2',
    '1Mc': 'Mc1_A',
    '1Rg': 'Rg1',
    '2Rg': 'Rg2',
    '1Sm': 'Sm1',
    '2Sm': 'Sm2',
    '4Ezra': 'Esr4',
    'Am': 'Am',
    'ApBar': 'ApBar',
    'Bar': 'Bar',
    'Bel_Dr': 'BelDr',
    'Ct': 'Ct',
    'Dn': 'Dn',
    'Dt': 'Dt',
    'Ec': 'Ec',
    'EpBar': 'EpBar_A',
    'Ex': 'Ex',
    'Ez': 'Ez',
    'Gn': 'Gn',
    'Hb': 'Hb',
    'Hg': 'Hg',
    'Hs': 'Hs',
    'Is': 'Is',
    'Jb': 'Jb',
    'Jd': 'Jd',
    'Jl': 'Jl',
    'Jon': 'Jon',
    'Jr': 'Jr',
    'Js': 'Jos',
    'Lm': 'Thr',
    'Lv': 'Lv',
    'Mi': 'Mi',
    'Ml': 'Ml',
    'Na': 'Na',
    'Nm': 'Nm',
    'Ob': 'Ob',
    'Pr': 'Pr',
    'Ru': 'Ru',
    'Sa': 'Sa',
    'Sap': 'Sap',
    'Sir': 'Sir',
    'Su': 'Sus',
    'Zf': 'Zf',
  },
  'S': {
    '1Cor': '1Cor',
    '2Cor': '2Cor',
    '1Joh': '1John',
    '1Petr': '1Peter',
    '2Petr': '2Peter',
    '1Thess': '1Thess',
    '2Thess': '2Thess',
    '1Tim': '1Tim',
    '2Tim': '2Tim',
    'Tim': '1Tim',
    'Acts': 'Acts',
    'Col': 'Col',
    'Eph': 'Eph',
    'Gal': 'Gal',
    'Heb': 'Heb',
    'Jas': 'James',
    'Joh': 'John',
    'Jude': 'Jude',
    'Lk': 'Luke',
    'Mat': 'Matt',
    'Mk': 'Mark',
    'Phil': 'Phil',
    'Rom': 'Rom',
    'Tit': 'Titus',
  },
}

In [9]:
bookFromPeri = {}
for volume in bookMapping:
  for periAcro in bookMapping[volume]:
    bookFromPeri[periAcro] = bookMapping[volume][periAcro]

In [14]:
for periAcro in sorted(bookFromPeri):
  print(f'{periAcro:<10} => {bookFromPeri[periAcro]}')

1Chr       => Chr1
1Cor       => 1Cor
1Joh       => 1John
1Mc        => Mc1_A
1Petr      => 1Peter
1Rg        => Rg1
1Sm        => Sm1
1Thess     => 1Thess
1Tim       => 1Tim
2Chr       => Chr2
2Cor       => 2Cor
2Petr      => 2Peter
2Rg        => Rg2
2Sm        => Sm2
2Thess     => 2Thess
2Tim       => 2Tim
4Ezra      => Esr4
Acts       => Acts
Am         => Am
ApBar      => ApBar
Bar        => Bar
Bel_Dr     => BelDr
Col        => Col
Ct         => Ct
Dn         => Dn
Dt         => Dt
Ec         => Ec
EpBar      => EpBar_A
Eph        => Eph
Ex         => Ex
Ez         => Ez
Gal        => Gal
Gn         => Gn
Hb         => Hb
Heb        => Heb
Hg         => Hg
Hs         => Hs
Is         => Is
Jas        => James
Jb         => Jb
Jd         => Jd
Jl         => Jl
Joh        => John
Jon        => Jon
Jr         => Jr
Js         => Jos
Jude       => Jude
Lk         => Luke
Lm         => Thr
Lv         => Lv
Mat        => Matt
Mi         => Mi
Mk         => Mark
Ml         => Ml
Na      

In [48]:
PERICOPE_INDEX = 2
VERSION_INDEX = 40
P_VAL = 'P'
P_INDEX = 2

In [519]:
raw = {}

with open(PERI_RAW_PATH, 'w') as fh:
  for (ln, line) in enumerate(lines):
    if line[VERSION_INDEX] != P_VAL:
      continue
    pericopeStr = line[P_INDEX]
    raw[ln] = pericopeStr
    fh.write(f'{ln:>5} {pericopeStr}\n')

In [520]:
exceptions = {
  30: (')ܡܛܠ)', ''),
  57: ('ܐܚܝܕ ܟܠ', ''),
  855: ('01:03:', '01:03-'),
  1082: (' + ܡܛܠ ܐܪܘܢܗ ܕܡܪܝܐ', ''),
  1209: ('Tit#03:', '+03:'),
  1215: ('ܐܬܘ)', ''),
  1252: ('-10', ''),
  1253: (')', ''),
  1295: ('+ܘܡܢ ܐܝܕܐ ܕܟܠ ܕܩܡܘ ܥܠܘܗܝ܀', ''),
  1298: ('Is#01-01', 'Is#01:01'),
  1350: ('Pr#2109-21', 'Pr#21:09-21'),
  1425: ('Mi#04:13-05-03', 'Mi#04:13-05:03'),
  1549: ('Jr#11:18-12:05+12:09-09b-11+12:14-15', 'Jr#11:18-12:05+12:09-09b+11+12:14-15'),
  1697: ('Jr#11:18-12:05+12:09-09b-11+12:14-15', 'Jr#11:18-12:05+12:09-09b+11+12:14-15'),
  2142: ('Jr#11:18-12:05+12:09-09b-11+12:14-15', 'Jr#11:18-12:05+12:09-09b+11+12:14-15'),
  2353: ('Jr#11:18-12:05+12:09-09b-11+12:14-15', 'Jr#11:18-12:05+12:09-09b+11+12:14-15'),
  2617: ('Jr#11:18-12:05+12:09-09b-11+12:14-15', 'Jr#11:18-12:05+12:09-09b+11+12:14-15'),
  2858: ('Jr#11:18-12:05+12:09-09b-11+12:14-15', 'Jr#11:18-12:05+12:09-09b+11+12:14-15'),
  4681: ('Jr#11:18-12:05+12:09-09b-11+12:14-15', 'Jr#11:18-12:05+12:09-09b+11+12:14-15'),
  4837: ('Jr#11:18-12:05+12:09-09b-11+12:14-15', 'Jr#11:18-12:05+12:09-09b+11+12:14-15'),
  8692: ('Jr#11:18-12:05+12:09-09b-11+12:14-15', 'Jr#11:18-12:05+12:09-09b+11+12:14-15'),
  1849: ('ܕܒܝܬܐ)', ''),
  1955: (' + small fragments', ''),
  2258: ('+ܐܚܝܕ ܟܠ', ''),
  2732: ('Is#37:33-10-37', 'Is#37:33-37:37'),
  3025: ('1Joh#03:02:17', '1Joh#03:02-17'),
  3521: ('+ܡܛܠ ܕܡܪܝܐ ܡܠܠ', ''),
  3525: ('ܡܛܠ ܕܥܕܠܐ ܢܕܥ ܛܠܝܐ ܠܡܣܠܝܘ ܒܝܫܬܐ ܘܠܡܓܒܐ ܛܒܬܐ܆ ܢܣܠܐ ܒܝܫܬܐ ܘܢܓܐ ܠܗ ܛܒܬܐ+', ''),
  6791: ('ܡܛܠ ܕܥܕܠܐ ܢܕܥ ܛܠܝܐ ܠܡܣܠܝܘ ܒܝܫܬܐ ܘܠܡܓܒܐ ܛܒܬܐ܆ ܢܣܠܐ ܒܝܫܬܐ ܘܢܓܐ ܠܗ ܛܒܬܐ +', ''),
  7493: ('+ܡܛܠ ܕܥܕܠܐ ܢܕܥ ܛܠܝܐ ܠܡܣܠܝܘ ܒܝܫܬܐ ܘܠܡܓܒܐ ܛܒܬܐ܆ ܢܣܠܐ ܒܝܫܬܐ ܘܢܓܐ ܠܗ ܛܒܬܐ', ''),
  3914: ('1Cor#05:11-16-17-21', '1Cor#05:11-16+17-21'),
  3928: ('+ ܘܐܘܬܒܘܗܝ ܠܫܠܡܘܢ ܥܠ ܟܘܪܣܝܗ ܕܕܘܝܕ ܐܒܘܗܝ ', ''),
  4244: ('Is#54:17)ܕܡܬܬܩܢ)1-17', 'Is#54:1-17'),
  4263: ('1Joh#01-02+02-10', '1Joh#01:02+02:10'),
  4447: ('Jr#11:18-12:05+12:09-09b-11+12:14-15', 'Jr#11:18-12:05+12:09-11+12:14-15'),
  4545: ('Nm#24:-15a+17b-18', 'Nm#24:15a+17b-18'),
  5105: (')ܡܛܠ)', ''),
  5196: ('Rom#09:10:17-18', 'Rom#10:17-18'),
  5212: ('Acts#12:25-13-03', 'Acts#12:25-13:03'),
  5261: ('Rom#07:07:16', 'Rom#07:07-16'),
  5353: ('+ܠܥܠܡ ܠܕܪ̈ܝܟܘܢ', ''),
  5719: ('ܒܝܫܬܐ ܘܛܒܐ ܠܗ ܛܒܬܐ', ''),
  6168: ('Dn#23-25', 'Dn#23:25'),
  6566: (')ܡܛܠ)', ''),
  6604: ('+ܠܥܠܡ ܠܕܪ̈ܝܟܘܢ', ''),
  7061: (' + ܡܛܠ ܐܪܘܢܗ ܕܡܪܝܐ', ''),
  8896: ('Sa#04-05+08:12+08:22-23', 'Sa#04:05+08:12+08:22-23'),
  9021: (' + ܘܐܘܬܒܘܗܝ ܠܫܠܝܡܘܢ ܥܠ ܟܘܪܣܝܐ', ''),
}
skips = {
  887,
  1612,
  1613,
  2864,
  4568,
  4569,
  6333,
  7284,
  7430,
  7519,
}

In [530]:
len(exceptions) + len(skips)

58

In [521]:
test = None
# test = {56}

In [522]:
bracketedPat = r'(\([^\)]+\))|(<[^>]+>)'
bracketedRe = re.compile(bracketedPat)

def simplify(x, ln):
  trim = (
    x
    .replace('\u200E', '')
    .replace('\u200F', '')
    .replace('\u2013', '-')
    .replace('[', '')
    .replace(']', '')
    .replace('.', '')
  )
  trim = bracketedRe.sub('', trim)
  trim = trim.replace('++', '')
  trim = trim.replace('-?', '')
  trim = trim.replace('?', '')
  if ln in exceptions:
    (offend, better) = exceptions[ln]
    trim = trim.replace(offend, better)
  return trim

In [523]:
def parsePericopes(shape, prevData):
  parts = shape.strip().strip('+').split('+')
  result = []
  for part in parts:
    (good, data) = parsePericope(part, prevData)
    if not good:
      return (False, shape)
    result.append(data)
    prevData = data[-1]
  return (True, result)

In [524]:
def parsePericope(shape, prevData):
  shape = shape.strip().strip('-')
  parts = shape.split('-')
  if len(parts) > 2:
    return (False, shape)
  if len(parts) == 1:
    (good, data) = parseVerse(shape, prevData)
    if good:
      return (good, [data])
    else:
      return (good, shape)
  result = []
  for part in parts:
    (good, data) = parseVerse(part, prevData)
    if not good:
      return (good, shape)
    else:
      result.append(data)
      prevData = data
  return (True, result)

In [525]:
def parseVerse(shape, prevData):
  book = None
  chapter = None
  verse = None
  shape = shape.strip()
  parts = [p.strip() for p in shape.split('#')]
  chvh = None
  if len(parts) > 2:
    return (False, shape)
  
  chvh = shape if len(parts) == 1 else parts[1]
  book = None if len(parts) == 1 else parts[0]
  
  subparts = [sp.strip() for sp in chvh.split(':')]
  if len(subparts) > 2:
    return (False, shape)
  
  if len(subparts) == 1:
    chapter = None
    verse = chvh
  else:
    (chapter, verse) = subparts
    
  if book is None:
    if prevData is None:
      return (False, shape)
    book = prevData[0]
  if chapter is None:
    if prevData is None or len(prevData) < 2:
      return (False, shape)
    chapter = prevData[1]
    
  if verse is not None:
    verse = verse.replace('a', '').replace('b', '').replace('f', '').lstrip('0')
    if '/' in verse:
      verse = verse.split('/', 1)[0]
    if verse == 'end':
      verse = None
  if chapter is not None:
    chapter = chapter.lstrip('0')
    if chapter == '':
      chapter = '0'
  if (
      (chapter is not None and not chapter.isdigit())
      or 
      (verse is not None and not verse.isdigit())
  ):
    return (False, shape)
    
  return (True, (book, chapter, verse))

In [526]:
pericopes = {}
errors = {}

for (ln, line) in enumerate(lines):
  if ln in skips:
    continue
  if test is not None and ln not in test:
    continue
  if line[VERSION_INDEX] != P_VAL:
    continue
  pericopeStr = (simplify(line[P_INDEX], ln))
  (good, data) = parsePericopes(pericopeStr, None)
  if good:
    pericopes[ln] = data
  else:
    errors[ln] = data
  prevData = data[-1][-1]

In [529]:
if errors:
  for (i, peri) in sorted(errors.items())[0:20]:
    print(f'{i:>5} {raw[i]} => {peri}\n')
print(f'{len(errors):>5} errors')
print(f'{len(pericopes):>5} pericopes')

with open(ERROR_PATH, 'w') as f:
  for (i, peri) in sorted(errors.items()):
    f.write(f'{i:>5} {peri}\n')
    
with open(PERI_PATH, 'w') as f:
  for (i, peri) in sorted(pericopes.items()):
    f.write(f'{i:>5} {peri}\n')
if pericopes:
  for (i, peri) in sorted(pericopes.items())[0:20]:
    print(f'{i:>5} {raw[i]} => {peri}\n')

  705 ?#?:?-? => #:

    1 errors
 8266 pericopes
    0 Acts#08:05-13. => [[('Acts', '8', '5'), ('Acts', '8', '13')]]

    1 1Sm#16:01-13a. => [[('1Sm', '16', '1'), ('1Sm', '16', '13')]]

    2 Ct#01:02-14. => [[('Ct', '1', '2'), ('Ct', '1', '14')]]

    3 Jd#05:01-11. => [[('Jd', '5', '1'), ('Jd', '5', '11')]]

    4 Ex#03:01-10. => [[('Ex', '3', '1'), ('Ex', '3', '10')]]

    5 1Sm#01:09-19a.(‎‏ܠܪܡܬܐ‏‎) => [[('1Sm', '1', '9'), ('1Sm', '1', '19')]]

    6 Nm#17:16-26. => [[('Nm', '17', '16'), ('Nm', '17', '26')]]

    7 Jd#13:02-14. => [[('Jd', '13', '2'), ('Jd', '13', '14')]]

    8 ‎2Cor#03:12-04:04. => [[('2Cor', '3', '12'), ('2Cor', '4', '4')]]

    9 Gn#12:07-08.+18:01-15. => [[('Gn', '12', '7'), ('Gn', '12', '8')], [('Gn', '18', '1'), ('Gn', '18', '15')]]

   10 Gn#03:21-04:07. => [[('Gn', '3', '21'), ('Gn', '4', '7')]]

   11 Gn#17:01-09. => [[('Gn', '17', '1'), ('Gn', '17', '9')]]

   12 Pr#09:12-18. => [[('Pr', '9', '12'), ('Pr', '9', '18')]]

   13 Gn#05:01-24. => [[('Gn', '

In [364]:
pA = use('peshitta')

TF app is up-to-date.
Using annotation/app-peshitta commit 1f3f47a5154f5be012f5c42d050baca70a6c7e48 (=latest)
  in /Users/dirk/text-fabric-data/__apps__/peshitta.
Using etcbc/peshitta/tf - 0.1 r0.4 in /Users/dirk/text-fabric-data


**Documentation:** <a target="_blank" href="https://github.com/etcbc/peshitta/blob/master/docs" title="provenance of Peshitta (Old Testament)">PESHITTA</a> <a target="_blank" href="https://annotation.github.io/text-fabric/Writing/Syriac" title="('Syriac characters and transcriptions',)">Character table</a> <a target="_blank" href="https://github.com/etcbc/peshitta/blob/master/docs/transcription-0.1.md#transcription.md" title="PESHITTA feature documentation">Feature docs</a> <a target="_blank" href="https://github.com/annotation/app-peshitta" title="peshitta API documentation">peshitta API</a> <a target="_blank" href="https://annotation.github.io/text-fabric/Api/Fabric/" title="text-fabric-api">Text-Fabric API 7.3.10</a> <a target="_blank" href="https://annotation.github.io/text-fabric/Use/Search/" title="Search Templates Introduction and Reference">Search Reference</a>

In [45]:
sA = use('syrnt')

TF app is up-to-date.
Using annotation/app-syrnt commit d8cce973438848a1bf7e4f4ab62b2d480206ca9b (=latest)
  in /Users/dirk/text-fabric-data/__apps__/syrnt.
Using etcbc/syrnt/tf - 0.1 r0.3 in /Users/dirk/text-fabric-data


**Documentation:** <a target="_blank" href="https://github.com/etcbc/syrnt/blob/master/docs" title="provenance of SyrNT">SYRNT</a> <a target="_blank" href="https://annotation.github.io/text-fabric/Writing/Syriac" title="('Syriac characters and transcriptions',)">Character table</a> <a target="_blank" href="https://github.com/etcbc/syrnt/blob/master/docs/transcription-0.1.md#transcription.md" title="SYRNT feature documentation">Feature docs</a> <a target="_blank" href="https://github.com/annotation/app-syrnt" title="syrnt API documentation">syrnt API</a> <a target="_blank" href="https://annotation.github.io/text-fabric/Api/Fabric/" title="text-fabric-api">Text-Fabric API 7.3.10</a> <a target="_blank" href="https://annotation.github.io/text-fabric/Use/Search/" title="Search Templates Introduction and Reference">Search Reference</a>

In [46]:
pApi = pA.api
sApi = sA.api
pF = pApi.F
sF = sApi.F

In [71]:
peshittaBooks = [pF.book.v(n) for n in pF.otype.s('book')]
print('\n'.join(sorted(peshittaBooks)))

Am
ApBar
ApcPs
ApcPs_A
ApcPs_B
Bar
BelDr
Chr1
Chr2
Ct
Dn
Dt
Ec
EpBar_A
EpBar_B
EpJr
Esr3
Esr4
Est
Ex
Ez
Ezr
Gn
Hb
Hg
Hs
Is
Jb
Jd
Jdt
Jl
Jon
Jos
Jr
Lv
Mc1_A
Mc1_B
Mc2
Mc3
Mc4
Mi
Ml
Na
Neh
Nm
Ob
Oda
OrM_A
OrM_B
Pr
Ps
PsS
Rg1
Rg2
Ru
Sa
Sap
Sir
Sm1
Sm2
Sus
Tb_A
Tb_B
Thr
Zf


In [72]:
syrntBooks = [sF.book.v(n) for n in sF.otype.s('book')]
print('\n'.join(sorted(syrntBooks)))

1Cor
1John
1Peter
1Thess
1Tim
2Cor
2John
2Peter
2Thess
2Tim
3John
Acts
Col
Eph
Gal
Heb
James
John
Jude
Luke
Mark
Matt
Phil
Phlm
Rev
Rom
Titus
