forked from Freely-Given-org/BibleOrgSys
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ESFMBibleBook.py
executable file
·590 lines (516 loc) · 36.9 KB
/
ESFMBibleBook.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# ESFMBibleBook.py
#
# Module handling the ESFM markers for Bible books
#
# Copyright (C) 2010-2019 Robert Hunt
# Author: Robert Hunt <Freely.Given.org@gmail.com>
# License: See gpl-3.0.txt
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
Module for defining and manipulating ESFM Bible books.
"""
from gettext import gettext as _
LastModifiedDate = '2019-02-19' # by RJH
ShortProgName = "ESFMBibleBook"
ProgName = "ESFM Bible book handler"
ProgVersion = '0.48'
ProgNameVersion = '{} v{}'.format( ShortProgName, ProgVersion )
ProgNameVersionDate = '{} {} {}'.format( ProgNameVersion, _("last modified"), LastModifiedDate )
debuggingThisModule = False
import os, logging
import BibleOrgSysGlobals
from USFM3Markers import OFTEN_IGNORED_USFM_HEADER_MARKERS
from ESFMFile import ESFMFile
from Bible import BibleBook
ESFM_SEMANTIC_TAGS = 'AGLOPQTS' # S is put last coz it must be the last tag if there are multiple tags
ESFM_STRONGS_TAGS = 'HG'
sortedNLMarkers = None
class ESFMBibleBook( BibleBook ):
"""
Class to load and manipulate a single ESFM file / book.
"""
def __init__( self, containerBibleObject, BBB ):
"""
Create the ESFM Bible book object.
"""
BibleBook.__init__( self, containerBibleObject, BBB ) # Initialise the base class
self.objectNameString = 'ESFM Bible Book object'
self.objectTypeString = 'ESFM'
global sortedNLMarkers
if sortedNLMarkers is None:
sortedNLMarkers = sorted( BibleOrgSysGlobals.USFMMarkers.getNewlineMarkersList('Combined'), key=len, reverse=True )
# end of __init__
def load( self, filename, folder=None ):
"""
Load the ESFM Bible book from a file.
Tries to combine physical lines into logical lines,
i.e., so that all lines begin with a ESFM paragraph marker.
Uses the addLine function of the base class to save the lines.
Note: the base class later on will try to break apart lines with a paragraph marker in the middle --
we don't need to worry about that here.
"""
if debuggingThisModule or BibleOrgSysGlobals.debugFlag:
print( "ESFM.load( {}, {} )".format( filename, folder ) )
def ESFMPreprocessing( BBB, C, V, marker, originalText ):
"""
Converts ESFM tagging to pseudo-USFM codes for easier handling later on.
Parameters:
BBB, C, V parameters are just for use in error messages
originalText is the text line from the file
Returns:
A string replacement to use instead of originalText
Converts:
XXX=PYYYY to \dic PXXX=YYY\dic*
e.g., "{the three lepers}=PMat6Lepers" to "the three lepers\dic Pthe_three_lepers=Mat6lepers\dic*"
i.e, braces and equal signs are removed from the text
and the information is placed in a \dic field.
Note: This DOESN'T remove the underline/underscore characters used to join translated words
which were one word in the original, e.g., went_down
"""
if (debuggingThisModule or BibleOrgSysGlobals.debugFlag) \
and len(originalText)>5: # Don't display for "blank" lines (like '\v 10 ')
print( "\n\nESFMPreprocessing( {} {}:{}, {}, {!r} )".format( BBB, C, V, marker, originalText ) )
def saveWord( BBB, C, V, word ):
"""
"""
if debuggingThisModule or BibleOrgSysGlobals.debugFlag:
print( "ESFM saveWord( {}, {}:{}, {!r} )".format( BBB, C, V, word ) )
assert word and ' ' not in word
# end of saveWord
def saveSemanticTag( BBB, C, V, word, tag ):
"""
Fills the semantic dictionary with keys:
'Tag errors': contains a list of 4-tuples (BBB,C,V,errorWord)
'Missing': contains a dictionary
'A' 'G' 'L' 'O' 'P' 'Q' entries each containing a dictionary
where the key is the name (e.g., 'Jonah')
and the entry is a list of 4-tuples (BBB,C,V,actualWord)
Returns a character SFM field to be inserted into the line
(for better compatibility with the software chain).
"""
#if C=='4' and V in ('11','12'):
if debuggingThisModule or BibleOrgSysGlobals.debugFlag:
print( "ESFM saveSemanticTag( {} {}:{}, {!r}, {!r} )".format( BBB, C, V, word, tag ) )
assert word and ' ' not in word
assert tag and tag[0]=='=' and len(tag)>=2
tagMarker, tagContent = tag[1], tag[2:]
thisDict = self.containerBibleObject.semanticDict
if tagMarker not in ESFM_SEMANTIC_TAGS:
loadErrors.append( _("{} {}:{} unknown ESFM {!r} tag content {!r}").format( self.BBB, C, V, tagMarker, tagContent ) )
logging.error( "ESFM tagging error in {} {}:{}: unknown {!r} tag in {!r}".format( BBB, C, V, tagMarker, tag ) )
self.addPriorityError( 15, C, V, _("Unknown ESFM semantic tag") )
if 'Tag errors' not in thisDict: thisDict['Tag errors'] = []
thisDict['Tag errors'].append( (BBB,C,V,tag[1:]) )
if not tagContent: tagContent = word
# Now look in the semantic database
if tagMarker in thisDict \
and tagContent in thisDict[tagMarker]:
thisDict[tagMarker][tagContent].append( (BBB,C,V,word) )
#print( "Now have {}:{}={}".format( tagMarker, tagContent, thisDict[tagMarker][tagContent] ) )
else: # couldn't find it
loadErrors.append( _("{} {}:{} unknown ESFM {!r} tag content {!r}").format( self.BBB, C, V, tagMarker, tagContent ) )
logging.error( "ESFM tagging error in {} {}:{}: unknown {!r} tag content {!r}".format( BBB, C, V, tagMarker, tagContent ) )
self.addPriorityError( 15, C, V, _("Unknown ESFM semantic tag") )
if 'Missing' not in thisDict: thisDict['Missing'] = {}
if tagMarker not in thisDict['Missing']: thisDict['Missing'][tagMarker] = {}
if tagContent not in thisDict['Missing'][tagMarker]: thisDict['Missing'][tagMarker][tagContent] = []
thisDict['Missing'][tagMarker][tagContent].append( (BBB,C,V) if word==tagContent else (BBB,C,V,word) )
if word==tagContent:
return "\\sem {} {}\\sem*".format( tagMarker, word )
return "\\sem {} {}={}\\sem*".format( tagMarker, word, tagContent )
# end of saveSemanticTag
def saveStrongsTag( BBB, C, V, word, tag ):
"""
Returns a character SFM field to be inserted into the line
(for better compatibility with the software chain).
"""
#if C=='4' and V in ('11','12'):
if debuggingThisModule or BibleOrgSysGlobals.debugFlag:
print( "ESFM saveStrongsTag( {}, {}:{}, {!r}, {!r} )".format( BBB, C, V, word, tag ) )
assert word and ' ' not in word
assert tag and tag[0]=='=' and tag[1]=='S' and len(tag)>=3
tagMarker, tagContent = tag[2], tag[3:]
thisDict = self.containerBibleObject.StrongsDict
if tagMarker not in ESFM_STRONGS_TAGS:
loadErrors.append( _("{} {}:{} unknown ESFM {!r} tag content {!r}").format( self.BBB, C, V, tagMarker, tagContent ) )
logging.error( "ESFM tagging error in {} {}:{}: unknown {!r} tag in {!r}".format( BBB, C, V, tagMarker, tag ) )
self.addPriorityError( 10, C, V, _("Unknown ESFM Strong's tag") )
if 'Tag errors' not in thisDict: thisDict['Tag errors'] = []
thisDict['Tag errors'].append( (BBB,C,V,tag[1:]) )
if not tagContent: tagContent = word
# Now look in the Strongs database
if tagMarker in thisDict \
and tagContent in thisDict[tagMarker]:
thisEntry = thisDict[tagMarker][tagContent]
if isinstance( thisEntry, str ):
thisDict[tagMarker][tagContent] = [thisEntry] # Convert from a string to a list with the string as the first list item
thisDict[tagMarker][tagContent].append( (BBB,C,V,word) )
#print( " ", tagMarker, tagContent, thisEntry )
#print( "Now have {}:{}={}".format( tagMarker, tagContent, thisDict[tagMarker][tagContent] ) )
else: # couldn't find it
loadErrors.append( _("{} {}:{} unknown ESFM {!r} tag content {!r}").format( self.BBB, C, V, tagMarker, tagContent ) )
logging.error( "ESFM tagging error in {} {}:{}: unknown {!r} tag content {!r}".format( BBB, C, V, tagMarker, tagContent ) )
self.addPriorityError( 10, C, V, _("Unknown ESFM Strong's tag") )
if 'Missing' not in thisDict: thisDict['Missing'] = {}
if tagMarker not in thisDict['Missing']: thisDict['Missing'][tagMarker] = {}
if tagContent not in thisDict['Missing'][tagMarker]: thisDict['Missing'][tagMarker][tagContent] = []
thisDict['Missing'][tagMarker][tagContent].append( (BBB,C,V) if word==tagContent else (BBB,C,V,word) )
return "\\str {} {}={}\\str*".format( tagMarker, tagContent, word )
# end of saveStrongsTag
# Main code for ESFMPreprocessing
# Analyse and collect all ESFM tags and special characters,
# and put the results into USFM type character fields
bracedGroupFlag = underlineGroupFlag = startsWithUnderline = False
word = underlineGroup = bracedGroupText = tagText = ''
# The tag is the bit starting with =, e.g., '=PJonah'
hangingUnderlineCount = 0 # Count of unclosed '…_ ' sequences
lastChar = ''
#textLen = len( originalText )
resultText = ''
firstWordFlag = True
#print( 'ESFMPreprocessing {} {}:{}'.format( BBB, C, V ) )
for j, originalChar in enumerate( originalText ):
char = originalChar
#nextChar = originalText[j+1] if j<textLen-1 else ''
#if '{' in originalText or '_' in originalText or '=' in originalText:
#if C=='4' and V=='11':
#print( " ESFMPreprocessing {}={!r} lc={!r} uGF={} hUC={} uL={!r} bGF={} bG={!r} tg={!r} \n oT={!r} \n rT={!r}" \
#.format( j, originalChar, lastChar, underlineGroupFlag, hangingUnderlineCount, underlineGroup, bracedGroupFlag, bracedGroup, tag, originalText, resultText ) )
# Handle hanging underlines, e.g., 'and_ ' or ' _then' or 'and_ they_ _were_ _not _ashamed'
if char == ' ':
if lastChar == '_':
hangingUnderlineCount += 1
assert hangingUnderlineCount < 3
#assert resultText[-1] == ' '
#resultText = resultText[:-1] # Remove the space from the underline otherwise we'll get two spaces
if lastChar != '_' and (not underlineGroupFlag) and hangingUnderlineCount!=0:
#if underlineGroup: print( "underlineGroup was: {!r}".format( underlineGroup ) )
underlineGroup = ''
#if lastChar == ' ':
#startsWithUnderline = char == '_'
#if char == ' ': hangingUnderlineCount += 1
elif char == '_':
if lastChar == ' ':
hangingUnderlineCount -= 1
if hangingUnderlineCount < 0:
loadErrors.append( _("{} {}:{} missing first part of ESFM underline group at position {}").format( self.BBB, C, V, j ) )
logging.error( "ESFM underlining error at {} in {} {}:{}".format( j, BBB, C, V ) )
self.addPriorityError( 10, C, V, _("Missing first part of ESFM underline group") )
hangingUnderlineCount = 0 # recover
if bracedGroupFlag:
if char == '}': bracedGroupFlag = False
else: bracedGroupText += '_' if char==' ' else char
# Handle formation of output string but with tagged text converted into internal SFM fields
# e.g., 'And_ Elohim=G=SH430 _said=SH559:'
# becomes 'And_ Elohim\sem G Elohim\sem*\str H 430=Elohim\str* _said\str H 559=said\str*:'
if tagText:
if BibleOrgSysGlobals.strictCheckingFlag or BibleOrgSysGlobals.debugFlag: assert tagText[0] == '='
if char in ' _=' or char in BibleOrgSysGlobals.ALL_WORD_PUNCT_CHARS: # Note: A forward slash is permitted
if underlineGroupFlag:
underlineGroup += word
if char == '_': underlineGroup += char
else: underlineGroupFlag = False
if len(tagText) > 1:
if tagText[1]=='S':
resultText += saveStrongsTag( BBB, C, V, underlineGroup if underlineGroup else word, tagText )
underlineGroup = ''
underlineGroupFlag = hangingUnderlineFlag = False
elif bracedGroupText or word:
resultText += saveSemanticTag( BBB, C, V, bracedGroupText if bracedGroupText else word, tagText )
else: # WEB Luke 16:7 contains a footnote: \f + \ft 100 cors = about 2,110 liters or 600 bushels.\f*
logging.critical( "Something funny with special symbol {!r} at {} {}:{}".format( char, BBB, C, V ) )
if BibleOrgSysGlobals.debugFlag or debuggingThisModule: halt
if char == '_':
if not underlineGroupFlag: # it's just starting now
underlineGroup += word + char
underlineGroupFlag = True
char = ' ' # to go into resultText
elif char != '=': underlineGroupFlag = False
if char == '=': tagText = char # Started a new consecutive tag
else:
if word:
saveWord( BBB, C, V, word )
firstWordFlag = False
word = bracedGroupText = tagText = ''
if char!='}': resultText += char
else:
loadErrors.append( _("{} {}:{} unexpected short ESFM tag at {}={!r} in {!r}").format( self.BBB, C, V, j, originalChar, originalText ) )
logging.error( "ESFM tagging error in {} {}:{}: unexpected short tag at {}={!r} in {!r}".format( BBB, C, V, j, originalChar, originalText ) )
self.addPriorityError( 21, C, V, _("Unexpected ESFM short tag") )
else: # still in tag
tagText += char
else: # not in tag
if char == '=':
assert not tagText
tagText = char
else: # still not in tag
if char == '{':
if (lastChar and lastChar!=' ') or tagText or bracedGroupFlag or bracedGroupText:
loadErrors.append( _("{} {}:{} unexpected ESFM opening brace at {}={!r} in {!r}").format( self.BBB, C, V, j, originalChar, originalText ) )
logging.error( "ESFM tagging error in {} {}:{}: unexpected opening brace at {}={!r} in {!r}".format( BBB, C, V, j, originalChar, originalText ) )
self.addPriorityError( 20, C, V, _("Unexpected ESFM opening brace") )
bracedGroupFlag = True
char = '' # nothing to go into resultText
elif char in ' _' or char in BibleOrgSysGlobals.DASH_CHARS:
if underlineGroupFlag:
underlineGroup += word
if char == '_':
underlineGroup += char
#char = ' ' # to go into resultText
else: underlineGroupFlag = False
elif char == ' ':
underlineGroupFlag = False
if startsWithUnderline:
underlineGroup += word
startsWithUnderline = False
elif char == '_':
if hangingUnderlineCount > 0:
#char = '' # nothing to go into resultText
#hangingUnderlineCount -= 1# underlineGroupFlag will be set instead below
pass
else: # not hanging underline
underlineGroup += word + char
#char = ' ' # to go into resultText
underlineGroupFlag = True
if word:
if marker == 'v' and not firstWordFlag:
saveWord( BBB, C, V, word )
firstWordFlag = False
word = ''
elif char!='}': word += char
if char!='}': resultText += char
lastChar = originalChar
#else: # TEMP: just remove all ESFM tags and special characters
#inTag = False
#for char in originalText:
#if inTag:
#if char in ' _' or char in BibleOrgSysGlobals.ALL_WORD_PUNCT_CHARS: # Note: A forward slash is permitted
#inTag = False
#resultText += char
#else: # not in tag
#if char == '=': inTag = True; continue
#resultText += char
#resultText = resultText.replace('{','').replace('}','').replace('_(',' ').replace(')_',' ').replace('_',' ')
if debuggingThisModule and resultText != originalText:
print( "from: {!r}".format( originalText ) )
print( " got: {!r}".format( resultText ) )
#assert originalText.count('_') == resultText.count('_') Not necessarily true
elif BibleOrgSysGlobals.strictCheckingFlag or (BibleOrgSysGlobals.debugFlag and debuggingThisModule) \
and ('{' in originalText or '}' in originalText or '=' in originalText):
print( "original:", repr(originalText) )
print( "returned:", repr(resultText) )
return resultText
# end of ESFMBibleBook.ESFMPreprocessing
def doaddLine( originalMarker, originalText ):
"""
Check for newLine markers within the line (if so, break the line)
and save the information in our database.
Also checks for matching underlines.
Also convert ~ to a proper non-break space.
"""
#if (debuggingThisModule or BibleOrgSysGlobals.verbosityLevel > 1) \
#and (originalMarker not in ('c','v') or len(originalText)>5): # Don't display for "blank" lines (like '\v 10 ')
#print( "ESFM doaddLine( {!r}, {!r} )".format( originalMarker, originalText ) )
marker, text = originalMarker, originalText.replace( '~', ' ' )
marker = BibleOrgSysGlobals.USFMMarkers.toStandardMarker( originalMarker )
if marker != originalMarker:
loadErrors.append( _("{} {}:{} ESFM doesn't allow unnumbered marker \\{}: {!r}").format( self.BBB, C, V, originalMarker, originalText ) )
logging.error( _("ESFM doesn't allow the unnumbered marker after {} {}:{} in \\{}: {!r}").format( self.BBB, C, V, originalMarker, originalText ) )
self.addPriorityError( 90, C, V, _("ESFM doesn't allow unnumbered markers") )
if '\\' in text: # Check markers inside the lines
markerList = BibleOrgSysGlobals.USFMMarkers.getMarkerListFromText( text )
ix = 0
for insideMarker, iMIndex, nextSignificantChar, fullMarker, characterContext, endIndex, markerField in markerList: # check paragraph markers
if insideMarker == '\\': # it's a free-standing backspace
loadErrors.append( _("{} {}:{} Improper free-standing backspace character within line in \\{}: {!r}").format( self.BBB, C, V, marker, text ) )
logging.error( _("Improper free-standing backspace character within line after {} {}:{} in \\{}: {!r}").format( self.BBB, C, V, marker, text ) ) # Only log the first error in the line
self.addPriorityError( 100, C, V, _("Improper free-standing backspace character inside a line") )
elif BibleOrgSysGlobals.USFMMarkers.isNewlineMarker(insideMarker): # Need to split the line for everything else to work properly
if ix==0:
loadErrors.append( _("{} {}:{} NewLine marker {!r} shouldn't appear within line in \\{}: {!r}").format( self.BBB, C, V, insideMarker, marker, text ) )
logging.error( _("NewLine marker {!r} shouldn't appear within line after {} {}:{} in \\{}: {!r}").format( insideMarker, self.BBB, C, V, marker, text ) ) # Only log the first error in the line
self.addPriorityError( 96, C, V, _("NewLine marker \\{} shouldn't be inside a line").format( insideMarker ) )
thisText = text[ix:iMIndex].rstrip()
self.addLine( marker, thisText )
ix = iMIndex + 1 + len(insideMarker) + len(nextSignificantChar) # Get the start of the next text -- the 1 is for the backslash
#print( "Did a split from {}:{!r} to {}:{!r} leaving {}:{!r}".format( originalMarker, originalText, marker, thisText, insideMarker, text[ix:] ) )
marker = BibleOrgSysGlobals.USFMMarkers.toStandardMarker( insideMarker ) # setup for the next line
if marker != insideMarker:
loadErrors.append( _("{} {}:{} ESFM doesn't allow unnumbered marker within line \\{}: {!r}").format( self.BBB, C, V, insideMarker, originalText ) )
logging.error( _("ESFM doesn't allow the unnumbered marker within line after {} {}:{} in \\{}: {!r}").format( self.BBB, C, V, insideMarker, originalText ) )
self.addPriorityError( 90, C, V, _("ESFM doesn't allow unnumbered markers") )
if ix != 0: # We must have separated multiple lines
text = text[ix:] # Get the final bit of the line
if '_' in text:
# Should this code be somewhere more general, e.g., in InternalBibleBook.py ???
leftCount, rightCount = text.count( '_ ' ), text.count( ' _' )
if leftCount > rightCount:
loadErrors.append( _("{} {}:{} Too many '_ ' sequences in {} text: {}").format( self.BBB, C, V, marker, text ) )
logging.warning( _("Too many '_ ' sequences in {} line after {} {}:{} at beginning of line with text: {!r}").format( marker, self.BBB, C, V, text ) )
elif leftCount < rightCount:
loadErrors.append( _("{} {}:{} Too many ' _' sequences in {} text: {}").format( self.BBB, C, V, marker, text ) )
logging.warning( _("Too many ' _' sequences in {} line after {} {}:{} at beginning of line with text: {!r}").format( marker, self.BBB, C, V, text ) )
self.addLine( marker, text ) # Call the function in the base class to save the line (or the remainder of the line if we split it above)
# end of ESFMBibleBook.doaddLine
# Main code for ESFMBibleBook.load
if BibleOrgSysGlobals.verbosityLevel > 2: print( " " + _("Loading {}…").format( filename ) )
#self.BBB = BBB
#self.isSingleChapterBook = BibleOrgSysGlobals.BibleBooksCodes.isSingleChapterBook( BBB )
self.sourceFilename = filename
self.sourceFolder = folder
self.sourceFilepath = os.path.join( folder, filename ) if folder else filename
originalBook = ESFMFile()
originalBook.read( self.sourceFilepath )
# Do some important cleaning up before we save the data
C, V = '-1', '-1' # So first/id line starts at -1:0
lastMarker = lastText = ''
loadErrors = []
for marker,originalText in originalBook.lines: # Always process a line behind in case we have to combine lines
#print( "After {} {}:{} \\{} {!r}".format( self.BBB, C, V, marker, originalText ) )
# Keep track of where we are for more helpful error messages
if marker=='c' and originalText: C, V = originalText.split()[0], '0'
elif marker=='v' and originalText:
V = originalText.split()[0]
if C == '-1': C = '1' # Some single chapter books don't have an explicit chapter 1 marker
elif C == '-1' and marker!='intro': V = str( int(V) + 1 )
elif marker=='restore': continue # Ignore these lines completely
# Now load the actual Bible book data
if marker in OFTEN_IGNORED_USFM_HEADER_MARKERS:
text = originalText
else:
text = ESFMPreprocessing( self.BBB, C, V, marker, originalText ) # Convert ESFM encoding to pseudo-USFM
if BibleOrgSysGlobals.USFMMarkers.isNewlineMarker( marker ):
if lastMarker: doaddLine( lastMarker, lastText )
lastMarker, lastText = marker, text
elif BibleOrgSysGlobals.USFMMarkers.isInternalMarker( marker ) \
or marker.endswith('*') and BibleOrgSysGlobals.USFMMarkers.isInternalMarker( marker[:-1] ): # the line begins with an internal marker -- append it to the previous line
if text:
loadErrors.append( _("{} {}:{} Found '\\{}' internal marker at beginning of line with text: {!r}").format( self.BBB, C, V, marker, text ) )
logging.warning( _("Found '\\{}' internal marker after {} {}:{} at beginning of line with text: {!r}").format( marker, self.BBB, C, V, text ) )
else: # no text
loadErrors.append( _("{} {}:{} Found '\\{}' internal marker at beginning of line (with no text)").format( self.BBB, C, V, marker ) )
logging.warning( _("Found '\\{}' internal marker after {} {}:{} at beginning of line (with no text)").format( marker, self.BBB, C, V ) )
self.addPriorityError( 27, C, V, _("Found \\{} internal marker on new line in file").format( marker ) )
if not lastText.endswith(' '): lastText += ' ' # Not always good to add a space, but it's their fault!
lastText += '\\' + marker + ' ' + text
if BibleOrgSysGlobals.verbosityLevel > 3: print( "{} {} {} Appended {}:{!r} to get combined line {}:{!r}".format( self.BBB, C, V, marker, text, lastMarker, lastText ) )
elif BibleOrgSysGlobals.USFMMarkers.isNoteMarker( marker ) \
or marker.endswith('*') and BibleOrgSysGlobals.USFMMarkers.isNoteMarker( marker[:-1] ): # the line begins with a note marker -- append it to the previous line
if text:
loadErrors.append( _("{} {}:{} Found '\\{}' note marker at beginning of line with text: {!r}").format( self.BBB, C, V, marker, text ) )
logging.warning( _("Found '\\{}' note marker after {} {}:{} at beginning of line with text: {!r}").format( marker, self.BBB, C, V, text ) )
else: # no text
loadErrors.append( _("{} {}:{} Found '\\{}' note marker at beginning of line (with no text)").format( self.BBB, C, V, marker ) )
logging.warning( _("Found '\\{}' note marker after {} {}:{} at beginning of line (with no text)").format( marker, self.BBB, C, V ) )
self.addPriorityError( 26, C, V, _("Found \\{} note marker on new line in file").format( marker ) )
if not lastText.endswith(' ') and marker!='f': lastText += ' ' # Not always good to add a space, but it's their fault! Don't do it for footnotes, though.
lastText += '\\' + marker + ' ' + text
if BibleOrgSysGlobals.verbosityLevel > 3: print( "{} {} {} Appended {}:{!r} to get combined line {}:{!r}".format( self.BBB, C, V, marker, text, lastMarker, lastText ) )
else: # the line begins with an unknown marker (ESFM doesn't allow custom markers)
if text:
loadErrors.append( _("{} {}:{} Found '\\{}' unknown marker at beginning of line with text: {!r}").format( self.BBB, C, V, marker, text ) )
logging.error( _("Found '\\{}' unknown marker after {} {}:{} at beginning of line with text: {!r}").format( marker, self.BBB, C, V, text ) )
else: # no text
loadErrors.append( _("{} {}:{} Found '\\{}' unknown marker at beginning of line (with no text)").format( self.BBB, C, V, marker ) )
logging.error( _("Found '\\{}' unknown marker after {} {}:{} at beginning of line (with no text)").format( marker, self.BBB, C, V ) )
self.addPriorityError( 100, C, V, _("Found \\{} unknown marker on new line in file").format( marker ) )
for tryMarker in sortedNLMarkers: # Try to do something intelligent here -- it might be just a missing space
if marker.startswith( tryMarker ): # Let's try changing it
if lastMarker: doaddLine( lastMarker, lastText )
lastMarker, lastText = tryMarker, marker[len(tryMarker):] + ' ' + text
loadErrors.append( _("{} {}:{} Changed '\\{}' unknown marker to {!r} at beginning of line: {}").format( self.BBB, C, V, marker, tryMarker, text ) )
logging.warning( _("Changed '\\{}' unknown marker to {!r} after {} {}:{} at beginning of line: {}").format( marker, tryMarker, self.BBB, C, V, text ) )
break
# Otherwise, don't bother processing this line -- it'll just cause more problems later on
if lastMarker: doaddLine( lastMarker, lastText ) # Process the final line
if not originalBook.lines: # There were no lines!!!
loadErrors.append( _("{} This ESFM file was totally empty: {}").format( self.BBB, self.sourceFilename ) )
logging.error( _("ESFM file for {} was totally empty: {}").format( self.BBB, self.sourceFilename ) )
lastMarker, lastText = 'rem', 'This (ESFM) file was completely empty' # Save something since we had a file at least
if loadErrors: self.errorDictionary['Load Errors'] = loadErrors
if 0 and BibleOrgSysGlobals.debugFlag and self.BBB=='JNA':
for name,thisDict in ( ('SEM',self.containerBibleObject.semanticDict), ('STR',self.containerBibleObject.StrongsDict) ):
if 'Tag errors' in thisDict:
print( "\n{} Tag errors: {}".format( name, thisDict['Tag errors'] ) )
if 'Missing' in thisDict:
print( "\n{} Missing: {}".format( name, thisDict['Missing'] ) )
if thisDict == self.containerBibleObject.semanticDict:
for tag in ESFM_SEMANTIC_TAGS:
if tag in thisDict:
print( "\n{} Found {}: {}".format( name, tag, thisDict[tag] ) )
elif thisDict == self.containerBibleObject.StrongsDict:
for tag in ESFM_STRONGS_TAGS:
for num in thisDict[tag]:
if isinstance( thisDict[tag][num], list ):
print( "\n{} Found {} {}: {}".format( name, tag, num, thisDict[tag][num] ) )
halt
#if debugging: print( self._rawLines ); halt
# end of ESFMBibleBook.load
# end of class ESFMBibleBook
def demo():
"""
Demonstrate reading and processing some ESFM Bible databases.
"""
if BibleOrgSysGlobals.verbosityLevel > 0: print( ProgNameVersion )
def demoFile( name, filename, folder, BBB ):
if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Loading {} from {}…").format( BBB, filename ) )
EBB = ESFMBibleBook( name, BBB )
EBB.load( filename, folder )
if BibleOrgSysGlobals.verbosityLevel > 1: print( " ID is {!r}".format( EBB.getField( 'id' ) ) )
if BibleOrgSysGlobals.verbosityLevel > 1: print( " Header is {!r}".format( EBB.getField( 'h' ) ) )
if BibleOrgSysGlobals.verbosityLevel > 1: print( " Main titles are {!r} and {!r}".format( EBB.getField( 'mt1' ), EBB.getField( 'mt2' ) ) )
#if BibleOrgSysGlobals.verbosityLevel > 0: print( EBB )
EBB.validateMarkers()
EBBVersification = EBB.getVersification ()
if BibleOrgSysGlobals.verbosityLevel > 2: print( EBBVersification )
UBBAddedUnits = EBB.getAddedUnits ()
if BibleOrgSysGlobals.verbosityLevel > 2: print( UBBAddedUnits )
discoveryDict = EBB._discover()
#print( "discoveryDict", discoveryDict )
EBB.check()
EBErrors = EBB.getErrors()
if BibleOrgSysGlobals.verbosityLevel > 2: print( EBErrors )
# end of demoFile
import USFMFilenames
if 1: # Test individual files
#name, testFolder, filename, BBB = "WEB", "../../../../../Data/Work/Bibles/English translations/WEB (World English Bible)/2012-06-23 eng-web_usfm/", "06-JOS.usfm", "JOS" # You can put your test file here
#name, testFolder, filename, BBB = "WEB", "../../../../../Data/Work/Bibles/English translations/WEB (World English Bible)/2012-06-23 eng-web_usfm/", "44-SIR.usfm", "SIR" # You can put your test file here
#name, testFolder, filename, BBB = "Matigsalug", "../../../../../Data/Work/Matigsalug/Bible/MBTV/", "MBT102SA.SCP", "SA2" # You can put your test file here
#name, testFolder, filename, BBB = "Matigsalug", "../../../../../Data/Work/Matigsalug/Bible/MBTV/", "MBT15EZR.SCP", "EZR" # You can put your test file here
name, testFolder, filename, BBB = "Matigsalug", "../../../../../Data/Work/Matigsalug/Bible/MBTV/", "MBT41MAT.SCP", "MAT" # You can put your test file here
#name, testFolder, filename, BBB = "Matigsalug", "../../../../../Data/Work/Matigsalug/Bible/MBTV/", "MBT67REV.SCP", "REV" # You can put your test file here
if os.access( testFolder, os.R_OK ):
demoFile( name, filename, testFolder, BBB )
else: print( "Sorry, test folder {!r} doesn't exist on this computer.".format( testFolder ) )
if 1: # Test a whole folder full of files
name, testFolder = "Matigsalug", "../../../../../Data/Work/Matigsalug/Bible/MBTV/" # You can put your test folder here
#name, testFolder = "WEB", "../../../../../Data/Work/Bibles/English translations/WEB (World English Bible)/2012-06-23 eng-web_usfm/" # You can put your test folder here
if os.access( testFolder, os.R_OK ):
if BibleOrgSysGlobals.verbosityLevel > 1: print( _("Scanning {} from {}…").format( name, testFolder ) )
fileList = USFMFilenames.USFMFilenames( testFolder ).getMaximumPossibleFilenameTuples()
for BBB,filename in fileList:
demoFile( name, filename, testFolder, BBB )
else: print( "Sorry, test folder {!r} doesn't exist on this computer.".format( testFolder ) )
# end of demo
if __name__ == '__main__':
# Configure basic set-up
parser = BibleOrgSysGlobals.setup( ProgName, ProgVersion )
BibleOrgSysGlobals.addStandardOptionsAndProcess( parser )
demo()
BibleOrgSysGlobals.closedown( ProgName, ProgVersion )
# end of ESFMBibleBook.py