/
BibleWriter.py
executable file
·9819 lines (8876 loc) · 656 KB
/
BibleWriter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# BibleWriter.py
#
# Module writing out InternalBibles in various formats.
#
# Copyright (C) 2010-2017 Robert Hunt
# Author: Robert Hunt <Freely.Given.org@gmail.com>
# License: See gpl-3.0.txt
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
EARLY PROTOTYPE ONLY AT THIS STAGE! (Developmental code not very well structured yet.)
Module for exporting Bibles in various formats listed below.
A class which extends InternalBible.
This is intended to be a virtual class, i.e., to be extended further
by classes which load particular kinds of Bibles (e.g., OSIS, USFM, USX, etc.)
Contains functions:
toPickle( self, outputFolder=None )
makeLists( outputFolder=None )
toBOSBCV( self, outputFolder=None ) -- one file per verse using our internal Bible format
toPseudoUSFM( outputFolder=None ) -- this is our internal Bible format -- exportable for debugging purposes
For more details see InternalBible.py, InternalBibleBook.py, InternalBibleInternals.py
toUSFM( outputFolder=None. removeVerseBridges=False )
toESFM( outputFolder=None )
toText( outputFolder=None )
toVPL( outputFolder=None )
toMarkdown( outputFolder=None )
toDoor43( outputFolder=None, controlDict=None, validationSchema=None )
toHTML5( outputFolder=None, controlDict=None, validationSchema=None, humanReadable=True )
toCustomBible( outputFolder=None, removeVerseBridges=False )
toUSXXML( outputFolder=None, controlDict=None, validationSchema=None )
toUSFXXML( outputFolder=None, controlDict=None, validationSchema=None )
toOSISXML( outputFolder=None, controlDict=None, validationSchema=None )
toZefaniaXML( outputFolder=None, controlDict=None, validationSchema=None )
toHaggaiXML( outputFolder=None, controlDict=None, validationSchema=None )
toOpenSongXML( outputFolder=None, controlDict=None, validationSchema=None )
toSwordModule( outputFolder=None, controlDict=None, validationSchema=None )
totheWord( outputFolder=None )
toMySword( outputFolder=None )
toESword( outputFolder=None )
toMyBible( outputFolder=None )
toSwordSearcher( outputFolder=None )
toDrupalBible( outputFolder=None )
toPhotoBible( outputFolder=None )
toODF( outputFolder=None ) for LibreOffice/OpenOffice exports
toTeX( outputFolder=None ) and thence to PDF
doAllExports( givenOutputFolderName=None, wantPhotoBible=False, wantODFs=False, wantPDFs=False )
Note that not all exports export all books.
Some formats only handle subsets, e.g. may not handle front or back matter, glossaries, or deuterocanonical books.
"""
from gettext import gettext as _
LastModifiedDate = '2017-11-17' # by RJH
ShortProgName = "BibleWriter"
ProgName = "Bible writer"
ProgVersion = '0.94'
ProgNameVersion = '{} v{}'.format( ShortProgName, ProgVersion )
ProgNameVersionDate = '{} {} {}'.format( ProgNameVersion, _("last modified"), LastModifiedDate )
debuggingThisModule = False
OSISNameSpace = "http://www.bibletechnologies.net/2003/OSIS/namespace"
OSISSchemaLocation = "http://www.bibletechnologies.net/osisCore.2.1.1.xsd"
import sys, os, shutil, logging
from datetime import datetime
from collections import OrderedDict
import re, json
import zipfile, tarfile
import subprocess, multiprocessing
import BibleOrgSysGlobals, ControlFiles
from InternalBibleInternals import BOS_ADDED_NESTING_MARKERS, BOS_NESTING_MARKERS
from InternalBible import InternalBible
from BibleOrganizationalSystems import BibleOrganizationalSystem
from BibleReferences import BibleReferenceList
from USFMMarkers import OFTEN_IGNORED_USFM_HEADER_MARKERS, USFM_INTRODUCTION_MARKERS, \
USFM_PRECHAPTER_MARKERS, USFM_BIBLE_PARAGRAPH_MARKERS
from MLWriter import MLWriter
def exp( messageString ):
"""
Expands the message string in debug mode.
Prepends the module name to a error or warning message string
if we are in debug mode.
Returns the new string.
"""
try: nameBit, errorBit = messageString.split( ': ', 1 )
except ValueError: nameBit, errorBit = '', messageString
if BibleOrgSysGlobals.debugFlag or debuggingThisModule:
nameBit = '{}{}{}'.format( ShortProgName, '.' if nameBit else '', nameBit )
return '{}{}'.format( nameBit+': ' if nameBit else '', errorBit )
# end of exp
defaultControlFolder = 'ControlFiles/' # Relative to the current working directory
def setDefaultControlFolder( newFolderName ):
"""
Set the global default folder for control files.
"""
global defaultControlFolder
if BibleOrgSysGlobals.verbosityLevel > 1:
print( "defaultControlFolder changed from {} to {}".format( defaultControlFolder, newFolderName ) )
defaultControlFolder = newFolderName
# end of BibleWriter.setDefaultControlFolder
ALL_CHAR_MARKERS = None
class BibleWriter( InternalBible ):
"""
Class to export Bibles.
The Bible class is based on this class.
"""
def __init__( self ):
"""
# Create the object.
# """
InternalBible.__init__( self ) # Initialise the base class
self.doneSetupGeneric = False
#self.genericBOS = BibleOrganizationalSystem( "GENERIC-KJV-81" )
#self.genericBRL = BibleReferenceList( self.genericBOS, BibleObject=self ) # self isn't actualised yet!!!
global ALL_CHAR_MARKERS
if ALL_CHAR_MARKERS is None:
ALL_CHAR_MARKERS = BibleOrgSysGlobals.USFMMarkers.getCharacterMarkersList( expandNumberableMarkers=True )
# end of BibleWriter.__init_
def toPickle( self, outputFolder=None ):
"""
Saves this Python object as a pickle file (plus a zipped version for downloading).
"""
if BibleOrgSysGlobals.debugFlag: print( "toPickle( {}, {} )".format( self.abbreviation, outputFolder ) )
if BibleOrgSysGlobals.verbosityLevel > 1: print( "Running BibleWriter:toPickle…" )
if not outputFolder: outputFolder = 'OutputFiles/BOS_Bible_Object_Pickle/'
if not os.access( outputFolder, os.F_OK ): os.makedirs( outputFolder ) # Make the empty folder if there wasn't already one there
result = self.pickle( folder=outputFolder )
if result: # now create a zipped version
filename = self.abbreviation if self.abbreviation else self.name
if filename is None:
filename = self.objectTypeString
if BibleOrgSysGlobals.debugFlag: assert filename
filename = BibleOrgSysGlobals.makeSafeFilename( filename+'.pickle' ) # Same as in InternalBible.pickle()
filepath = os.path.join( outputFolder, filename )
if BibleOrgSysGlobals.verbosityLevel > 2: print( " Zipping {} pickle file…".format( filename ) )
zf = zipfile.ZipFile( filepath+'.zip', 'w', compression=zipfile.ZIP_DEFLATED )
zf.write( filepath, filename )
zf.close()
if BibleOrgSysGlobals.verbosityLevel > 0 and BibleOrgSysGlobals.maxProcesses > 1:
print( " BibleWriter.toPickle finished successfully." )
return True
else:
print( " BibleWriter.toPickle failed." )
return False
# end of BibleWriter.toPickle
def __setupWriter( self ):
"""
Do some generic system setting up.
Unfortunately, I don't know how to do this in the _init__ function
coz it uses self (which isn't actualised yet in init).
"""
if BibleOrgSysGlobals.debugFlag: assert not self.doneSetupGeneric
#if 'discoveryResults' not in dir(self): self.discover()
if not self.doneSetupGeneric:
self.genericBOS = BibleOrganizationalSystem( "GENERIC-KJV-81" )
self.genericBRL = BibleReferenceList( self.genericBOS, BibleObject=self ) # this prevents pickling!
# because unfortunately it causes a recursive linking of objects
self.doneSetupGeneric = True
# end of BibleWriter.__setupWriter
def __adjustControlDict( self, existingControlDict ):
"""
Do some global name replacements in the given control dictionary.
"""
if BibleOrgSysGlobals.debugFlag: assert isinstance( existingControlDict, dict )
if not existingControlDict: logging.warning( exp("adjustControlDict: The control dictionary is empty!") )
for entry in existingControlDict:
existingControlDict[entry] = existingControlDict[entry].replace( '__PROJECT_NAME__', self.projectName )
#.replace( '__PROJECT_NAME__', BibleOrgSysGlobals.makeSafeFilename( self.projectName.replace( ' ', '_' ) ) )
#print( entry, repr(existingControlDict[entry]) )
# end of BibleWriter.__adjustControlDict
def makeLists( self, outputFolder=None ):
"""
Write the pseudo USFM out directly (for debugging, etc.).
May write the rawLines 2-tuples to .rSFM files (if _rawLines still exists)
Always writes the processed 5-tuples to .pSFM files (from _processedLines).
"""
import InternalBibleBook
if BibleOrgSysGlobals.verbosityLevel > 1: print( "Running BibleWriter:makeLists…" )
if BibleOrgSysGlobals.debugFlag: assert self.books
if not self.doneSetupGeneric: self.__setupWriter()
if 'discoveryResults' not in dir(self): self.discover()
if not outputFolder: outputFolder = 'OutputFiles/BOS_Lists/'
if not os.access( outputFolder, os.F_OK ): os.makedirs( outputFolder ) # Make the empty folder if there wasn't already one there
# Create separate sub-folders
txtOutputFolder = os.path.join( outputFolder, 'TXT/' )
if not os.access( txtOutputFolder, os.F_OK ): os.makedirs( txtOutputFolder ) # Make the empty folder if there wasn't already one there
csvOutputFolder = os.path.join( outputFolder, 'CSV/' )
if not os.access( csvOutputFolder, os.F_OK ): os.makedirs( csvOutputFolder ) # Make the empty folder if there wasn't already one there
xmlOutputFolder = os.path.join( outputFolder, 'XML/' )
if not os.access( xmlOutputFolder, os.F_OK ): os.makedirs( xmlOutputFolder ) # Make the empty folder if there wasn't already one there
htmlOutputFolder = os.path.join( outputFolder, 'HTML/' )
if not os.access( htmlOutputFolder, os.F_OK ): os.makedirs( htmlOutputFolder ) # Make the empty folder if there wasn't already one there
#def countWords( marker, segment, location ):
#""" Breaks the segment into words and counts them.
#"""
#def stripWordPunctuation( word ):
#"""Removes leading and trailing punctuation from a word.
#Returns the "clean" word."""
#while word and word[0] in InternalBibleBook.LEADING_WORD_PUNCT_CHARS:
#word = word[1:] # Remove leading punctuation
#while word and word[-1] in InternalBibleBook.TRAILING_WORD_PUNCT_CHARS:
#word = word[:-1] # Remove trailing punctuation
#if '<' in word or '>' in word or '"' in word: print( "BibleWriter.makeLists: Need to escape HTML chars here 3s42", BBB, C, V, repr(word) )
#return word
## end of stripWordPunctuation
#words = segment.replace('—',' ').replace('–',' ').split() # Treat em-dash and en-dash as word break characters
#for j,rawWord in enumerate(words):
#if marker=='c' or marker=='v' and j==1 and rawWord.isdigit(): continue # Ignore the chapter and verse numbers (except ones like 6a)
#word = rawWord
#for internalMarker in InternalBibleBook.INTERNAL_SFMS_TO_REMOVE: word = word.replace( internalMarker, '' )
#word = stripWordPunctuation( word )
#if word and not word[0].isalnum():
##print( word, stripWordPunctuation( word ) )
#if len(word) > 1:
#if BibleOrgSysGlobals.debugFlag: print( "BibleWriter.makeLists: {} {}:{} ".format( BBB, C, V ) + _("Have unexpected character starting word {!r}").format( word ) )
#word = word[1:]
#if word: # There's still some characters remaining after all that stripping
#if BibleOrgSysGlobals.verbosityLevel > 3: # why???
#for k,char in enumerate(word):
#if not char.isalnum() and (k==0 or k==len(word)-1 or char not in InternalBibleBook.MEDIAL_WORD_PUNCT_CHARS):
#if BibleOrgSysGlobals.debugFlag: print( "BibleWriter.makeLists: {} {}:{} ".format( BBB, C, V ) + _("Have unexpected {!r} in word {!r}").format( char, word ) )
#lcWord = word.lower()
#isAReferenceOrNumber = True
#for char in word:
#if not char.isdigit() and char not in ':-,.': isAReferenceOrNumber = False; break
#if not isAReferenceOrNumber:
#allWordCounts[word] = 1 if word not in allWordCounts else allWordCounts[word] + 1
#allCaseInsensitiveWordCounts[lcWord] = 1 if lcWord not in allCaseInsensitiveWordCounts else allCaseInsensitiveWordCounts[lcWord] + 1
#if location == "main":
#mainTextWordCounts[word] = 1 if word not in mainTextWordCounts else mainTextWordCounts[word] + 1
#mainTextCaseInsensitiveWordCounts[lcWord] = 1 if lcWord not in mainTextCaseInsensitiveWordCounts else mainTextCaseInsensitiveWordCounts[lcWord] + 1
##else: print( "excluded reference or number", word )
## end of countWords
def printWordCounts( typeString, dictionary ):
""" Given a description and a dictionary,
sorts and writes the word count data to text, csv, and xml files. """
title = BibleOrgSysGlobals.makeSafeXML( typeString.replace('_',' ') + " sorted by word" )
filenamePortion = BibleOrgSysGlobals.makeSafeFilename( typeString + "_sorted_by_word." )
if BibleOrgSysGlobals.verbosityLevel > 2: print( " " + _("Writing '{}*'…").format( filenamePortion ) )
sortedWords = sorted(dictionary)
with open( os.path.join( txtOutputFolder, filenamePortion )+'txt', 'wt', encoding='utf-8' ) as txtFile, \
open( os.path.join( csvOutputFolder, filenamePortion )+'csv', 'wt', encoding='utf-8' ) as csvFile, \
open( os.path.join( xmlOutputFolder, filenamePortion )+'xml', 'wt', encoding='utf-8' ) as xmlFile, \
open( os.path.join( htmlOutputFolder, filenamePortion )+'html', 'wt', encoding='utf-8' ) as htmlFile:
xmlFile.write( '<?xml version="1.0" encoding="utf-8"?>\n' ) # Write the xml header
xmlFile.write( '<entries>\n' ) # root element
htmlFile.write( '<html><header><title>{}</title></header>\n'.format( title ) ) # Write the html header
htmlFile.write( '<body><h1>{}</h1>\n'.format( title ) ) # Write the header
htmlFile.write( '<table><tr><th>Word</th><th>Count</th></tr>\n' )
for word in sortedWords:
if BibleOrgSysGlobals.debugFlag: assert ' ' not in word
txtFile.write( "{} {}\n".format( word, dictionary[word] ) )
csvFile.write( "{},{}\n".format( repr(word) if ',' in word else word, dictionary[word] ) )
#if '<' in word or '>' in word or '"' in word: print( "BibleWriter.makeLists: Here 3g5d", repr(word) )
#if BibleOrgSysGlobals.debugFlag: assert '<' not in word and '>' not in word and '"' not in word
xmlFile.write( "<entry><word>{}</word><count>{}</count></entry>\n".format( BibleOrgSysGlobals.makeSafeXML(word), dictionary[word] ) )
htmlFile.write( "<tr><td>{}</td><td>{}</td></tr>\n".format( BibleOrgSysGlobals.makeSafeXML(word), dictionary[word] ) )
xmlFile.write( '</entries>' ) # close root element
htmlFile.write( '</table></body></html>' ) # close open elements
title = BibleOrgSysGlobals.makeSafeXML( typeString.replace('_',' ') + " sorted by count" )
filenamePortion = BibleOrgSysGlobals.makeSafeFilename( typeString + "_sorted_by_count." )
if BibleOrgSysGlobals.verbosityLevel > 2: print( " " + _("Writing '{}*'…").format( filenamePortion ) )
with open( os.path.join( txtOutputFolder, filenamePortion )+'txt', 'wt', encoding='utf-8' ) as txtFile, \
open( os.path.join( csvOutputFolder, filenamePortion )+'csv', 'wt', encoding='utf-8' ) as csvFile, \
open( os.path.join( xmlOutputFolder, filenamePortion )+'xml', 'wt', encoding='utf-8' ) as xmlFile, \
open( os.path.join( htmlOutputFolder, filenamePortion )+'html', 'wt', encoding='utf-8' ) as htmlFile:
xmlFile.write( '<?xml version="1.0" encoding="utf-8"?>\n' ) # Write the xml header
xmlFile.write( '<entries>\n' ) # root element
htmlFile.write( '<html><header><title>{}</title></header>\n'.format( title ) ) # Write the html header
htmlFile.write( '<body><h1>{}</h1>\n'.format( title ) ) # Write the header
htmlFile.write( '<table><tr><th>Word</th><th>Count</th></tr>\n' )
for word in sorted(sortedWords, key=dictionary.get):
if BibleOrgSysGlobals.debugFlag: assert ' ' not in word
txtFile.write( "{} {}\n".format( word, dictionary[word] ) )
csvFile.write( "{},{}\n".format( repr(word) if ',' in word else word, dictionary[word] ) )
#if BibleOrgSysGlobals.debugFlag: assert '<' not in word and '>' not in word and '"' not in word
xmlFile.write( "<entry><word>{}</word><count>{}</count></entry>\n".format( BibleOrgSysGlobals.makeSafeXML(word), dictionary[word] ) )
htmlFile.write( "<tr><td>{}</td><td>{}</td></tr>\n".format( BibleOrgSysGlobals.makeSafeXML(word), dictionary[word] ) )
xmlFile.write( '</entries>' ) # close root element
htmlFile.write( '</table></body></html>' ) # close open elements
# end of printWordCounts
## Initialise all our counters
#allWordCounts, allCaseInsensitiveWordCounts = {}, {}
#mainTextWordCounts, mainTextCaseInsensitiveWordCounts = {}, {}
## Determine all the counts
#for BBB,bookObject in self.books.items():
#C = V = '0' # Just for error messages
#for entry in bookObject._processedLines:
#marker, text, cleanText, extras = entry.getMarker(), entry.getText(), entry.getCleanText(), entry.getExtras()
#if '¬' in marker or marker in BOS_ADDED_NESTING_MARKERS: continue # Just ignore added markers -- not needed here
## Keep track of where we are for more helpful error messages
#if marker=='c' and text: C, V = text.split()[0], '0'
#elif marker=='v' and text: V = text.split()[0]
#if text and BibleOrgSysGlobals.USFMMarkers.isPrinted(marker): # process this main text
#countWords( marker, cleanText, "main" )
#if extras:
#for extra in extras: # do any footnotes and cross-references
#extraType, extraIndex, extraText, cleanExtraText = extra
#if BibleOrgSysGlobals.debugFlag:
#assert extraText # Shouldn't be blank
##assert extraText[0] != '\\' # Shouldn't start with backslash code
#assert extraText[-1] != '\\' # Shouldn't end with backslash code
##print( extraType, extraIndex, len(text), "'"+extraText+"'", "'"+cleanExtraText+"'" )
#assert extraIndex >= 0
##assert 0 <= extraIndex <= len(text)+3
##assert extraType in ('fn','xr',)
#assert '\\f ' not in extraText and '\\f*' not in extraText and '\\x ' not in extraText and '\\x*' not in extraText # Only the contents of these fields should be in extras
#countWords( extraType, cleanExtraText, "notes" )
# Now sort the lists and write them each twice (sorted by word and sorted by count)
printWordCounts( "All_wordcounts", self.discoveryResults['ALL']['allWordCounts'] )
printWordCounts( "Main_text_wordcounts", self.discoveryResults['ALL']['mainTextWordCounts'] )
printWordCounts( "All_wordcounts_case_insensitive", self.discoveryResults['ALL']['allCaseInsensitiveWordCounts'] )
printWordCounts( "Main_text_wordcounts_case_insensitive", self.discoveryResults['ALL']['mainTextCaseInsensitiveWordCounts'] )
if BibleOrgSysGlobals.verbosityLevel > 0 and BibleOrgSysGlobals.maxProcesses > 1:
print( " BibleWriter.makeLists finished successfully." )
return True
# end of BibleWriter.makeLists
def toBOSBCV( self, outputFolder=None ):
"""
Write the internal pseudoUSFM out directly with one file per verse.
"""
if BibleOrgSysGlobals.verbosityLevel > 1: print( "Running BibleWriter:toBOSBCV…" )
if BibleOrgSysGlobals.debugFlag: assert self.books
if not self.doneSetupGeneric: self.__setupWriter()
if not outputFolder: outputFolder = 'OutputFiles/BOS_BCV_Export/'
if os.access( outputFolder, os.F_OK ): # We need to delete it
shutil.rmtree( outputFolder, ignore_errors=True )
os.makedirs( outputFolder ) # Make the empty folder
self.writeBOSBCVFiles( outputFolder ) # This function is part of InternalBible
# Now create a zipped collection (for easier download)
if BibleOrgSysGlobals.verbosityLevel > 2: print( " Zipping BCV files…" )
zf = zipfile.ZipFile( os.path.join( outputFolder, 'AllFiles.zip' ), 'w', compression=zipfile.ZIP_DEFLATED )
for filename in os.listdir( outputFolder ):
if not filename.endswith( '.zip' ):
filepath = os.path.join( outputFolder, filename )
zf.write( filepath, filename ) # Save in the archive without the path
zf.close()
if BibleOrgSysGlobals.verbosityLevel > 0 and BibleOrgSysGlobals.maxProcesses > 1:
print( " BibleWriter.toBOSBCV finished successfully." )
return True
# end of BibleWriter.toBOSBCV
def toPseudoUSFM( self, outputFolder=None ):
"""
Write the pseudo USFM out directly (for debugging, etc.).
May write the rawLines 2-tuples to .rSFM files (if _rawLines still exists)
Always writes the processed 5-tuples to .pSFM files (from _processedLines).
"""
if BibleOrgSysGlobals.verbosityLevel > 1: print( "Running BibleWriter:toPseudoUSFM…" )
if BibleOrgSysGlobals.debugFlag: assert self.books
if not self.doneSetupGeneric: self.__setupWriter()
if not outputFolder: outputFolder = 'OutputFiles/BOS_PseudoUSFM_Export/'
if not os.access( outputFolder, os.F_OK ): os.makedirs( outputFolder ) # Make the empty folder if there wasn't already one there
# Write the raw and pseudo-USFM files
for j, (BBB,bookObject) in enumerate( self.books.items() ):
try: rawUSFMData = bookObject._rawLines
except AttributeError: rawUSFMData = None # it's been deleted :-(
if rawUSFMData:
#print( "\pseudoESFMData", pseudoESFMData[:50] ); halt
#USFMAbbreviation = BibleOrgSysGlobals.BibleBooksCodes.getUSFMAbbreviation( BBB )
#USFMNumber = BibleOrgSysGlobals.BibleBooksCodes.getUSFMNumber( BBB )
filename = "{:02}_{}_BibleWriter.rSFM".format( j, BBB )
filepath = os.path.join( outputFolder, BibleOrgSysGlobals.makeSafeFilename( filename ) )
if BibleOrgSysGlobals.verbosityLevel > 2: print( " " + _("Writing {!r}…").format( filepath ) )
with open( filepath, 'wt', encoding='utf-8' ) as myFile:
for marker,text in rawUSFMData:
myFile.write( "{}: {!r}\n".format( marker, text ) )
pseudoESFMData = bookObject._processedLines
#print( "\pseudoESFMData", pseudoESFMData[:50] ); halt
USFMAbbreviation = BibleOrgSysGlobals.BibleBooksCodes.getUSFMAbbreviation( BBB )
USFMNumber = BibleOrgSysGlobals.BibleBooksCodes.getUSFMNumber( BBB )
indent = 3
filename = "{:02}_{}_BibleWriter.pSFM".format( j, BBB )
filepath = os.path.join( outputFolder, BibleOrgSysGlobals.makeSafeFilename( filename ) )
if BibleOrgSysGlobals.verbosityLevel > 2: print( " " + _("Writing {!r}…").format( filepath ) )
indentLevel = 0
C, V = '0', '-1' # So id line starts at 0:0
with open( filepath, 'wt', encoding='utf-8' ) as myFile:
for entry in pseudoESFMData:
marker, adjText, cleanText, extras = entry.getMarker(), entry.getAdjustedText(), entry.getCleanText(), entry.getExtras()
if marker in USFM_PRECHAPTER_MARKERS:
if BibleOrgSysGlobals.debugFlag or BibleOrgSysGlobals.strictCheckingFlag: assert C == '0'
V = str( int(V) + 1 )
if marker == 'c': C, V = adjText, '0'
elif marker == 'v': V = adjText
myFile.write( "{}{}{} = {} {} {}\n".format( ' '*indent*indentLevel, ' ' if len(marker)<2 else '',
marker, repr(adjText) if adjText is not None else '',
repr(cleanText) if cleanText and cleanText!=adjText else '',
entry.getExtras().summary() if extras else '' ) )
if marker in BOS_NESTING_MARKERS:
indentLevel += 1
elif indentLevel and marker[0]=='¬': indentLevel -= 1
if indentLevel > 7: print( "BibleWriter.toPseudoUSFM: {} {}:{} indentLevel={} marker={}".format( BBB, C, V, indentLevel, marker ) )
if BibleOrgSysGlobals.debugFlag: assert indentLevel <= 7 # Should only be 7: e.g., chapters c s1 p v list li1
if BibleOrgSysGlobals.debugFlag: assert indentLevel == 0
# Now create a zipped collection
if BibleOrgSysGlobals.verbosityLevel > 2: print( " Zipping PseudoUSFM files…" )
zf = zipfile.ZipFile( os.path.join( outputFolder, 'AllFiles.zip' ), 'w', compression=zipfile.ZIP_DEFLATED )
for filename in os.listdir( outputFolder ):
if not filename.endswith( '.zip' ):
filepath = os.path.join( outputFolder, filename )
zf.write( filepath, filename ) # Save in the archive without the path
zf.close()
if BibleOrgSysGlobals.verbosityLevel > 0 and BibleOrgSysGlobals.maxProcesses > 1:
print( " BibleWriter.toPseudoUSFM finished successfully." )
return True
# end of BibleWriter.toPseudoUSFM
def toUSFM( self, outputFolder=None, removeVerseBridges=False ):
"""
Adjust the pseudo USFM and write the USFM files.
NOTE: We use Windows \r\n line endings for writing USFM files.
"""
if BibleOrgSysGlobals.verbosityLevel > 1: print( "Running BibleWriter:toUSFM…" )
if BibleOrgSysGlobals.debugFlag: assert self.books
includeEmptyVersesFlag = True
if not self.doneSetupGeneric: self.__setupWriter()
if not outputFolder: outputFolder = 'OutputFiles/BOS_USFM_' + ('Reexport/' if self.objectTypeString=="USFM" else 'Export/')
if not os.access( outputFolder, os.F_OK ): os.makedirs( outputFolder ) # Make the empty folder if there wasn't already one there
#if not controlDict: controlDict = {}; ControlFiles.readControlFile( 'ControlFiles', "To_XXX_controls.txt", controlDict )
#assert controlDict and isinstance( controlDict, dict )
ignoredMarkers = set()
# Adjust the extracted outputs
for BBB,bookObject in self.books.items():
pseudoESFMData = bookObject._processedLines
#print( "\pseudoESFMData", pseudoESFMData[:50] ); halt
USFMAbbreviation = BibleOrgSysGlobals.BibleBooksCodes.getUSFMAbbreviation( BBB )
USFMNumber = BibleOrgSysGlobals.BibleBooksCodes.getUSFMNumber( BBB )
if includeEmptyVersesFlag:
try:
verseList = self.genericBOS.getNumVersesList( BBB )
numC, numV = len(verseList), verseList[0]
except KeyError:
if BibleOrgSysGlobals.debugFlag: assert BBB in ('FRT','GLS',)
numC = numV = 0
USFM = ''
# Prepend any important missing (header/title) fields
if pseudoESFMData.contains( 'id', 1 ) is None:
USFM += '\\id {} -- BibleOrgSys USFM export v{}'.format( USFMAbbreviation.upper(), ProgVersion )
if pseudoESFMData.contains( 'h', 8 ) is None:
try:
h = self.suppliedMetadata['File'][BBB+'ShortName']
if h: USFM += '\n\\h {}'.format( h )
except (KeyError,TypeError): pass # ok, we've got nothing to add
if pseudoESFMData.contains( 'mt1', 12 ) is None:
try:
mt = self.suppliedMetadata['File'][BBB+'LongName']
if mt: USFM += '\n\\mt1 {}'.format( mt )
except (KeyError,TypeError): pass # ok, we've got nothing to add
inField = None
vBridgeStartInt = vBridgeEndInt = None # For printing missing (bridged) verse numbers
if BibleOrgSysGlobals.verbosityLevel > 2: print( " " + _("Adjusting USFM output…" ) )
for processedBibleEntry in pseudoESFMData:
pseudoMarker, value = processedBibleEntry.getMarker(), processedBibleEntry.getFullText()
#print( BBB, pseudoMarker, repr(value) )
#if (not USFM) and pseudoMarker!='id': # We need to create an initial id line
#USFM += '\\id {} -- BibleOrgSys USFM export v{}'.format( USFMAbbreviation.upper(), ProgVersion )
if '¬' in pseudoMarker or pseudoMarker in BOS_ADDED_NESTING_MARKERS: continue # Just ignore added markers -- not needed here
if pseudoMarker in ('c#','vp#',):
ignoredMarkers.add( pseudoMarker )
continue
#value = cleanText # (temp)
#if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print( "toUSFM: pseudoMarker = {!r} value = {!r}".format( pseudoMarker, value ) )
if removeVerseBridges and pseudoMarker in ('v','c',):
if vBridgeStartInt and vBridgeEndInt:
for vNum in range( vBridgeStartInt+1, vBridgeEndInt+1 ): # Fill in missing verse numbers
USFM += '\n\\v {}'.format( vNum )
vBridgeStartInt = vBridgeEndInt = None
if pseudoMarker in ('v','f','fr','x','xo',): # These fields should always end with a space but the processing will have removed them
#if BibleOrgSysGlobals.debugFlag: assert value
if pseudoMarker=='v' and removeVerseBridges:
vString = value
for bridgeChar in ('-', '–', '—'): # hyphen, endash, emdash
ix = vString.find( bridgeChar )
if ix != -1:
value = vString[:ix] # Remove verse bridges
vEnd = vString[ix+1:]
#print( BBB, repr(value), repr(vEnd) )
try: vBridgeStartInt, vBridgeEndInt = int( value ), int( vEnd )
except ValueError:
print( "toUSFM: bridge doesn't seem to be integers in {} {!r}".format( BBB, vString ) )
vBridgeStartInt = vBridgeEndInt = None # One of them isn't an integer
#print( ' ', BBB, repr(vBridgeStartInt), repr(vBridgeEndInt) )
break
if value and value[-1]!=' ': value += ' ' # Append a space since it didn't have one
elif pseudoMarker[-1]=='~' or BibleOrgSysGlobals.USFMMarkers.isNewlineMarker(pseudoMarker): # Have a continuation field
if inField is not None:
USFM += '\\{}*'.format( inField ) # Do a close marker for footnotes and cross-references
inField = None
if pseudoMarker[-1] == '~':
#print( "psMarker ends with squiggle: {!r}={!r}".format( pseudoMarker, value ) )
if BibleOrgSysGlobals.debugFlag: assert pseudoMarker[:-1] in ('v','p','c')
USFM += (' ' if USFM and USFM[-1]!=' ' else '') + value
else: # not a continuation marker
adjValue = value
#if pseudoMarker in ('it','bk','ca','nd',): # Character markers to be closed -- had to remove ft and xt from this list for complex footnotes with f fr fq ft fq ft f*
if pseudoMarker in ALL_CHAR_MARKERS: # Character markers to be closed
#if (USFM[-2]=='\\' or USFM[-3]=='\\') and USFM[-1]!=' ':
if USFM[-1] != ' ':
USFM += ' ' # Separate markers by a space e.g., \p\bk Revelation
if BibleOrgSysGlobals.debugFlag: print( "toUSFM: Added space to {!r} before {!r}".format( USFM[-2], pseudoMarker ) )
adjValue += '\\{}*'.format( pseudoMarker ) # Do a close marker
elif pseudoMarker in ('f','x',): inField = pseudoMarker # Remember these so we can close them later
elif pseudoMarker in ('fr','fq','ft','xo',): USFM += ' ' # These go on the same line just separated by spaces and don't get closed
elif USFM: USFM += '\n' # paragraph markers go on a new line
if not value: USFM += '\\{}'.format( pseudoMarker )
else: USFM += '\\{} {}'.format( pseudoMarker,adjValue )
#print( pseudoMarker, USFM[-200:] )
# Write the USFM output
#print( "\nUSFM", USFM[:3000] )
filename = "{}{}BibleWriter.SFM".format( USFMNumber, USFMAbbreviation.upper() ) # This seems to be the undocumented standard filename format even though it's so ugly with digits running into each other, e.g., 102SA…
#if not os.path.exists( USFMOutputFolder ): os.makedirs( USFMOutputFolder )
filepath = os.path.join( outputFolder, BibleOrgSysGlobals.makeSafeFilename( filename ) )
if BibleOrgSysGlobals.verbosityLevel > 2: print( " " + _("Writing {!r}…").format( filepath ) )
with open( filepath, 'wt', newline='\r\n', encoding='utf-8' ) as myFile: # Use Windows newline endings for USFM
myFile.write( USFM )
if ignoredMarkers:
logging.info( "toUSFM: Ignored markers were {}".format( ignoredMarkers ) )
if BibleOrgSysGlobals.verbosityLevel > 2:
print( " " + _("WARNING: Ignored toUSFM markers were {}").format( ignoredMarkers ) )
# Now create a zipped collection
if BibleOrgSysGlobals.verbosityLevel > 2: print( " Zipping USFM files…" )
zf = zipfile.ZipFile( os.path.join( outputFolder, 'AllUSFMFiles.zip' ), 'w', compression=zipfile.ZIP_DEFLATED )
for filename in os.listdir( outputFolder ):
if not filename.endswith( '.zip' ):
filepath = os.path.join( outputFolder, filename )
zf.write( filepath, filename ) # Save in the archive without the path
zf.close()
if BibleOrgSysGlobals.verbosityLevel > 0 and BibleOrgSysGlobals.maxProcesses > 1:
print( " BibleWriter.toUSFM finished successfully." )
return True
# end of BibleWriter.toUSFM
def toESFM( self, outputFolder=None ): #, removeVerseBridges=False ):
"""
Adjust the pseudo ESFM and write the ESFM files.
"""
if BibleOrgSysGlobals.verbosityLevel > 1: print( "Running BibleWriter:toESFM…" )
if BibleOrgSysGlobals.debugFlag: assert self.books
if not self.doneSetupGeneric: self.__setupWriter()
if not outputFolder: outputFolder = 'OutputFiles/BOS_ESFM_' + ('Reexport/' if self.objectTypeString=="ESFM" else 'Export/')
if not os.access( outputFolder, os.F_OK ): os.makedirs( outputFolder ) # Make the empty folder if there wasn't already one there
#if not controlDict: controlDict = {}; ControlFiles.readControlFile( 'ControlFiles', "To_XXX_controls.txt", controlDict )
#assert controlDict and isinstance( controlDict, dict )
ignoredMarkers = set()
# Adjust the extracted outputs
for BBB,bookObject in self.books.items():
pseudoESFMData = bookObject._processedLines
#print( "\pseudoESFMData", pseudoESFMData[:50] ); halt
USFMAbbreviation = BibleOrgSysGlobals.BibleBooksCodes.getUSFMAbbreviation( BBB )
USFMNumber = BibleOrgSysGlobals.BibleBooksCodes.getUSFMNumber( BBB )
filename = "{}{}BibleWriter.ESFM".format( USFMNumber, USFMAbbreviation.upper() )
#if not os.path.exists( ESFMOutputFolder ): os.makedirs( ESFMOutputFolder )
filepath = os.path.join( outputFolder, BibleOrgSysGlobals.makeSafeFilename( filename ) )
if BibleOrgSysGlobals.verbosityLevel > 2: print( " " + _("Writing {!r}…").format( filepath ) )
indentLevel, indentSize = 0, 2
inField = None
vBridgeStartInt = vBridgeEndInt = None # For printing missing (bridged) verse numbers
initialMarkers = [processedBibleEntry.getMarker() for processedBibleEntry in pseudoESFMData[:4]]
#print( BBB, initialMarkers )
if BibleOrgSysGlobals.verbosityLevel > 2: print( " " + _("Adjusting ESFM output…" ) )
with open( filepath, 'wt', encoding='utf-8' ) as myFile:
if 'id' not in initialMarkers:
#print( "Write ID" )
myFile.write( '\\id {} -- BibleOrgSys ESFM export v{}\n'.format( USFMAbbreviation.upper(), ProgVersion ) )
if 'ide' not in initialMarkers:
#print( "Write IDE" )
myFile.write( '\\ide UTF-8\n' )
if 'rem' not in initialMarkers:
#print( "Write REM" )
myFile.write( '\\rem ESFM v0.5 {}\n'.format( BBB ) )
for j, processedBibleEntry in enumerate( pseudoESFMData ):
pseudoMarker, value = processedBibleEntry.getMarker(), processedBibleEntry.getFullText()
if debuggingThisModule: print( "writeESFM", indentLevel, "now", BBB, j, pseudoMarker, repr(value) )
if j==1 and pseudoMarker=='ide':
#print( "Write IDE 1" )
myFile.write( '\\ide UTF-8\n' )
if 'rem' not in initialMarkers:
#print( "Write REM 2" )
myFile.write( '\\rem ESFM v0.5 {}\n'.format( BBB ) )
ESFMLine = ''
elif j==2 and pseudoMarker=='rem':
#print( "Write REM 3" )
if value != 'ESFM v0.5 {}'.format( BBB ):
logging.info( "Updating {} ESFM rem line from {!r} to v0.5".format( BBB, value ) )
ESFMLine = '\\rem ESFM v0.5 {}'.format( BBB )
else:
if '¬' in pseudoMarker:
if indentLevel > 0:
indentLevel -= 1
else:
logging.error( "toESFM: Indent level can't go negative at {} {} {} {!r}".format( BBB, j, pseudoMarker, value ) )
if BibleOrgSysGlobals.debugFlag and debuggingThisModule:
print( "toESFM: Indent level can't go negative at {} {} {} {!r}".format( BBB, j, pseudoMarker, value ) )
halt
ESFMLine = ' ' * indentLevel * indentSize
if pseudoMarker in ('c#','vp#',):
ignoredMarkers.add( pseudoMarker )
continue
#value = cleanText # (temp)
#if BibleOrgSysGlobals.debugFlag and debuggingThisModule: print( "toESFM: pseudoMarker = {!r} value = {!r}".format( pseudoMarker, value ) )
if 0 and removeVerseBridges and pseudoMarker in ('v','c',):
if vBridgeStartInt and vBridgeEndInt:
for vNum in range( vBridgeStartInt+1, vBridgeEndInt+1 ): # Fill in missing verse numbers
ESFMLine += '\n\\v {}'.format( vNum )
vBridgeStartInt = vBridgeEndInt = None
if pseudoMarker == 'vp#': continue
elif pseudoMarker in ('v','f','fr','x','xo',): # These fields should always end with a space but the processing will have removed them
#if BibleOrgSysGlobals.debugFlag: assert value
if pseudoMarker=='v' and 0 and removeVerseBridges:
vString = value
for bridgeChar in ('-', '–', '—'): # hyphen, endash, emdash
ix = vString.find( bridgeChar )
if ix != -1:
value = vString[:ix] # Remove verse bridges
vEnd = vString[ix+1:]
#print( BBB, repr(value), repr(vEnd) )
try: vBridgeStartInt, vBridgeEndInt = int( value ), int( vEnd )
except ValueError:
logging.warning( "toESFM: bridge doesn't seem to be integers in {} {!r}".format( BBB, vString ) )
vBridgeStartInt = vBridgeEndInt = None # One of them isn't an integer
#print( ' ', BBB, repr(vBridgeStartInt), repr(vBridgeEndInt) )
break
if value and value[-1] != ' ': value += ' ' # Append a space since it didn't have one
elif pseudoMarker[-1]=='~' or BibleOrgSysGlobals.USFMMarkers.isNewlineMarker(pseudoMarker): # Have a continuation field
if inField is not None:
ESFMLine += '\\{}*'.format( inField ) # Do a close marker for footnotes and cross-references
inField = None
if pseudoMarker[-1] == '~':
#print( "psMarker ends with squiggle: {!r}={!r}".format( pseudoMarker, value ) )
if BibleOrgSysGlobals.debugFlag: assert pseudoMarker[:-1] in ('v','p','c')
ESFMLine += (' ' if ESFMLine and ESFMLine[-1]!=' ' else '') + value
else: # not a continuation marker
adjValue = value
#if pseudoMarker in ('it','bk','ca','nd',): # Character markers to be closed -- had to remove ft and xt from this list for complex footnotes with f fr fq ft fq ft f*
if pseudoMarker in ALL_CHAR_MARKERS: # Character markers to be closed
#if (ESFMLine[-2]=='\\' or ESFMLine[-3]=='\\') and ESFMLine[-1]!=' ':
if ESFMLine[-1] != ' ':
ESFMLine += ' ' # Separate markers by a space e.g., \p\bk Revelation
if BibleOrgSysGlobals.debugFlag: print( "toESFM: Added space to {!r} before {!r}".format( ESFMLine[-2], pseudoMarker ) )
adjValue += '\\{}*'.format( pseudoMarker ) # Do a close marker
elif pseudoMarker in ('f','x',): inField = pseudoMarker # Remember these so we can close them later
elif pseudoMarker in ('fr','fq','ft','xo',): ESFMLine += ' ' # These go on the same line just separated by spaces and don't get closed
#elif ESFMLine: ESFMLine += '\n' # paragraph markers go on a new line
if not value: ESFMLine += '\\{}'.format( pseudoMarker )
else: ESFMLine += '\\{} {}'.format( pseudoMarker,adjValue )
#print( BBB, pseudoMarker, repr(ESFMLine) )
#if BBB=='GEN' and j > 20: halt
if ESFMLine: myFile.write( '{}\n'.format( ESFMLine ) )
if pseudoMarker in BOS_NESTING_MARKERS:
indentLevel += 1
#print( pseudoMarker, indentLevel )
if indentLevel != 0:
logging.error( "toESFM: Ended with wrong indent level of {}".format( indentLevel ) ); halt
if ignoredMarkers:
logging.info( "toESFM: Ignored markers were {}".format( ignoredMarkers ) )
if BibleOrgSysGlobals.verbosityLevel > 2:
print( " " + _("WARNING: Ignored toESFM markers were {}").format( ignoredMarkers ) )
# Now create a zipped collection
if BibleOrgSysGlobals.verbosityLevel > 2: print( " Zipping ESFM files…" )
zf = zipfile.ZipFile( os.path.join( outputFolder, 'AllESFMFiles.zip' ), 'w', compression=zipfile.ZIP_DEFLATED )
for filename in os.listdir( outputFolder ):
if not filename.endswith( '.zip' ):
filepath = os.path.join( outputFolder, filename )
zf.write( filepath, filename ) # Save in the archive without the path
zf.close()
if BibleOrgSysGlobals.verbosityLevel > 0 and BibleOrgSysGlobals.maxProcesses > 1:
print( " BibleWriter.toESFM finished successfully." )
return True
# end of BibleWriter.toESFM
def toText( self, outputFolder=None ):
"""
Write the pseudo USFM out into a simple plain-text format.
The format varies, depending on whether or not there are paragraph markers in the text.
"""
if BibleOrgSysGlobals.verbosityLevel > 1: print( "Running BibleWriter:toText…" )
if BibleOrgSysGlobals.debugFlag: assert self.books
if not self.doneSetupGeneric: self.__setupWriter()
if not outputFolder: outputFolder = 'OutputFiles/BOS_PlainText_Export/'
if not os.access( outputFolder, os.F_OK ): os.makedirs( outputFolder ) # Make the empty folder if there wasn't already one there
ignoredMarkers = set()
# First determine our format
columnWidth = 80
verseByVerse = True
# Write the plain text files
for BBB,bookObject in self.books.items():
pseudoESFMData = bookObject._processedLines
filename = "BOS-BibleWriter-{}.txt".format( BBB )
filepath = os.path.join( outputFolder, BibleOrgSysGlobals.makeSafeFilename( filename ) )
if BibleOrgSysGlobals.verbosityLevel > 2: print( " " + _("Writing {!r}…").format( filepath ) )
textBuffer = ''
with open( filepath, 'wt', encoding='utf-8' ) as myFile:
gotVP = None
for entry in pseudoESFMData:
marker, text = entry.getMarker(), entry.getCleanText()
if marker in OFTEN_IGNORED_USFM_HEADER_MARKERS or marker in ('ie',): # Just ignore these lines
ignoredMarkers.add( marker )
elif marker == 'h':
if textBuffer: myFile.write( "{}".format( textBuffer ) ); textBuffer = ''
myFile.write( "{}\n\n".format( text ) )
elif marker in USFM_INTRODUCTION_MARKERS: # Drop the introduction
ignoredMarkers.add( marker )
elif marker in ('mt1','mt2','mt3','mt4', 'imt1','imt2','imt3','imt4',):
if textBuffer: myFile.write( "{}".format( textBuffer ) ); textBuffer = ''
myFile.write( "\n{}{}\n".format( ' '*((columnWidth-len(text))//2), text ) )
elif marker in ('mte1','mte2','mte3','mte4', 'imte1','imte2','imte3','imte4',):
if textBuffer: myFile.write( "{}".format( textBuffer ) ); textBuffer = ''
myFile.write( "\n{}{}\n\n".format( ' '*((columnWidth-len(text))//2), text ) )
elif marker == 'c':
C = text
if textBuffer: myFile.write( "{}".format( textBuffer ) ); textBuffer = ''
myFile.write( "\n\nChapter {}".format( text ) )
elif marker == 'vp#': # This precedes a v field and has the verse number to be printed
gotVP = text # Just remember it for now
elif marker == 'v':
V = text
if gotVP: # this is the verse number to be published
text = gotVP
gotVP = None
if textBuffer: myFile.write( "{}".format( textBuffer ) ); textBuffer = ''
myFile.write( "\n{} ".format( text ) )
elif marker in ('p','pi1','pi2','pi3','pi4', 's1','s2','s3','s4', 'ms1','ms2','ms3','ms4',): # Drop out these fields
ignoredMarkers.add( marker )
elif text:
textBuffer += (' ' if textBuffer else '') + text
if textBuffer: myFile.write( "{}\n".format( textBuffer ) ) # Write the last bit
#if verseByVerse:
#myFile.write( "{} ({}): {!r} {!r} {}\n" \
#.format( entry.getMarker(), entry.getOriginalMarker(), entry.getAdjustedText(), entry.getCleanText(), entry.getExtras() ) )
if ignoredMarkers:
logging.info( "toText: Ignored markers were {}".format( ignoredMarkers ) )
if BibleOrgSysGlobals.verbosityLevel > 2:
print( " " + _("WARNING: Ignored toText markers were {}").format( ignoredMarkers ) )
# Now create a zipped collection
if BibleOrgSysGlobals.verbosityLevel > 2: print( " Zipping text files…" )
zf = zipfile.ZipFile( os.path.join( outputFolder, 'AllTextFiles.zip' ), 'w', compression=zipfile.ZIP_DEFLATED )
for filename in os.listdir( outputFolder ):
if not filename.endswith( '.zip' ):
filepath = os.path.join( outputFolder, filename )
zf.write( filepath, filename ) # Save in the archive without the path
zf.close()
if BibleOrgSysGlobals.verbosityLevel > 0 and BibleOrgSysGlobals.maxProcesses > 1:
print( " BibleWriter.toText finished successfully." )
return True
# end of BibleWriter.toText
def toVPL( self, outputFolder=None ):
"""
Write the pseudo USFM out into some simple verse-per-line formats.
"""
if BibleOrgSysGlobals.verbosityLevel > 1: print( "Running BibleWriter:toVPL…" )
if BibleOrgSysGlobals.debugFlag: assert self.books
if not self.doneSetupGeneric: self.__setupWriter()
if not outputFolder: outputFolder = 'OutputFiles/BOS_VersePerLine_Export/'
if not os.access( outputFolder, os.F_OK ): os.makedirs( outputFolder ) # Make the empty folder if there wasn't already one there
ignoredMarkers = set()
# First determine our format
for VPLFormat in ('Forge',):
thisOutputFolder = os.path.join( outputFolder, VPLFormat+'/' )
if not os.access( thisOutputFolder, os.F_OK ): os.makedirs( thisOutputFolder ) # Make the empty folder if there wasn't already one there
#print( 'VPL', repr(self.name), repr(self.shortName), repr(self.projectName), repr(self.abbreviation) )
abbreviation = self.abbreviation if self.abbreviation else 'Unknown'
title = self.getAName()
ForgeBookNames = { 'GEN':'Ge', 'LEV':'Le', 'LAM':'La',
'MAT':'Mt', 'JDE':'Jude' }
# Write the plain text files
for BBB,bookObject in self.books.items():
bookName = ForgeBookNames[BBB] if BBB in ForgeBookNames else BBB
pseudoESFMData = bookObject._processedLines
filename = "BOS-BibleWriter-{}.txt".format( bookName )
filepath = os.path.join( thisOutputFolder, BibleOrgSysGlobals.makeSafeFilename( filename ) )
if BibleOrgSysGlobals.verbosityLevel > 2: print( " " + _("Writing {!r}…").format( filepath ) )
textBuffer = ''
with open( filepath, 'wt', encoding='utf-8' ) as myFile:
try: myFile.write('\ufeff') # Forge for SwordSearcher needs the BOM
except UnicodeEncodeError: # why does this fail on Windows???
logging.critical( exp("toForgeForSwordSearcher: Unable to write BOM to file") )
# Write the intro stuff
myFile.write( '; TITLE: {}\n'.format( title ) )
myFile.write( '; ABBREVIATION: {}\n'.format( abbreviation ) )
myFile.write( '; HAS ITALICS\n' )
myFile.write( '; HAS FOOTNOTES\n' )
myFile.write( '; HAS REDLETTER\n' )
gotVP = None
haveP = False
for entry in pseudoESFMData:
marker, text = entry.getMarker(), entry.getCleanText()
if '¬' in marker or marker in BOS_ADDED_NESTING_MARKERS:
continue # Just ignore added markers -- not needed here
if marker in ('c#','vp#',):
ignoredMarkers.add( marker )
continue
if marker in OFTEN_IGNORED_USFM_HEADER_MARKERS or marker in ('ie',): # Just ignore these lines
ignoredMarkers.add( marker )
elif marker == 'h':
ignoredMarkers.add( marker )
#if textBuffer: myFile.write( "{}".format( textBuffer ) ); textBuffer = ''
#myFile.write( "{}\n\n".format( text ) )
elif marker in USFM_INTRODUCTION_MARKERS: # Drop the introduction
ignoredMarkers.add( marker )
elif marker in ('mt1','mt2','mt3','mt4', 'imt1','imt2','imt3','imt4',):
ignoredMarkers.add( marker )
#if textBuffer: myFile.write( "{}".format( textBuffer ) ); textBuffer = ''
#myFile.write( "\n{}{}\n".format( ' '*((columnWidth-len(text))//2), text ) )
elif marker in ('mte1','mte2','mte3','mte4', 'imte1','imte2','imte3','imte4',):
ignoredMarkers.add( marker )
#if textBuffer: myFile.write( "{}".format( textBuffer ) ); textBuffer = ''
#myFile.write( "\n{}{}\n\n".format( ' '*((columnWidth-len(text))//2), text ) )
elif marker == 'c':
if textBuffer: myFile.write( "{}\n".format( textBuffer ) ); textBuffer = ''
C = text
elif marker == 'vp#': # This precedes a v field and has the verse number to be printed
gotVP = text # Just remember it for now
elif marker == 'v':
V = text
if gotVP: # this is the verse number to be published
text = gotVP
gotVP = None
if textBuffer: myFile.write( "{}\n".format( textBuffer ) ); textBuffer = ''
myFile.write( "\n$$ {} {}:{}\n".format( bookName, C, V ) )
if haveP: textBuffer = '¶'; haveP = False
elif marker == 'p':
haveP = True
elif marker in ('pi1','pi2','pi3','pi4', 's1','s2','s3','s4', 'ms1','ms2','ms3','ms4',): # Drop out these fields
ignoredMarkers.add( marker )
elif text:
#print( "do Marker", repr(marker), repr(text) )
textBuffer += (' ' if textBuffer else '') + text
if textBuffer: myFile.write( "{}\n".format( textBuffer ) ) # Write the last bit
#if verseByVerse:
#myFile.write( "{} ({}): {!r} {!r} {}\n" \
#.format( entry.getMarker(), entry.getOriginalMarker(), entry.getAdjustedText(), entry.getCleanText(), entry.getExtras() ) )
if ignoredMarkers:
#print( "Ignored", ignoredMarkers )
logging.info( "toVPL: Ignored markers were {}".format( ignoredMarkers ) )
if BibleOrgSysGlobals.verbosityLevel > 2:
print( " " + _("WARNING: Ignored toVPL markers were {}").format( ignoredMarkers ) )
# Now create a zipped collection
if BibleOrgSysGlobals.verbosityLevel > 2: print( " Zipping text files…" )
zf = zipfile.ZipFile( os.path.join( thisOutputFolder, 'AllTextFiles.zip' ), 'w', compression=zipfile.ZIP_DEFLATED )
for filename in os.listdir( thisOutputFolder ):
if not filename.endswith( '.zip' ):
filepath = os.path.join( thisOutputFolder, filename )
zf.write( filepath, filename ) # Save in the archive without the path
zf.close()
if BibleOrgSysGlobals.verbosityLevel > 0 and BibleOrgSysGlobals.maxProcesses > 1:
print( " BibleWriter.toVPL finished successfully." )
return True
# end of BibleWriter.toVPL
def toMarkdown( self, outputFolder=None ):
"""
Write the Bible data out into GFM markdown format.
The format varies, depending on whether or not there are paragraph markers in the text.
"""
if BibleOrgSysGlobals.verbosityLevel > 1: print( "Running BibleWriter:toMarkdown…" )
if BibleOrgSysGlobals.debugFlag: assert self.books
if not self.doneSetupGeneric: self.__setupWriter()
if not outputFolder: outputFolder = 'OutputFiles/BOS_Markdown_Export/'
if not os.access( outputFolder, os.F_OK ): os.makedirs( outputFolder ) # Make the empty folder if there wasn't already one there