-
Notifications
You must be signed in to change notification settings - Fork 0
/
txt2translog_proj.py
81 lines (65 loc) · 3.56 KB
/
txt2translog_proj.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
"""Extract text from a given text file and use it to generate a Translog project file while
also preserving the style that was provided through a RTF file.
Make sure to back up your data before using this script!"""
from copy import deepcopy
from pathlib import Path
import xml.etree.ElementTree as ET
def process_file(pfin, pdout, tree_base, style):
"""Convert a text file into a Translog XML file by using a base XML template (tree_base),
and a given string (RTF).
:param pfin: input file whose contents will be placed on the source side of the XML file
:param pdout: the output directory to write the resulting XML file to
:param tree_base: the XML tree (ElementTree) that contains the XML Translog template
:param style: the RTF text string. It must have "[{CONTENT}]" in it (without quotes).
That part will be replaced by the actual contents of the input file on the source side.
"""
tree = tree_base.getroot()
pfout = pdout.joinpath(pfin.with_suffix(".project").name)
# Set FileName to the correct path (the output path)
tree.find("./FileName").text = str(pfout)
src_text = pfin.read_text(encoding="utf-8")
tree.find(".//SourceText").text = style.replace("[{CONTENT}]", src_text.replace("\n", "\\par\n"))
tree.find(".//TargetText").text = style.replace("[{CONTENT}]", r"\par")
tree.find(".//SourceTextUTF8").text = src_text
tree_base.write(pfout, encoding="utf-8")
def main(fin, dout, fbase, fstyle, extension=None, recursive=False):
"""Main entry point to convert a text file, or all files with a given extension in a given directory,
into a Translog-compatible XML file.
:param fin: input file or directory
:param dout: output directory to write the Translog projects to
:param fbase: path to base XML file
:param fstyle: path to RTF file. It must have "[{CONTENT}]" in it (without quotes).
That part will be replaced by the actual contents of the input file on the source side.
:param extension: only files with this extension will be processed when using a directory in 'fin'
:param recursive: recursively process files when using a directory in 'fin'
"""
pin = Path(fin).resolve()
pdout = Path(dout).resolve()
style = Path(fstyle).read_text(encoding="utf-8")
tree_base = ET.parse(fbase)
extension = "" if not extension else extension
if pin.is_dir():
files = pin.rglob(f"*{extension}") if recursive else pin.glob(f"*{extension}")
for pfin in files:
process_file(pfin, pdout, deepcopy(tree_base), style)
elif pin.is_file():
process_file(pin, pdout, deepcopy(tree_base), style)
else:
raise ValueError(f"Not a valid directory or file: {fin}")
if __name__ == '__main__':
import argparse
cparser = argparse.ArgumentParser(description=__doc__)
cparser.add_argument("inp", help="Input text file or directory to process.")
cparser.add_argument("dout", help="Path to output directory.")
cparser.add_argument("fbase", help="The XML Translog template to use as a base.")
cparser.add_argument("fstyle", help="A file containing the RTF instructions regarding style.")
cparser.add_argument("-e", "--extension", default="", help="Only files with this extension will be processed.")
cparser.add_argument("-r", "--recursive", action="store_true",
help="If 'inp' is a directory, traverse it recursively.")
cargs = cparser.parse_args()
main(cargs.inp,
cargs.dout,
cargs.fbase,
cargs.fstyle,
cargs.extension,
cargs.recursive)