Skip to content

Commit

Permalink
mvp-HXLTM (#16), EticaAI/HXL-Data-Science-file-formats#19: hxltm2xlif…
Browse files Browse the repository at this point in the history
…f, ...
  • Loading branch information
fititnt committed Jun 28, 2021
1 parent 5bd50b7 commit d164727
Showing 1 changed file with 61 additions and 8 deletions.
69 changes: 61 additions & 8 deletions _systema/programma/hxltm2xliff.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,15 +176,16 @@ def execute_cli(self, args,

try:
temp = tempfile.NamedTemporaryFile()
temp_csv4xliff = tempfile.NamedTemporaryFile()
args.outfile = temp.name

# print(temp_csv4xliff)
# print(temp_csv4xliff.name)

with self.hxlhelper.make_source(args, stdin) as source, \
self.hxlhelper.make_output(args, stdout) as output:
source.without_columns('#meta')
source.with_columns('#item')

# print(source)

# Save the HXL TM locally. It will be used by either hxltm2csv
# or hxltm2csv + hxltm2xliff
hxl.io.write_hxl(output.output, source,
show_tags=not args.strip_tags)

Expand All @@ -200,22 +201,32 @@ def execute_cli(self, args,
self.hxltm2csv(args.outfile, self.original_outfile,
self.original_outfile_is_stdout, args)
else:
self.hxl2tab(args.outfile, self.original_outfile,
self.original_outfile_is_stdout, args)
# self.hxl2tab(args.outfile, self.original_outfile,
# self.original_outfile_is_stdout, args)

self.hxltm2csv(args.outfile, temp_csv4xliff.name,
False, args)
self.hxltm2xliff(temp_csv4xliff.name, self.original_outfile,
self.original_outfile_is_stdout, args)

finally:
temp.close()
temp_csv4xliff.close()

return self.EXIT_OK

def hxltm2csv(self, hxlated_input, tab_output, is_stdout, args):
"""
hxl2tab is is the main method to de facto make the conversion.
hxltm2csv pre-process the initial HXL TM on a intermediate format that
can be used alone or as requisite of the hxltm2xliff exporter
"""

with open(hxlated_input, 'r') as csv_file:
csv_reader = csv.reader(csv_file)

# TODO: fix problem if input data already only have HXL hashtags
# but no extra headings (Emerson Rocha, 2021-06-28 01:27 UTC)

# Hotfix: skip first non-HXL header. Ideally I think the already
# exported HXlated file should already save without headers.
next(csv_reader)
Expand Down Expand Up @@ -280,6 +291,48 @@ def hxl2tab(self, hxlated_input, tab_output, is_stdout, args):
for line in csv_reader:
txt_writer.writerow(line)

def hxltm2xliff(self, hxlated_input, tab_output, is_stdout, args):
"""
hxltm2xliff is is the main method to de facto make the conversion.
TODO: this is a work-in-progress at this moment, 2021-06-28
"""

# TODO: implement something like internal structure of a object,
# maybe csv.DictReader?
# @see https://www.geeksforgeeks.org/convert-csv-to-json-using-python/
# @see https://docs.python.org/3/library/csv.html#csv.DictReader

with open(hxlated_input, 'r') as csv_file:
csv_reader = csv.reader(csv_file)

# # Hotfix: skip first non-HXL header. Ideally I think the already
# # exported HXlated file should already save without headers.
# next(csv_reader)
header_original = next(csv_reader)
header_new = self.hxltm2csv_header(
header_original,
fontem_linguam=args.fontem_linguam,
objectivum_linguam=args.objectivum_linguam,
)

if is_stdout:
txt_writer = csv.writer(sys.stdout, delimiter='\t')
txt_writer.writerow(header_new)
for line in csv_reader:
txt_writer.writerow(line)
else:

tab_output_cleanup = open(tab_output, 'w')
tab_output_cleanup.truncate()
tab_output_cleanup.close()

with open(tab_output, 'a') as new_txt:
txt_writer = csv.writer(new_txt, delimiter='\t')
txt_writer.writerow(header_new)
for line in csv_reader:
txt_writer.writerow(line)

# def hxl2tab_header(self, hxlated_header):
def hxltm2csv_header(self, hxlated_header, fontem_linguam, objectivum_linguam):
"""
Expand Down

0 comments on commit d164727

Please sign in to comment.