Skip to content

Commit

Permalink
Some refactoring to get translation and other functionality to share …
Browse files Browse the repository at this point in the history
…the same codepath, logging of issues encountered while extracting translations, and general clarification of code.
  • Loading branch information
CensoredUsername committed May 2, 2024
1 parent 08e3710 commit cc858ec
Showing 1 changed file with 63 additions and 65 deletions.
128 changes: 63 additions & 65 deletions unrpyc.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@ def cpu_count():
import decompiler
import deobfuscate
from decompiler import astdump, translate
from decompiler.renpycompat import (pickle_safe_loads, pickle_safe_dumps, pickle_loads,
pickle_detect_python2)
from decompiler.renpycompat import pickle_safe_loads, pickle_detect_python2


class Context:
Expand Down Expand Up @@ -155,8 +154,9 @@ def read_ast_from_file(in_file, context):

def get_ast(in_file, try_harder, context):
"""
Decides whether its tried to aquired the ast the basic way or with special deobfucation
methods.
Opens the rpyc file at path in_file to load the contained AST.
If try_harder is True, an attempt will be made to work around obfuscation techniques.
Else, it is loaded as a normal rpyc file.
"""
with in_file.open('rb') as in_file:
if try_harder:
Expand Down Expand Up @@ -200,58 +200,48 @@ def decompile_rpyc(input_filename, context, overwrite=False, try_harder=False, d
context.set_state('ok')


def tl_worker(arg_tup):
def worker_tl(arg_tup):
"""
This func manages the translation feature related work. It takes two steps to complete,
because the tl-data is stored in different files as the specific file a mp-process works
on when decompiling.
Step1: Browses all rpy(m)c files and extracts the tl_data
Step2: Decompiles and replaces at the same time all translation strings
This file implements the first pass of the translation feature. It gathers TL-data from the
given rpyc files, to be used by the common worker to translate while decompiling.
arg_tup is (args, filename). Returns the gathered TL data in the context.
"""
args, filename = arg_tup
context = Context()

if not args.translate2:
try:
context.log(f'Extracting translations from {filename}...')
ast = get_ast(filename, args.try_harder, context)

tl_inst = translate.Translator(args.translate, True)
tl_inst.translate_dialogue(ast)
# we pickle and unpickle this manually because the regular unpickler will choke on it
tl_data = pickle_safe_dumps(tl_inst.dialogue), tl_inst.strings

This comment has been minimized.

Copy link
@madeddy

madeddy May 2, 2024

Contributor

(Read this first or this makes no sense.)
Mind the note from J.McBarn above. I guess he found out the hard way back then too. (As myself this year...)

Somehow the basic pickler used in multiprocessing does not like the data in dialogue. Maybe you know another, easier way around it as the double pickling we had so far.

This comment has been minimized.

Copy link
@CensoredUsername

CensoredUsername May 2, 2024

Author Owner

Ah damnit. I thought it was a residue from having to serialize it to a file.

context.set_result(tl_data)
except Exception as e:
context.set_error(e)
context.log(f'Error while extracting translations from {filename}:')
context.log(traceback.format_exc())
else:
try:
tl_inst = translate.Translator(args.translate)
tl_inst.dialogue, tl_inst.strings = args.translate2
decompile_rpyc(
filename, context, overwrite=args.clobber, try_harder=args.try_harder,
translator=tl_inst, init_offset=args.init_offset,
sl_custom_names=args.sl_custom_names
)
except Exception as e:
context.set_error(e)
context.log(f'Error while translating {filename}:')
context.log(traceback.format_exc())
try:
context.log(f'Extracting translations from {filename}...')
ast = get_ast(filename, args.try_harder, context)

tl_inst = translate.Translator(args.translate, True)
tl_inst.translate_dialogue(ast)
context.set_result((tl_inst.dialogue, tl_inst.strings))
context.set_state("ok")

except Exception as e:
context.set_error(e)
context.log(f'Error while extracting translations from {filename}:')
context.log(traceback.format_exc())

return context


def worker(arg_tup):
def worker_common(arg_tup):
"""
The core of unrpyc. arg_tup is (args, filename). This worker will unpack the file at filename,
decompile it, and write the output to it's corresponding rpy file.
"""

args, filename = arg_tup
context = Context()

try:
decompile_rpyc(
filename, context, overwrite=args.clobber, try_harder=args.try_harder,
dump=args.dump, no_pyexpr=args.no_pyexpr, comparable=args.comparable,
init_offset=args.init_offset, sl_custom_names=args.sl_custom_names
)
init_offset=args.init_offset, sl_custom_names=args.sl_custom_names,
translator=args.translator)

except Exception as e:
context.set_error(e)
context.log(f'Error while decompiling {filename}:')
Expand All @@ -260,16 +250,19 @@ def worker(arg_tup):
return context


def mp_runner(worker_type, baselist, args):
def run_workers(worker, common_args, private_args, parallelism):
"""
Runs worker in parallel using multiprocessing, with a max of `parallelism` processes.
Workers are called as worker((common_args, private_args[i])).
Workers should return an instance of `Context` as return value.
"""

# When TL-Extract is run, the list must be assembled after it to include the TL data
worklist = []
worklist = [(args, x) for x in baselist]
worker_args = ((common_args, x) for x in private_args)

results = []
if args.processes > 1:
with Pool(args.processes) as pool:
for result in pool.imap(worker_type, worklist, 1):
if parallelism > 1:
with Pool(parallelism) as pool:
for result in pool.imap(worker, worker_args, 1):
results.append(result)

for line in result.log_contents:
Expand All @@ -278,7 +271,7 @@ def mp_runner(worker_type, baselist, args):
print("")

else:
for result in map(worker_type, worklist):
for result in map(worker, worker_args):
results.append(result)

for line in result.log_contents:
Expand Down Expand Up @@ -423,10 +416,6 @@ def main():
help="Changes the dialogue language in the decompiled script files, using a translation "
"already present in the tl dir.")

ap.add_argument(
'--translate2',
help=argparse.SUPPRESS)

ap.add_argument(
'--version',
action='version',
Expand All @@ -437,10 +426,10 @@ def main():
# Catch impossible arg combinations so they don't produce strange errors or fail silently
if (args.no_pyexpr or args.comparable) and not args.dump:
ap.error(
"Options 'comparable' and 'no_pyexpr' are not usable without 'dump'.")
"Options '--comparable' and '--no_pyexpr' require '--dump'.")

if args.dump and args.translate:
ap.error("Options 'translate' and 'dump' can only be used separately.")
ap.error("Options '--translate' and '--dump' cannot be used together.")

if args.sl_custom_names is not None:
try:
Expand Down Expand Up @@ -490,23 +479,32 @@ def traverse(inpath):
# If a big file starts near the end, there could be a long time with only one thread running,
# which is inefficient. Avoid this by starting big files first.
worklist.sort(key=lambda x: x.stat().st_size, reverse=True)
results = []

translation_errors = 0
args.translator = None
if args.translate:
results = mp_runner(tl_worker, worklist, args)
print("Step 1: analysing files for translations.")
results = run_workers(worker_tl, args, worklist, args.processes)

print('Sorting extracted translations.')
tl_dialogue = {}
tl_strings = {}
for entry in results:
if entry.state != "ok":
translation_errors += 1
if not entry.value:
continue
tl_dialogue.update(pickle_loads(entry.value[0]))
tl_dialogue.update(entry.value[0])
tl_strings.update(entry.value[1])
args.translate2 = (tl_dialogue, tl_strings)
results = mp_runner(tl_worker, worklist, args)
else:
results = mp_runner(worker, worklist, args)

translator = translate.Translator(None)
translator.dialogue = tl_dialogue
translator.strings = tl_strings
args.translator = translator

print("Step 2: decompiling.")

results = run_workers(worker_common, args, worklist, args.processes)

success = sum(result.state == "ok" for result in results)
skipped = sum(result.state == "skip" for result in results)
Expand All @@ -519,10 +517,7 @@ def traverse(inpath):
print(f"{55 * '-'}")
print(f"Processed {plural_s(len(results), 'file')}.")

if args.translate:
print(f"> {plural_s(success, 'file')} were successfully analyzed.")
else:
print(f"> {plural_s(success, 'file')} were successfully decompiled.")
print(f"> {plural_s(success, 'file')} were successfully decompiled.")

if broken:
print(f"> {plural_s(broken, 'file')} did not have the correct header, "
Expand All @@ -534,6 +529,9 @@ def traverse(inpath):
if skipped:
print(f"> {plural_s(skipped, 'file')} were skipped as the output file already existed.")

if translation_errors:
print(f"> {plural_s(translation_errors, 'file')} failed translation extraction.")


if skipped:
print("")
Expand Down

0 comments on commit cc858ec

Please sign in to comment.