Skip to content

Commit

Permalink
Some refactoring to get translation and other functionality to share …
Browse files Browse the repository at this point in the history
…the same codepath, logging of issues encountered while extracting translations, and general clarification of code.
  • Loading branch information
CensoredUsername committed May 3, 2024
1 parent 08e3710 commit 9236e8c
Showing 1 changed file with 82 additions and 68 deletions.
150 changes: 82 additions & 68 deletions unrpyc.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,9 @@ def read_ast_from_file(in_file, context):

def get_ast(in_file, try_harder, context):
"""
Decides whether its tried to aquired the ast the basic way or with special deobfucation
methods.
Opens the rpyc file at path in_file to load the contained AST.
If try_harder is True, an attempt will be made to work around obfuscation techniques.
Else, it is loaded as a normal rpyc file.
"""
with in_file.open('rb') as in_file:
if try_harder:
Expand Down Expand Up @@ -200,58 +201,54 @@ def decompile_rpyc(input_filename, context, overwrite=False, try_harder=False, d
context.set_state('ok')


def tl_worker(arg_tup):
def worker_tl(arg_tup):
"""
This func manages the translation feature related work. It takes two steps to complete,
because the tl-data is stored in different files as the specific file a mp-process works
on when decompiling.
Step1: Browses all rpy(m)c files and extracts the tl_data
Step2: Decompiles and replaces at the same time all translation strings
This file implements the first pass of the translation feature. It gathers TL-data from the
given rpyc files, to be used by the common worker to translate while decompiling.
arg_tup is (args, filename). Returns the gathered TL data in the context.
"""
args, filename = arg_tup
context = Context()

if not args.translate2:
try:
context.log(f'Extracting translations from {filename}...')
ast = get_ast(filename, args.try_harder, context)

tl_inst = translate.Translator(args.translate, True)
tl_inst.translate_dialogue(ast)
# we pickle and unpickle this manually because the regular unpickler will choke on it
tl_data = pickle_safe_dumps(tl_inst.dialogue), tl_inst.strings
context.set_result(tl_data)
except Exception as e:
context.set_error(e)
context.log(f'Error while extracting translations from {filename}:')
context.log(traceback.format_exc())
else:
try:
tl_inst = translate.Translator(args.translate)
tl_inst.dialogue, tl_inst.strings = args.translate2
decompile_rpyc(
filename, context, overwrite=args.clobber, try_harder=args.try_harder,
translator=tl_inst, init_offset=args.init_offset,
sl_custom_names=args.sl_custom_names
)
except Exception as e:
context.set_error(e)
context.log(f'Error while translating {filename}:')
context.log(traceback.format_exc())
try:
context.log(f'Extracting translations from {filename}...')
ast = get_ast(filename, args.try_harder, context)

tl_inst = translate.Translator(args.translate, True)
tl_inst.translate_dialogue(ast)

# this object has to be sent back to the main process, for which it needs to be pickled.
# the default pickler cannot pickle fake classes correctly, so manually handle that here.
context.set_result(pickle_safe_dumps((tl_inst.dialogue, tl_inst.strings)))
context.set_state("ok")

except Exception as e:
context.set_error(e)
context.log(f'Error while extracting translations from {filename}:')
context.log(traceback.format_exc())

return context


def worker(arg_tup):
def worker_common(arg_tup):
"""
The core of unrpyc. arg_tup is (args, filename). This worker will unpack the file at filename,
decompile it, and write the output to it's corresponding rpy file.
"""

args, filename = arg_tup
context = Context()

if args.translator:

This comment has been minimized.

Copy link
@madeddy

madeddy May 4, 2024

Contributor

Without pickling in L519 this block can logically go.

args.translator = pickle_loads(args.translator)

try:
decompile_rpyc(
filename, context, overwrite=args.clobber, try_harder=args.try_harder,
dump=args.dump, no_pyexpr=args.no_pyexpr, comparable=args.comparable,
init_offset=args.init_offset, sl_custom_names=args.sl_custom_names
)
init_offset=args.init_offset, sl_custom_names=args.sl_custom_names,
translator=args.translator)

except Exception as e:
context.set_error(e)
context.log(f'Error while decompiling {filename}:')
Expand All @@ -260,16 +257,19 @@ def worker(arg_tup):
return context


def mp_runner(worker_type, baselist, args):
def run_workers(worker, common_args, private_args, parallelism):
"""
Runs worker in parallel using multiprocessing, with a max of `parallelism` processes.
Workers are called as worker((common_args, private_args[i])).
Workers should return an instance of `Context` as return value.
"""

# When TL-Extract is run, the list must be assembled after it to include the TL data
worklist = []
worklist = [(args, x) for x in baselist]
worker_args = ((common_args, x) for x in private_args)

results = []
if args.processes > 1:
with Pool(args.processes) as pool:
for result in pool.imap(worker_type, worklist, 1):
if parallelism > 1:
with Pool(parallelism) as pool:
for result in pool.imap(worker, worker_args, 1):
results.append(result)

for line in result.log_contents:
Expand All @@ -278,7 +278,7 @@ def mp_runner(worker_type, baselist, args):
print("")

else:
for result in map(worker_type, worklist):
for result in map(worker, worker_args):
results.append(result)

for line in result.log_contents:
Expand Down Expand Up @@ -423,10 +423,6 @@ def main():
help="Changes the dialogue language in the decompiled script files, using a translation "
"already present in the tl dir.")

ap.add_argument(
'--translate2',
help=argparse.SUPPRESS)

ap.add_argument(
'--version',
action='version',
Expand All @@ -437,10 +433,10 @@ def main():
# Catch impossible arg combinations so they don't produce strange errors or fail silently
if (args.no_pyexpr or args.comparable) and not args.dump:
ap.error(
"Options 'comparable' and 'no_pyexpr' are not usable without 'dump'.")
"Options '--comparable' and '--no_pyexpr' require '--dump'.")

if args.dump and args.translate:
ap.error("Options 'translate' and 'dump' can only be used separately.")
ap.error("Options '--translate' and '--dump' cannot be used together.")

if args.sl_custom_names is not None:
try:
Expand Down Expand Up @@ -490,23 +486,41 @@ def traverse(inpath):
# If a big file starts near the end, there could be a long time with only one thread running,
# which is inefficient. Avoid this by starting big files first.
worklist.sort(key=lambda x: x.stat().st_size, reverse=True)
results = []

translation_errors = 0
args.translator = None
if args.translate:
results = mp_runner(tl_worker, worklist, args)

print('Sorting extracted translations.')
# For translation, we first need to analyse all files for translation data.
# We then collect all of these back into the main process, and build a
# datastructure of all of them. This datastructure is then passed to
# all decompiling processes.
# Note: because this data contains some FakeClasses, Multiprocessing cannot
# pass it between processes (it pickles them, and pickle will complain about
# these). Therefore, we need to manually pickle and unpickle it.

print("Step 1: analysing files for translations.")
results = run_workers(worker_tl, args, worklist, args.processes)

print('Compiling extracted translations.')
tl_dialogue = {}
tl_strings = {}
for entry in results:
if not entry.value:
continue
tl_dialogue.update(pickle_loads(entry.value[0]))
tl_strings.update(entry.value[1])
args.translate2 = (tl_dialogue, tl_strings)
results = mp_runner(tl_worker, worklist, args)
else:
results = mp_runner(worker, worklist, args)
if entry.state != "ok":
translation_errors += 1

if entry.value:
new_dialogue, new_strings = pickle_loads(entry.value)
tl_dialogue.update(new_dialogue)
tl_strings.update(new_strings)

translator = translate.Translator(None)
translator.dialogue = tl_dialogue
translator.strings = tl_strings
args.translator = pickle_safe_dumps(translator)

This comment has been minimized.

Copy link
@madeddy

madeddy May 4, 2024

Contributor

Sending this pickled back into the decompiler is not necessary. This is all we need:
args.translator = translator

Tested!

This comment has been minimized.

Copy link
@CensoredUsername

CensoredUsername May 4, 2024

Author Owner

That is odd. When I was working on it I originally did not do that, but then it failed during testing with errors in multiprocessing when calling the workers in the second step.

I just tested it by tl'ing the tutorial game into japanese. I also don't understand why this time pickling wouldn't be necessary, as multiprocessing pickles items the same way when starting and when returning.

This comment has been minimized.

Copy link
@madeddy

madeddy May 4, 2024

Contributor

Well, as said, there is some strange stuff going with TL. I encountered never issues (in Linux) without the pickling of the in-going data. Its also original like this.

Maybe how data goes into MP and how it gets out, is for some reason a different kettle of fish. I'm not privy how thats handled inside multiprocessing. The code in "pool.py" is a bit complicated IMO.

Anyway, whatever you do with the pickling, both ways work for me. The branch can go into dev if you think its ready.

This comment has been minimized.

Copy link
@CensoredUsername

CensoredUsername May 5, 2024

Author Owner

Well, as said, there is some strange stuff going with TL. I encountered never issues (in Linux) without the pickling of the in-going data. Its also original like this.

Ah, linux. That explains it.

So on linux, multiprocessing spawns child processes via forking, the forked child process will have all data already in there, no pickling necessary.. On windows, it spawns a new process, and sends any required data to that process. For that, it needs to be pickled.

So to maintain windows compatibility, we should pickle both ways.

This comment has been minimized.

Copy link
@madeddy

madeddy May 5, 2024

Contributor

to maintain windows compatibility

In this case, absolutely! Didn't know the different context methods have such a impact on code behavior.


print("Step 2: decompiling.")

This comment has been minimized.

Copy link
@madeddy

madeddy May 4, 2024

Contributor

Decompiling does also the "normal" way to use unrpyc, so this print() should maybe be "Decompiling and translating scripts." instead.

This comment has been minimized.

Copy link
@CensoredUsername

CensoredUsername May 5, 2024

Author Owner

Could rename it, but it is mostly there to distinguish between the different passes in the log.

This comment has been minimized.

Copy link
@madeddy

madeddy May 5, 2024

Contributor

Ah, ok then. Was just a thought.


results = run_workers(worker_common, args, worklist, args.processes)

success = sum(result.state == "ok" for result in results)
skipped = sum(result.state == "skip" for result in results)
Expand All @@ -519,10 +533,7 @@ def traverse(inpath):
print(f"{55 * '-'}")
print(f"Processed {plural_s(len(results), 'file')}.")

if args.translate:
print(f"> {plural_s(success, 'file')} were successfully analyzed.")
else:
print(f"> {plural_s(success, 'file')} were successfully decompiled.")
print(f"> {plural_s(success, 'file')} were successfully decompiled.")

if broken:
print(f"> {plural_s(broken, 'file')} did not have the correct header, "
Expand All @@ -534,6 +545,9 @@ def traverse(inpath):
if skipped:
print(f"> {plural_s(skipped, 'file')} were skipped as the output file already existed.")

if translation_errors:
print(f"> {plural_s(translation_errors, 'file')} failed translation extraction.")


if skipped:
print("")
Expand Down

0 comments on commit 9236e8c

Please sign in to comment.