CensoredUsername · CensoredUsername · Jun 26, 2024 · Apr 27, 2024 · Apr 27, 2024 · May 2, 2024
diff --git a/README.md b/README.md
@@ -18,7 +18,28 @@ legacy-dev (python 2):[![Build Status](https://github.com/CensoredUsername/unrpy
 
 ## Usage
 
-This tool can either be ran as a command line tool, as a library, or injected into the game itself. To use it as a command line tool, a local python installation is required.
+This tool can either be ran as a command line tool, as a library, or injected into the game itself. To use it as a command line tool, a local python installation is required. To use it for its default function (decompiling)
+you can simply pass it the files you want to decompile as arguments, or pass it the folder
+containing them. For example, `python unrpyc.py file1.rpyc file2.rpyc` or `python unrpyc.py folder/`
+
+### Additional features
+
+#### Translation:
+For easier reading of decompiled script files, unrpyc can use translation data contained in a game
+to automatically convert the emitted script files to another language. You can find the supported
+languages for a game by looking in the `game/tl` folder of said game (`None` being the default)
+
+To use this feature, simply pass the name of the target language (which has to match the name found
+in the tl folder) with the `-t`/`--translate` option. For example, if a game has a folder
+`path/to/renpyapp/game/tl/french`, then you can run the command:
+`python unrpyc.py /path/to/renpyapp/ -t french`
+
+#### Raw ast view:
+Instead of decompiling, the tool can simply show the contents of a rpyc file. This is mainly useful
+for bug reports and the development of unrpyc. You can pass the `-d`/`--dump` flag to activate this
+feature.
+
+Note: this generates a _lot_ of output.
 
 ## Compatibility
 

diff --git a/unrpyc.py b/unrpyc.py
@@ -44,8 +44,8 @@ def cpu_count():
 import decompiler
 import deobfuscate
 from decompiler import astdump, translate
-from decompiler.renpycompat import (pickle_safe_loads, pickle_safe_dumps, pickle_safe_dump,
-                                    pickle_loads, pickle_detect_python2)
+from decompiler.renpycompat import (pickle_safe_loads, pickle_safe_dumps, pickle_loads,
+                                    pickle_detect_python2)
 
 
 class Context:
@@ -153,6 +153,20 @@ def read_ast_from_file(in_file, context):
     return stmts
 
 
+def get_ast(in_file, try_harder, context):
+    """
+    Opens the rpyc file at path in_file to load the contained AST.
+    If try_harder is True, an attempt will be made to work around obfuscation techniques.
+    Else, it is loaded as a normal rpyc file.
+    """
+    with in_file.open('rb') as in_file:
+        if try_harder:
+            ast = deobfuscate.read_ast(in_file, context)
+        else:
+            ast = read_ast_from_file(in_file, context)
+    return ast
+
+
 def decompile_rpyc(input_filename, context, overwrite=False, try_harder=False, dump=False,
                    comparable=False, no_pyexpr=False, translator=None, init_offset=False,
                    sl_custom_names=None):
@@ -173,12 +187,7 @@ def decompile_rpyc(input_filename, context, overwrite=False, try_harder=False, d
         return
 
     context.log(f'Decompiling {input_filename} to {out_filename.name} ...')
-
-    with input_filename.open('rb') as in_file:
-        if try_harder:
-            ast = deobfuscate.read_ast(in_file, context)
-        else:
-            ast = read_ast_from_file(in_file, context)
+    ast = get_ast(input_filename, try_harder, context)
 
     with out_filename.open('w', encoding='utf-8') as out_file:
         if dump:
@@ -191,40 +200,54 @@ def decompile_rpyc(input_filename, context, overwrite=False, try_harder=False, d
 
     context.set_state('ok')
 
-def extract_translations(input_filename, language, context):
-    context.log(f'Extracting translations from {input_filename}...')
 
-    with input_filename.open('rb') as in_file:
-        ast = read_ast_from_file(in_file)
+def worker_tl(arg_tup):
+    """
+    This file implements the first pass of the translation feature. It gathers TL-data from the
+    given rpyc files, to be used by the common worker to translate while decompiling.
+    arg_tup is (args, filename). Returns the gathered TL data in the context.
+    """
+    args, filename = arg_tup
+    context = Context()
+
+    try:
+        context.log(f'Extracting translations from {filename}...')
+        ast = get_ast(filename, args.try_harder, context)
+
+        tl_inst = translate.Translator(args.translate, True)
+        tl_inst.translate_dialogue(ast)
+
+        # this object has to be sent back to the main process, for which it needs to be pickled.
+        # the default pickler cannot pickle fake classes correctly, so manually handle that here.
+        context.set_result(pickle_safe_dumps((tl_inst.dialogue, tl_inst.strings)))
+        context.set_state("ok")
 
-    translator = translate.Translator(language, True)
-    translator.translate_dialogue(ast)
-    # we pickle and unpickle this manually because the regular unpickler will choke on it
-    return pickle_safe_dumps(translator.dialogue), translator.strings
+    except Exception as e:
+        context.set_error(e)
+        context.log(f'Error while extracting translations from {filename}:')
+        context.log(traceback.format_exc())
+
+    return context
 
 
-def worker(arg_tup):
+def worker_common(arg_tup):
+    """
+    The core of unrpyc. arg_tup is (args, filename). This worker will unpack the file at filename,
+    decompile it, and write the output to it's corresponding rpy file.
+    """
+
     args, filename = arg_tup
     context = Context()
 
-    try:
-        if args.write_translation_file:
-            result = extract_translations(filename, args.language, context)
-            context.set_result(result)
-
-        else:
-            if args.translation_file is not None:
-                translator = translate.Translator(None)
-                translator.language, translator.dialogue, translator.strings = (
-                    pickle_loads(args.translations))
-            else:
-                translator = None
+    if args.translator:
+        args.translator = pickle_loads(args.translator)
 
-            decompile_rpyc(
-                filename, context, args.clobber, try_harder=args.try_harder, dump=args.dump,
-                no_pyexpr=args.no_pyexpr, comparable=args.comparable, translator=translator,
-                init_offset=args.init_offset, sl_custom_names=args.sl_custom_names
-                )
+    try:
+        decompile_rpyc(
+            filename, context, overwrite=args.clobber, try_harder=args.try_harder,
+            dump=args.dump, no_pyexpr=args.no_pyexpr, comparable=args.comparable,
+            init_offset=args.init_offset, sl_custom_names=args.sl_custom_names,
+            translator=args.translator)
 
     except Exception as e:
         context.set_error(e)
@@ -234,6 +257,38 @@ def worker(arg_tup):
     return context
 
 
+def run_workers(worker, common_args, private_args, parallelism):
+    """
+    Runs worker in parallel using multiprocessing, with a max of `parallelism` processes.
+    Workers are called as worker((common_args, private_args[i])).
+    Workers should return an instance of `Context` as return value.
+    """
+
+    worker_args = ((common_args, x) for x in private_args)
+
+    results = []
+    if parallelism > 1:
+        with Pool(parallelism) as pool:
+            for result in pool.imap(worker, worker_args, 1):
+                results.append(result)
+
+                for line in result.log_contents:
+                    print(line)
+
+                print("")
+
+    else:
+        for result in map(worker, worker_args):
+            results.append(result)
+
+            for line in result.log_contents:
+                print(line)
+
+            print("")
+
+    return results
+
+
 def parse_sl_custom_names(unparsed_arguments):
     # parse a list of strings in the format
     # classname=name-nchildren into {classname: (name, nchildren)}
@@ -323,32 +378,6 @@ def main():
         "Defaults to the amount of hw threads available minus one, disabled when muliprocessing "
         "unavailable is.")
 
-    ap.add_argument(
-        '-t',
-        '--translation-file',
-        dest='translation_file',
-        type=Path,
-        action='store',
-        default=None,
-        help="Use the specified file to translate during decompilation")
-
-    ap.add_argument(
-        '-T',
-        '--write-translation-file',
-        dest='write_translation_file',
-        type=Path,
-        action='store',
-        default=None,
-        help="Store translations in the specified file instead of decompiling")
-
-    ap.add_argument(
-        '-l',
-        '--language',
-        dest='language',
-        action='store',
-        default=None,
-        help="If writing a translation file, the language of the translations to write")
-
     ap.add_argument(
         '--comparable',
         dest='comparable',
@@ -385,6 +414,15 @@ def main():
         "potentially followed by a '-', and the amount of children the displayable takes"
         "(valid options are '0', '1' or 'many', with 'many' being the default)")
 
+    ap.add_argument(
+        '-t',
+        '--translate',
+        dest='translate',
+        type=str,
+        action='store',
+        help="Changes the dialogue language in the decompiled script files, using a translation "
+        "already present in the tl dir.")
+
     ap.add_argument(
         '--version',
         action='version',
@@ -395,24 +433,10 @@ def main():
     # Catch impossible arg combinations so they don't produce strange errors or fail silently
     if (args.no_pyexpr or args.comparable) and not args.dump:
         ap.error(
-            "Arguments 'comparable' and 'no_pyexpr' are not usable without 'dump'.")
-
-    if ((args.try_harder or args.dump)
-            and (args.write_translation_file or args.translation_file or args.language)):
-        ap.error(
-            "Arguments 'try_harder' and/or 'dump' are not usable with the translation "
-            "feature.")
-
-    # Fail early to avoid wasting time going through the files
-    if (args.write_translation_file
-            and not args.clobber
-            and args.write_translation_file.exists()):
-        ap.error(
-            "Output translation file already exists. Pass --clobber to overwrite.")
+            "Options '--comparable' and '--no_pyexpr' require '--dump'.")
 
-    if args.translation_file:
-        with args.translation_file.open('rb') as in_file:
-            args.translations = in_file.read()
+    if args.dump and args.translate:
+        ap.error("Options '--translate' and '--dump' cannot be used together.")
 
     if args.sl_custom_names is not None:
         try:
@@ -462,39 +486,41 @@ def traverse(inpath):
     # If a big file starts near the end, there could be a long time with only one thread running,
     # which is inefficient. Avoid this by starting big files first.
     worklist.sort(key=lambda x: x.stat().st_size, reverse=True)
-    worklist = [(args, x) for x in worklist]
 
-    results = []
-    if args.processes > 1:
-        with Pool(args.processes) as pool:
-            for result in pool.imap(worker, worklist, 1):
-                results.append(result)
-
-                for line in result.log_contents:
-                    print(line)
-
-                print("")
-
-    else:
-        for result in map(worker, worklist):
-            results.append(result)
-
-            for line in result.log_contents:
-                print(line)
-
-            print("")
-
-    if args.write_translation_file:
-        print(f'Writing translations to {args.write_translation_file}...')
-        translated_dialogue = {}
-        translated_strings = {}
-        for result in results:
-            if not result.value:
-                continue
-            translated_dialogue.update(pickle_loads(result.value[0]))
-            translated_strings.update(result.value[1])
-        with args.write_translation_file.open('wb') as out_file:
-            pickle_safe_dump((args.language, translated_dialogue, translated_strings), out_file)
+    translation_errors = 0
+    args.translator = None
+    if args.translate:
+        # For translation, we first need to analyse all files for translation data.
+        # We then collect all of these back into the main process, and build a 
+        # datastructure of all of them. This datastructure is then passed to
+        # all decompiling processes.
+        # Note: because this data contains some FakeClasses, Multiprocessing cannot
+        # pass it between processes (it pickles them, and pickle will complain about
+        # these). Therefore, we need to manually pickle and unpickle it.
+
+        print("Step 1: analysing files for translations.")
+        results = run_workers(worker_tl, args, worklist, args.processes)
+
+        print('Compiling extracted translations.')
+        tl_dialogue = {}
+        tl_strings = {}
+        for entry in results:
+            if entry.state != "ok":
+                translation_errors += 1
+
+            if entry.value:
+                new_dialogue, new_strings = pickle_loads(entry.value)
+                tl_dialogue.update(new_dialogue)
+                tl_strings.update(new_strings)
+
+        translator = translate.Translator(None)
+        translator.dialogue = tl_dialogue
+        translator.strings = tl_strings
+        args.translator = pickle_safe_dumps(translator)
+
+        print("Step 2: decompiling.")
+
+    results = run_workers(worker_common, args, worklist, args.processes)
 
     success = sum(result.state == "ok" for result in results)
     skipped = sum(result.state == "skip" for result in results)
@@ -507,10 +533,7 @@ def traverse(inpath):
     print(f"{55 * '-'}")
     print(f"Processed {plural_s(len(results), 'file')}.")
 
-    if args.write_translation_file:
-        print(f"> {plural_s(success, 'file')} were successfully analyzed.")
-    else:
-        print(f"> {plural_s(success, 'file')} were successfully decompiled.")
+    print(f"> {plural_s(success, 'file')} were successfully decompiled.")
 
     if broken:
         print(f"> {plural_s(broken, 'file')} did not have the correct header, "
@@ -522,6 +545,9 @@ def traverse(inpath):
     if skipped:
         print(f"> {plural_s(skipped, 'file')} were skipped as the output file already existed.")
 
+    if translation_errors:
+        print(f"> {plural_s(translation_errors, 'file')} failed translation extraction.")
+
 
     if skipped:
         print("")