Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev: translation refactoring #226

Merged
merged 4 commits into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,28 @@ legacy-dev (python 2):[![Build Status](https://github.com/CensoredUsername/unrpy

## Usage

This tool can either be ran as a command line tool, as a library, or injected into the game itself. To use it as a command line tool, a local python installation is required.
This tool can either be ran as a command line tool, as a library, or injected into the game itself. To use it as a command line tool, a local python installation is required. To use it for its default function (decompiling)
you can simply pass it the files you want to decompile as arguments, or pass it the folder
containing them. For example, `python unrpyc.py file1.rpyc file2.rpyc` or `python unrpyc.py folder/`

### Additional features

#### Translation:
For easier reading of decompiled script files, unrpyc can use translation data contained in a game
to automatically convert the emitted script files to another language. You can find the supported
languages for a game by looking in the `game/tl` folder of said game (`None` being the default)

To use this feature, simply pass the name of the target language (which has to match the name found
in the tl folder) with the `-t`/`--translate` option. For example, if a game has a folder
`path/to/renpyapp/game/tl/french`, then you can run the command:
`python unrpyc.py /path/to/renpyapp/ -t french`

#### Raw ast view:
Instead of decompiling, the tool can simply show the contents of a rpyc file. This is mainly useful
for bug reports and the development of unrpyc. You can pass the `-d`/`--dump` flag to activate this
feature.

Note: this generates a _lot_ of output.

## Compatibility

Expand Down
252 changes: 139 additions & 113 deletions unrpyc.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ def cpu_count():
import decompiler
import deobfuscate
from decompiler import astdump, translate
from decompiler.renpycompat import (pickle_safe_loads, pickle_safe_dumps, pickle_safe_dump,
pickle_loads, pickle_detect_python2)
from decompiler.renpycompat import (pickle_safe_loads, pickle_safe_dumps, pickle_loads,
pickle_detect_python2)


class Context:
Expand Down Expand Up @@ -153,6 +153,20 @@ def read_ast_from_file(in_file, context):
return stmts


def get_ast(in_file, try_harder, context):
"""
Opens the rpyc file at path in_file to load the contained AST.
If try_harder is True, an attempt will be made to work around obfuscation techniques.
Else, it is loaded as a normal rpyc file.
"""
with in_file.open('rb') as in_file:
if try_harder:
ast = deobfuscate.read_ast(in_file, context)
else:
ast = read_ast_from_file(in_file, context)
return ast


def decompile_rpyc(input_filename, context, overwrite=False, try_harder=False, dump=False,
comparable=False, no_pyexpr=False, translator=None, init_offset=False,
sl_custom_names=None):
Expand All @@ -173,12 +187,7 @@ def decompile_rpyc(input_filename, context, overwrite=False, try_harder=False, d
return

context.log(f'Decompiling {input_filename} to {out_filename.name} ...')

with input_filename.open('rb') as in_file:
if try_harder:
ast = deobfuscate.read_ast(in_file, context)
else:
ast = read_ast_from_file(in_file, context)
ast = get_ast(input_filename, try_harder, context)

with out_filename.open('w', encoding='utf-8') as out_file:
if dump:
Expand All @@ -191,40 +200,54 @@ def decompile_rpyc(input_filename, context, overwrite=False, try_harder=False, d

context.set_state('ok')

def extract_translations(input_filename, language, context):
context.log(f'Extracting translations from {input_filename}...')

with input_filename.open('rb') as in_file:
ast = read_ast_from_file(in_file)
def worker_tl(arg_tup):
"""
This file implements the first pass of the translation feature. It gathers TL-data from the
given rpyc files, to be used by the common worker to translate while decompiling.
arg_tup is (args, filename). Returns the gathered TL data in the context.
"""
args, filename = arg_tup
context = Context()

try:
context.log(f'Extracting translations from {filename}...')
ast = get_ast(filename, args.try_harder, context)

tl_inst = translate.Translator(args.translate, True)
tl_inst.translate_dialogue(ast)

# this object has to be sent back to the main process, for which it needs to be pickled.
# the default pickler cannot pickle fake classes correctly, so manually handle that here.
context.set_result(pickle_safe_dumps((tl_inst.dialogue, tl_inst.strings)))
context.set_state("ok")

translator = translate.Translator(language, True)
translator.translate_dialogue(ast)
# we pickle and unpickle this manually because the regular unpickler will choke on it
return pickle_safe_dumps(translator.dialogue), translator.strings
except Exception as e:
context.set_error(e)
context.log(f'Error while extracting translations from {filename}:')
context.log(traceback.format_exc())

return context


def worker(arg_tup):
def worker_common(arg_tup):
"""
The core of unrpyc. arg_tup is (args, filename). This worker will unpack the file at filename,
decompile it, and write the output to it's corresponding rpy file.
"""

args, filename = arg_tup
context = Context()

try:
if args.write_translation_file:
result = extract_translations(filename, args.language, context)
context.set_result(result)

else:
if args.translation_file is not None:
translator = translate.Translator(None)
translator.language, translator.dialogue, translator.strings = (
pickle_loads(args.translations))
else:
translator = None
if args.translator:
args.translator = pickle_loads(args.translator)

decompile_rpyc(
filename, context, args.clobber, try_harder=args.try_harder, dump=args.dump,
no_pyexpr=args.no_pyexpr, comparable=args.comparable, translator=translator,
init_offset=args.init_offset, sl_custom_names=args.sl_custom_names
)
try:
decompile_rpyc(
filename, context, overwrite=args.clobber, try_harder=args.try_harder,
dump=args.dump, no_pyexpr=args.no_pyexpr, comparable=args.comparable,
init_offset=args.init_offset, sl_custom_names=args.sl_custom_names,
translator=args.translator)

except Exception as e:
context.set_error(e)
Expand All @@ -234,6 +257,38 @@ def worker(arg_tup):
return context


def run_workers(worker, common_args, private_args, parallelism):
"""
Runs worker in parallel using multiprocessing, with a max of `parallelism` processes.
Workers are called as worker((common_args, private_args[i])).
Workers should return an instance of `Context` as return value.
"""

worker_args = ((common_args, x) for x in private_args)

results = []
if parallelism > 1:
with Pool(parallelism) as pool:
for result in pool.imap(worker, worker_args, 1):
results.append(result)

for line in result.log_contents:
print(line)

print("")

else:
for result in map(worker, worker_args):
results.append(result)

for line in result.log_contents:
print(line)

print("")

return results


def parse_sl_custom_names(unparsed_arguments):
# parse a list of strings in the format
# classname=name-nchildren into {classname: (name, nchildren)}
Expand Down Expand Up @@ -323,32 +378,6 @@ def main():
"Defaults to the amount of hw threads available minus one, disabled when muliprocessing "
"unavailable is.")

ap.add_argument(
'-t',
'--translation-file',
dest='translation_file',
type=Path,
action='store',
default=None,
help="Use the specified file to translate during decompilation")

ap.add_argument(
'-T',
'--write-translation-file',
dest='write_translation_file',
type=Path,
action='store',
default=None,
help="Store translations in the specified file instead of decompiling")

ap.add_argument(
'-l',
'--language',
dest='language',
action='store',
default=None,
help="If writing a translation file, the language of the translations to write")

ap.add_argument(
'--comparable',
dest='comparable',
Expand Down Expand Up @@ -385,6 +414,15 @@ def main():
"potentially followed by a '-', and the amount of children the displayable takes"
"(valid options are '0', '1' or 'many', with 'many' being the default)")

ap.add_argument(
'-t',
'--translate',
dest='translate',
type=str,
action='store',
help="Changes the dialogue language in the decompiled script files, using a translation "
"already present in the tl dir.")

ap.add_argument(
'--version',
action='version',
Expand All @@ -395,24 +433,10 @@ def main():
# Catch impossible arg combinations so they don't produce strange errors or fail silently
if (args.no_pyexpr or args.comparable) and not args.dump:
ap.error(
"Arguments 'comparable' and 'no_pyexpr' are not usable without 'dump'.")

if ((args.try_harder or args.dump)
and (args.write_translation_file or args.translation_file or args.language)):
ap.error(
"Arguments 'try_harder' and/or 'dump' are not usable with the translation "
"feature.")

# Fail early to avoid wasting time going through the files
if (args.write_translation_file
and not args.clobber
and args.write_translation_file.exists()):
ap.error(
"Output translation file already exists. Pass --clobber to overwrite.")
"Options '--comparable' and '--no_pyexpr' require '--dump'.")

if args.translation_file:
with args.translation_file.open('rb') as in_file:
args.translations = in_file.read()
if args.dump and args.translate:
ap.error("Options '--translate' and '--dump' cannot be used together.")

if args.sl_custom_names is not None:
try:
Expand Down Expand Up @@ -462,39 +486,41 @@ def traverse(inpath):
# If a big file starts near the end, there could be a long time with only one thread running,
# which is inefficient. Avoid this by starting big files first.
worklist.sort(key=lambda x: x.stat().st_size, reverse=True)
worklist = [(args, x) for x in worklist]

results = []
if args.processes > 1:
with Pool(args.processes) as pool:
for result in pool.imap(worker, worklist, 1):
results.append(result)

for line in result.log_contents:
print(line)

print("")

else:
for result in map(worker, worklist):
results.append(result)

for line in result.log_contents:
print(line)

print("")

if args.write_translation_file:
print(f'Writing translations to {args.write_translation_file}...')
translated_dialogue = {}
translated_strings = {}
for result in results:
if not result.value:
continue
translated_dialogue.update(pickle_loads(result.value[0]))
translated_strings.update(result.value[1])
with args.write_translation_file.open('wb') as out_file:
pickle_safe_dump((args.language, translated_dialogue, translated_strings), out_file)
translation_errors = 0
args.translator = None
if args.translate:
# For translation, we first need to analyse all files for translation data.
# We then collect all of these back into the main process, and build a
# datastructure of all of them. This datastructure is then passed to
# all decompiling processes.
# Note: because this data contains some FakeClasses, Multiprocessing cannot
# pass it between processes (it pickles them, and pickle will complain about
# these). Therefore, we need to manually pickle and unpickle it.

print("Step 1: analysing files for translations.")
results = run_workers(worker_tl, args, worklist, args.processes)

print('Compiling extracted translations.')
tl_dialogue = {}
tl_strings = {}
for entry in results:
if entry.state != "ok":
translation_errors += 1

if entry.value:
new_dialogue, new_strings = pickle_loads(entry.value)
tl_dialogue.update(new_dialogue)
tl_strings.update(new_strings)

translator = translate.Translator(None)
translator.dialogue = tl_dialogue
translator.strings = tl_strings
args.translator = pickle_safe_dumps(translator)

print("Step 2: decompiling.")

results = run_workers(worker_common, args, worklist, args.processes)

success = sum(result.state == "ok" for result in results)
skipped = sum(result.state == "skip" for result in results)
Expand All @@ -507,10 +533,7 @@ def traverse(inpath):
print(f"{55 * '-'}")
print(f"Processed {plural_s(len(results), 'file')}.")

if args.write_translation_file:
print(f"> {plural_s(success, 'file')} were successfully analyzed.")
else:
print(f"> {plural_s(success, 'file')} were successfully decompiled.")
print(f"> {plural_s(success, 'file')} were successfully decompiled.")

if broken:
print(f"> {plural_s(broken, 'file')} did not have the correct header, "
Expand All @@ -522,6 +545,9 @@ def traverse(inpath):
if skipped:
print(f"> {plural_s(skipped, 'file')} were skipped as the output file already existed.")

if translation_errors:
print(f"> {plural_s(translation_errors, 'file')} failed translation extraction.")


if skipped:
print("")
Expand Down