Skip to content

Commit

Permalink
Fix #24 in screen language 1 too by doing analysis on the decompiled …
Browse files Browse the repository at this point in the history
…python screen code keyword arguments to see if they need to be guarded by parenthesis to be parsed correctly
  • Loading branch information
CensoredUsername committed Jan 2, 2015
1 parent 665e4b0 commit f983608
Show file tree
Hide file tree
Showing 2 changed files with 167 additions and 8 deletions.
13 changes: 6 additions & 7 deletions decompiler/screendecompiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

import re

from util import DecompilerBase, reconstruct_paraminfo
from util import DecompilerBase, reconstruct_paraminfo, simple_expression_guard
import codegen

# Main API
Expand Down Expand Up @@ -78,13 +78,13 @@ def print_screen(self, ast):
self.write("tag %s" % ast.tag)
if ast.zorder and ast.zorder != '0':
self.indent()
self.write("zorder %s" % ast.zorder)
self.write("zorder %s" % simple_expression_guard(ast.zorder))
if ast.modal:
self.indent()
self.write("modal %s" % ast.modal)
self.write("modal %s" % simple_expression_guard(ast.modal))
if ast.variant and ast.variant != "None":
self.indent()
self.write("variant %s" % ast.variant)
self.write("variant %s" % simple_expression_guard(ast.variant))

if not self.decompile_python:
self.indent()
Expand Down Expand Up @@ -151,7 +151,7 @@ def print_node(self, header, code):

def print_arguments(self, args, kwargs, multiline=True):
if args:
self.write(" " + " ".join(['(%s)' % i if ' ' in i else i for i in args]))
self.write(" " + " ".join([simple_expression_guard(i) for i in args]))
kwargs = dict(kwargs)

# remove renpy-internal kwargs
Expand All @@ -161,8 +161,7 @@ def print_arguments(self, args, kwargs, multiline=True):
del kwargs['scope']

for key, value in kwargs.iteritems():
if ' ' in value:
kwargs[key] = '(%s)' % value
value = simple_expression_guard(value)
if multiline or (self.force_multiline_kwargs and kwargs):
self.write(":")
self.indent_level += 1
Expand Down
162 changes: 161 additions & 1 deletion decompiler/util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import unicode_literals
import sys
import re

class DecompilerBase(object):
def __init__(self, out_file=None, indentation = ' '):
Expand Down Expand Up @@ -122,4 +123,163 @@ def string_escape(s):
s = s.replace('"', '\\"')
s = s.replace('\n', '\\n')
s = s.replace('\t', '\\t')
return s
return s

# Note that the way or/and/not/in/is are parsed are completely broken
# due to re.escape not escaping \b to \\b, instead to \\\x08
# but since it's broken, we have to parse it broken too
OPERATORS = ['or\b', 'and\b', 'not\b', 'in\b', 'is\b',
'<', '<=', '>', '>=', '<>', '!=', '==',
'|', '^', '&', '<<', '>>', '+', '-',
'*', '/', '//', '%', '~', '**', ]
# keywords used by ren'py's parser
KEYWORDS = set(['$', 'as', 'at', 'behind', 'call', 'expression', 'hide',
'if', 'in', 'image', 'init', 'jump', 'menu', 'onlayer',
'python', 'return', 'scene', 'set', 'show', 'with',
'while', 'zorder', 'transform'])

operator_regexp = "|".join(re.escape(i) for i in OPERATORS)

word_regexp = ur'[a-zA-Z_\u00a0-\ufffd][0-9a-zA-Z_\u00a0-\ufffd]*'

def simple_expression_guard(s):
# Some things we deal with are supposed to be parsed by
# ren'py's Lexer.simple_expression but actually cannot
# be parsed by it. figure out if this is the case
# a slightly more naive approach woudl be to check
# for spaces in it and surround it with () if necessary
# but we're not naive
s = s.strip()

if Lexer(s).simple_expression():
return s
else:
return "(%s)" % s

class Lexer(object):
# special lexer for simple_expressions the ren'py way
# false negatives aren't dangerous. but false positives are
def __init__(self, string):
self.pos = 0
self.length = len(string)
self.string = string

def re(self, regexp):
# see if regexp matches at self.string[self.pos].
# if it does, increment self.pos
if self.length == self.pos:
return None

match = re.compile(regexp, re.DOTALL).match(self.string, self.pos)
if not match:
return None

self.pos = match.end()
return match.group(0)

def eol(self):
# eat the next whitespace and check for the end of this simple_expression
self.re(ur"(\s+|\\\n)+")
return self.pos >= self.length

def match(self, regexp):
# strip whitespace and match regexp
self.re(ur"(\s+|\\\n)+")
return self.re(regexp)

def python_string(self):
# parse strings the ren'py way (don't parse docstrings, no b/r in front allowed)
return self.match(ur"""(u?(?P<a>"|').*?(?<=[^\\])(?:\\\\)*(?P=a))""")

def container(self):
# parses something enclosed by [], () or {}'s. keyword something
containers = {"{": "}", "[": "]", "(": ")"}
if self.eol():
return None

c = self.string[self.pos]
if c not in containers:
return None
self.pos += 1

c = containers[c]

while not self.eol():
if c == self.string[self.pos]:
self.pos += 1
return True

if self.python_string() or self.container():
continue

self.pos += 1

return None

def number(self):
# parses a number, float or int (but not forced long)
return self.match(r'(\d+\.?\d*|\.\d+)(?:[eE][-+]?\d+)?')

def word(self):
# parses a word
return self.match(word_regexp)

def name(self):
# parses a word unless it's in KEYWORDS.
pos = self.pos
word = self.word()

if word in KEYWORDS:
self.pos = pos
return None

return word

def simple_expression(self):
# test if the start string was a simple expression
start = self.pos

while True:

# consume any amount of operators
while self.match(operator_regexp):
pass

# no end of line allowed after operators
if self.eol():
break

# parse anything which can be called or have attributes requested
if not(self.python_string() or
self.number() or
self.container() or
self.name()):
break

while True:
# skip whitespace
if self.eol():
break

# if the previous was followed by a dot, there should be a word after it
if self.match(r'\.'):
if not self.word():
# ren'py errors here. I just stop caring
return False

continue

# parses slices, function calls, and postfix {}
if self.container():
continue

break

# if we're not followed by an operator, this is the end
if self.match(operator_regexp):
continue

break

# are we at the end of the simple expression?
return self.eol()

0 comments on commit f983608

Please sign in to comment.