Skip to content

Commit

Permalink
Fix and improve expression parsing in jsinterp, thx dirkf
Browse files Browse the repository at this point in the history
Improve BODMAS (fix player 590f65a6)
Support more weird expressions with multiple unary ops
Treat Infinity like NaN
Cache operator list

From: ytdl-org/youtube-dl@fd8242e
      ytdl-org/youtube-dl@e0094e6
  • Loading branch information
Taapat committed Jun 27, 2024
1 parent f0c008e commit db3434f
Showing 1 changed file with 37 additions and 15 deletions.
52 changes: 37 additions & 15 deletions src/jsinterp.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,15 +154,16 @@ def remove_quotes(s):

# NB In principle NaN cannot be checked by membership.
# Here all NaN values are actually this one, so _NaN is _NaN,
# although _NaN != _NaN.
# although _NaN != _NaN. Ditto Infinity.

_NaN = float('nan')
_Infinity = float('inf')


def _js_bit_op(op):

def zeroise(x):
return 0 if x in (None, JSUndefined, _NaN) else x
return 0 if x in (None, JSUndefined, _NaN, _Infinity) else x

def wrapped(a, b):
return op(zeroise(a), zeroise(b)) & 0xffffffff
Expand All @@ -183,7 +184,7 @@ def wrapped(a, b):
def _js_div(a, b):
if JSUndefined in (a, b) or not (a or b):
return _NaN
return operator.truediv(a or 0, b) if b else float('inf')
return operator.truediv(a or 0, b) if b else _Infinity


def _js_mod(a, b):
Expand Down Expand Up @@ -310,9 +311,20 @@ def __setitem__(self, key, value):
class JSInterpreter(object):
__named_object_counter = 0

OP_CHARS = None

def __init__(self, code, objects=None):
self.code, self._functions = code, {}
self._objects = {} if objects is None else objects
if type(self).OP_CHARS is None:
type(self).OP_CHARS = self.OP_CHARS = self.__op_chars()

@classmethod
def __op_chars(cls):
op_chars = set(';,[')
for op in cls._all_operators():
op_chars.update(op[0])
return op_chars

def _named_object(self, namespace, obj):
self.__named_object_counter += 1
Expand All @@ -322,15 +334,12 @@ def _named_object(self, namespace, obj):

@classmethod
def _separate(cls, expr, delim=',', max_split=None, skip_delims=None):
OP_CHARS = '+-*/%&|^=<>!,;{}:['
if not expr:
return
# collections.Counter() is ~10% slower in both 2.7 and 3.9
counters = {k: 0 for k in _MATCHING_PARENS.values()}
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
in_quote, escaping = None, False
after_op, in_regex_char_group = True, False

in_quote, escaping, after_op, in_regex_char_group = None, False, True, False
for idx, char in enumerate(expr):
paren_delta = 0
if not in_quote:
Expand All @@ -348,7 +357,7 @@ def _separate(cls, expr, delim=',', max_split=None, skip_delims=None):
in_regex_char_group = char == '['
escaping = not escaping and in_quote and char == '\\'
in_unary_op = (not in_quote and not in_regex_char_group and after_op not in (True, False) and char in '-+')
after_op = not in_quote and (char in OP_CHARS or paren_delta > 0 or (after_op and char.isspace()))
after_op = not in_quote and (char in cls.OP_CHARS or paren_delta > 0 or (after_op and char.isspace()))

if char != delim[pos] or any(counters.values()) or in_quote or in_unary_op:
pos = 0
Expand All @@ -374,10 +383,12 @@ def _separate_at_paren(cls, expr, delim=None):
return separated[0][1:].strip(), separated[1].strip()

@staticmethod
def _all_operators():
return chain(
# Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
_SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS)
def _all_operators(_cached=[]):
if not _cached:
_cached.extend(chain(
# Ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Operator_Precedence
_SC_OPERATORS, _LOG_OPERATORS, _COMP_OPERATORS, _OPERATORS))
return _cached

def _operator(self, op, left_val, right_expr, local_vars, allow_recursion):
if op in ('||', '&&'):
Expand Down Expand Up @@ -496,7 +507,6 @@ def dict_item(key, val):
expr = self._dump(inner, local_vars) + outer

if expr.startswith('('):

m = re.match(r'\((?P<d>[a-z])%(?P<e>[a-z])\.length\+(?P=e)\.length\)%(?P=e)\.length', expr)
if m:
# short-cut eval of frequently used `(d%e.length+e.length)%e.length`, worth ~6% on `pytest -k test_nsig`
Expand Down Expand Up @@ -656,7 +666,7 @@ def dict_item(key, val):
(?P<op>{_OPERATOR_RE})?
=(?!=)(?P<expr>.*)$
)|(?P<return>
(?!if|return|true|false|null|undefined)(?P<name>{_NAME_RE})$
(?!if|return|true|false|null|undefined|NaN|Infinity)(?P<name>{_NAME_RE})$
)|(?P<indexing>
(?P<in>{_NAME_RE})\[(?P<idx>.+)\]$
)|(?P<attribute>
Expand Down Expand Up @@ -695,6 +705,8 @@ def dict_item(key, val):
return JSUndefined, should_return
elif expr == 'NaN':
return _NaN, should_return
elif expr == 'Infinity':
return _Infinity, should_return

elif md.get('return'):
return local_vars[m.group('name')], should_return
Expand Down Expand Up @@ -723,18 +735,28 @@ def dict_item(key, val):
right_expr = separated.pop()
# handle operators that are both unary and binary, minimal BODMAS
if op in ('+', '-'):
# simplify/adjust consecutive instances of these operators
undone = 0
while len(separated) > 1 and not separated[-1].strip():
undone += 1
separated.pop()
if op == '-' and undone % 2 != 0:
right_expr = op + right_expr
elif op == '+':
while len(separated) > 1 and separated[-1].strip() in self.OP_CHARS:
right_expr = separated.pop() + right_expr
# hanging op at end of left => unary + (strip) or - (push right)
left_val = separated[-1]
for dm_op in ('*', '%', '/', '**'):
bodmas = tuple(self._separate(left_val, dm_op, skip_delims=skip_delim))
if len(bodmas) > 1 and not bodmas[-1].strip():
expr = op.join(separated) + op + right_expr
right_expr = None
if len(separated) > 1:
separated.pop()
right_expr = op.join((left_val, right_expr))
else:
separated = [op.join((left_val, right_expr))]
right_expr = None
break
if right_expr is None:
continue
Expand Down

0 comments on commit db3434f

Please sign in to comment.