Skip to content

Commit

Permalink
Merge pull request #65 from vstinner/ext_arg
Browse files Browse the repository at this point in the history
Prevent EXTENDED_ARG to propagate past ConcreteBytecode
  • Loading branch information
MatthieuDartiailh committed Aug 7, 2020
2 parents f9bc295 + 056ec38 commit ccb7ef9
Show file tree
Hide file tree
Showing 8 changed files with 465 additions and 67 deletions.
4 changes: 2 additions & 2 deletions bytecode/bytecode.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,10 +178,10 @@ def __iter__(self):
yield instr

def _check_instr(self, instr):
if not isinstance(instr, (Label, SetLineno, Instr, _bytecode.ConcreteInstr)):
if not isinstance(instr, (Label, SetLineno, Instr)):
raise ValueError(
"Bytecode must only contain Label, "
"SetLineno, Instr and ConcreteInstr objects, "
"SetLineno, and Instr objects, "
"but %s was found" % type(instr).__name__
)

Expand Down
9 changes: 4 additions & 5 deletions bytecode/cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,9 @@ def __iter__(self):
instr = self[index]
index += 1

if not isinstance(instr, (SetLineno, Instr, ConcreteInstr)):
if not isinstance(instr, (SetLineno, Instr)):
raise ValueError(
"BasicBlock must only contain SetLineno, "
"Instr and ConcreteInstr objects, "
"BasicBlock must only contain SetLineno and Instr objects, "
"but %s was found" % instr.__class__.__name__
)

Expand Down Expand Up @@ -386,7 +385,7 @@ def from_bytecode(bytecode):
continue

# don't copy SetLineno objects
if isinstance(instr, (Instr, ConcreteInstr)):
if isinstance(instr, Instr):
instr = instr.copy()
if isinstance(instr.arg, Label):
jumps.append(instr)
Expand Down Expand Up @@ -419,7 +418,7 @@ def to_bytecode(self):

for instr in block:
# don't copy SetLineno objects
if isinstance(instr, (Instr, ConcreteInstr)):
if isinstance(instr, Instr):
instr = instr.copy()
if isinstance(instr.arg, BasicBlock):
jumps.append(instr)
Expand Down
113 changes: 68 additions & 45 deletions bytecode/concrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,53 +210,18 @@ def from_code(code, *, extended_arg=False):
instructions.append(instr)
offset += instr.size

bytecode = ConcreteBytecode()

# replace jump targets with blocks
# HINT : in some cases Python generate useless EXTENDED_ARG opcode
# with a value of zero. Such opcodes do not increases the size of the
# following opcode the way a normal EXTENDED_ARG does. As a
# consequence, they need to be tracked manually as otherwise the
# offsets in jump targets can end up being wrong.
if not extended_arg:
nb_extended_args = 0
extended_arg = None
index = 0
while index < len(instructions):
instr = instructions[index]

if instr.name == "EXTENDED_ARG":
nb_extended_args += 1
if extended_arg is not None:
if not _WORDCODE:
raise ValueError("EXTENDED_ARG followed " "by EXTENDED_ARG")
extended_arg = (extended_arg << 8) + instr.arg
else:
extended_arg = instr.arg

del instructions[index]
continue

if extended_arg is not None:
if _WORDCODE:
arg = (extended_arg << 8) + instr.arg
else:
arg = (extended_arg << 16) + instr.arg
extended_arg = None

instr = ConcreteInstr(
instr.name,
arg,
lineno=instr.lineno,
extended_args=nb_extended_args,
)
instructions[index] = instr
nb_extended_args = 0

index += 1

if extended_arg is not None:
raise ValueError("EXTENDED_ARG at the end of the code")
# The list is modified in place
bytecode._remove_extended_args(instructions)

bytecode = ConcreteBytecode()
bytecode.name = code.co_name
bytecode.filename = code.co_filename
bytecode.flags = code.co_flags
Expand All @@ -275,9 +240,10 @@ def from_code(code, *, extended_arg=False):
bytecode[:] = instructions
return bytecode

def _normalize_lineno(self):
lineno = self.first_lineno
for instr in self:
@staticmethod
def _normalize_lineno(instructions, first_lineno):
lineno = first_lineno
for instr in instructions:
# if instr.lineno is not set, it's inherited from the previous
# instruction, or from self.first_lineno
if instr.lineno is not None:
Expand All @@ -290,7 +256,7 @@ def _assemble_code(self):
offset = 0
code_str = []
linenos = []
for lineno, instr in self._normalize_lineno():
for lineno, instr in self._normalize_lineno(self, self.first_lineno):
code_str.append(instr.assemble())
linenos.append((offset, lineno))
offset += instr.size
Expand Down Expand Up @@ -335,6 +301,58 @@ def _assemble_lnotab(first_lineno, linenos):

return b"".join(lnotab)

@staticmethod
def _remove_extended_args(instructions):
# replace jump targets with blocks
# HINT : in some cases Python generate useless EXTENDED_ARG opcode
# with a value of zero. Such opcodes do not increases the size of the
# following opcode the way a normal EXTENDED_ARG does. As a
# consequence, they need to be tracked manually as otherwise the
# offsets in jump targets can end up being wrong.
nb_extended_args = 0
extended_arg = None
index = 0
while index < len(instructions):
instr = instructions[index]

# Skip SetLineno meta instruction
if isinstance(instr, SetLineno):
index += 1
continue

if instr.name == "EXTENDED_ARG":
nb_extended_args += 1
if extended_arg is not None:
if not _WORDCODE:
raise ValueError("EXTENDED_ARG followed " "by EXTENDED_ARG")
extended_arg = (extended_arg << 8) + instr.arg
else:
extended_arg = instr.arg

del instructions[index]
continue

if extended_arg is not None:
if _WORDCODE:
arg = (extended_arg << 8) + instr.arg
else:
arg = (extended_arg << 16) + instr.arg
extended_arg = None

instr = ConcreteInstr(
instr.name,
arg,
lineno=instr.lineno,
extended_args=nb_extended_args,
)
instructions[index] = instr
nb_extended_args = 0

index += 1

if extended_arg is not None:
raise ValueError("EXTENDED_ARG at the end of the code")

def compute_stacksize(self):
bytecode = self.to_bytecode()
cfg = _bytecode.ControlFlowGraph.from_bytecode(bytecode)
Expand Down Expand Up @@ -386,10 +404,15 @@ def to_code(self, stacksize=None):
)

def to_bytecode(self):

# Copy instruction and remove extended args if any (in-place)
c_instructions = self[:]
self._remove_extended_args(c_instructions)

# find jump targets
jump_targets = set()
offset = 0
for instr in self:
for instr in c_instructions:
if isinstance(instr, SetLineno):
continue
target = instr.get_jump_target(offset)
Expand All @@ -404,7 +427,7 @@ def to_bytecode(self):
offset = 0
ncells = len(self.cellvars)

for lineno, instr in self._normalize_lineno():
for lineno, instr in self._normalize_lineno(c_instructions, self.first_lineno):
if offset in jump_targets:
label = Label()
labels[offset] = label
Expand Down
7 changes: 6 additions & 1 deletion bytecode/instr.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,12 @@ def __init__(self, name, arg=UNSET, *, lineno=None):
self._set(name, arg, lineno)

def _check_arg(self, name, opcode, arg):
if name == "EXTENDED_ARG":
raise ValueError(
"only concrete instruction can contain EXTENDED_ARG, "
"highlevel instruction can represent arbitrary argument without it"
)

if opcode >= _opcode.HAVE_ARGUMENT:
if arg is UNSET:
raise ValueError("operation %s requires an argument" % name)
Expand Down Expand Up @@ -200,7 +206,6 @@ def _set(self, name, arg, lineno):

self._check_arg(name, opcode, arg)

opcode = _opcode.opmap[name]
self._name = name
self._opcode = opcode
self._arg = arg
Expand Down
14 changes: 0 additions & 14 deletions bytecode/tests/test_cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,20 +643,6 @@ def test_handling_of_set_lineno(self):
)
self.assertEqual(code.compute_stacksize(), 1)

def test_handling_of_extended_arg(self):
code = Bytecode()
code.first_lineno = 3
code.extend(
[
Instr("LOAD_CONST", 7),
Instr("STORE_NAME", "x"),
Instr("EXTENDED_ARG", 1),
Instr("LOAD_CONST", 8),
Instr("STORE_NAME", "y"),
]
)
self.assertEqual(code.compute_stacksize(), 1)

def test_invalid_stacksize(self):
code = Bytecode()
code.extend([Instr("STORE_NAME", "x")])
Expand Down

0 comments on commit ccb7ef9

Please sign in to comment.