# An exploration to Python VM

Reference to https://leanpub.com/insidethepythonvirtualmachine/read#leanpub-auto-code-objects

## PyCodeObject

In [1]:
def foo(name, age):
    ''' Get the information of a person. '''
    born = 2023 - age
    return f"hello {name}, born at {born}!"

foo.__code__

<code object foo at 0x7f126c907b30, file "/tmp/ipykernel_3378401/73030821.py", line 1>

In [2]:
dir(foo.__code__)

['__class__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'co_argcount',
 'co_cellvars',
 'co_code',
 'co_consts',
 'co_filename',
 'co_firstlineno',
 'co_flags',
 'co_freevars',
 'co_kwonlyargcount',
 'co_lnotab',
 'co_name',
 'co_names',
 'co_nlocals',
 'co_posonlyargcount',
 'co_stacksize',
 'co_varnames',
 'replace']

In [3]:
print(foo.__code__.__doc__)

code(argcount, posonlyargcount, kwonlyargcount, nlocals, stacksize,
      flags, codestring, constants, names, varnames, filename, name,
      firstlineno, lnotab[, freevars[, cellvars]])

Create a code object.  Not for the faint of heart.


In [4]:
import dis

In [5]:
dis.dis(foo)

  3           0 LOAD_CONST               1 (2023)
              2 LOAD_FAST                1 (age)
              4 BINARY_SUBTRACT
              6 STORE_FAST               2 (born)

  4           8 LOAD_CONST               2 ('hello ')
             10 LOAD_FAST                0 (name)
             12 FORMAT_VALUE             0
             14 LOAD_CONST               3 (', born at ')
             16 LOAD_FAST                2 (born)
             18 FORMAT_VALUE             0
             20 LOAD_CONST               4 ('!')
             22 BUILD_STRING             5
             24 RETURN_VALUE


In [6]:
def call_foo():
    def foo(name):
        return f"hello {name}!"
    return foo("world")

dis.dis(call_foo)

  2           0 LOAD_CONST               1 (<code object foo at 0x7f126c9075b0, file "/tmp/ipykernel_3378401/2804291229.py", line 2>)
              2 LOAD_CONST               2 ('call_foo.<locals>.foo')
              4 MAKE_FUNCTION            0
              6 STORE_FAST               0 (foo)

  4           8 LOAD_FAST                0 (foo)
             10 LOAD_CONST               3 ('world')
             12 CALL_FUNCTION            1
             14 RETURN_VALUE

Disassembly of <code object foo at 0x7f126c9075b0, file "/tmp/ipykernel_3378401/2804291229.py", line 2>:
  3           0 LOAD_CONST               1 ('hello ')
              2 LOAD_FAST                0 (name)
              4 FORMAT_VALUE             0
              6 LOAD_CONST               2 ('!')
              8 BUILD_STRING             3
             10 RETURN_VALUE


Let's explore some essential attributes of the code object.

**co_names** is a tuple containing global attributes and methods used inside the scope.

In [7]:
call_foo.__code__.co_names

()

**co_varname** is the tuple containing local variable names used in function.

In [8]:
call_foo.__code__.co_varnames

('foo',)

**co_consts** returns the literals used by bytecode.

The global `foo` method should display richer details

In [9]:
call_foo.__code__.co_consts

(None,
 <code object foo at 0x7f126c9075b0, file "/tmp/ipykernel_3378401/2804291229.py", line 2>,
 'call_foo.<locals>.foo',
 'world')

In [10]:
foo.__code__.co_names

()

In [11]:
foo.__code__.co_varnames

('name', 'age', 'born')

In [12]:
foo.__code__.co_consts

(' Get the information of a person. ', 2023, 'hello ', ', born at ', '!')

## Alter the Code Object

Let's define a naive function and get bytecode first.

Reference https://stackoverflow.com/questions/33348067/modifying-python-bytecode

In [13]:
def fact():
    a = 0
    a = 10
    return a
dis.dis(fact)

  2           0 LOAD_CONST               1 (0)
              2 STORE_FAST               0 (a)

  3           4 LOAD_CONST               2 (10)
              6 STORE_FAST               0 (a)

  4           8 LOAD_FAST                0 (a)
             10 RETURN_VALUE


In [14]:
fact.__code__.co_code

b'd\x01}\x00d\x02}\x00|\x00S\x00'

By default, the `fact()` should get 10

In [15]:
fact()

10

Let's alter the code by removing the `a=1` line and make simply return 0

In [16]:
fact_co_code = fact.__code__.co_code
new_co_code = fact_co_code[:2] + fact_co_code[8:]
dis.dis(new_co_code)

          0 LOAD_CONST               1 (1)
          2 LOAD_FAST                0 (0)
          4 RETURN_VALUE


In [17]:
from typing import *
import types
def patch(frame_or_func: Union[types.FrameType, types.FunctionType]):
    fact_code = frame_or_func.f_code if isinstance(frame_or_func, types.FrameType) else frame_or_func.__code__
    new_co_code = fact_code.co_code[:2] + fact_code.co_code[8:]
    print('dis', dis.dis(new_co_code))
    new_code = types.CodeType( 
        fact_code.co_argcount,
        fact_code.co_kwonlyargcount, 
        fact_code.co_posonlyargcount, 
        fact_code.co_nlocals,
        fact_code.co_stacksize,
        fact_code.co_flags,
        new_co_code,  # fn_code.co_code: this you changed
        fact_code.co_consts,
        fact_code.co_names,
        fact_code.co_varnames,
        fact_code.co_filename,
        fact_code.co_name,
        fact_code.co_firstlineno,
        fact_code.co_lnotab,   # In general, You should adjust this
        fact_code.co_freevars,
        fact_code.co_cellvars
    )
    return new_code

Here we reuse PyTorch's `set_frame` directly to alter the frame behavior. 

In [18]:
from torch._dynamo.eval_frame import set_eval_frame

In [19]:
def fact_dummy():
    a = 1
    a = 10
    return a

In [32]:
from dataclasses import dataclass
from torch._dynamo.types import GuardedCode
from torch._dynamo.convert_frame import CheckFunctionManager
check_fn = CheckFunctionManager(None, locals(), globals(), None).check_fn
check_fn.args = []

def callback(frame:types.FrameType, cache:int):
    if frame.f_code.co_name == 'fact_dummy':
        patched_code = patch(frame)
        new_fn = types.FunctionType(patched_code, globals())
        print("fact_dummy res:", new_fn())
        return GuardedCode(patched_code, check_fn=check_fn)
    return
    if "/usr" in frame.f_code.co_filename:
        return GuardedCode(frame.f_code)
    print('name', frame.f_code.co_name)
    if True or frame.f_code.co_name == 'fact_dummy':
        #patched_code = patch(frame)
        #return GuardedCode(patched_code)
        return GuardedCode(frame.f_code)
    return

set_eval_frame(callback)

In [33]:
fact_dummy()

ERROR RUNNING GUARDS fact_dummy /tmp/ipykernel_3378401/2630368416.py:1


AttributeError: 'function' object has no attribute 'args'

In [22]:
set_eval_frame(None)

<function __main__.callback(frame: frame, cache: int)>