In [260]:
# Это пример кода, который может быть переведён в байт код
# Ссылка на документацию - https://docs.python.org/3/library/dis.html
# Поддерживается: 
#   присваивание переменных: числа,строки и другие переменные
#   арифметические и логические операции
#   if elif else
#   пустые инструкции (обычные числа/строки посреди кода)
#   функция print()
#   однострочные комментарии - они будут просто проигнорированы

In [261]:
import dis
import re

### Infix Notation Code Example

In [262]:
from sympy import parse_expr, srepr
postfix = srepr(parse_expr("1+2*a == 5", evaluate=False))
print(postfix)

Equality(Add(Mul(Integer(2), Symbol('a')), Integer(1)), Integer(5))


In [263]:
from pyparsing import infixNotation, opAssoc, Word, nums, alphas
variable = Word(alphas)
integer = Word(nums)
expr = infixNotation(integer | variable, [("*", 2, opAssoc.LEFT), ("+", 2, opAssoc.LEFT)])
result = expr.parseString("1+2*a")  # Gives parse tree you can convert to postfix
print(result)

[['1', '+', ['2', '*', 'a']]]


### Byte Code Conversion

In [264]:
program = """
a = 1
if a == 120:
    a = 200
elif a > 122:
    if a < 132:
        if a == 123:
            a = 300
        else:
            a = 400
else:
    a = 500 + a + 10

print(a)
"""

In [265]:
print('RESUME           0') # main

RESUME           0


In [266]:
# print()
# TODO POP_TOP -> bonded with RETURN_VALUE
    # если программа - однострочный print, то POP_TOP не будет
pattern = r'\bprint\s*\(\s*([^\)]*)\s*\)'
# pattern = r'\bprint\b'
byte_code = '''LOAD_NAME         ? (print)
PUSH_NULL
__X__'''

def replace_print(match):
    args = match.group(1).strip()
    return byte_code.replace("__X__", f"__({args})__\
\nCALL                {args.count(',')+1}\n\
POP_TOP")

program = re.sub(pattern, replace_print, program)
print(program)


a = 1
if a == 120:
    a = 200
elif a > 122:
    if a < 132:
        if a == 123:
            a = 300
        else:
            a = 400
else:
    a = 500 + a + 10

LOAD_NAME         ? (print)
PUSH_NULL
__(a)__
CALL                1
POP_TOP



In [267]:
# else
pattern = r'^\s*else\s*:'

def replace_print(match):
    return "JUMP_FORWARD      ? (to ?)\n"

program = re.sub(pattern, replace_print, program, flags=re.MULTILINE)
print(program)


a = 1
if a == 120:
    a = 200
elif a > 122:
    if a < 132:
        if a == 123:
            a = 300
JUMP_FORWARD      ? (to ?)

            a = 400
JUMP_FORWARD      ? (to ?)

    a = 500 + a + 10

LOAD_NAME         ? (print)
PUSH_NULL
__(a)__
CALL                1
POP_TOP



In [268]:
# if elif
# TODO TO_BOOL
pattern = r'\n\s*(if|elif)\s*([^\:]*)\s*\:'
# pattern = r'\bprint\b'
byte_code = '''__X__
POP_JUMP_IF_FALSE ? (to ?)
NOT_TAKEN\n'''

def replace_print(match):
    args = match.group(2).strip()
    new_byte_code = byte_code

    if match.group(1).strip() == "elif":
        new_byte_code = "JUMP_FORWARD      ? (to ?)\n\n" + byte_code
    new_byte_code = '\n' + new_byte_code
    
    return new_byte_code.replace("__X__", f"__({args})__")

program = re.sub(pattern, replace_print, program)
print(program)


a = 1
__(a == 120)__
POP_JUMP_IF_FALSE ? (to ?)
NOT_TAKEN

    a = 200
JUMP_FORWARD      ? (to ?)

__(a > 122)__
POP_JUMP_IF_FALSE ? (to ?)
NOT_TAKEN

__(a < 132)__
POP_JUMP_IF_FALSE ? (to ?)
NOT_TAKEN

__(a == 123)__
POP_JUMP_IF_FALSE ? (to ?)
NOT_TAKEN

            a = 300
JUMP_FORWARD      ? (to ?)

            a = 400
JUMP_FORWARD      ? (to ?)

    a = 500 + a + 10

LOAD_NAME         ? (print)
PUSH_NULL
__(a)__
CALL                1
POP_TOP



In [None]:
srepr(parse_expr('500 + a + 10', evaluate=False)) # TODO binary +

"Add(Symbol('a'), Integer(10), Integer(500))"

In [275]:
from sympy import Add, Mul, Pow, Eq, StrictGreaterThan, StrictLessThan, \
    Rational, Integer, Float, Symbol

def infix_parser(expr: str) -> str:
    if any(c in expr for c in '\'\"'):
        return expr # не работаем со строками

    try:
        num = parse_expr(expr, evaluate=True)
        if num.is_number: 
            return f"LOAD_?            ? {num}\n"
    except:
        pass

    def analyze_expression(expr):
        expr = parse_expr(expr, evaluate=False)
        # print(srepr(expr))
        elements = []
        def walk(node):
            # Операторы (например, Add, Mul, Pow)
            if isinstance(node, Add):
                elements.append(('Operator', 'Add', '+'))
            elif isinstance(node, Mul):
                elements.append(('Operator', 'Mul', '*'))
            elif isinstance(node, Pow):
                elements.append(('Operator', 'Pow', '**'))
            # elif isinstance(node, Subtract): # there is some issues
                # elements.append(('Operator', 'Subtract', '-'))
            elif isinstance(node, Eq):
                elements.append(('Operator', 'Eq', '=='))
            elif isinstance(node, StrictGreaterThan):
                elements.append(('Operator', 'StrictGreaterThan', '>'))
            elif isinstance(node, StrictLessThan):
                elements.append(('Operator', 'StrictLessThan', '<'))
            elif isinstance(node, Rational):
                elements.append(('Number', 'Rational', str(node)))
            elif isinstance(node, Integer):
                elements.append(('Number', 'Integer', str(node)))
            elif isinstance(node, Float):
                elements.append(('Number', 'Float', str(node)))
            elif isinstance(node, Symbol):
                elements.append(('Variable', 'Symbol', str(node)))

            # for arg in node.args:
            for arg in reversed(node.args):
                walk(arg)
        walk(expr)
        return elements

    # Add(Mul(Float('3.0', precision=53), Symbol('a')), Integer(1))
    result_string = ""
    for type, _, object in reversed(analyze_expression(expr)): # немного не тот порядок
        if type == 'Operator':
            result_string += f'?_OP              ? ({object})\n'
        elif type == 'Variable':
            result_string += f'LOAD_NAME         ? ({object})\n'
        elif type == 'Number':
            result_string += f'LOAD_?            ? ({object})\n'

    return result_string

print(infix_parser('500 + a + 10'))

LOAD_?            ? (500)
LOAD_NAME         ? (a)
LOAD_?            ? (10)
?_OP              ? (+)



In [271]:
# if conditions __(a == 10)__ # TODO
pattern = r'__\(\s*([^\)]*)\s*\)__\n'

byte_code = '''__X__'''

def replace_print(match):
    args = match.group(1).strip()
    return byte_code.replace("__X__", infix_parser(args))

program = re.sub(pattern, replace_print, program)
print(program)


a = 1
LOAD_NAME         ? (a)
LOAD_?            ? (120)
?_OP              ? (==)
POP_JUMP_IF_FALSE ? (to ?)
NOT_TAKEN

    a = 200
JUMP_FORWARD      ? (to ?)

LOAD_NAME         ? (a)
LOAD_?            ? (122)
?_OP              ? (>)
POP_JUMP_IF_FALSE ? (to ?)
NOT_TAKEN

LOAD_NAME         ? (a)
LOAD_?            ? (132)
?_OP              ? (<)
POP_JUMP_IF_FALSE ? (to ?)
NOT_TAKEN

LOAD_NAME         ? (a)
LOAD_?            ? (123)
?_OP              ? (==)
POP_JUMP_IF_FALSE ? (to ?)
NOT_TAKEN

            a = 300
JUMP_FORWARD      ? (to ?)

            a = 400
JUMP_FORWARD      ? (to ?)

    a = 500 + a + 10

LOAD_NAME         ? (print)
PUSH_NULL
LOAD_NAME         ? (a)
CALL                1
POP_TOP



In [272]:
# variables a = 10
pattern = r'\n\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*(=[^==])\s*(.+)'
# pattern = r'\bprint\b'
byte_code = '''__X__STORE_NAME        __Y__\n'''

def replace_print(match):
    var = match.group(1).strip()
    r_expr = infix_parser(match.group(3).strip())
    # print(str(match))
    # print(str(match).count('\\n'))
    
    return (str(match).count('\\n')*'\n')+byte_code \
        .replace("__X__", f"{r_expr}") \
        .replace("__Y__", f"? ({var})")

program = re.sub(pattern, replace_print, program)
print(program)


LOAD_?            ? 1
STORE_NAME        ? (a)

LOAD_NAME         ? (a)
LOAD_?            ? (120)
?_OP              ? (==)
POP_JUMP_IF_FALSE ? (to ?)
NOT_TAKEN

LOAD_?            ? 200
STORE_NAME        ? (a)

JUMP_FORWARD      ? (to ?)

LOAD_NAME         ? (a)
LOAD_?            ? (122)
?_OP              ? (>)
POP_JUMP_IF_FALSE ? (to ?)
NOT_TAKEN

LOAD_NAME         ? (a)
LOAD_?            ? (132)
?_OP              ? (<)
POP_JUMP_IF_FALSE ? (to ?)
NOT_TAKEN

LOAD_NAME         ? (a)
LOAD_?            ? (123)
?_OP              ? (==)
POP_JUMP_IF_FALSE ? (to ?)
NOT_TAKEN

LOAD_?            ? 300
STORE_NAME        ? (a)

JUMP_FORWARD      ? (to ?)

LOAD_?            ? 400
STORE_NAME        ? (a)

JUMP_FORWARD      ? (to ?)

LOAD_?            ? (500)
LOAD_NAME         ? (a)
LOAD_?            ? (10)
?_OP              ? (+)
STORE_NAME        ? (a)


LOAD_NAME         ? (print)
PUSH_NULL
LOAD_NAME         ? (a)
CALL                1
POP_TOP



In [273]:
print('LOAD_CONST           ? (None)') # TODO main return value
print('RETURN_VALUE') # TODO apply to every if branch instead of JUMP_FORWARD

LOAD_CONST           ? (None)
RETURN_VALUE
