In [156]:
from lark import Lark, Tree
from lexer import Lexer as Lexer_

In [157]:
grammar = """
start: statements

statements: statement+
statement: print | declare | exception_handling | return | control | expression_statement

print: PRINT_KEYWORD ROUND_OPEN print_args ROUND_CLOSE END_OF_STATEMENT
print_args: expression print_args_temp | 
print_args_temp: COMMA expression print_args_temp |

expression_statement: expression END_OF_STATEMENT | assignment END_OF_STATEMENT

expression: expression binary_op expression | unary_expression | function_call | IDENTIFIER index | ROUND_OPEN expression ROUND_CLOSE | literal | IDENTIFIER COMPOUND_OPERATOR expression | IDENTIFIER DOT_OPERATOR IDENTIFIER expression
binary_op: OPERATOR | COMPARATOR
unary_expression: UNARY_OPERATOR IDENTIFIER | IDENTIFIER UNARY_OPERATOR | NOT_OPERATOR IDENTIFIER | NOT_OPERATOR ROUND_OPEN expression ROUND_CLOSE

assignment: IDENTIFIER ASSIGNMENT_OPERATOR expression 

index: index SQUARE_OPEN expression SQUARE_CLOSE | 

control: function | if_else | while | do_while | for_loop | break_continue
function: FUNCTION_DECLARATION IDENTIFIER ROUND_OPEN parameters ROUND_CLOSE block
if_else: IF_ELIF ROUND_OPEN expression ROUND_CLOSE block else_temp 
else_temp: ELSE_KEYWORD block | 
while: WHILE_KEYWORD ROUND_OPEN expression ROUND_CLOSE block
do_while: DO_KEYWORD block WHILE_KEYWORD ROUND_OPEN expression ROUND_CLOSE
for_loop: FOR_KEYWORD ROUND_OPEN dec_control_flow END_OF_STATEMENT expression END_OF_STATEMENT for_update ROUND_CLOSE block
for_update: expression | assignment
break_continue: BREAK_CONTINUE END_OF_STATEMENT

dec_control_flow: VARIABLE_DECLARATION IDENTIFIER ASSIGNMENT_OPERATOR expression

declare: tuple_declaration | list_declaration | arr_declaration | exception_declaration | variable_declaration 
tuple_declaration: TUPLE_DECLARATION IDENTIFIER ASSIGNMENT_OPERATOR matrix END_OF_STATEMENT
list_declaration: LIST_DECLARATION IDENTIFIER ASSIGNMENT_OPERATOR matrix END_OF_STATEMENT
arr_declaration: ARR_DECLARATION IDENTIFIER ASSIGNMENT_OPERATOR matrix END_OF_STATEMENT
exception_declaration: EXCEPTION_TYPE IDENTIFIER ASSIGNMENT_OPERATOR IDENTIFIER END_OF_STATEMENT
variable_declaration: VARIABLE_DECLARATION IDENTIFIER variable_declaration_temp ASSIGNMENT_OPERATOR expression variable_declaration_expression_temp END_OF_STATEMENT
variable_declaration_temp: COMMA IDENTIFIER variable_declaration_temp | 
variable_declaration_expression_temp: COMMA expression variable_declaration_expression_temp | 

matrix: matrix_temp | list_content
matrix_temp: SQUARE_OPEN matrix matrix_temp_comma SQUARE_CLOSE | 
matrix_temp_comma: COMMA matrix matrix_temp_comma |
list_content: SQUARE_OPEN expression list_content_temp SQUARE_CLOSE | SQUARE_OPEN SQUARE_CLOSE
list_content_temp: COMMA expression list_content_temp |

exception_handling: try_catch_finally | throw
try_catch_finally: TRY_KEYWORD block CATCH_KEYWORD ROUND_OPEN EXCEPTION_TYPE IDENTIFIER ROUND_CLOSE block FINALLY_KEYWORD block
throw: THROW_KEYWORD EXCEPTION_TYPE ROUND_OPEN print_args ROUND_CLOSE END_OF_STATEMENT

block: CURLY_OPEN statements CURLY_CLOSE | CURLY_OPEN CURLY_CLOSE

function_call: IDENTIFIER ROUND_OPEN argument_temp ROUND_CLOSE | IDENTIFIER DOT_OPERATOR IDENTIFIER ROUND_OPEN argument_temp ROUND_CLOSE

return: RETURN_KEYWORD expression? END_OF_STATEMENT

literal: INTEGER_CONSTANT | DECIMAL_CONSTANT | STRING_LITERAL | BOOLEAN_VALUE | NULL_KEYWORD

argument_temp: COMMA argument_temp | expression argument_temp |

parameters: parameter parameters_temp |
parameter: VARIABLE_DECLARATION IDENTIFIER | LIST_DECLARATION IDENTIFIER | ARR_DECLARATION IDENTIFIER | TUPLE_DECLARATION IDENTIFIER
parameters_temp: COMMA parameter parameters_temp | 

%declare STRING_LITERAL BOOLEAN_VALUE COMMA FUNCTION_DECLARATION BREAK_CONTINUE IF_ELIF ELSE_KEYWORD WHILE_KEYWORD DO_KEYWORD FOR_KEYWORD PRINT_KEYWORD RETURN_KEYWORD VARIABLE_DECLARATION LIST_DECLARATION ARR_DECLARATION TUPLE_DECLARATION EXCEPTION_TYPE NULL_KEYWORD TRY_KEYWORD CATCH_KEYWORD FINALLY_KEYWORD THROW_KEYWORD KEYWORD NOT_OPERATOR ASSIGNMENT_OPERATOR OPERATOR COMPOUND_OPERATOR UNARY_OPERATOR COMPARATOR DOT_OPERATOR PUNCTUATION END_OF_STATEMENT ROUND_OPEN ROUND_CLOSE CURLY_OPEN CURLY_CLOSE SQUARE_OPEN SQUARE_CLOSE DECIMAL_CONSTANT INTEGER_CONSTANT IDENTIFIER QUOTATION ERROR
%import common.WS
%ignore WS
"""

In [158]:
from lark.lexer import Lexer, Token

class MyLexer(Lexer):
    def __init__(self, lexer_conf):
        pass

    def lex(self, data):
        lexer = Lexer_(source_code=data)
        lexer.tokenize()
        tokens = lexer.get_tokens()
        for type, value in tokens:
            yield Token(type, value)

In [159]:
parser = Lark(grammar, start='start', lexer=MyLexer, parser='lalr')

input_string = """
a = c;
"""

def visualize_tree(tree, depth=0):
    if isinstance(tree, Tree):
        print("  " * depth + "+-" + str(tree.data))
        for child in tree.children[:-1]:
            print("  " * (depth + 1) + "|")
            visualize_tree(child, depth + 1)
        if tree.children:
            print("  " * (depth + 1) + "|")
            visualize_tree(tree.children[-1], depth + 1)
    else:
        print("  " * depth + "+-" + str(tree))

try:
    tree = parser.parse(input_string)
    visualize_tree(tree)
    print("Parsing successful.")
except Exception as e:
    print("Parsing failed:", e)

['a', '=', 'c', ';']
+-start
  |
  +-statements
    |
    +-statement
      |
      +-expression_statement
        |
        +-assignment
          |
          +-a
          |
          +-=
          |
          +-expression
            |
            +-c
            |
            +-index
        |
        +-;
Parsing successful.


In [168]:
import logging
from typing import List
logging.basicConfig()
log = logging.getLogger(__name__)
log.setLevel(logging.DEBUG)

class ASTNode:
    """Abstract base class for abstract sequence of sequence of sums"""
    def __init__(self):
        """This is an abstract class and should not be instantiated"""
        this_class = self.__class__.__name__
        if this_class == "ASTNode":
            raise NotImplementedError("ASTNode is an abstract class and should not be instantiated")
        else:
            raise NotImplementedError(f"{this_class} is missing a constructor method")

In [169]:
class Statement(ASTNode):
    pass

class Statements(ASTNode):
    def __init__(self):
        self.statements: List[Statement] = []
    
    def append(self, statement: Statement):
        self.statements.append(statement)

    def __str__(self) -> str:
        el_strs = ", ".join(str(e) for e in self.statements)
        return f"[{el_strs}]"

    def __repr__(self):
        return f"statements({repr(self.statements)})"

In [175]:
from dataclasses import dataclass

In [396]:
@dataclass
class Declare(Statement):
    pass

@dataclass
class TupleDeclaration(Declare):
    def __init__(self):
        self.name: str
        self.value: List[Expression]

@dataclass
class ListDeclaration(Declare):
    pass

@dataclass
class ArrDeclaration(Declare):
    pass

@dataclass
class ExceptionDeclaration(Declare):
    pass

@dataclass
class VariableDeclaration(Declare):
    pass

@dataclass
class ExceptionHandling(Statement):
    pass

@dataclass
class TryCatchFinally(ExceptionHandling):
    pass

@dataclass
class Throw(ExceptionHandling):
    pass

@dataclass
class Return(Statement):
    pass    

@dataclass
class Control(Statement):
    pass

@dataclass
class Function(Control):
    pass

@dataclass
class IfElse(Control):
    pass

@dataclass
class While(Control):
    pass

@dataclass
class DoWhile(Control):
    pass

@dataclass
class ForLoop(Control):
    pass

@dataclass
class BreakContinue(Control):
    pass

@dataclass
class ExpressionStatement(Statement):
    pass

class Expression(ASTNode):
    def __init__(self, value):
        self.value = value

    def __str__(self):
        return str(f'expression({self.value})')

    def __repr__(self):
        return repr(f'expression({self.value})')

@dataclass
class Print(Statement):
    def __init__(self, *args):
        self.args = List[Expression]

    def __str__(self) -> str:
        el_strs = ", ".join(str(e) for e in self.args)
        return f"print({el_strs})"
    
    def __repr__(self):
        return f"print({repr(self.args)})"

@dataclass
class BinaryOp(Expression):
    pass

@dataclass
class UnaryExpression(Expression):
    pass

@dataclass
class FunctionCall(Expression):
    pass

class Literal(Expression):
    def __init__(self, value):
        self.value = value

    def __str__(self) -> str:
        return str(self.value)
    
    def __repr__(self):
        return f"literal({repr(self.value)})"

In [397]:
import lark

import logging
logging.basicConfig()
log = logging.getLogger(__name__)
log.setLevel(logging.DEBUG)

In [398]:
class Transformer(lark.Transformer):

    def __init__(self):
        self.temp_args = []

    def print(self, children):
        log.debug(f"Processing 'print' with {children}")
        args = children[2]
        return Print(args)

    def print_args(self, children):
        log.debug(f"Processing 'print_args' with {children}")
        return children[0]

    def print_args_temp(self, children):
        log.debug(f"Processing 'print_args_temp' with {children}")
        if len(children) > 1:
            self.temp_args.append(children[1])
        if len(children) > 2:
            return children[2]
        else:
            return []

    def expression(self, children):
        log.debug(f"Processing 'expression' with {children}")
        value = children[0].value
        return Expression(value)

    def statement(self, children):
        log.debug(f"Processing 'statement' with {children}")
        return children[0]

    def statements(self, children):
        log.debug(f"Processing 'statements' with {children}")
        stmts = Statements()
        for child in children:
            stmts.append(child)
        return stmts
    
    def tuple_declaration(self, args):
        return TupleDeclaration()
    
    def list_declaration(self, args):
        return ListDeclaration()
    
    def arr_declaration(self, args):
        return ArrDeclaration()
    
    def exception_declaration(self, args):
        return ExceptionDeclaration()

    def variable_declaration(self, args):
        log.debug(f"Processing Variable Declaration with {args}")
        return VariableDeclaration()
    
    def exception_handling(self, args):
        return ExceptionHandling()
    
    def try_catch_finally(self, args):
        return TryCatchFinally()
    
    def throw(self, args):
        return Throw()
    
    def return_(self, args):
        return Return()
    
    def function(self, args):
        return Function()
    
    def if_else(self, args):
        return IfElse()
    
    def while_(self, args):
        return While()
    
    def do_while(self, args):
        return DoWhile()
    
    def for_loop(self, args):
        return ForLoop()
    
    def break_continue(self, args):
        return BreakContinue()
    
    def binary_op(self, args):
        return BinaryOp()
    
    def unary_expression(self, args):
        return UnaryExpression()

    def function_call(self, args):
        return FunctionCall()
    
    def INTEGER_CONSTANT(self, args):
        log.debug(f'INTEGER_CONSTANT - {args}')
        return Literal(int(args))
    
    def DECIMAL_CONSTANT(self, args):
        return Literal(float(args[0]))
    
    def STRING_LITERAL(self, args):
        return Literal(str(args[0]))
    
    def BOOLEAN_VALUE(self, args):
        return Literal(bool(args[0]))
    
    def NULL_KEYWORD(self, args):
        return Literal(args[0])
    
    def literal(self, args):
        log.debug(f'literal - {args}')
        return args[0]
    
    def print_args(self, args):
        log.debug(f'print_args - {args}')
        return args


In [399]:
tree = parser.parse("""
print(3);
print(4);
""")

['print', '(', '3', ')', ';', 'print', '(', '4', ')', ';']


In [400]:
tree

Tree(Token('RULE', 'start'), [Tree(Token('RULE', 'statements'), [Tree(Token('RULE', 'statement'), [Tree(Token('RULE', 'print'), [Token('PRINT_KEYWORD', 'print'), Token('ROUND_OPEN', '('), Tree(Token('RULE', 'print_args'), [Tree(Token('RULE', 'expression'), [Tree(Token('RULE', 'literal'), [Token('INTEGER_CONSTANT', '3')])]), Tree(Token('RULE', 'print_args_temp'), [])]), Token('ROUND_CLOSE', ')'), Token('END_OF_STATEMENT', ';')])]), Tree(Token('RULE', 'statement'), [Tree(Token('RULE', 'print'), [Token('PRINT_KEYWORD', 'print'), Token('ROUND_OPEN', '('), Tree(Token('RULE', 'print_args'), [Tree(Token('RULE', 'expression'), [Tree(Token('RULE', 'literal'), [Token('INTEGER_CONSTANT', '4')])]), Tree(Token('RULE', 'print_args_temp'), [])]), Token('ROUND_CLOSE', ')'), Token('END_OF_STATEMENT', ';')])])])])

In [401]:
transformer = Transformer()
ast = transformer.transform(tree)

DEBUG:__main__:INTEGER_CONSTANT - 3
DEBUG:__main__:literal - [literal(3)]
DEBUG:__main__:Processing 'expression' with [literal(3)]
DEBUG:__main__:Processing 'print_args_temp' with []
DEBUG:__main__:print_args - ['expression(3)', []]
DEBUG:__main__:Processing 'print' with [Token('PRINT_KEYWORD', 'print'), Token('ROUND_OPEN', '('), ['expression(3)', []], Token('ROUND_CLOSE', ')'), Token('END_OF_STATEMENT', ';')]
DEBUG:__main__:Processing 'statement' with [print(typing.List[__main__.Expression])]
DEBUG:__main__:INTEGER_CONSTANT - 4
DEBUG:__main__:literal - [literal(4)]
DEBUG:__main__:Processing 'expression' with [literal(4)]
DEBUG:__main__:Processing 'print_args_temp' with []
DEBUG:__main__:print_args - ['expression(4)', []]
DEBUG:__main__:Processing 'print' with [Token('PRINT_KEYWORD', 'print'), Token('ROUND_OPEN', '('), ['expression(4)', []], Token('ROUND_CLOSE', ')'), Token('END_OF_STATEMENT', ';')]
DEBUG:__main__:Processing 'statement' with [print(typing.List[__main__.Expression])]
DE

In [402]:
tree

Tree(Token('RULE', 'start'), [Tree(Token('RULE', 'statements'), [Tree(Token('RULE', 'statement'), [Tree(Token('RULE', 'print'), [Token('PRINT_KEYWORD', 'print'), Token('ROUND_OPEN', '('), Tree(Token('RULE', 'print_args'), [Tree(Token('RULE', 'expression'), [Tree(Token('RULE', 'literal'), [Token('INTEGER_CONSTANT', '3')])]), Tree(Token('RULE', 'print_args_temp'), [])]), Token('ROUND_CLOSE', ')'), Token('END_OF_STATEMENT', ';')])]), Tree(Token('RULE', 'statement'), [Tree(Token('RULE', 'print'), [Token('PRINT_KEYWORD', 'print'), Token('ROUND_OPEN', '('), Tree(Token('RULE', 'print_args'), [Tree(Token('RULE', 'expression'), [Tree(Token('RULE', 'literal'), [Token('INTEGER_CONSTANT', '4')])]), Tree(Token('RULE', 'print_args_temp'), [])]), Token('ROUND_CLOSE', ')'), Token('END_OF_STATEMENT', ';')])])])])

In [404]:
ast

Tree(Token('RULE', 'start'), [statements([print(typing.List[__main__.Expression]), print(typing.List[__main__.Expression])])])

In [380]:
visualize_tree(ast)

+-start
  |
  +-[print([3, []]), print([4, []])]


In [373]:
repr(ast)

"Tree(Token('RULE', 'start'), [statements([print(([3, []],)), print(([3, []],))])])"

In [374]:
visualize_tree(tree)

+-start
  |
  +-statements
    |
    +-statement
      |
      +-print
        |
        +-print
        |
        +-(
        |
        +-print_args
          |
          +-expression
            |
            +-literal
              |
              +-3
          |
          +-print_args_temp
        |
        +-)
        |
        +-;
    |
    +-statement
      |
      +-print
        |
        +-print
        |
        +-(
        |
        +-print_args
          |
          +-expression
            |
            +-literal
              |
              +-3
          |
          +-print_args_temp
            |
            +-,
            |
            +-expression
              |
              +-literal
                |
                +-4
            |
            +-print_args_temp
        |
        +-)
        |
        +-;


In [230]:
tree = parser.parse("""
var a = 3;
""")

['var', 'a', '=', '3', ';']


In [231]:
ast = transformer.transform(tree)
visualize_tree(ast)

DEBUG:__main__:Processing Variable Declaration with [Token('VARIABLE_DECLARATION', 'var'), Token('IDENTIFIER', 'a'), Tree(Token('RULE', 'variable_declaration_temp'), []), Token('ASSIGNMENT_OPERATOR', '='), Tree(Token('RULE', 'expression'), [Tree(Token('RULE', 'literal'), [literal(3)])]), Tree(Token('RULE', 'variable_declaration_expression_temp'), []), Token('END_OF_STATEMENT', ';')]


+-start
  |
  +-statements
    |
    +-statement
      |
      +-declare
        |
        +-VariableDeclaration()
