In [1]:
from lark import Lark, Tree
from lexer import Lexer as Lexer_

In [2]:
grammar = """
    start: statements

    statements: statement+

    statement: print_statement END_OF_STATEMENT
             | declaration END_OF_STATEMENT
             | exception_handling
             | return_statement END_OF_STATEMENT
             | control_flow
             | expression_statement

    print_statement: PRINT_KEYWORD ROUND_OPEN print_args ROUND_CLOSE

    print_args: (expression) (COMMA print_args)?

    expression_statement: expression END_OF_STATEMENT
                        | assignment END_OF_STATEMENT

    expression: expression (OPERATOR|COMPARATOR) expression
              | unary_expression
              | IDENTIFIER
              | function_call
              | IDENTIFIER index
              | ROUND_OPEN expression ROUND_CLOSE
              | literal
              | IDENTIFIER COMPOUND_OPERATOR expression
              | IDENTIFIER DOT_OPERATOR IDENTIFIER expression

    unary_expression: UNARY_OPERATOR IDENTIFIER | IDENTIFIER UNARY_OPERATOR
                    | NOT_OPERATOR (IDENTIFIER | ROUND_OPEN expression ROUND_CLOSE)

    assignment: IDENTIFIER ASSIGNMENT_OPERATOR expression
              | IDENTIFIER ASSIGNMENT_OPERATOR assignment_list

    assignment_list: (literal|IDENTIFIER) COMMA (literal|IDENTIFIER) (COMMA (literal|IDENTIFIER))*

    index: (index?) SQUARE_OPEN expression SQUARE_CLOSE

    control_flow: FUNCTION_DECLARATION IDENTIFIER ROUND_OPEN parameters ROUND_CLOSE block
                | IF_ELIF ROUND_OPEN expression ROUND_CLOSE block (ELSE_KEYWORD block)?
                | WHILE_KEYWORD ROUND_OPEN expression ROUND_CLOSE block
                | DO_KEYWORD block WHILE_KEYWORD ROUND_OPEN expression ROUND_CLOSE
                | FOR_KEYWORD ROUND_OPEN dec_control_flow END_OF_STATEMENT expression END_OF_STATEMENT (expression | assignment) ROUND_CLOSE block
                | BREAK_CONTINUE END_OF_STATEMENT

    dec_control_flow: VARIABLE_DECLARATION IDENTIFIER ASSIGNMENT_OPERATOR expression

    declaration: TUPLE_DECLARATION IDENTIFIER ASSIGNMENT_OPERATOR SQUARE_OPEN expression (COMMA expression)* SQUARE_CLOSE
                | LIST_DECLARATION IDENTIFIER ASSIGNMENT_OPERATOR list_content
                | ARR_DECLARATION IDENTIFIER ASSIGNMENT_OPERATOR SQUARE_OPEN literal (COMMA literal)* SQUARE_CLOSE
                | EXCEPTION_TYPE IDENTIFIER ASSIGNMENT_OPERATOR IDENTIFIER
                | LIST_DECLARATION IDENTIFIER ASSIGNMENT_OPERATOR matrix
                | ARR_DECLARATION IDENTIFIER ASSIGNMENT_OPERATOR matrix
                | VARIABLE_DECLARATION IDENTIFIER (COMMA IDENTIFIER)* ASSIGNMENT_OPERATOR expression (COMMA (expression))*

    list_content: SQUARE_OPEN expression (COMMA expression)* SQUARE_CLOSE
                | SQUARE_OPEN SQUARE_CLOSE

    matrix: SQUARE_OPEN items SQUARE_CLOSE

    items: matrix (COMMA matrix)*

    exception_handling: TRY_KEYWORD block CATCH_KEYWORD ROUND_OPEN EXCEPTION_TYPE IDENTIFIER ROUND_CLOSE block FINALLY_KEYWORD block
                      | THROW_KEYWORD EXCEPTION_TYPE ROUND_OPEN print_args ROUND_CLOSE END_OF_STATEMENT

    block: CURLY_OPEN statements CURLY_CLOSE | CURLY_OPEN CURLY_CLOSE

    function_call: IDENTIFIER ROUND_OPEN arguments ROUND_CLOSE
                 | IDENTIFIER DOT_OPERATOR IDENTIFIER ROUND_OPEN arguments ROUND_CLOSE

    return_statement: RETURN_KEYWORD expression?

    literal: INTEGER_CONSTANT
           | DECIMAL_CONSTANT
           | STRING_LITERAL
           | BOOLEAN_VALUE 
           | NULL_KEYWORD

    arguments: (COMMA | expression)*

    parameters: parameter (COMMA parameter)*
              | (COMMA expression)*

    parameter: (VARIABLE_DECLARATION | LIST_DECLARATION | ARR_DECLARATION | TUPLE_DECLARATION) IDENTIFIER
    %declare STRING_LITERAL BOOLEAN_VALUE COMMA FUNCTION_DECLARATION BREAK_CONTINUE IF_ELIF ELSE_KEYWORD WHILE_KEYWORD DO_KEYWORD FOR_KEYWORD PRINT_KEYWORD RETURN_KEYWORD VARIABLE_DECLARATION LIST_DECLARATION ARR_DECLARATION TUPLE_DECLARATION EXCEPTION_TYPE NULL_KEYWORD TRY_KEYWORD CATCH_KEYWORD FINALLY_KEYWORD THROW_KEYWORD KEYWORD NOT_OPERATOR ASSIGNMENT_OPERATOR OPERATOR COMPOUND_OPERATOR UNARY_OPERATOR COMPARATOR DOT_OPERATOR PUNCTUATION END_OF_STATEMENT ROUND_OPEN ROUND_CLOSE CURLY_OPEN CURLY_CLOSE SQUARE_OPEN SQUARE_CLOSE DECIMAL_CONSTANT INTEGER_CONSTANT IDENTIFIER QUOTATION ERROR
    %import common.WS
    %ignore WS
"""

In [3]:
from lark.lexer import Lexer, Token

class MyLexer(Lexer):
    def __init__(self, lexer_conf):
        pass

    def lex(self, data):
        lexer = Lexer_(source_code=data)
        lexer.tokenize()
        tokens = lexer.get_tokens()
        for type, value in tokens:
            yield Token(type, value)

In [4]:
parser = Lark(grammar, start='start', lexer=MyLexer, parser='lalr')

input_string = """
print(10);
var x = 5;
var y = true;
x = 10;
x = y;
"""

def visualize_tree(tree, depth=0):
    if isinstance(tree, Tree):
        print("  " * depth + "+-" + str(tree.data))
        for child in tree.children[:-1]:
            print("  " * (depth + 1) + "|")
            visualize_tree(child, depth + 1)
        if tree.children:
            print("  " * (depth + 1) + "|")
            visualize_tree(tree.children[-1], depth + 1)
    else:
        print("  " * depth + "+-" + str(tree))

try:
    tree = parser.parse(input_string)
    visualize_tree(tree)
    print("Parsing successful.")
except Exception as e:
    print("Parsing failed:", e)

['print', '(', '10', ')', ';', 'var', 'x', '=', '5', ';', 'var', 'y', '=', 'true', ';', 'x', '=', '10', ';', 'x', '=', 'y', ';']
+-start
  |
  +-statements
    |
    +-statement
      |
      +-print_statement
        |
        +-print
        |
        +-(
        |
        +-print_args
          |
          +-expression
            |
            +-literal
              |
              +-10
        |
        +-)
      |
      +-;
    |
    +-statement
      |
      +-declaration
        |
        +-var
        |
        +-x
        |
        +-=
        |
        +-expression
          |
          +-literal
            |
            +-5
      |
      +-;
    |
    +-statement
      |
      +-declaration
        |
        +-var
        |
        +-y
        |
        +-=
        |
        +-expression
          |
          +-literal
            |
            +-true
      |
      +-;
    |
    +-statement
      |
      +-expression_statement
        |
        +-assignment
          |
  

In [8]:
import logging
import lark

import logging

# Configure the logging module to write debug messages to a file
logging.basicConfig(filename='debug.log', level=logging.DEBUG)
log = logging.getLogger(__name__)
log.setLevel(logging.DEBUG)

# Rest of your code...


In [33]:
import sys
from typing import List
from dataclasses import dataclass

from lark import Lark, ast_utils, Transformer, v_args
from lark.tree import Meta

this_module = sys.modules[__name__]

In [34]:
class ASTNode:
    """Abstract base class for abstract sequence of sequence of sums"""
    def __init__(self):
        """This is an abstract class and should not be instantiated"""
        this_class = self.__class__.__name__
        if this_class == "ASTNode":
            raise NotImplementedError("ASTNode is an abstract class and should not be instantiated")
        else:
            raise NotImplementedError(f"{this_class} is missing a constructor method")

In [35]:
class Statement(ASTNode):
    pass

class Statements(ASTNode):
    def __init__(self):
        self.statements: List[Statement] = []
    
    def append(self, statement: Statement):
        self.statements.append(statement)

    def __str__(self) -> str:
        el_strs = ", ".join(str(e) for e in self.statements)
        return f"[{el_strs}]"
    
    def __repr__(self) -> str:
        return f"statements({repr(self.statements)})"

class Declare(Statement):
    pass

class ExceptionHandling(Statement):
    pass

class Return(Statement):
    pass

class Control(Statement):
    pass

class Expressions(Statement):
    pass

class Expression(Expressions):
    pass

class Assignment(Expression):
    pass 

class Print(Statement):
    def __init__(self):
        self.print_args: List[Expression] = []

    def append(self, expression: Expression):
        self.print_args.append(expression)
    
    def __str__(self) -> str:
        el_strs = ", ".join(str(e) for e in self.print_args)
        return f"print({el_strs})"
    
    def __repr__(self) -> str:
        return f"print({repr(self.print_args)})"

class Literal(Expression):
    def __init__(self, value):
        self.value = value

    def __str__(self) -> str:
        return f"literal({self.value})"
    
    def __repr__(self) -> str:
        return f"literal({repr(self.value)})"

In [36]:
class Transformer(lark.Transformer):
    def STRING_LITERAL(self, data):
        log.debug(f"Processing token STRING_LITERAL with {data}")
        val = data.value
        ast_node = Literal(val)
        log.debug(f"Processed token into value {ast_node}")
        return ast_node
    
    def INTEGER_CONSTANT(self, *data):
        log.debug(f"Processing token INTEGER_CONSTANT with {data}")
        val = int(str(data[0]))
        ast_node = Literal(val)
        log.debug(f"Processed token into value {ast_node}")
        return ast_node
    
    def DECIMAL_CONSTANT(self, data):
        log.debug(f"Processing token DECIMAL_CONSTANT with {data}")
        val = float(data.value)
        ast_node = Literal(val)
        log.debug(f"Processed token into value {ast_node}")
        return ast_node
    
    def BOOLEAN_VALUE(self, data):
        log.debug(f"Processing token BOOLEAN_VALUE with {data}")
        val = data.value.lower() == "true"
        ast_node = Literal(val)
        log.debug(f"Processed token into value {ast_node}")
        return ast_node
    
    def NULL_KEYWORD(self, data):
        log.debug(f"Processing token NULL_KEYWORD with {data}")
        ast_node = Literal(data.value)
        log.debug(f"Processed token into value {ast_node}")
        return ast_node
    
    def print(self, data):
        log.debug(f"Processing rule print with {data}")
        ast_node = Print(data)
        log.debug(f"Processed rule into value {ast_node}")
        return ast_node
    
    def tuple_declaration(self, data):
        log.debug(f"Processing rule tuple_declaration with {data}")
        ast_node = Declare()
        log.debug(f"Processed rule into value {ast_node}")
        return ast_node
    
    def list_declaration(self, data):
        log.debug(f"Processing rule list_declaration with {data}")
        ast_node = Declare()
        log.debug(f"Processed rule into value {ast_node}")
        return ast_node
    
    def arr_declaration(self, data):
        log.debug(f"Processing rule arr_declaration with {data}")
        ast_node = Declare()
        log.debug(f"Processed rule into value {ast_node}")
        return ast_node
    
    def exception_declaration(self, data):
        log.debug(f"Processing rule exception_declaration with {data}")
        ast_node = Declare()
        log.debug(f"Processed rule into value {ast_node}")
        return ast_node
    
    def variable_declaration(self, data):
        log.debug(f"Processing rule variable_declaration with {data}")
        ast_node = Declare()
        log.debug(f"Processed rule into value {ast_node}")
        return ast_node
    

In [37]:
transformer = ast_utils.create_transformer(this_module, Transformer())
tree = parser.parse("print(a);")
ast = transformer.transform(tree)

['print', '(', 'a', ')', ';']


In [41]:
repr(ast)

"Tree(Token('RULE', 'start'), [Tree(Token('RULE', 'statements'), [Tree(Token('RULE', 'statement'), [Tree(Token('RULE', 'print_statement'), [Token('PRINT_KEYWORD', 'print'), Token('ROUND_OPEN', '('), Tree(Token('RULE', 'print_args'), [Tree(Token('RULE', 'expression'), [Token('IDENTIFIER', 'a')])]), Token('ROUND_CLOSE', ')')]), Token('END_OF_STATEMENT', ';')])])])"

In [42]:
visualize_tree(ast)

+-start
  |
  +-statements
    |
    +-statement
      |
      +-print_statement
        |
        +-print
        |
        +-(
        |
        +-print_args
          |
          +-expression
            |
            +-a
        |
        +-)
      |
      +-;


In [40]:
visualize_tree(tree)

+-start
  |
  +-statements
    |
    +-statement
      |
      +-print_statement
        |
        +-print
        |
        +-(
        |
        +-print_args
          |
          +-expression
            |
            +-a
        |
        +-)
      |
      +-;


In [150]:
def visualize_tree(tree, depth=0):
    if isinstance(tree, Tree):
        print("  " * depth + "+-" + str(tree.data))
        for child in tree.children[:-1]:
            print("  " * (depth + 1) + "|")
            visualize_tree(child, depth + 1)
        if tree.children:
            print("  " * (depth + 1) + "|")
            visualize_tree(tree.children[-1], depth + 1)
    else:
        print(tree)
        print("  " * depth + "+-" + str(tree))

In [151]:
visualize_tree(tree)

+-start
  |
  +-statements
    |
    +-statement
      |
      +-expression_statement
        |
        +-expression
          |
          +-literal
            |
2
            +-2
        |
;
        +-;


In [152]:
tree

Tree(Token('RULE', 'start'), [Tree(Token('RULE', 'statements'), [Tree(Token('RULE', 'statement'), [Tree(Token('RULE', 'expression_statement'), [Tree(Token('RULE', 'expression'), [Tree(Token('RULE', 'literal'), [Token('INTEGER_CONSTANT', '2')])]), Token('END_OF_STATEMENT', ';')])])])])

In [160]:
str(tree.data)

'start'

In [None]:
grammar = """
?start: line_temp END_OF_STATEMENT

line: 
"""