In [1]:
from lark import Lark, Tree
from lexer import Lexer as Lexer_

In [2]:
grammar = """
    start: statements

    statements: statement+

    statement: print_statement END_OF_STATEMENT
             | declaration END_OF_STATEMENT
             | exception_handling
             | return_statement END_OF_STATEMENT
             | control_flow
             | expression_statement

    print_statement: PRINT_KEYWORD ROUND_OPEN print_args ROUND_CLOSE

    print_args: (expression) (COMMA print_args)?

    expression_statement: expression END_OF_STATEMENT
                        | assignment END_OF_STATEMENT

    expression: expression (operator|comparator) expression
              | unary_expression
              | identifier
              | function_call
              | identifier index
              | ROUND_OPEN expression ROUND_CLOSE
              | literal
              | identifier COMPOUND_OPERATOR expression
              | identifier DOT_OPERATOR identifier expression

    unary_expression: unary_operator identifier | identifier unary_operator
                    | NOT_OPERATOR (identifier | ROUND_OPEN expression ROUND_CLOSE)

    assignment: identifier ASSIGNMENT_OPERATOR expression
              | identifier ASSIGNMENT_OPERATOR assignment_list

    assignment_list: (literal|identifier) COMMA (literal|identifier) (COMMA (literal|identifier))*

    index: (index?) SQUARE_OPEN expression SQUARE_CLOSE

    control_flow: FUNCTION_DECLARATION identifier ROUND_OPEN parameters ROUND_CLOSE block
                | IF_ELIF ROUND_OPEN expression ROUND_CLOSE block (ELSE_KEYWORD block)?
                | WHILE_KEYWORD ROUND_OPEN expression ROUND_CLOSE block
                | DO_KEYWORD block WHILE_KEYWORD ROUND_OPEN expression ROUND_CLOSE
                | FOR_KEYWORD ROUND_OPEN dec_control_flow END_OF_STATEMENT expression END_OF_STATEMENT (expression | assignment) ROUND_CLOSE block
                | BREAK_CONTINUE END_OF_STATEMENT

    dec_control_flow: VARIABLE_DECLARATION identifier ASSIGNMENT_OPERATOR expression

    declaration: TUPLE_DECLARATION identifier ASSIGNMENT_OPERATOR SQUARE_OPEN expression (COMMA expression)* SQUARE_CLOSE
                | LIST_DECLARATION identifier ASSIGNMENT_OPERATOR list_content
                | ARR_DECLARATION identifier ASSIGNMENT_OPERATOR SQUARE_OPEN literal (COMMA literal)* SQUARE_CLOSE
                | EXCEPTION_TYPE identifier ASSIGNMENT_OPERATOR identifier
                | LIST_DECLARATION identifier ASSIGNMENT_OPERATOR matrix
                | ARR_DECLARATION identifier ASSIGNMENT_OPERATOR matrix
                | VARIABLE_DECLARATION identifier (COMMA identifier)* ASSIGNMENT_OPERATOR expression (COMMA (expression))*

    list_content: SQUARE_OPEN expression (COMMA expression)* SQUARE_CLOSE
                | SQUARE_OPEN SQUARE_CLOSE

    matrix: SQUARE_OPEN items SQUARE_CLOSE

    items: matrix (COMMA matrix)*

    exception_handling: TRY_KEYWORD block CATCH_KEYWORD ROUND_OPEN EXCEPTION_TYPE identifier ROUND_CLOSE block FINALLY_KEYWORD block
                      | THROW_KEYWORD EXCEPTION_TYPE ROUND_OPEN print_args ROUND_CLOSE END_OF_STATEMENT

    block: CURLY_OPEN statements CURLY_CLOSE | CURLY_OPEN CURLY_CLOSE

    function_call: identifier ROUND_OPEN arguments ROUND_CLOSE
                 | identifier DOT_OPERATOR identifier ROUND_OPEN arguments ROUND_CLOSE

    return_statement: RETURN_KEYWORD expression?

    operator: OPERATOR

    compound_operator: COMPOUND_OPERATOR

    unary_operator: UNARY_OPERATOR

    comparator: COMPARATOR

    identifier: IDENTIFIER

    literal: integer_constant
           | decimal_constant
           | string_literal
           | BOOLEAN_VALUE 
           | NULL_KEYWORD

    keywords: KEYWORD

    integer_constant: INTEGER_CONSTANT

    decimal_constant: DECIMAL_CONSTANT

    string_literal: STRING_LITERAL

    arguments: (COMMA | expression)*

    parameters: parameter (COMMA parameter)*
              | (COMMA expression)*

    parameter: (VARIABLE_DECLARATION | LIST_DECLARATION | ARR_DECLARATION | TUPLE_DECLARATION) identifier
    %declare STRING_LITERAL BOOLEAN_VALUE COMMA FUNCTION_DECLARATION BREAK_CONTINUE IF_ELIF ELSE_KEYWORD WHILE_KEYWORD DO_KEYWORD FOR_KEYWORD PRINT_KEYWORD RETURN_KEYWORD VARIABLE_DECLARATION LIST_DECLARATION ARR_DECLARATION TUPLE_DECLARATION EXCEPTION_TYPE NULL_KEYWORD TRY_KEYWORD CATCH_KEYWORD FINALLY_KEYWORD THROW_KEYWORD KEYWORD NOT_OPERATOR ASSIGNMENT_OPERATOR OPERATOR COMPOUND_OPERATOR UNARY_OPERATOR COMPARATOR DOT_OPERATOR PUNCTUATION END_OF_STATEMENT ROUND_OPEN ROUND_CLOSE CURLY_OPEN CURLY_CLOSE SQUARE_OPEN SQUARE_CLOSE DECIMAL_CONSTANT INTEGER_CONSTANT IDENTIFIER QUOTATION ERROR
    %import common.WS
    %ignore WS
"""

In [3]:
from lark.lexer import Lexer, Token

class MyLexer(Lexer):
    def __init__(self, lexer_conf):
        pass

    def lex(self, data):
        lexer = Lexer_(source_code=data)
        lexer.tokenize()
        tokens = lexer.get_tokens()
        for type, value in tokens:
            yield Token(type, value)

In [4]:
parser = Lark(grammar, start='start', lexer=MyLexer, parser='lalr')

input_string = """
print(10);
var x = 5;
var y = true;
x = 10;
x = y;
"""

def visualize_tree(tree, depth=0):
    if isinstance(tree, Tree):
        print("  " * depth + "+-" + str(tree.data))
        for child in tree.children[:-1]:
            print("  " * (depth + 1) + "|")
            visualize_tree(child, depth + 1)
        if tree.children:
            print("  " * (depth + 1) + "|")
            visualize_tree(tree.children[-1], depth + 1)
    else:
        print("  " * depth + "+-" + str(tree))

try:
    tree = parser.parse(input_string)
    visualize_tree(tree)
    print("Parsing successful.")
except Exception as e:
    print("Parsing failed:", e)

['print', '(', '10', ')', ';', 'var', 'x', '=', '5', ';', 'var', 'y', '=', 'true', ';', 'x', '=', '10', ';', 'x', '=', 'y', ';']
+-start
  |
  +-statements
    |
    +-statement
      |
      +-print_statement
        |
        +-print
        |
        +-(
        |
        +-print_args
          |
          +-expression
            |
            +-literal
              |
              +-integer_constant
                |
                +-10
        |
        +-)
      |
      +-;
    |
    +-statement
      |
      +-declaration
        |
        +-var
        |
        +-identifier
          |
          +-x
        |
        +-=
        |
        +-expression
          |
          +-literal
            |
            +-integer_constant
              |
              +-5
      |
      +-;
    |
    +-statement
      |
      +-declaration
        |
        +-var
        |
        +-identifier
          |
          +-y
        |
        +-=
        |
        +-expression
          |
   

In [6]:
from lark import Transformer

class AstTransformer(Transformer):
    # Define AST classes
    class Statements(list):
        pass

    class Statement:
        pass

    class PrintStatement(Statement):
        def __init__(self, print_args):
            self.print_args = print_args

    class Expression:
        pass

    class BinaryExpression(Expression):
        def __init__(self, left, operator, right):
            self.left = left
            self.operator = operator
            self.right = right

    class UnaryExpression(Expression):
        def __init__(self, operator, expression):
            self.operator = operator
            self.expression = expression

    class Identifier(Expression):
        def __init__(self, name):
            self.name = name

    class FunctionCall(Expression):
        def __init__(self, name, arguments):
            self.name = name
            self.arguments = arguments

    class Literal(Expression):
        def __init__(self, value):
            self.value = value

    class Assignment(Statement):
        def __init__(self, identifier, expression):
            self.identifier = identifier
            self.expression = expression

    # Define transformation rules
    def statements(self, statements):
        return AstTransformer.Statements(statements)

    def print_statement(self, args):
        print_args = args[0]
        return AstTransformer.PrintStatement(print_args)

    def expression_statement(self, expr):
        return expr

    def expression(self, data):
        if len(data) == 1:
            return data[0]
        elif len(data) == 3:
            operator = data[1]
            if isinstance(operator, str):
                left = data[0]
                right = data[2]
                return AstTransformer.BinaryExpression(left, operator, right)

    def unary_expression(self, data):
        if len(data) == 2:
            operator, expression = data
            return AstTransformer.UnaryExpression(operator, expression)

    def identifier(self, name):
        return AstTransformer.Identifier(name[0])

    def function_call(self, data):
        name = data[0]
        arguments = data[1]
        return AstTransformer.FunctionCall(name, arguments)

    def literal(self, value):
        return AstTransformer.Literal(value[0])

    def assignment(self, data):
        identifier = data[0]
        expression = data[1]
        return AstTransformer.Assignment(identifier, expression)

transformer = AstTransformer()

def parse(code):
    tree = parser.parse(code)
    visualize_tree(tree)
    return transformer.transform(tree)

In [20]:
tree = parser.parse(input_string)
tree

['var', 'a', '=', '"Hello"', ';', 'var', 'b', '=', '"World"', ';', 'var', 'c', '=', 'a', '+', '" "', '+', 'b', ';', 'print', '(', 'c', ')', ';', 'var', 'a', '=', '"Hello World"', ';', 'var', 'c', '=', 'a', '.', 'slice', '(', '0', ',', '5', ')', ';', 'print', '(', 'c', ')', ';']


Tree(Token('RULE', 'start'), [Tree(Token('RULE', 'statements'), [Tree(Token('RULE', 'statement'), [Tree(Token('RULE', 'declaration'), [Token('VARIABLE_DECLARATION', 'var'), Tree(Token('RULE', 'identifier'), [Token('IDENTIFIER', 'a')]), Token('ASSIGNMENT_OPERATOR', '='), Tree(Token('RULE', 'expression'), [Tree(Token('RULE', 'literal'), [Tree(Token('RULE', 'string_literal'), [Token('STRING_LITERAL', '"Hello"')])])])]), Token('END_OF_STATEMENT', ';')]), Tree(Token('RULE', 'statement'), [Tree(Token('RULE', 'declaration'), [Token('VARIABLE_DECLARATION', 'var'), Tree(Token('RULE', 'identifier'), [Token('IDENTIFIER', 'b')]), Token('ASSIGNMENT_OPERATOR', '='), Tree(Token('RULE', 'expression'), [Tree(Token('RULE', 'literal'), [Tree(Token('RULE', 'string_literal'), [Token('STRING_LITERAL', '"World"')])])])]), Token('END_OF_STATEMENT', ';')]), Tree(Token('RULE', 'statement'), [Tree(Token('RULE', 'declaration'), [Token('VARIABLE_DECLARATION', 'var'), Tree(Token('RULE', 'identifier'), [Token('IDENT

In [17]:
input_string = """
/* Concatenation */
var a = "Hello";
var b = "World";
var c = a + " " + b;
print(c); /* Outputs "Hello World" */

/* Slicing */
var a = "Hello World";
var c = a.slice(0, 5);

print(c); /* Outputs "Hello" */
"""

ast = parse(input_string)

['var', 'a', '=', '"Hello"', ';', 'var', 'b', '=', '"World"', ';', 'var', 'c', '=', 'a', '+', '" "', '+', 'b', ';', 'print', '(', 'c', ')', ';', 'var', 'a', '=', '"Hello World"', ';', 'var', 'c', '=', 'a', '.', 'slice', '(', '0', ',', '5', ')', ';', 'print', '(', 'c', ')', ';']
+-start
  |
  +-statements
    |
    +-statement
      |
      +-declaration
        |
        +-var
        |
        +-identifier
          |
          +-a
        |
        +-=
        |
        +-expression
          |
          +-literal
            |
            +-string_literal
              |
              +-"Hello"
      |
      +-;
    |
    +-statement
      |
      +-declaration
        |
        +-var
        |
        +-identifier
          |
          +-b
        |
        +-=
        |
        +-expression
          |
          +-literal
            |
            +-string_literal
              |
              +-"World"
      |
      +-;
    |
    +-statement
      |
      +-declaration
        |


In [27]:
from lark import Tree

def visualize_tree(tree, depth=0):
    if isinstance(tree, Tree):
        print('\t' * depth + tree.data)
        for child in tree.children:
            if depth == 0:
                for children in child:
                    visualize_tree(children, depth + 1)
            else:
                visualize_tree(child, depth + 1)
    else:
        print('\t' * depth + str(tree))

# Example AST visualization function call
visualize_tree(ast)


start
	statement
		declaration
			var
			<__main__.AstTransformer.Identifier object at 0x114ab5ad0>
			=
			<__main__.AstTransformer.Literal object at 0x114370190>
		;
	statement
		declaration
			var
			<__main__.AstTransformer.Identifier object at 0x114ab5d50>
			=
			<__main__.AstTransformer.Literal object at 0x114ab53d0>
		;
	statement
		declaration
			var
			<__main__.AstTransformer.Identifier object at 0x114b11290>
			=
			None
		;
	statement
		<__main__.AstTransformer.PrintStatement object at 0x114b10090>
		;
	statement
		declaration
			var
			<__main__.AstTransformer.Identifier object at 0x114b10510>
			=
			<__main__.AstTransformer.Literal object at 0x114b10450>
		;
	statement
		declaration
			var
			<__main__.AstTransformer.Identifier object at 0x114b109d0>
			=
			<__main__.AstTransformer.FunctionCall object at 0x114b11c10>
		;
	statement
		<__main__.AstTransformer.PrintStatement object at 0x114b11a50>
		;


In [25]:
ast

Tree(Token('RULE', 'start'), [[Tree(Token('RULE', 'statement'), [Tree(Token('RULE', 'declaration'), [Token('VARIABLE_DECLARATION', 'var'), <__main__.AstTransformer.Identifier object at 0x114ab5ad0>, Token('ASSIGNMENT_OPERATOR', '='), <__main__.AstTransformer.Literal object at 0x114370190>]), Token('END_OF_STATEMENT', ';')]), Tree(Token('RULE', 'statement'), [Tree(Token('RULE', 'declaration'), [Token('VARIABLE_DECLARATION', 'var'), <__main__.AstTransformer.Identifier object at 0x114ab5d50>, Token('ASSIGNMENT_OPERATOR', '='), <__main__.AstTransformer.Literal object at 0x114ab53d0>]), Token('END_OF_STATEMENT', ';')]), Tree(Token('RULE', 'statement'), [Tree(Token('RULE', 'declaration'), [Token('VARIABLE_DECLARATION', 'var'), <__main__.AstTransformer.Identifier object at 0x114b11290>, Token('ASSIGNMENT_OPERATOR', '='), None]), Token('END_OF_STATEMENT', ';')]), Tree(Token('RULE', 'statement'), [<__main__.AstTransformer.PrintStatement object at 0x114b10090>, Token('END_OF_STATEMENT', ';')]), 

In [24]:
tree

Tree(Token('RULE', 'start'), [Tree(Token('RULE', 'statements'), [Tree(Token('RULE', 'statement'), [Tree(Token('RULE', 'declaration'), [Token('VARIABLE_DECLARATION', 'var'), Tree(Token('RULE', 'identifier'), [Token('IDENTIFIER', 'a')]), Token('ASSIGNMENT_OPERATOR', '='), Tree(Token('RULE', 'expression'), [Tree(Token('RULE', 'literal'), [Tree(Token('RULE', 'string_literal'), [Token('STRING_LITERAL', '"Hello"')])])])]), Token('END_OF_STATEMENT', ';')]), Tree(Token('RULE', 'statement'), [Tree(Token('RULE', 'declaration'), [Token('VARIABLE_DECLARATION', 'var'), Tree(Token('RULE', 'identifier'), [Token('IDENTIFIER', 'b')]), Token('ASSIGNMENT_OPERATOR', '='), Tree(Token('RULE', 'expression'), [Tree(Token('RULE', 'literal'), [Tree(Token('RULE', 'string_literal'), [Token('STRING_LITERAL', '"World"')])])])]), Token('END_OF_STATEMENT', ';')]), Tree(Token('RULE', 'statement'), [Tree(Token('RULE', 'declaration'), [Token('VARIABLE_DECLARATION', 'var'), Tree(Token('RULE', 'identifier'), [Token('IDENT

In [13]:
from lark import Lark, Transformer, v_args
from lark.visitors import Visitor, visit_children_decor

class AstVisitor(Visitor):
    def __init__(self):
        self.indent = 0
        self.output = ""

    def visit(self, tree):
        if isinstance(tree, AstTransformer.Statements):
            self.visit_statements(tree)
        elif isinstance(tree, AstTransformer.PrintStatement):
            self.visit_print_statement(tree)
        elif isinstance(tree, AstTransformer.Assignment):
            self.visit_assignment(tree)
        elif isinstance(tree, AstTransformer.Identifier):
            self.visit_identifier(tree)
        elif isinstance(tree, AstTransformer.Literal):
            self.visit_literal(tree)
        # Add more cases for other AST node classes

    def visit_statements(self, tree):
        self.output += " " * self.indent + "Statements:\n"
        self.indent += 2
        for stmt in tree:
            self.visit(stmt)
        self.indent -= 2

    def visit_print_statement(self, tree):
        self.output += " " * self.indent + "PrintStatement:\n"
        self.indent += 2
        self.output += " " * self.indent + "Arguments:\n"
        self.indent += 2
        for arg in tree.print_args:
            self.visit(arg)
        self.indent -= 4

    def visit_assignment(self, tree):
        self.output += " " * self.indent + "Assignment:\n"
        self.indent += 2
        self.output += " " * self.indent + "Identifier: " + tree.identifier.name + "\n"
        self.output += " " * self.indent + "Expression:\n"
        self.indent += 2
        self.visit(tree.expression)
        self.indent -= 4

    def visit_identifier(self, tree):
        self.output += " " * self.indent + "Identifier: " + tree.name + "\n"

    def visit_literal(self, tree):
        self.output += " " * self.indent + "Literal: " + str(tree.value) + "\n"

def visualize_ast(ast):
    visitor = AstVisitor()
    visitor.visit(ast)
    print("Hello")
    print(visitor.output)

In [14]:
visualize_ast(ast)

Hello

