In [77]:
from lark import Lark, Tree
from lexer import Lexer as Lexer_

In [222]:
grammar = """
    start: statements

    statements: statement+

    statement: print_statement END_OF_STATEMENT
             | declaration END_OF_STATEMENT
             | exception_handling
             | return_statement END_OF_STATEMENT
             | control_flow
             | expression_statement

    print_statement: PRINT_KEYWORD ROUND_OPEN print_args ROUND_CLOSE

    print_args: (expression) (COMMA print_args)?

    expression_statement: expression END_OF_STATEMENT
                        | assignment END_OF_STATEMENT

    expression: expression (operator|comparator) expression
              | unary_expression
              | identifier
              | function_call
              | identifier index
              | ROUND_OPEN expression ROUND_CLOSE
              | literal
              | identifier COMPOUND_OPERATOR expression
              | identifier DOT_OPERATOR identifier expression

    unary_expression: unary_operator identifier | identifier unary_operator
                    | NOT_OPERATOR (identifier | ROUND_OPEN expression ROUND_CLOSE)

    assignment: identifier ASSIGNMENT_OPERATOR expression
              | identifier ASSIGNMENT_OPERATOR assignment_list

    assignment_list: (literal|identifier) COMMA (literal|identifier) (COMMA (literal|identifier))*

    index: (index?) SQUARE_OPEN expression SQUARE_CLOSE

    control_flow: FUNCTION_DECLARATION identifier ROUND_OPEN parameters ROUND_CLOSE block
                | IF_ELIF ROUND_OPEN expression ROUND_CLOSE block (ELSE_KEYWORD block)?
                | WHILE_KEYWORD ROUND_OPEN expression ROUND_CLOSE block
                | DO_KEYWORD block WHILE_KEYWORD ROUND_OPEN expression ROUND_CLOSE
                | FOR_KEYWORD ROUND_OPEN dec_control_flow END_OF_STATEMENT expression END_OF_STATEMENT (expression | assignment) ROUND_CLOSE block
                | BREAK_CONTINUE END_OF_STATEMENT

    dec_control_flow: VARIABLE_DECLARATION identifier ASSIGNMENT_OPERATOR expression

    declaration: TUPLE_DECLARATION identifier ASSIGNMENT_OPERATOR SQUARE_OPEN expression (COMMA expression)* SQUARE_CLOSE
                | LIST_DECLARATION identifier ASSIGNMENT_OPERATOR list_content
                | ARR_DECLARATION identifier ASSIGNMENT_OPERATOR SQUARE_OPEN literal (COMMA literal)* SQUARE_CLOSE
                | EXCEPTION_TYPE identifier ASSIGNMENT_OPERATOR identifier
                | LIST_DECLARATION identifier ASSIGNMENT_OPERATOR matrix
                | ARR_DECLARATION identifier ASSIGNMENT_OPERATOR matrix
                | VARIABLE_DECLARATION identifier (COMMA identifier)* ASSIGNMENT_OPERATOR expression (COMMA (expression))*

    list_content: SQUARE_OPEN expression (COMMA expression)* SQUARE_CLOSE
                | SQUARE_OPEN SQUARE_CLOSE

    matrix: SQUARE_OPEN items SQUARE_CLOSE

    items: matrix (COMMA matrix)*

    exception_handling: TRY_KEYWORD block CATCH_KEYWORD ROUND_OPEN EXCEPTION_TYPE identifier ROUND_CLOSE block FINALLY_KEYWORD block
                      | THROW_KEYWORD EXCEPTION_TYPE ROUND_OPEN print_args ROUND_CLOSE END_OF_STATEMENT

    block: CURLY_OPEN statements CURLY_CLOSE | CURLY_OPEN CURLY_CLOSE

    function_call: identifier ROUND_OPEN arguments ROUND_CLOSE
                 | identifier DOT_OPERATOR identifier ROUND_OPEN arguments ROUND_CLOSE

    return_statement: RETURN_KEYWORD expression?

    operator: OPERATOR

    compound_operator: COMPOUND_OPERATOR

    unary_operator: UNARY_OPERATOR

    comparator: COMPARATOR

    identifier: IDENTIFIER

    literal: integer_constant
           | decimal_constant
           | string_literal
           | BOOLEAN_VALUE 
           | NULL_KEYWORD

    keywords: KEYWORD

    integer_constant: INTEGER_CONSTANT

    decimal_constant: DECIMAL_CONSTANT

    string_literal: STRING_LITERAL

    arguments: (COMMA | expression)*

    parameters: parameter (COMMA parameter)*
              | (COMMA expression)*

    parameter: (VARIABLE_DECLARATION | LIST_DECLARATION | ARR_DECLARATION | TUPLE_DECLARATION) identifier
    %declare STRING_LITERAL BOOLEAN_VALUE COMMA FUNCTION_DECLARATION BREAK_CONTINUE IF_ELIF ELSE_KEYWORD WHILE_KEYWORD DO_KEYWORD FOR_KEYWORD PRINT_KEYWORD RETURN_KEYWORD VARIABLE_DECLARATION LIST_DECLARATION ARR_DECLARATION TUPLE_DECLARATION EXCEPTION_TYPE NULL_KEYWORD TRY_KEYWORD CATCH_KEYWORD FINALLY_KEYWORD THROW_KEYWORD KEYWORD NOT_OPERATOR ASSIGNMENT_OPERATOR OPERATOR COMPOUND_OPERATOR UNARY_OPERATOR COMPARATOR DOT_OPERATOR PUNCTUATION END_OF_STATEMENT ROUND_OPEN ROUND_CLOSE CURLY_OPEN CURLY_CLOSE SQUARE_OPEN SQUARE_CLOSE DECIMAL_CONSTANT INTEGER_CONSTANT IDENTIFIER QUOTATION ERROR
    %import common.WS
    %ignore WS
"""

In [223]:
from lark.lexer import Lexer, Token

class MyLexer(Lexer):
    def __init__(self, lexer_conf):
        pass

    def lex(self, data):
        lexer = Lexer_(source_code=data)
        lexer.tokenize()
        tokens = lexer.get_tokens()
        for type, value in tokens:
            yield Token(type, value)

In [242]:
parser = Lark(grammar, start='start', lexer=MyLexer, parser='lalr')

input_string = """
"""

def visualize_tree(tree, depth=0):
    if isinstance(tree, Tree):
        print("  " * depth + "+-" + str(tree.data))
        for child in tree.children[:-1]:
            print("  " * (depth + 1) + "|")
            visualize_tree(child, depth + 1)
        if tree.children:
            print("  " * (depth + 1) + "|")
            visualize_tree(tree.children[-1], depth + 1)
    else:
        print("  " * depth + "+-" + str(tree))

try:
    tree = parser.parse(input_string)
    visualize_tree(tree)
    print("Parsing successful.")
except Exception as e:
    print("Parsing failed:", e)

['const', 'a', '=', '5', ';', 'var', 'b', '=', '5', ';']
+-start
  |
  +-statements
    |
    +-statement
      |
      +-declaration
        |
        +-const
        |
        +-identifier
          |
          +-a
        |
        +-=
        |
        +-expression
          |
          +-literal
            |
            +-integer_constant
              |
              +-5
      |
      +-;
    |
    +-statement
      |
      +-declaration
        |
        +-var
        |
        +-identifier
          |
          +-b
        |
        +-=
        |
        +-expression
          |
          +-literal
            |
            +-integer_constant
              |
              +-5
      |
      +-;
Parsing successful.


In [123]:
from lark import Lark, ast_utils, Transformer, v_args
from dataclasses import dataclass
from lark.tree import Meta
import sys

this_module = sys.modules[__name__]

# Define AST classes
class _Ast(ast_utils.Ast):
    pass

class _Statement(_Ast):
    pass

@dataclass
class Value(_Ast, ast_utils.WithMeta):
    meta: Meta
    value: object

@dataclass
class Name(_Ast):
    name: str

@dataclass
class CodeBlock(_Ast, ast_utils.AsList):
    statements: list[_Statement]

@dataclass
class PrintStatement(_Statement):
    expression: Value

@dataclass
class Declaration(_Statement):
    identifier: str
    value: Value

@dataclass
class ExceptionHandling(_Statement):
    try_block: CodeBlock
    catch_block: CodeBlock
    finally_block: CodeBlock

@dataclass
class ReturnStatement(_Statement):
    expression: Value

@dataclass
class ControlFlow(_Statement):
    condition: Value
    block: CodeBlock

@dataclass
class Assignment(_Statement):
    identifier: str
    index: Value
    value: Value

@dataclass
class Expression(_Ast):
    left: Value
    operator: str
    right: Value

class ToAst(Transformer):
    def start(self, items):
        return items[0]

    def statements(self, items):
        return CodeBlock(statements=items)

    def print_statement(self, items):
        return PrintStatement(expression=items[0])

    def declaration(self, items):
        return Declaration(identifier=items[0], value=items[1])

    def exception_handling(self, items):
        return ExceptionHandling(try_block=items[0], catch_block=items[1], finally_block=items[2])

    def return_statement(self, items):
        return ReturnStatement(expression=items[0])

    def control_flow(self, items):
        return ControlFlow(condition=items[0], block=items[1])

    def assignment(self, items):
        return Assignment(identifier=items[0], index=items[1], value=items[2])

    def expression(self, items):
        return Expression(left=items[0], operator=items[1], right=items[2])

    def STRING(self, value):
        return value[1:-1]

    def INTEGER_CONSTANT(self, value):
        return int(value)

    def DECIMAL_CONSTANT(self, value):
        return float(value)

    def IDENTIFIER(self, value):
        return Name(name=value)

    def BOOLEAN_VALUE(self, value):
        return Value(meta=None, value=value.lower() == 'true')

parser = Lark(grammar, start='start', transformer=ToAst(), lexer=MyLexer, parser='lalr')
transformer = ast_utils.create_transformer(this_module, ToAst())

def parse(text):
    tree = parser.parse(text)
    return transformer.transform(tree)

# Example usage
source_code = """
print 10;
var x = 5;
var y = true;
x = 10;
x = y;
"""

ast = parse(source_code)
print(ast)

['print', '10', ';', 'var', 'x', '=', '5', ';', 'var', 'y', '=', 'true', ';', 'x', '=', '10', ';', 'x', '=', 'y', ';']


UnexpectedToken: Unexpected token Token('INTEGER_CONSTANT', '10') at line None, column None.
Expected one of: 
	* ROUND_OPEN
