In [1]:
from lark import Lark, Tree

In [4]:
grammar = """
    start: statements

    statements: statement+

    statement: print_statement ";"
             | declaration ";" 
             | exception_handling 
             | return_statement ";"
             | control_flow 
             | expression ";" 
             | assignment ";"               
             | comments 

    comments: "/*" comment_text? "*/"

    comment_text: comment_char*

    comment_char: /[^\\n]/ | /\\n/

    print_statement: "print" "(" (expression | literal | identifier) (","(expression | literal | identifier))* ")"

    expression: expression operator operand
              | expression operator identifier "[" (identifier | integer_constant | expression) "]"
              | expression comparator expression
              | operand unary_operator
              | operand
              | function_call
              | "!" (BOOLEAN_VAR | identifier | expression)
              | BOOLEAN_VAR
              | identifier "[" (identifier | integer_constant| expression)"]"
              | "(" (expression) ")"

    assignment: identifier "[" (integer_constant | identifier | expression) "]" "=" expression
              | identifier ("," identifier)* "=" (literal | identifier | expression) ("," (literal | identifier | expression))*
              | identifier index "=" expression
              | identifier compound_operator expression

    index:     "[" (expression | integer_constant) "]" 
              | index "[" (expression |integer_constant) "]"
              
    control_flow: "func" identifier "(" parameters ")" block

                |"if" "(" expression ")" block
                | "elif" "(" expression ")" block
                | "else" block
                | "while" "(" expression ")" block
                | "do" block "while" "(" expression ")"
                | "for" "(" assignment ";" expression ";" (expression | assignment) ")" block
                | "break" ";"
                | "continue" ";"

    declaration: "tuple" identifier "=" "[" expression ("," expression)* "]"
                | "list" identifier "=" "[" expression ("," expression)* "]"
                | "list" identifier "=" "[" ("," | (expression))* "]"
                | "arr" identifier "=" "[" literal ("," literal)* "]"
                | "ExceptionType" identifier "=" identifier
                | "null" 
                | "list" identifier "=" matrix
                | "arr"  identifier "=" matrix
                |"var" identifier ("," identifier)* "=" (literal | identifier | expression) ("," (literal | identifier | expression))* 
                | "const" identifier ("," identifier)* "=" (literal | identifier | expression) ("," (literal | identifier | expression))* 
    
    matrix : "[" [items]"]"

    items: value ("," value)*
    value: matrix | literal

    exception_handling: "try" block "catch" "(" "ExceptionType" identifier ")" block "finally" block
                       | "throw" "ExceptionType" "(" comment_text ")" ";"


    block: "{" statements "}" | "{" "}"

    function_call: identifier "(" arguments ")"
    | identifier "." identifier "(" arguments ")" 


    operand: identifier
           | literal

    return_statement: "return" (expression)

    operator: "+"
            | "-"
            | "*"
            | "/"
            | "%"
            | "&"
            | "|"

    compound_operator: "+="
                      | "-="
                      | "*="
                      | "/="
                      | "%="
                      | "<<="
                      | ">>="
                      | "&="
                      | "|="
                      | "^="

    unary_operator: "++"
                    | "--"

    comparator: "=="
              | "!="
              | "<=" 
              | ">="
              | "<"
              | ">"
              | "||"
              | "&&"

    
    identifier: /[a-zA-Z_][a-zA-Z0-9_]*/

    literal: integer_constant
           | decimal_constant
           | string_literal
           | BOOLEAN_VAR

    keywords: "func" | "if" | "elif" | "else" | "while" | "do" | "for" | "break" | "continue" | "try" | "catch" | "finally" | "throw" | "var" | "const" | "true" | "false" | "null" | "return" | "print"

    BOOLEAN_VAR: "true" | "false"

    integer_constant: /[0-9]+/

    decimal_constant: /[0-9]+ '.' [0-9]+/

    string_literal: /"[^"]*"/

    arguments: ("," | expression)*

    parameters: parameter ("," parameter)*
                | ("," expression)*

    parameter: "var" identifier | "const" identifier

    %import common.WS
    %ignore WS
"""

parser = Lark(grammar, start='start')

input_string = """

mat[2][3]=2;
"""

def visualize_tree(tree, depth=0):
    if isinstance(tree, Tree):
        print("  " * depth + "+-" + str(tree.data))
        for child in tree.children[:-1]:
            print("  " * (depth + 1) + "|")
            visualize_tree(child, depth + 1)
        if tree.children:
            print("  " * (depth + 1) + "|")
            visualize_tree(tree.children[-1], depth + 1)
    else:
        print("  " * depth + "+-" + str(tree))

try:
    tree = parser.parse(input_string)
    visualize_tree(tree)
    print("Parsing successful.")
except Exception as e:
    print("Parsing failed:", e)
    
    
# the change in literal- removed            | "!" (BOOLEAN_VAR | identifier )
# removed this from expression              | identifier "[" (identifier | integer_constant| expression)"]" operand expression

    

# issues
    # still cannot restrict identifiers or functional call from taking the values of keywords
    # a++c still accepted                                                                        hopefully solved
    # a= a+=c                                                                                    hopefully solved
    # take a look at the expression again
    # the if(expression ) part is wrong as it also legalizes if(sum(2,3))
    # note the expression part does not differenciate between a expression which terminate to have boolean values and the ones which terminate to have a interger or string value


+-start
  |
  +-statements
    |
    +-statement
      |
      +-assignment
        |
        +-identifier
          |
          +-mat
        |
        +-index
          |
          +-index
            |
            +-expression
              |
              +-operand
                |
                +-literal
                  |
                  +-integer_constant
                    |
                    +-2
          |
          +-expression
            |
            +-operand
              |
              +-literal
                |
                +-integer_constant
                  |
                  +-3
        |
        +-expression
          |
          +-operand
            |
            +-literal
              |
              +-integer_constant
                |
                +-2
Parsing successful.


In [3]:
if ("hello"):
    print("j")
else:
    print(2)

j
