In [1]:
!antlr4 -Dlanguage=Python3 BraKet.g4 -visitor -no-listener

In [2]:
import sys
from antlr4 import *
from BraKetLexer import BraKetLexer
from BraKetParser import BraKetParser
from antlr4.error.ErrorListener import ErrorListener

class CollectingErrorListener(ErrorListener):
    def __init__(self):
        self.errors = []

    def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):
        self.errors.append(
            f"line {line}:{column} {msg}"
        )

def main(show_tokens=True, code="", file_dump=False):
    input_stream = InputStream(code)

    # 2. Lexical Analysis
    lexer = BraKetLexer(input_stream)
    lexer.removeErrorListeners()
    lex_errors = CollectingErrorListener()
    lexer.addErrorListener(lex_errors)

    token_stream = CommonTokenStream(lexer)

    # 3. Parsing
    parser = BraKetParser(token_stream)
    parser.removeErrorListeners()
    parse_errors = CollectingErrorListener()
    parser.addErrorListener(parse_errors)

def main(show_tokens=True, code="", file_dump=False):
    # 1. Provide the input
    input_stream = InputStream(code)

    # 2. Lexical Analysis
    lexer = BraKetLexer(input_stream)
    lexer.removeErrorListeners()
    lex_errors = CollectingErrorListener()
    lexer.addErrorListener(lex_errors)
    token_stream = CommonTokenStream(lexer)

    # 3. Parsing
    parser = BraKetParser(token_stream)
    parser.removeErrorListeners()
    parse_errors = CollectingErrorListener()
    parser.addErrorListener(parse_errors)
    tree = parser.program()  # Ensure 'prog' matches your .g4 start rule

    # 4. Print the Tree
    print("--- Parse Tree ---")
    str_tree = tree.toStringTree(recog=parser)
    if file_dump:
        with open('ast.txt', mode='w', encoding='utf-8') as file:
            file.write(str_tree)
            print('AST saved to ast.txt.')
    else:
        print(str_tree)

    # 5. Output Tokens (Optional)
    if show_tokens:
        print("\n--- Tokens ---")
        # We fill the stream to ensure all tokens are loaded
        token_stream.fill

        if file_dump:
            with open('tokens.txt', mode='w', encoding='utf-8') as file:
                file.write(f"{'INDEX':<6} | {'TEXT':<10} | {'TYPE':<15} | {'LINE':<5} | {'COL':<5}\n")
                file.write(f"{'-' * 50}\n")
                for i, token in enumerate(token_stream.tokens):
                    # Map the integer type ID to the name you defined in the .g4 file
                    t_type = BraKetParser.symbolicNames[token.type] if token.type != -1 else "EOF"
                    
                    file.write(f"{i:<6} | {token.text:<10} | {t_type:<15} | {token.line:<5} | {token.column:<5}\n")
                print('Tokens saved to tokens.txt.')
        else:        
            print(f"{'INDEX':<6} | {'TEXT':<10} | {'TYPE':<15} | {'LINE':<5} | {'COL':<5}")
            print("-" * 50)
            
            for i, token in enumerate(token_stream.tokens):
                # Map the integer type ID to the name you defined in the .g4 file
                t_type = BraKetParser.symbolicNames[token.type] if token.type != -1 else "EOF"
                
                print(f"{i:<6} | {token.text:<10} | {t_type:<15} | {token.line:<5} | {token.column:<5}")

    # 6. Print Errors (after tokens)
    print("\n--- Errors ---")
    if file_dump:
        with open('errors.txt', mode='w', encoding='utf-8') as file:
            if lex_errors.errors or parse_errors.errors:        
                if lex_errors.errors:
                    file.write("Lexer Errors:\n")
                    for err in lex_errors.errors:
                        file.write(f"  {err}\n")
                    file.write("\n")
        
                if parse_errors.errors:
                    file.write("Parser Errors:\n")
                    for err in parse_errors.errors:
                        file.write(f"  {err}\n")
            else:
                file.write("--- Errors ---\n")
                file.write("No errors.")
        print("Errors saved to errors.txt.")
    else:
        if lex_errors.errors or parse_errors.errors:    
            if lex_errors.errors:
                print("Lexer Errors:")
                for err in lex_errors.errors:
                    print("  " + err)
    
            if parse_errors.errors:
                print("Parser Errors:")
                for err in parse_errors.errors:
                    print("  " + err)
        else:
            print("No errors.")

if __name__ == '__main__':
    main()

--- Parse Tree ---
program

--- Tokens ---
INDEX  | TEXT       | TYPE            | LINE  | COL  
--------------------------------------------------
0      | <EOF>      | EOF             | 1     | 0    

--- Errors ---
No errors.


In [3]:
code='''
from _file1_ import _, test, test
from test import test
import filename

const i = 0
const x = t+(-1+9i)
const arr = [x, i, 1+1i, 1+0i]
const |t> = (1,1i)
const <t0| = (0,1) @ (0,1)
const not_op = (|t>)
const op = ((1i, 2), (1i, 2))
const booltest = !1 > (2 < 2) || (3 && 0) == 1

func test_function () {}

func test2 () {}
'''

In [4]:
main(code=code, file_dump=False)

--- Parse Tree ---
(program (import_list (import_statement from _file1_ import (func_list _ , test , test)) (import_statement from test import (func_list test)) (import_statement import filename)) (const_decl_list (const_decl const (var_decl i = (expression (num_expression (num_term (num_factor 0)))))) (const_decl const (var_decl x = (expression (num_expression (num_term (num_factor (dirac_expression t))) + (num_expression (num_term (num_factor ( (num_expression (num_term (num_factor -1+9i))) )))))))) (const_decl const (var_decl arr = (expression (array [ (expression x) , (expression i) , (expression (num_expression (num_term (num_factor 1)) + (num_expression (num_term (num_factor 1i))))) , (expression (num_expression (num_term (num_factor 1)) + (num_expression (num_term (num_factor 0i))))) ])))) (const_decl const (var_decl |t> = (num_expression (num_term (num_factor (dirac_expression (braket_vector ( (braket_value 1) , (braket_value 1i) )))))))) (const_decl const (var_decl <t0| = (num

In [14]:
code='''
func testfunc() {
    return 8
}
func test2(x, y=1, x=2) {
|_0> = (1,0)
if ( x == 0 ) {
test = 1
} elif ( x == 1 ) {
test = 2
} elif ( x == 2 ) {
test = testfunc()
} else {
return |_0>*<_0|
} 
}
main() {}
'''

In [15]:
main(code=code, file_dump=False)

--- Parse Tree ---
(program (func_decl_list (func_decl func testfunc ( ) { (statement_list (statement (return_statement return (expression (num_expression (num_term (num_factor 8))))))) }) (func_decl func test2 ( (param_list (identifier_list x) , (default_list (assign_statement (var_decl y = (expression (num_expression (num_term (num_factor 1)))))) , (assign_statement (var_decl x = (expression (num_expression (num_term (num_factor 2)))))))) ) { (statement_list (statement (assign_statement (var_decl |_0> = (num_expression (num_term (num_factor (dirac_expression (braket_vector ( (braket_value 1) , (braket_value 0) ))))))))) (statement (if_statement if ( (bool_expression (bool_or (bool_and (bool_cmp (num_expression (num_term (num_factor (dirac_expression x)))) (num_comp (eq_comp ==)) (num_expression (num_term (num_factor 0))))))) ) { (statement_list (statement (assign_statement (var_decl test = (expression (num_expression (num_term (num_factor 1)))))))) } (elif elif ( (bool_expression (bo

In [61]:
code='''
main() {
    x = ''
    y = "test string

    print(x, y)
}
'''

In [62]:
main(code=code, file_dump=False)

--- Parse Tree ---
(program (main_function main ( ) { (statement_list (statement (assign_statement (var_decl x = (expression (num_expression (num_term (num_factor ''))))))) (statement (assign_statement (var_decl y = expression)))) <missing '}'>))

--- Tokens ---
INDEX  | TEXT       | TYPE            | LINE  | COL  
--------------------------------------------------
0      | main       | MAIN            | 2     | 0    
1      | (          | LPAREN          | 2     | 4    
2      | )          | RPAREN          | 2     | 5    
3      | {          | LCURLY          | 2     | 7    
4      | x          | IDENTIFIER      | 3     | 4    
5      | =          | ASSIGN          | 3     | 6    
6      | ''         | CHAR            | 3     | 8    
7      | y          | IDENTIFIER      | 4     | 4    
8      | =          | ASSIGN          | 4     | 6    
9      | <EOF>      | EOF             | 8     | 0    

--- Errors ---
Lexer Errors:
  line 4:8 token recognition error at: '"test string\n\n    pr

In [63]:
code='''
const _0 = {
 x=0,
 func test() {
     print(x)
 },
 y=1
}
func testFun(struct) {
    print(struct.x)
}
main() {
    struct_test2 = {
        x = 1,
        y = 1,
        func test() {
            print(y, x)
        }
    }
    array = [[0], [1], [2]]
    testFun(struct_test2)
}
'''

In [64]:
main(code=code, file_dump=False)

--- Parse Tree ---
(program (const_decl_list (const_decl const (var_decl _0 = (expression (struct { (struct_value (var_decl x = (expression (num_expression (num_term (num_factor 0))))) , (struct_value (func_decl func test ( ) { (statement_list (statement (func_call_statement print ( (arg_list (arg x)) )))) }) , (struct_value (var_decl y = (expression (num_expression (num_term (num_factor 1)))))))) }))))) (func_decl_list (func_decl func testFun ( (param_list (identifier_list struct)) ) { (statement_list (statement (func_call_statement print ( (arg_list (arg (struct_access struct . x))) )))) })) (main_function main ( ) { (statement_list (statement (assign_statement (var_decl struct_test2 = (expression (struct { (struct_value (var_decl x = (expression (num_expression (num_term (num_factor 1))))) , (struct_value (var_decl y = (expression (num_expression (num_term (num_factor 1))))) , (struct_value (func_decl func test ( ) { (statement_list (statement (func_call_statement print ( (arg_list 

In [55]:
code='''
const |ket0> = (1,0)
const |ket1> = (0,1)
const op = ((1,0), (0,1))
const op2 = 1/2 * ((1i,1i), (1i,1i)) 

main() {
    x = <ket1|@<ket1|*|ket0>@|ket0>
    y = ((1 < 2) || true) && 3 > 4
    z = (1+2i) * -1
    print(x)
    return x 
}
'''

In [56]:
main(code=code, file_dump=False)

--- Parse Tree ---
(program (const_decl_list (const_decl const (var_decl |ket0> = (num_expression (num_term (num_factor (dirac_expression (braket_vector ( (braket_value 1) , (braket_value 0) )))))))) (const_decl const (var_decl |ket1> = (num_expression (num_term (num_factor (dirac_expression (braket_vector ( (braket_value 0) , (braket_value 1) )))))))) (const_decl const (var_decl op = (expression (num_expression (num_term (num_factor (dirac_expression (op ( (braket_vector ( (braket_value 1) , (braket_value 0) )) , (braket_vector ( (braket_value 0) , (braket_value 1) )) ))))))))) (const_decl const (var_decl op2 = (expression (num_expression (num_term (num_factor 1) / (num_term (num_factor 2) * (num_term (num_factor (dirac_expression (op ( (braket_vector ( (braket_value 1i) , (braket_value 1i) )) , (braket_vector ( (braket_value 1i) , (braket_value 1i) )) )))))))))))) (main_function main ( ) { (statement_list (statement (assign_statement (var_decl x = (expression (num_expression (num_ter