In [1]:
!antlr4 -Dlanguage=Python3 BraKet.g4 -visitor -no-listener

In [193]:
import sys
from antlr4 import *
from BraKetLexer import BraKetLexer
from BraKetParser import BraKetParser
from antlr4.error.ErrorListener import ErrorListener

class CollectingErrorListener(ErrorListener):
    def __init__(self):
        self.errors = []

    def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):
        self.errors.append(
            f"line {line}:{column} {msg}"
        )

def main(show_tokens=True, code="", file_dump=False):
    input_stream = InputStream(code)

    # 2. Lexical Analysis
    lexer = BraKetLexer(input_stream)
    lexer.removeErrorListeners()
    lex_errors = CollectingErrorListener()
    lexer.addErrorListener(lex_errors)

    token_stream = CommonTokenStream(lexer)

    # 3. Parsing
    parser = BraKetParser(token_stream)
    parser.removeErrorListeners()
    parse_errors = CollectingErrorListener()
    parser.addErrorListener(parse_errors)

def main(show_tokens=True, code="", file_dump=False):
    # 1. Provide the input
    input_stream = InputStream(code)

    # 2. Lexical Analysis
    lexer = BraKetLexer(input_stream)
    lexer.removeErrorListeners()
    lex_errors = CollectingErrorListener()
    lexer.addErrorListener(lex_errors)
    token_stream = CommonTokenStream(lexer)

    # 3. Parsing
    parser = BraKetParser(token_stream)
    parser.removeErrorListeners()
    parse_errors = CollectingErrorListener()
    parser.addErrorListener(parse_errors)
    tree = parser.program()  # Ensure 'prog' matches your .g4 start rule

    # 4. Print the Tree
    print("--- Parse Tree ---")
    str_tree = tree.toStringTree(recog=parser)
    if file_dump:
        with open('ast.txt', mode='w', encoding='utf-8') as file:
            file.write(str_tree)
            print('AST saved to ast.txt.')
    else:
        print(str_tree)

    # 5. Output Tokens (Optional)
    if show_tokens:
        print("\n--- Tokens ---")
        # We fill the stream to ensure all tokens are loaded
        token_stream.fill

        if file_dump:
            with open('tokens.txt', mode='w', encoding='utf-8') as file:
                file.write(f"{'INDEX':<6} | {'TEXT':<10} | {'TYPE':<15} | {'LINE':<5} | {'COL':<5}\n")
                file.write(f"{'-' * 50}\n")
                for i, token in enumerate(token_stream.tokens):
                    # Map the integer type ID to the name you defined in the .g4 file
                    t_type = BraKetParser.symbolicNames[token.type] if token.type != -1 else "EOF"
                    
                    file.write(f"{i:<6} | {token.text:<10} | {t_type:<15} | {token.line:<5} | {token.column:<5}\n")
                print('Tokens saved to tokens.txt.')
        else:        
            print(f"{'INDEX':<6} | {'TEXT':<10} | {'TYPE':<15} | {'LINE':<5} | {'COL':<5}")
            print("-" * 50)
            
            for i, token in enumerate(token_stream.tokens):
                # Map the integer type ID to the name you defined in the .g4 file
                t_type = BraKetParser.symbolicNames[token.type] if token.type != -1 else "EOF"
                
                print(f"{i:<6} | {token.text:<10} | {t_type:<15} | {token.line:<5} | {token.column:<5}")

    # 6. Print Errors (after tokens)
    print("\n--- Errors ---")
    if file_dump:
        with open('errors.txt', mode='w', encoding='utf-8') as file:
            if lex_errors.errors or parse_errors.errors:        
                if lex_errors.errors:
                    file.write("Lexer Errors:\n")
                    for err in lex_errors.errors:
                        file.write(f"  {err}\n")
                    file.write("\n")
        
                if parse_errors.errors:
                    file.write("Parser Errors:\n")
                    for err in parse_errors.errors:
                        file.write(f"  {err}\n")
            else:
                file.write("--- Errors ---\n")
                file.write("No errors.")
        print("Errors saved to errors.txt.")
    else:
        if lex_errors.errors or parse_errors.errors:    
            if lex_errors.errors:
                print("Lexer Errors:")
                for err in lex_errors.errors:
                    print("  " + err)
    
            if parse_errors.errors:
                print("Parser Errors:")
                for err in parse_errors.errors:
                    print("  " + err)
        else:
            print("No errors.")

if __name__ == '__main__':
    main()

--- Parse Tree ---
program

--- Tokens ---
INDEX  | TEXT       | TYPE            | LINE  | COL  
--------------------------------------------------
0      | <EOF>      | EOF             | 1     | 0    

--- Errors ---
No errors.


In [194]:
code='''
from _mamamo_ import _puke,test, test
from test import test
import filename

const i = 0
const x = t+(-1+9i)
const arr = [x, i, 1+1i, 1+0i]
const |t> = (1,1i)
const <t0| = (0,1) @ (0,1)
const not_op = (|t>)
const op = ((1i, 2), (1i, 2))
const booltest = !1 > (2 < 2) || (3 && 0) == 1

func test_function () {}

func test2 () {}
'''

In [195]:
code='''
func testfunc() {}
func test2(x, y=1, x=2) {
if ( x == 0 ) {
test = 1
} elif ( x == 1 ) {
test = 2
} elif ( x == 2 ) {
test = 4
} else {
test = 0
} 
}
'''

In [196]:
code='''
func testfunc() {}
func test2(x) {
    test = 0
    for (i = 0; i < x; x = x + 1) {
        test = test + 1
    }
    test = 0
    while (test < 4) {
        test = test + 1
    }
    x = 0
    do {
        x = x + 1
        if (x == 1) {
            test = 1
        } elif (x == 2) {
            test = 2
        } else {
            test = 3
        }
    } while (x < 10)
}
main() {
    test2(2)
}
'''

In [197]:
code='''
const struct_test = {
 x=0,
 func test() {
     print(x)
 },
 y=1
}
func testFun(struct) {
    print(struct.x)
}
main() {
    struct_test2 = {
        x = 1,
        y = 1,
        func test() {
            print(y, x)
        }
    }
    testFun(struct_test2)
}
'''

In [198]:
code='''
const |ket0> = (1,0)
const |ket1> = (0,1)

main() 
    x = <ket1|@<ket1|*|ket0>@|ket0>
    print(x)
    return
}
'''

In [200]:
main(code=code, file_dump=False)

--- Parse Tree ---
(program (const_decl_list (const_decl const (var_decl |ket0> = (num_expression (num_term (num_factor (dirac_expression (braket_vector ( (braket_value 1) , (braket_value 0) )))))))) (const_decl const (var_decl |ket1> = (num_expression (num_term (num_factor (dirac_expression (braket_vector ( (braket_value 0) , (braket_value 1) ))))))))) (main_function main ( ) <missing '{'> (statement_list (statement (assign_statement (var_decl x = (expression (num_expression (num_term (num_factor (dirac_expression (dirac_expression <ket1|) @ (dirac_expression <ket1|))) * (num_term (num_factor (dirac_expression (dirac_expression |ket0>) @ (dirac_expression |ket0>)))))))))) (statement (func_call_statement print ( (arg_list (arg x)) ))) (statement (return_statement return expression))) }))

--- Tokens ---
INDEX  | TEXT       | TYPE            | LINE  | COL  
--------------------------------------------------
0      | const      | CONST           | 2     | 0    
1      | |ket0>     | KET_