# BSCS-A-6th

# Muhammad Hadeed Tariq (1175)
# Fahad Ali (1154)
# Syed Junaid Jaffery (1167)


---
---
---

# **EngC to C++ Transpiler**

# Importing in the Lexer


In [6]:
from Lexer_Hadeed.token_definition import token_defs
from Lexer_Hadeed.tokenize_regex import tokenize_regex
from Lexer_Hadeed.postfix_conversion import to_postfix
from Lexer_Hadeed.postfix_to_nfa import postfix_to_nfa,combine_nfas,print_nfa
from Lexer_Hadeed.nfa_to_dfa import nfa_to_dfa
from Lexer_Hadeed.plot_nfa import plot_nfa_with_graphviz
from Lexer_Hadeed.optimize_dfa import DFAOptimizer
from Lexer_Hadeed.dfa_table import DFATable
from Lexer_Hadeed.scanner import LexicalScanner
from Lexer_Hadeed.Token_class import Token
from Parser_Junaid.LR1Parser import green_text, normal_text_start

import io
import sys

# Create a context manager to suppress all output
class SuppressOutput:
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = io.StringIO()
        return self
    
    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout = self._original_stdout


# print("Processing Lexer Token Definitions...")
# print("Processing Lexer Token Definitions...silently")
# print("="*80)

# comment out the following line and then Dedent the rest of the code below to see the output, the print statements are still there, I just suppressed them for cleaner output
# with SuppressOutput():
results = []
for token_name, regex in token_defs:
    print(f"\n📝 Processing: {token_name} -> {regex}")

    # Tokenize and convert to postfix
    tokens = tokenize_regex(regex)
    print(f"   Tokens: {tokens}")

    postfix = to_postfix(tokens)
    print(f"   Postfix: {postfix}")

    # Convert to NFA and display
    nfa = postfix_to_nfa(postfix)
    if nfa:
        # print_nfa(nfa)
        filename = f"lexer_{token_name.lower()}"
        display_result = plot_nfa_with_graphviz(nfa, f"{token_name}: {regex}")
        results.append((token_name, regex, nfa, None))
    else:
        print(f"❌ Failed to create NFA for {token_name}")

    print("-" * 60)

#Combine all NFAs into a single NFA and display

final_nfa = combine_nfas(results)
# if final_nfa:
    # print("\n Combined Final NFA:")
    # print_nfa(final_nfa)
    # plot_nfa_with_graphviz(final_nfa, "Combined Final NFA", "final_combined_nfa")

#Convert the final NFA to a DFA
print("\n🔄 Converting NFA to DFA...")
start_dfa,all_dfa_states = nfa_to_dfa(final_nfa)

# print("\n✅ DFA States and Transitions:")
# for state in all_dfa_states:
#     acc = "Accepting" if state.is_accepting else "Non-Accepting"
#     token = f"Token: {state.token_type}" if state.token_type else ""
#     print(f"{state} ({acc}) {token}")
#     for symbol, target in state.transitions.items():
#         print(f"   {state} --{symbol}--> {target}")

#Optimize DFA
optimizer = DFAOptimizer(start_dfa, all_dfa_states)
min_start, min_states = optimizer.minimize_dfa()

# Build transition table
dfa_table = DFATable(min_start, min_states)
dfa_table.print_table_stats()
dfa_table.print_table()
dfa_table.export_to_excel() 

# Create scanner
scanner = LexicalScanner(dfa_table, skip_terminators=True)    
print(f"✅ {green_text}Scanner ready for use!{normal_text_start}")
print("="*80)


📝 Processing: KEYWORD -> if|else|for|while|main|do
   Tokens: ['(\\bif\\b|\\belse\\b|\\bfor\\b|\\bwhile\\b|\\bmain\\b|\\bdo\\b)']
   Postfix: ['(\\bif\\b|\\belse\\b|\\bfor\\b|\\bwhile\\b|\\bmain\\b|\\bdo\\b)']
✅ NFA graph saved to: NFAs\KEYWORD_if_else_for_while_main_do.png
------------------------------------------------------------

📝 Processing: TYPE -> integer|float|string|boolean
   Tokens: ['(\\binteger\\b|\\bfloat\\b|\\bstring\\b|\\bboolean\\b)']
   Postfix: ['(\\binteger\\b|\\bfloat\\b|\\bstring\\b|\\bboolean\\b)']
✅ NFA graph saved to: NFAs\TYPE_integer_float_string_boolean.png
------------------------------------------------------------

📝 Processing: BOOL -> true|false
   Tokens: ['(\\btrue\\b|\\bfalse\\b)']
   Postfix: ['(\\btrue\\b|\\bfalse\\b)']
✅ NFA graph saved to: NFAs\BOOL_true_false.png
------------------------------------------------------------

📝 Processing: ARITHMETIC_OP -> add|subtract|multiply|divide|remainder|power
   Tokens: ['(\\badd\\b|\\bsubtract\\b|\\bmu

# Importing in the Parser


In [8]:
from Parser_Junaid.LR1Parser import LR1Parser, green_text, normal_text_start

parser = LR1Parser()
parser.define_grammar()
parser.augment_grammar()
parser.compute_first_sets()
parser.build_canonical_collection()
parser.build_parsing_tables()


print(f"✅ {green_text}Parser is ready!{normal_text_start}")
print("="*80)

----- PHASE 0: GRAMMAR DEFINITION -----
Terminals count: 43
Terminals: ['if', 'else', 'while', 'for', 'main', 'function', 'return', 'integer', 'float', 'string', 'boolean', 'add', 'subtract', 'multiply', 'divide', 'remainder', 'power', 'is equal to', 'is not equal to', 'is greater than', 'is less than', 'is greater than or equal to', 'is less than or equal to', 'and', 'or', 'not', 'equals to', 'IDENTIFIER', 'NUMBER', 'FLOAT', 'STRING', 'BOOL', 'COMMA', 'DOT', 'COLON', 'semicolon', '(', ')', '{', '}', '[', ']', '$']
Non-terminals count: 19
Non-terminals: ['Program', 'MainFunction', 'StatementList', 'Statement', 'Declaration', 'Assignment', 'IfStatement', 'WhileStatement', 'ForStatement', 'Expression', 'LogicalExpr', 'RelationalExpr', 'ArithmeticExpr', 'Term', 'PowerExpr', 'Factor', 'Condition', 'Type', 'ReturnStatement']
Start Symbol: Program
Productions count: 51
Program -> MainFunction
MainFunction -> Type main ( ) { StatementList }
StatementList -> Statement
StatementList -> Statemen

# Importing in the Code Generator


In [9]:
from Optimizer_Assembly_CPP_Fahad.optimizer_assembly_cpp import complete_compiler_pipeline



print(f"✅ {green_text}Code Generator and Optimizer is ready!{normal_text_start}")
print("="*80)

✅ [32mCode Generator and Optimizer is ready![0m


---
---

---


# Test-1: Example-1 from Lexer.ipynb


## Tokenization


In [10]:
test_code_1 = '''
integer main () {
    integer x equals to 5 semicolon 
    integer y equals to 10 semicolon
    boolean result equals to x is less than y semicolon
    integer sum equals to 0 semicolon
    if (result) {
        sum equals to x add y semicolon
        }
    return sum semicolon
}
'''
    
print(f"\n🧪 Testing scanner with sample code:")
print(f"Input: {repr(test_code_1)}")
print(f"Length: {len(test_code_1)} characters")
    
tokens_1 = scanner.scan(test_code_1)
    
print(f"\n📝 Generated tokens:")
for i, token in enumerate(tokens_1):
    print(f"  {i+1:2d}. {token}")



🧪 Testing scanner with sample code:
Input: '\ninteger main () {\n    integer x equals to 5 semicolon \n    integer y equals to 10 semicolon\n    boolean result equals to x is less than y semicolon\n    integer sum equals to 0 semicolon\n    if (result) {\n        sum equals to x add y semicolon\n        }\n    return sum semicolon\n}\n'
Length: 282 characters
🔍 Scanning input: 282 characters
✅ Scanning complete: 44 tokens generated

📝 Generated tokens:
   1. TYPE(integer)
   2. KEYWORD(main)
   3. LPAREN(()
   4. RPAREN())
   5. LBRACE({)
   6. TYPE(integer)
   7. IDENTIFIER(x)
   8. ASSIGN_OP(equals to)
   9. NUMBER(5)
  10. SEMI(semicolon)
  11. TYPE(integer)
  12. IDENTIFIER(y)
  13. ASSIGN_OP(equals to)
  14. NUMBER(10)
  15. SEMI(semicolon)
  16. TYPE(boolean)
  17. IDENTIFIER(result)
  18. ASSIGN_OP(equals to)
  19. IDENTIFIER(x)
  20. RELATIONAL_OP(is less than)
  21. IDENTIFIER(y)
  22. SEMI(semicolon)
  23. TYPE(integer)
  24. IDENTIFIER(sum)
  25. ASSIGN_OP(equals to)
  26. 

## Parsing


In [11]:
ast_1, tac_1 = parser.test_program(
    tokens_1, 
    "Declaration, addition, and an if statement", 
    """integer main () {
    integer x equals to 5 semicolon 
    integer y equals to 10 semicolon
    boolean result equals to x is less than y semicolon
    integer sum equals to 0 semicolon
    if (result) {
        sum equals to x add y semicolon
        }
    return sum semicolon
}""",
    create_visualization=True
)



[35m=== TEST: Declaration, addition, and an if statement ===
Testing: integer main () {
    integer x equals to 5 semicolon 
    integer y equals to 10 semicolon
    boolean result equals to x is less than y semicolon
    integer sum equals to 0 semicolon
    if (result) {
        sum equals to x add y semicolon
        }
    return sum semicolon
}
[0m
Parsing log saved to 'test-declaration,-addition,-and-an-if-statement-log.txt'

[34m=== SEMANTIC ANALYSIS ===
[32m✓ No semantic errors found![37m

[32mParsing: SUCCESS[0m
[32mSemantic Analysis: SUCCESS[0m

[36mParse Tree Structure:[0m
[36mProgram
[37m[36m  MainFunction
[37m[36m    Type
[37m[36m      TYPE: integer
[37m[36m    KEYWORD: main
[37m[36m    LPAREN: (
[37m[36m    RPAREN: )
[37m[36m    LBRACE: {
[37m[36m    StatementList
[37m[36m      StatementList
[37m[36m        StatementList
[37m[36m          StatementList
[37m[36m            StatementList
[37m[36m              StatementList
[37m[36m   

## Code Generation


In [None]:
# code-gen
complete_compiler_pipeline("test_code_1", tac_1)


🔥 COMPLETE COMPILER PIPELINE: test_code_1

📋 STAGE 1: TAC Optimization
----------------------------------------
[Enhanced] 🚀 Starting enhanced TAC optimization with proper type inference...
[Enhanced] 🔍 Extracting type declarations...
[Enhanced]    📝 Declared x as int
[Enhanced]    📝 Declared y as int
[Enhanced]    📝 Declared result as bool
[Enhanced]    📝 Declared sum as int
[Enhanced] 🔍 Inferring temporary variable types...
[Enhanced]    🎯 Inferred t0 as int from: 5
[Enhanced]    🎯 Inferred t1 as int from: 10
[Enhanced]    🎯 Inferred t2 as bool from: x is less than y
[Enhanced]    🎯 Inferred t3 as int from: 0
[Enhanced]    🎯 Inferred t4 as int from: x add y
[Enhanced] 📋 Detected code type: conditional
[Enhanced] 🧠 Optimizing conditional code with type support...
[Enhanced]    📌 Variable: t0 (int) = 5 -> 5
[Enhanced]    🔄 Copy: x (int) = t0(5) -> 5
[Enhanced]    📌 Variable: t1 (int) = 10 -> 10
[Enhanced]    🔄 Copy: y (int) = t1(10) -> 10
[Enhanced]    📌 Variable: t3 (int) = 0 -> 0
[E

{'success': True,
 'stages_completed': 5,
 'optimization_stats': {'constants_propagated': 3,
  'expressions_folded': 1,
  'branches_evaluated': 1,
  'instructions_eliminated': 19,
  'smart_returns_fixed': 0,
  'control_flow_optimized': 1,
  'type_conversions': 0,
  'reduction_percentage': 90.47619047619048},
 'return_code': 15,
 'files_generated': ['pipeline_test_code_1_optimization.txt',
  'pipeline_test_code_1_generated.cpp',
  'pipeline_test_code_1_assembly.s'],
 'errors': [],
 'assembly_code': ['.global main',
  '.text',
  '',
  'main:',
  '    # Function prologue',
  '    pushq %rbp',
  '    movq %rsp, %rbp',
  '    subq $16, %rsp',
  '',
  '    # return 15',
  '    movq $15, %rax',
  '',
  '    # Function epilogue',
  '    movq %rbp, %rsp',
  '    popq %rbp',
  '    ret']}

---
---

---


# Test-2: Example-1 from tester.ipynb


## Tokenization


In [21]:
test_code_2 = """
integer main () {
    integer a equals to 10 semicolon
    integer b equals to 20 semicolon
    integer c equals to b subtract a semicolon
    boolean is_greater equals to b is greater than a semicolon
    if (is_greater) {
        c equals to c add 5 semicolon
    }
}"""


print(f"\n🧪 Testing scanner with sample code:")
print(f"Input: {repr(test_code_2)}")
print(f"Length: {len(test_code_2)} characters")
    
tokens_2 = scanner.scan(test_code_2)
    
print(f"\n📝 Generated tokens:")
for i, token in enumerate(tokens_2):
    print(f"  {i+1:2d}. {token}")


🧪 Testing scanner with sample code:
Input: '\ninteger main () {\n    integer a equals to 10 semicolon\n    integer b equals to 20 semicolon\n    integer c equals to b subtract a semicolon\n    boolean is_greater equals to b is greater than a semicolon\n    if (is_greater) {\n        c equals to c add 5 semicolon\n    }\n}'
Length: 270 characters
🔍 Scanning input: 270 characters
✅ Scanning complete: 43 tokens generated

📝 Generated tokens:
   1. TYPE(integer)
   2. KEYWORD(main)
   3. LPAREN(()
   4. RPAREN())
   5. LBRACE({)
   6. TYPE(integer)
   7. IDENTIFIER(a)
   8. ASSIGN_OP(equals to)
   9. NUMBER(10)
  10. SEMI(semicolon)
  11. TYPE(integer)
  12. IDENTIFIER(b)
  13. ASSIGN_OP(equals to)
  14. NUMBER(20)
  15. SEMI(semicolon)
  16. TYPE(integer)
  17. IDENTIFIER(c)
  18. ASSIGN_OP(equals to)
  19. IDENTIFIER(b)
  20. ARITHMETIC_OP(subtract)
  21. IDENTIFIER(a)
  22. SEMI(semicolon)
  23. TYPE(boolean)
  24. IDENTIFIER(is_greater)
  25. ASSIGN_OP(equals to)
  26. IDENTIFIER(b)
 

## Parsing


In [22]:
ast_2, tac_2 = parser.test_program(
    tokens_2, 
    "Declaration and return statement", 
    """integer main () {
    integer a equals to 10 semicolon
    return a semicolon
}""",
    create_visualization=True
)


[35m=== TEST: Declaration and return statement ===
Testing: integer main () {
    integer a equals to 10 semicolon
    return a semicolon
}
[0m
Parsing log saved to 'test-declaration-and-return-statement-log.txt'

[34m=== SEMANTIC ANALYSIS ===
[32m✓ No semantic errors found![37m

[32mParsing: SUCCESS[0m
[32mSemantic Analysis: SUCCESS[0m

[36mParse Tree Structure:[0m
[36mProgram
[37m[36m  MainFunction
[37m[36m    Type
[37m[36m      TYPE: integer
[37m[36m    KEYWORD: main
[37m[36m    LPAREN: (
[37m[36m    RPAREN: )
[37m[36m    LBRACE: {
[37m[36m    StatementList
[37m[36m      StatementList
[37m[36m        StatementList
[37m[36m          StatementList
[37m[36m            StatementList
[37m[36m              Statement
[37m[36m                Declaration
[37m[36m                  Type
[37m[36m                    TYPE: integer
[37m[36m                  IDENTIFIER: a
[37m[36m                  ASSIGN_OP: equals to
[37m[36m                  Expr

## Code Generation


In [23]:
# code gen
complete_compiler_pipeline("test_code_2", tac_2)


🔥 COMPLETE COMPILER PIPELINE: test_code_2

📋 STAGE 1: TAC Optimization
----------------------------------------
[Enhanced] 🚀 Starting enhanced TAC optimization with proper type inference...
[Enhanced] 🔍 Extracting type declarations...
[Enhanced]    📝 Declared a as int
[Enhanced]    📝 Declared b as int
[Enhanced]    📝 Declared c as int
[Enhanced]    📝 Declared is_greater as bool
[Enhanced] 🔍 Inferring temporary variable types...
[Enhanced]    🎯 Inferred t0 as int from: 10
[Enhanced]    🎯 Inferred t1 as int from: 20
[Enhanced]    🎯 Inferred t2 as int from: b subtract a
[Enhanced]    🎯 Inferred t3 as bool from: b is greater than a
[Enhanced]    🎯 Inferred t4 as int from: 5
[Enhanced]    🎯 Inferred t5 as int from: c add t4
[Enhanced] 📋 Detected code type: conditional
[Enhanced] 🧠 Optimizing conditional code with type support...
[Enhanced]    📌 Variable: t0 (int) = 10 -> 10
[Enhanced]    🔄 Copy: a (int) = t0(10) -> 10
[Enhanced]    📌 Variable: t1 (int) = 20 -> 20
[Enhanced]    🔄 Copy: b (i

{'success': True,
 'stages_completed': 5,
 'optimization_stats': {'constants_propagated': 3,
  'expressions_folded': 2,
  'branches_evaluated': 1,
  'instructions_eliminated': 19,
  'smart_returns_fixed': 0,
  'control_flow_optimized': 1,
  'type_conversions': 0,
  'reduction_percentage': 90.47619047619048},
 'return_code': 5,
 'files_generated': ['pipeline_test_code_2_optimization.txt',
  'pipeline_test_code_2_generated.cpp',
  'pipeline_test_code_2_assembly.s'],
 'errors': [],
 'assembly_code': ['.global main',
  '.text',
  '',
  'main:',
  '    # Function prologue',
  '    pushq %rbp',
  '    movq %rsp, %rbp',
  '    subq $16, %rsp',
  '',
  '    # return 5',
  '    movq $5, %rax',
  '',
  '    # Function epilogue',
  '    movq %rbp, %rsp',
  '    popq %rbp',
  '    ret']}

---
---

---


# Test-3: Area Calculation


## Tokenization


In [17]:
test_code_3 = """
integer main () {
    float radius equals to 5 semicolon
    float pi equals to 3.14 semicolon
    float area equals to pi multiply radius multiply radius semicolon
    return area semicolon
}
"""


print(f"\n🧪 Testing scanner with sample code:")
print(f"Input: {repr(test_code_3)}")
print(f"Length: {len(test_code_3)} characters")
    
tokens_3 = scanner.scan(test_code_3)
    
print(f"\n📝 Generated tokens:")
for i, token in enumerate(tokens_3):
    print(f"  {i+1:2d}. {token}")


🧪 Testing scanner with sample code:
Input: '\ninteger main () {\n    float radius equals to 5 semicolon\n    float pi equals to 3.14 semicolon\n    float area equals to pi multiply radius multiply radius semicolon\n    return area semicolon\n}\n'
Length: 194 characters
🔍 Scanning input: 194 characters
✅ Scanning complete: 29 tokens generated

📝 Generated tokens:
   1. TYPE(integer)
   2. KEYWORD(main)
   3. LPAREN(()
   4. RPAREN())
   5. LBRACE({)
   6. TYPE(float)
   7. IDENTIFIER(radius)
   8. ASSIGN_OP(equals to)
   9. NUMBER(5)
  10. SEMI(semicolon)
  11. TYPE(float)
  12. IDENTIFIER(pi)
  13. ASSIGN_OP(equals to)
  14. FLOAT(3.14)
  15. SEMI(semicolon)
  16. TYPE(float)
  17. IDENTIFIER(area)
  18. ASSIGN_OP(equals to)
  19. IDENTIFIER(pi)
  20. ARITHMETIC_OP(multiply)
  21. IDENTIFIER(radius)
  22. ARITHMETIC_OP(multiply)
  23. IDENTIFIER(radius)
  24. SEMI(semicolon)
  25. KEYWORD(return)
  26. IDENTIFIER(area)
  27. SEMI(semicolon)
  28. RBRACE(})
  29. $($)


## Parsing


In [18]:
ast_3, tac_3 = parser.test_program(
    tokens_3,
    "area calculation",
    """integer main () {
    float radius equals to 5 semicolon
    float pi equals to 3.14 semicolon
    float area equals to pi multiply radius multiply radius semicolon
    return area semicolon
}""",
    create_visualization=True
)


[35m=== TEST: area calculation ===
Testing: integer main () {
    float radius equals to 5 semicolon
    float pi equals to 3.14 semicolon
    float area equals to pi multiply radius multiply radius semicolon
    return area semicolon
}
[0m
Parsing log saved to 'test-area-calculation-log.txt'

[34m=== SEMANTIC ANALYSIS ===
[32m✓ No semantic errors found![37m

[32mParsing: SUCCESS[0m
[32mSemantic Analysis: SUCCESS[0m

[36mParse Tree Structure:[0m
[36mProgram
[37m[36m  MainFunction
[37m[36m    Type
[37m[36m      TYPE: integer
[37m[36m    KEYWORD: main
[37m[36m    LPAREN: (
[37m[36m    RPAREN: )
[37m[36m    LBRACE: {
[37m[36m    StatementList
[37m[36m      StatementList
[37m[36m        StatementList
[37m[36m          StatementList
[37m[36m            Statement
[37m[36m              Declaration
[37m[36m                Type
[37m[36m                  TYPE: float
[37m[36m                IDENTIFIER: radius
[37m[36m                ASSIGN_OP: equals

## Code Generation


In [20]:
# placeholder
complete_compiler_pipeline("test_code_3", tac_3)


🔥 COMPLETE COMPILER PIPELINE: test_code_3

📋 STAGE 1: TAC Optimization
----------------------------------------
[Enhanced] 🚀 Starting enhanced TAC optimization with proper type inference...
[Enhanced] 🔍 Extracting type declarations...
[Enhanced]    📝 Declared radius as float
[Enhanced]    📝 Declared pi as float
[Enhanced]    📝 Declared area as float
[Enhanced] 🔍 Inferring temporary variable types...
[Enhanced]    🎯 Inferred t0 as int from: 5
[Enhanced]    🎯 Inferred t1 as float from: 3.14
[Enhanced]    🎯 Inferred t2 as float from: pi multiply radius
[Enhanced]    🎯 Inferred t3 as float from: t2 multiply radius
[Enhanced] 📋 Detected code type: sequential
[Enhanced] 🎯 Optimizing sequential code with type support...
[Enhanced]    🔍 Analyzing computation graph with type information...
[Enhanced]       📝 Assignment: t0 (int) = 5
[Enhanced]       📝 Assignment: radius (float) = t0
[Enhanced]       📝 Assignment: t1 (float) = 3.14
[Enhanced]       📝 Assignment: pi (float) = t1
[Enhanced]      

{'success': True,
 'stages_completed': 5,
 'optimization_stats': {'constants_propagated': 5,
  'expressions_folded': 2,
  'branches_evaluated': 0,
  'instructions_eliminated': 10,
  'smart_returns_fixed': 0,
  'control_flow_optimized': 0,
  'type_conversions': 0,
  'reduction_percentage': 83.33333333333334},
 'return_code': 78,
 'files_generated': ['pipeline_test_code_3_optimization.txt',
  'pipeline_test_code_3_generated.cpp',
  'pipeline_test_code_3_assembly.s'],
 'errors': [],
 'assembly_code': ['.global main',
  '.text',
  '',
  'main:',
  '    # Function prologue',
  '    pushq %rbp',
  '    movq %rsp, %rbp',
  '    subq $16, %rsp',
  '',
  '    # return 78.5',
  '',
  '    # Function epilogue',
  '    movq %rbp, %rsp',
  '    popq %rbp',
  '    ret']}

---
---

---


# Test-4: Error Detection (Syntax Error/Parsing Error)


## Tokenization


In [31]:
test_code_4 = """
integer main () {
    integer a equals to 10
    return a semicolon
}"""


print(f"\n🧪 Testing scanner with sample code:")
print(f"Input: {repr(test_code_4)}")
print(f"Length: {len(test_code_4)} characters")
    
tokens_4 = scanner.scan(test_code_4)
    
print(f"\n📝 Generated tokens:")
for i, token in enumerate(tokens_4):
    print(f"  {i+1:2d}. {token}")


🧪 Testing scanner with sample code:
Input: '\ninteger main () {\n    integer a equals to 10\n    return a semicolon\n}'
Length: 70 characters
🔍 Scanning input: 70 characters
✅ Scanning complete: 14 tokens generated

📝 Generated tokens:
   1. TYPE(integer)
   2. KEYWORD(main)
   3. LPAREN(()
   4. RPAREN())
   5. LBRACE({)
   6. TYPE(integer)
   7. IDENTIFIER(a)
   8. ASSIGN_OP(equals to)
   9. NUMBER(10)
  10. KEYWORD(return)
  11. IDENTIFIER(a)
  12. SEMI(semicolon)
  13. RBRACE(})
  14. $($)


## Parsing


In [32]:
ast_4, tac_4 = parser.test_program(
    tokens_4,
    "buggy code with missing semicolon",
    """integer main () {
    integer a equals to 10
    return a semicolon
}""",
    create_visualization=True
)


[35m=== TEST: buggy code with missing semicolon ===
Testing: integer main () {
    integer a equals to 10
    return a semicolon
}
[0m
Parsing log saved to 'test-buggy-code-with-missing-semicolon-log.txt'

[31mParsing: FAILED[0m
[33mError details:[0m
 - Syntax error: Missing 1 semicolon or closing brace(s) or parenthesis/parentheses

[33mContext around error:[0m
...
  4: LBRACE({)
  5: TYPE(integer)
  6: IDENTIFIER(a)
  7: ASSIGN_OP(equals to)
  8: NUMBER(10)
[31m→ 9: KEYWORD(return)[0m
  10: IDENTIFIER(a)
  11: SEMI(semicolon)
  12: RBRACE(})
  13: $($)


[32mSuggestion:[0m Check if there should be a semicolon after '10'



## Code Generation will not be done since the code had syntax errors


# Test-5: Error Detection (Semantic Errors)


## Tokenization


In [33]:
test_code_5 = """
integer main () {
    integer a semicolon
    boolean b equals to 20 semicolon
    integer c equals to 20 add a semicolon
}"""


print(f"\n🧪 Testing scanner with sample code:")
print(f"Input: {repr(test_code_5)}")
print(f"Length: {len(test_code_5)} characters")
    
tokens_5 = scanner.scan(test_code_5)
    
print(f"\n📝 Generated tokens:")
for i, token in enumerate(tokens_5):
    print(f"  {i+1:2d}. {token}")


🧪 Testing scanner with sample code:
Input: '\ninteger main () {\n    integer a semicolon\n    boolean b equals to 20 semicolon\n    integer c equals to 20 add a semicolon\n}'
Length: 124 characters
🔍 Scanning input: 124 characters
✅ Scanning complete: 22 tokens generated

📝 Generated tokens:
   1. TYPE(integer)
   2. KEYWORD(main)
   3. LPAREN(()
   4. RPAREN())
   5. LBRACE({)
   6. TYPE(integer)
   7. IDENTIFIER(a)
   8. SEMI(semicolon)
   9. TYPE(boolean)
  10. IDENTIFIER(b)
  11. ASSIGN_OP(equals to)
  12. NUMBER(20)
  13. SEMI(semicolon)
  14. TYPE(integer)
  15. IDENTIFIER(c)
  16. ASSIGN_OP(equals to)
  17. NUMBER(20)
  18. ARITHMETIC_OP(add)
  19. IDENTIFIER(a)
  20. SEMI(semicolon)
  21. RBRACE(})
  22. $($)


## Parsing


In [34]:
ast_5, tac_5 = parser.test_program(
    tokens_5,
    "buggy code with semantic error",
    """integer main () {
    integer a semicolon
    boolean b equals to 20 semicolon
    integer c equals to 20 add a semicolon
}""",
    create_visualization=True
)


[35m=== TEST: buggy code with semantic error ===
Testing: integer main () {
    integer a semicolon
    boolean b equals to 20 semicolon
    integer c equals to 20 add a semicolon
}
[0m
Parsing log saved to 'test-buggy-code-with-semantic-error-log.txt'

[34m=== SEMANTIC ANALYSIS ===
[31mFound 3 semantic errors:
- Variable 'a' used before initialization
- Type mismatch: cannot assign integer to boolean variable 'b'
- Variable 'a' used before initialization
[37m
[32mParsing: SUCCESS[0m
[31mSemantic Analysis: FAILED[0m

[36mParse Tree Structure:[0m
[36mProgram
[37m[36m  MainFunction
[37m[36m    Type
[37m[36m      TYPE: integer
[37m[36m    KEYWORD: main
[37m[36m    LPAREN: (
[37m[36m    RPAREN: )
[37m[36m    LBRACE: {
[37m[36m    StatementList
[37m[36m      StatementList
[37m[36m        StatementList
[37m[36m          Statement
[37m[36m            Declaration
[37m[36m              Type
[37m[36m                TYPE: integer
[37m[36m              ID

## Again, Code Generation will not be done since the code had a semantic error
