# **Lexer Construction**

In [None]:
from token_definition import token_defs
from tokenize_regex import tokenize_regex
from postfix_conversion import to_postfix
from postfix_to_nfa import postfix_to_nfa,combine_nfas,print_nfa
from nfa_to_dfa import nfa_to_dfa
from plot_nfa import plot_nfa_with_graphviz
from optimize_dfa import DFAOptimizer
from dfa_table import DFATable
from scanner import LexicalScanner


print("Processing Lexer Token Definitions...")
print("="*80)
    
results = []
for token_name, regex in token_defs:
    print(f"\n📝 Processing: {token_name} -> {regex}")
        
    # Tokenize and convert to postfix
    tokens = tokenize_regex(regex)
    print(f"   Tokens: {tokens}")
        
    postfix = to_postfix(tokens)
    print(f"   Postfix: {postfix}")
    
    # Convert to NFA and display
    nfa = postfix_to_nfa(postfix)
    if nfa:
        # print_nfa(nfa)
        filename = f"lexer_{token_name.lower()}"
        display_result = plot_nfa_with_graphviz(nfa, f"{token_name}: {regex}")
        results.append((token_name, regex, nfa, None))
    else:
        print(f"❌ Failed to create NFA for {token_name}")
        
    print("-" * 60)
    
#Combine all NFAs into a single NFA and display

final_nfa = combine_nfas(results)
# if final_nfa:
    # print("\n Combined Final NFA:")
    # print_nfa(final_nfa)
    # plot_nfa_with_graphviz(final_nfa, "Combined Final NFA", "final_combined_nfa")
    
#Convert the final NFA to a DFA
print("\n🔄 Converting NFA to DFA...")
start_dfa,all_dfa_states = nfa_to_dfa(final_nfa)
                
# print("\n✅ DFA States and Transitions:")
# for state in all_dfa_states:
#     acc = "Accepting" if state.is_accepting else "Non-Accepting"
#     token = f"Token: {state.token_type}" if state.token_type else ""
#     print(f"{state} ({acc}) {token}")
#     for symbol, target in state.transitions.items():
#         print(f"   {state} --{symbol}--> {target}")
        
#Optimize DFA
optimizer = DFAOptimizer(start_dfa, all_dfa_states)
min_start, min_states = optimizer.minimize_dfa()

# Build transition table
dfa_table = DFATable(min_start, min_states)
dfa_table.print_table_stats()
dfa_table.print_table()
dfa_table.export_to_excel() 
    
# Create scanner
scanner = LexicalScanner(dfa_table, skip_terminators=True)    
print("✅ Scanner ready for use!")




In [None]:
"""Test the scanner with sample code"""
test_code = '''integer main () {
    integer x equals to 5 semicolon
    integer y equals to 10 semicolon
    boolean result equals to x is less than y semicolon
    if (result) {
        integer sum equals to x add y semicolon
        }
}'''
    
print(f"\n🧪 Testing scanner with sample code:")
print(f"Input: {repr(test_code)}")
print(f"Length: {len(test_code)} characters")
    
tokens = scanner.scan(test_code)
    
print(f"\n📝 Generated tokens:")
for i, token in enumerate(tokens):
    print(f"  {i+1:2d}. {token}")
    



In [None]:
# 1) Function with a loop, float arithmetic, and return
test_code_loop = '''
float compute_area ( float radius ) {
    float pi equals to 3.14 semicolon
    float area equals to pi multiply radius multiply radius semicolon
    return area semicolon
}
'''

print(f"\n🧪 Testing scanner with sample code:")
print(f"Input: {repr(test_code_loop)}")
print(f"Length: {len(test_code_loop)} characters")
    
tokens = scanner.scan(test_code_loop)
    
print(f"\n📝 Generated tokens:")
for i, token in enumerate(tokens):
    print(f"  {i+1:2d}. {token}")
    

In [None]:
# 2) Conditional with relational and logical ops, booleans, and strings
test_code_cond = '''
boolean check_user ( string name , integer age ) {
    boolean is_adult equals to age is greater than or equal to 18 semicolon
    boolean valid_name equals to name dot length is greater than 0 semicolon
    if ( is_adult and valid_name ) {
        string msg equals to "Access granted" semicolon
    } else {
        string msg equals to "Access denied" semicolon
    }
    return is_adult semicolon
}
'''

print(f"\n🧪 Testing scanner with sample code:")
print(f"Input: {repr(test_code_cond)}")
print(f"Length: {len(test_code_cond)} characters")
    
tokens = scanner.scan(test_code_cond)
    
print(f"\n📝 Generated tokens:")
for i, token in enumerate(tokens):
    print(f"  {i+1:2d}. {token}")
    