In [None]:
import tkinter as tk
from tkinter import ttk, font
import re
# define the expression for each token in dictionary , token type as keyword and pattern as it's value 
# Define the expression for each token in dictionary, token type as keyword and pattern as its value 
TOKEN_TYPES = {
    'KEYWORD': r'\b(auto|break|case|char|const|continue|default|do|double|else|enum|extern|float|for|goto|if|inline|int|long|register|restrict|return|short|signed|sizeof|static|struct|switch|typedef|union|unsigned|void|volatile|while|_Bool|_Complex|_Imaginary)\b',
    'PREPROCESSOR_DIRECTIVE': r'#\s*(include|define|undef|if|ifdef|ifndef|else|elif|endif|error|pragma)\s*<([a-zA-Z0-9_.]+)>',  # Capture entire line
    'IDENTIFIER': r'\b[a-zA-Z_][a-zA-Z0-9_]*\b',
    'NUMBER': r'\b\d+(\.\d+)?\b',
    'OPERATOR': r'[\+\-\*/=<>!]+',
    'PUNCTUATOR': r'[;,\(\)\{\}]',
    'COMMENT': r'//.*?$|/\*.*?\*/'  # Match single-line and multi-line comments
}

# define all reserved words of the C language 
RESERVED_WORDS = {
    'auto', 'break', 'case', 'char', 'const', 'continue', 'default', 'do',
    'double', 'else', 'enum', 'extern', 'float', 'for', 'goto', 'if',
    'inline', 'int', 'long', 'register', 'restrict', 'return', 'short',
    'signed', 'sizeof', 'static', 'struct', 'switch', 'typedef', 'union',
    'unsigned', 'void', 'volatile', 'while', '_Bool', '_Complex', '_Imaginary'
}


#  function to scan the text and classify it into tokens 
def scan_code(code):
    tokens = []

    # first, find and add comments as tokens
    comments = re.findall(TOKEN_TYPES['COMMENT'], code, re.DOTALL | re.MULTILINE)
    for comment in comments:
        tokens.append((comment.strip(), 'COMMENT'))  # append comment tokens , stripping whitespace 

    # remove comments from the code to avoid match in comment with any pattern 
    code = re.sub(TOKEN_TYPES['COMMENT'], '', code, flags=re.DOTALL | re.MULTILINE)

    # find and add preprocessor directives as single-line tokens
    directives = re.findall(TOKEN_TYPES['PREPROCESSOR_DIRECTIVE'], code, re.MULTILINE)
    for directive, header in directives:
        token = f"#{directive} <{header}>"
        tokens.append((token, 'PREPROCESSOR_DIRECTIVE'))  # Append directive tokens as single strings

    # remove preprocessor directives from the code to avoid match in comment with any pattern
    code = re.sub(TOKEN_TYPES['PREPROCESSOR_DIRECTIVE'], '', code)

    # continue with other token types 
    for token_type, pattern in TOKEN_TYPES.items():
        if token_type not in ['COMMENT', 'PREPROCESSOR_DIRECTIVE']:  # skip  cause it's already processed 
            matches = re.finditer(pattern, code)
            for match in matches:
                token_value = match.group()  # match the token value with the pattern
                if token_type == 'IDENTIFIER':  # handle reserved words separately to avoid that an keyword match and out as id  
                    if token_value in RESERVED_WORDS:
                        continue
                tokens.append((token_value, token_type))

    return tokens

# function to display tokens 
def display_tokens(tokens):
    # Clear previous tokens
    t2.delete('1.0', tk.END) # to delete the previous text
  # to display each token with its type in a formatted style, and color-coding makes it easier to distinguish between different token types visually
    for token, token_type in tokens:
        if token_type == 'KEYWORD':  # # check if the token type is 'KEYWORD'
            t2.insert(tk.END, f"{token} : {token_type}\n", "keyword") # # insert token with 'keyword' tag for color-coding and so on for others
        elif token_type == 'PREPROCESSOR_DIRECTIVE':
            t2.insert(tk.END, f"{token} : {token_type}\n", "preprocessor")
        elif token_type == 'IDENTIFIER':
            t2.insert(tk.END, f"{token} : {token_type}\n", "identifier")
        elif token_type == 'NUMBER':
            t2.insert(tk.END, f"{token} : {token_type}\n", "number")
        elif token_type == 'OPERATOR':
            t2.insert(tk.END, f"{token} : {token_type}\n", "operator")
        elif token_type == 'PUNCTUATOR':
            t2.insert(tk.END, f"{token} : {token_type}\n", "punctuator")
        elif token_type == 'COMMENT':
            t2.insert(tk.END, f"{token} : {token_type}\n", "comment")  

# GUI setup using tkinter 
root = tk.Tk()
root.title("C language scanner")

# Create a frame for input
frame = ttk.Frame(root)
frame.pack(pady=10)

# Text area for code input
t1 = tk.Text(frame, width=60, height=20, wrap='word')
t1.pack()

# Button to scan code
scan_button = ttk.Button(frame, text="Scan Code", command=lambda: display_tokens(scan_code(t1.get("1.0", tk.END))))
scan_button.pack(pady=5)

# Text area for output tokens
t2 = tk.Text(frame, width=60, height=20, wrap='word')
t2.pack()

# Configure text widget tags for color coding
t2.tag_configure("keyword", foreground="blue")  # Configure keyword tag color
t2.tag_configure("preprocessor", foreground="purple")  # Configure preprocessor tag color
t2.tag_configure("identifier", foreground="orange")  # Configure identifier tag color
t2.tag_configure("number", foreground="red")  # Configure number tag color
t2.tag_configure("operator", foreground="brown")  # Configure operator tag color
t2.tag_configure("punctuator", foreground="black")  # Configure delimiter tag color
t2.tag_configure("comment", foreground="grey")  # Configure comment tag color

# Start the Tkinter main loop
root.mainloop()
