In [None]:
import re

def tokenize_line(line, line_number, token_patterns):
    tokens = []
    position = 0

    while position < len(line):
        match = None
        for token_name, pattern in token_patterns.items():
            regex = re.compile(pattern)
            match = regex.match(line, position)
            if match:
                tokens.append((match.group(), token_name))
                position = match.end()
                break

        if not match:
            position += 1

    return tokens

def tokenize_file(filename):
    token_patterns = {
        "identifier": r"[a-zA-Z_][a-zA-Z0-9_]*",
        "op_asign": r"=",
        "op_addition": r"\+",
        "op_subtraction": r"-",
        "op_multiplication": r"\*",
        "op_div": r"/",
        "op_power": r"\^",
        "op_less": r"<",
        "op_greater": r">",
        "op_less_equal": r"<=",
        "op_greater_equal": r">=",
        "op_not_equal": r"!=",
        "num_integer": r"\b[0-9]+\b",
        "num_float": r"\b[0-9]+\.[0-9]+\b",
        "num_real": r"\b[0-9]+\.?[0-9]*[eE][-+]?[0-9]+\b",
        "string": r'".*?"',
        "keyword": r"\b(for|if|else|while|def|return)\b",
        "comment": r"//.*",
        "left_paren": r"\(",
        "right_paren": r"\)",
        "left_brace": r"\{",
        "right_brace": r"\}",
        "left_bracket": r"\[",
        "right_bracket": r"\]",
    }

    tokenized_lines = []

    with open(filename, 'r') as file:
        for line_number, line in enumerate(file, start=1):
            tokens = tokenize_line(line.strip(), line_number, token_patterns)
            tokenized_lines.append(tokens)

    return tokenized_lines

def main():
    filename = "entrada.txt"
    tokenized_result = tokenize_file(filename)

    for line_tokens in tokenized_result:
        for token, token_type in line_tokens:
            print(f"{token} -> {token_type}")
        print("-")

if __name__ == "__main__":
    main()


b -> identifier
= -> op_asign
7 -> num_integer
-
-
a -> identifier
= -> op_asign
32 -> num_integer
4 -> num_integer
* -> op_multiplication
( -> left_paren
- -> op_subtraction
8 -> num_integer
6 -> num_integer
- -> op_subtraction
b -> identifier
) -> right_paren
/ -> op_div
6 -> num_integer
1E-8 -> num_real
-
-
d -> identifier
= -> op_asign
a -> identifier
^ -> op_power
b -> identifier
/ -> op_div
/ -> op_div
Esto -> identifier
es -> identifier
un -> identifier
comentario -> identifier
-
