No terminales (NT):
Program, ModuleDecl, QualID, QualIDTail, ImportList, ImportDecl, AsOpt,
TopList, TopDecl, TypeDecl, StructDecl, FieldList, FieldListTail, Field,
ConstDecl, LetDecl, LetTail, FunDecl, ParamListOpt, ParamList, ParamListTail,
Param, RetType, Type, ArrOrFunType, SimpleType, Block, StmtList, Stmt,
ExprStmt, IfStmt, ElseOpt, WhileStmt, ReturnStmt, Expr,
Assign, AssignTail, Or, OrTail, And, AndTail, Eq, EqTail, Rel, RelTail,
Add, AddTail, Mul, MulTail, Unary, Postfix, PostfixTail,
Primary, ArgListOpt, ArgList, ArgListTail


Terminales (T):
module, import, as, type, struct, const, let, fn,
int, bool, string,
if, else, while, return,
true, false,
ID, NUM, STRING, EOF,
'=', '->', '.', ':', ';', '{', '}', '(', ')', '[', ']',
'||', '&&', '==', '!=', '<', '<=', '>', '>=',
'+', '-', '*', '/', '%', '!'


In [2]:

#Lexer

import re
from collections import namedtuple

Token = namedtuple("Token", ["type", "lexeme", "line", "col"])

# --------------------------
# Definición de tokens
# --------------------------
TOKEN_SPEC = [
    ("WHITESPACE", r"[ \t]+"),
    ("NEWLINE", r"\n"),
    ("COMMENT", r"//[^\n]*"),

    ("NUM", r"\d+(\.\d+)?"),            # números
    ("STRING", r'"([^"\\]|\\.)*"'),     # cadenas

    # Operadores múltiples (orden importante: de más largos a más cortos)
    ("OR", r"\|\|"),
    ("AND", r"&&"),
    ("EQEQ", r"=="),
    ("NEQ", r"!="),
    ("LE", r"<="),
    ("GE", r">="),
    ("ARROW", r"->"),

    # Operadores simples
    ("EQ", r"="),
    ("LT", r"<"),
    ("GT", r">"),
    ("PLUS", r"\+"),
    ("MINUS", r"-"),
    ("TIMES", r"\*"),
    ("DIV", r"/"),
    ("MOD", r"%"),
    ("NOT", r"!"),

    # Símbolos de agrupación
    ("DOT", r"\."),
    ("COLON", r":"),
    ("SEMI", r";"),
    ("LBRACE", r"\{"),
    ("RBRACE", r"\}"),
    ("LPAREN", r"\("),
    ("RPAREN", r"\)"),
    ("LBRACK", r"\["),
    ("RBRACK", r"\]"),

    # Identificadores
    ("ID", r"[A-Za-z_][A-Za-z0-9_]*"),
]

# Palabras reservadas
KEYWORDS = {
    "module": "MODULE",
    "import": "IMPORT",
    "as": "AS",
    "type": "TYPE",
    "struct": "STRUCT",
    "const": "CONST",
    "let": "LET",
    "fn": "FN",
    "int": "INT",
    "bool": "BOOL",
    "string": "STRING",
    "if": "IF",
    "else": "ELSE",
    "while": "WHILE",
    "return": "RETURN",
    "true": "TRUE",
    "false": "FALSE",
}

master_pat = re.compile("|".join(f"(?P<{name}>{pat})" for name, pat in TOKEN_SPEC))

class LexerError(Exception):
    pass

def tokenize(code):
    line, col, pos = 1, 1, 0
    while pos < len(code):
        m = master_pat.match(code, pos)
        if not m:
            raise LexerError(f"Error léxico en línea {line}, col {col}: '{code[pos:pos+10]}'")
        kind = m.lastgroup
        lexeme = m.group(kind)
        if kind == "NEWLINE":
            line += 1
            col = 1
        elif kind == "WHITESPACE" or kind == "COMMENT":
            col += len(lexeme)
        else:
            tok_type = KEYWORDS.get(lexeme, kind)
            yield Token(tok_type, lexeme, line, col)
            col += len(lexeme)
        pos = m.end()

if __name__ == "__main__":
    code = """
    module ejemplo;
    let x:int = 10;
    fn suma(a:int, b:int) -> int {
        if (a > b && true) {
            return a + b;
        } else {
            return 0;
        }
    }
    """
    try:
        for tok in tokenize(code):
            print(tok)
    except LexerError as e:
        print(e)


Token(type='MODULE', lexeme='module', line=2, col=5)
Token(type='ID', lexeme='ejemplo', line=2, col=12)
Token(type='SEMI', lexeme=';', line=2, col=19)
Token(type='LET', lexeme='let', line=3, col=5)
Token(type='ID', lexeme='x', line=3, col=9)
Token(type='COLON', lexeme=':', line=3, col=10)
Token(type='INT', lexeme='int', line=3, col=11)
Token(type='EQ', lexeme='=', line=3, col=15)
Token(type='NUM', lexeme='10', line=3, col=17)
Token(type='SEMI', lexeme=';', line=3, col=19)
Token(type='FN', lexeme='fn', line=4, col=5)
Token(type='ID', lexeme='suma', line=4, col=8)
Token(type='LPAREN', lexeme='(', line=4, col=12)
Token(type='ID', lexeme='a', line=4, col=13)
Token(type='COLON', lexeme=':', line=4, col=14)
Token(type='INT', lexeme='int', line=4, col=15)
Error léxico en línea 4, col 18: ', b:int) -'


In [10]:
digraph AFD {
  rankdir=LR;

  node [shape=circle];
  q0 [label="Inicio"];

  # Identificadores / palabras clave
  q0 -> qID [label="letter/_"];
  qID -> qID [label="letter/digit/_"];
  qID [shape=doublecircle, label="ID/Keyword"];

  # Números
  q0 -> qNUM [label="digit"];
  qNUM -> qNUM [label="digit"];
  qNUM [shape=doublecircle, label="NUM"];

  # Strings
  q0 -> qSTR [label="\""];
  qSTR -> qSTR [label="caracter"];
  qSTR -> qSTRend [label="\""];
  qSTRend [shape=doublecircle, label="STRING"];

  # Operadores
  q0 -> qEQ [label="="];
  qEQ [shape=doublecircle];
  qEQ -> qEQEQ [label="="];
  qEQEQ [shape=doublecircle, label="=="];

  q0 -> qAND1 [label="&"];
  qAND1 -> qAND2 [label="&"];
  qAND2 [shape=doublecircle, label="&&"];

  q0 -> qOR1 [label="|"];
  qOR1 -> qOR2 [label="|"];
  qOR2 [shape=doublecircle, label="||"];

  q0 -> qLT [label="<"];
  qLT [shape=doublecircle, label="<"];
  qLT -> qLE [label="="];
  qLE [shape=doublecircle, label="<="];

  q0 -> qGT [label=">"];
  qGT [shape=doublecircle, label=">"];
  qGT -> qGE [label="="];
  qGE [shape=doublecircle, label=">="];

  # Puntuación
  q0 -> qSYM [label=". ; : { } ( ) [ ] + - * / % !"];
  qSYM [shape=doublecircle];

  # Error
  q0 -> qERR [label="otro"];
  qERR [shape=doublecircle, color=red, label="ERROR"];
}


SyntaxError: invalid syntax (3238895455.py, line 1)