In [197]:
#import

import re
import json

In [198]:
#class token

class Token:
    def __init__(self, value, type, line):
        self.value = value
        self.type = type
        self.line = line
    
    def __str__(self):
        return f"Token({self.type}, '{self.value}', line {self.line})"
    
    def to_dict(self):
        return {
            "token": self.type,
            "linha": self.line,
            "lexema": self.value
        }

In [199]:
#class afd
class AFD:
    def __init__(self, name, transitions, start, accepts):
        self.name = name
        self.transitions = transitions
        self.start = start
        self.accepts = accepts
        self.reset()

    def reset(self):
        self.state = self.start
        self.lexeme = ""

    def step(self, c):
        trans = self.transitions.get(self.state, {})
        if c in trans:
            self.state = trans[c]
        elif 'LETTER' in trans and c.isalpha():
            self.state = trans['LETTER']
        elif 'DIGIT' in trans and c.isdigit():
            self.state = trans['DIGIT']
        elif 'ANY' in trans and c != '"' and c != "'" and c != '\n':
            self.state = trans['ANY']
        else:
            return False
        self.lexeme += c
        return True

    def is_accepting(self):
        return self.state in self.accepts

    def token_type(self):
        return self.accepts.get(self.state)

In [None]:
#make afd
def make_afd():
    transitions = {
        # Estado inicial (0)
        0: {
            'LETTER': 1, 'DIGIT': 2, '_': 1,
            '=': 3, '!': 4, '>': 5, '<': 6, '-': 7,
            '+': 8, '*': 9, '/': 10,
            '(': 11, ')': 12, '{': 13, '}': 14,
            ';': 15, ',': 16, ':': 17,
            '"': 18, "'": 30 
        },

        # Identificadores (1)
        1: {'LETTER': 1, 'DIGIT': 1, '_': 1},

        # Números inteiros (2)
        2: {'DIGIT': 2, '.': 19},
        
        # Ponto decimal (19) -> vai para números float
        19: {'DIGIT': 20},
        
        # Números float (20)
        20: {'DIGIT': 20},

        # Operadores relacionais e atribuição
        3: {'=': 21},   # = -> ==
        4: {'=': 22},   # ! -> !=
        5: {'=': 23},   # > -> >=
        6: {'=': 24},   # < -> <=
        7: {'>': 25},   # - ->

        # Strings (18)
        18: {'ANY': 18, '"': 26},

        # Char literals (30)
        30: {'ANY': 31},
        31: {"'": 32},

        # Comentários
        10: {'/': 33},
        33: {'ANY': 33, '\n': 34},
    }

    accepts = {
        # Identificadores
        1: "ID",
        
        # Constantes numéricas
        2: "INT_CONST",
        20: "FLOAT_CONST",
        
        # Operadores simples
        3: "ASSIGN",
        7: "MINUS",
        8: "PLUS",
        9: "MULT",
        10: "DIV",
        
        # Operadores relacionais compostos
        21: "EQ",
        22: "NE",
        23: "GE",
        24: "LE",
        25: "ARROW",
        5: "GT",
        6: "LT",
        
        # Delimitadores
        11: "LBRACKET",
        12: "RBRACKET",
        13: "LBRACE",
        14: "RBRACE",
        15: "SEMICOLON",
        16: "COMMA",
        17: "COLON",
        
        # Literais
        26: "FMT_STRING",
        32: "CHAR_LITERAL",
    }

    return AFD("global", transitions, 0, accepts)

In [201]:
#class lexer
class LexerAFD:
    def __init__(self, source):
        self.source = source
        self.tokens = []
        self.afd = make_afd()

    def tokenize(self):
        i = 0
        line = 1
        in_comment = False
        
        while i < len(self.source):
            c = self.source[i]

            # Controle de linhas
            if c == '\n':
                line += 1
                if in_comment:
                    in_comment = False
                    i += 1
                    continue

            # Ignorar espaços e tabs
            if c.isspace() or in_comment:
                i += 1
                continue

            afd = self.afd
            afd.reset()
            j = i
            
            # Processar lexema com AFD
            while j < len(self.source) and afd.step(self.source[j]):
                # Verificar se entrou em comentário
                if afd.state == 29:  # Estado de comentário
                    in_comment = True
                    break
                j += 1

            # Se está em comentário, continuar
            if in_comment:
                i = j
                continue

            # Verificar se reconheceu um token
            if afd.is_accepting():
                lex = afd.lexeme
                tipo = afd.token_type()

                # Palavras reservadas
                reserved = {
                    "fn": "FUNCTION", "main": "MAIN", "let": "LET", 
                    "int": "INT", "float": "FLOAT", "char": "CHAR", 
                    "if": "IF", "else": "ELSE", "while": "WHILE", 
                    "println": "PRINTLN", "return": "RETURN"
                }
                
                if tipo == "ID" and lex in reserved:
                    tipo = reserved[lex]

                self.tokens.append(Token(lex, tipo, line))
                i += len(lex)
            else:
                print(f"Erro léxico: caractere inesperado '{self.source[i]}' na linha {line}")
                i += 1

        return self.tokens