In [29]:
#import

import re
import json

In [30]:
#class token

class Token:
    def __init__(self, value, type, line):
        self.value = value
        self.type = type
        self.line = line
    
    def __str__(self):
        return f"Token({self.type}, '{self.value}', line {self.line})"
    
    def to_dict(self):
        return {
            "token": self.type,
            "linha": self.line,
            "lexema": self.value
        }

In [31]:
#class afd
class AFD:
    def __init__(self, name, transitions, start, accepts):
        self.name = name
        self.transitions = transitions
        self.start = start
        self.accepts = accepts
        self.reset()

    def reset(self):
        self.state = self.start
        self.lexeme = ""

    def step(self, c):
        trans = self.transitions.get(self.state, {})
        if c in trans:
            self.state = trans[c]
        elif 'LETTER' in trans and c.isalpha():
            self.state = trans['LETTER']
        elif 'DIGIT' in trans and c.isdigit():
            self.state = trans['DIGIT']
        elif 'ANY' in trans and c != '"' and c != "'" and c != '\n':
            self.state = trans['ANY']
        else:
            return False
        self.lexeme += c
        return True

    def is_accepting(self):
        return self.state in self.accepts

    def token_type(self):
        return self.accepts.get(self.state)

In [32]:
#make afd
def make_afd():
    transitions = {
        # Estado inicial (0)
        0: {
            'LETTER': 1, 'DIGIT': 2, '_': 1,
            '=': 3, '!': 4, '>': 5, '<': 6, '-': 7,
            '+': 8, '*': 9, '/': 10,  # Apenas divisão, não comentários
            '(': 11, ')': 12, '{': 13, '}': 14,
            ';': 15, ',': 16, ':': 17,
            '"': 18, "'": 19
        },

        # Identificadores (1)
        1: {'LETTER': 1, 'DIGIT': 1, '_': 1},

        # Números inteiros (2)
        2: {'DIGIT': 2, '.': 20},
        
        # Números float (20 -> 21)
        20: {'DIGIT': 21},
        21: {'DIGIT': 21},

        # Operadores relacionais
        3: {'=': 22},   # = -> ==
        4: {'=': 23},   # ! -> !=
        5: {'=': 24},   # > -> >=
        6: {'=': 25},   # < -> <=
        7: {'>': 26},   # - -> ->

        # Strings (18)
        18: {'ANY': 18, '"': 27},

        # Char literals (19 -> 28 -> 29)
        19: {'ANY': 28},
        28: {"'": 29},
    }

    accepts = {
        1: "ID",
        2: "INT_CONST",
        21: "FLOAT_CONST",
        3: "ASSIGN",
        7: "MINUS",
        8: "PLUS",
        9: "MULT",
        10: "DIV",  # Apenas operador divisão
        22: "EQ",
        23: "NE", 
        24: "GE",
        25: "LE",
        26: "ARROW",
        5: "GT",
        6: "LT",
        11: "LBRACKET",
        12: "RBRACKET",
        13: "LBRACE",
        14: "RBRACE",
        15: "SEMICOLON",
        16: "COMMA",
        17: "COLON",
        27: "FMT_STRING",
        29: "CHAR_LITERAL",
    }

    return AFD("global", transitions, 0, accepts)

In [None]:
#class lexer
class Lexer:
    def __init__(self, source):
        self.source = source
        self.tokens = []
        self.afd = make_afd()

    def tokenize(self):
        i = 0
        line = 1
        
        while i < len(self.source):
            c = self.source[i]

            # Controle de linhas
            if c == '\n':
                line += 1
                i += 1
                continue

            # Ignorar espaços em branco
            if c.isspace():
                i += 1
                continue

            afd = self.afd
            afd.reset()
            j = i
            
            # Processar lexema com AFD
            while j < len(self.source) and afd.step(self.source[j]):
                j += 1

            # Verificar se reconheceu um token
            if afd.is_accepting():
                lex = afd.lexeme
                tipo = afd.token_type()

                # Palavras reservadas
                reserved = {
                    "fn": "FUNCTION", "main": "MAIN", "let": "LET", 
                    "int": "INT", "float": "FLOAT", "char": "CHAR", 
                    "if": "IF", "else": "ELSE", "while": "WHILE", 
                    "println": "PRINTLN", "return": "RETURN"
                }
                
                if lex in reserved:
                    tipo = reserved[lex]

                self.tokens.append(Token(lex, tipo, line))
                i += len(lex)
            else:
                print(f"Erro léxico: caractere inesperado '{self.source[i]}' na linha {line}")
                i += 1

        return self.tokens