<h3>Token</h3>
<h4>Analisador Lexico - AFD </h4>

In [90]:
T_KEYWORD = "<keyword %s>"
T_OP = "<op %s>"
T_INT = "<int %s>"
T_STRING = "<string %s>"
T_IDENTIF = "<id %s>"
T_SPECIAL = "<special %s>"
T_PUNCT = "<punct %s>"
T_DOT = "<dot>"
T_CONDITIONAL_OP = "<conditional_op %s>"
T_COMMENT = "<comment>"

class Token():
    def __init__(self, tipo, valor=None):
        self.tipo = tipo
        self.valor = valor

    def __str__(self):
        return f"{self.tipo}({self.valor})" if self.valor else self.tipo

class StopExecution(Exception):
    def _render_traceback_(self):
        pass

def tokenize_line(line, line_number):
    tokens = []
    i = 0
    n = len(line)

    while i < n:
        if line[i].isspace():
            i += 1
        elif line[i] == '#':  # Comment
            tokens.append(Token("T_COMMENT"))
            break 
        elif line[i].isalpha():
            start = i
            while i < n and (line[i].isalnum() or line[i] == '_'):
                i += 1
            word = line[start:i]
            if word in ["var", "func", "if", "elif", "else", "return", "object", "init"]:
                tokens.append(Token("T_KEYWORD", word))
            elif word in ["true", "false", "null", "end", "main"]:
                tokens.append(Token("T_SPECIAL", word))
            else:
                tokens.append(Token("T_IDENTIF", word))
        elif line[i].isdigit():
            start = i
            while i < n and line[i].isdigit():
                i += 1
            tokens.append(Token("T_INT", line[start:i]))
        elif line[i] == '"':
            start = i
            i += 1
            while i < n and line[i] != '"':
                i += 1
            if i >= n:
                print(f"Erro: String não fechada na linha {line_number}")
                raise StopExecution
            i += 1
            tokens.append(Token("T_STRING", line[start:i]))
        elif line[i] in "=<>!+-*/":
            start = i
            i += 1
            if i < n and line[i] == "=":
                i += 1
            tokens.append(Token("T_OP", line[start:i]))
        elif line[i] in "(),[]{}":
            tokens.append(Token("T_PUNCT", line[i]))
            i += 1
        elif line[i] in "?:":
            tokens.append(Token("T_CONDITIONAL_OP", line[i]))
            i += 1
        elif line[i] == '.':
            tokens.append(Token("T_DOT"))
            i += 1
        else:
            print(f"Erro: Caractere não reconhecido '{line[i]}' na linha {line_number}")
            raise StopExecution
    return tokens

def tokenize():

    try:
        token_total = [];
        with open('codigo.x', 'r') as f:
            lines = f.read().splitlines()

        line_number = 0
        for line in lines:
            line_number += 1
            tokens = tokenize_line(line, line_number)
            token_total.extend(tokens)
            #print(tokens)

        return token_total

    except StopExecution:
        print("Execução parada devido a erro.")

<h3>Analisador Sinatico - Parser </h3>

In [91]:
class Parser:
    def __init__(self, tokens):
        self.tokens = tokens
        self.current_idx = 0  
        self.current_token = self.tokens[0] if tokens else None
        self.symbol_table = {}

    def get_next_token(self):
        self.current_idx += 1
        if self.current_idx < len(self.tokens):
            self.current_token = self.tokens[self.current_idx]
        else:
            self.current_token = None

    def eat(self, token_type):
        if self.current_token.tipo == token_type:
            self.get_next_token()
        else:
            raise Exception(f"Erro de sintaxe. Esperado: {token_type}. Recebido: {self.current_token.tipo} - Valor: {self.current_token.valor}")

    def factor(self):
        """factor : INT | IDENTIF | ( expr )"""
        token = self.current_token
        if token.tipo == 'T_INT':
            self.eat('T_INT')
            return int(token.valor)
        elif token.tipo == 'T_IDENTIF':
            self.eat('T_IDENTIF')
            return self.symbol_table.get(token.valor, None) 
        elif token.tipo == 'T_PUNCT' and token.valor == '(':
            self.eat('T_PUNCT')
            result = self.expr()
            self.eat('T_PUNCT')
            return result

    def term(self):
        """term : factor ((MUL | DIV) factor)*"""
        result = self.factor()

        while self.current_token is not None and self.current_token.tipo in ('T_OP') and self.current_token.valor in ['*', '/']:
            token = self.current_token
            if token.valor == '*':
                self.eat('T_OP')
                result *= self.factor()
            elif token.valor == '/':
                self.eat('T_OP')
                result /= self.factor()

        return result

    def expr(self):
        """expr : term ((PLUS | MINUS) term)*"""
        result = self.term()

        while self.current_token is not None and self.current_token.tipo in ('T_OP') and self.current_token.valor in ['+', '-']:
            token = self.current_token
            if token.valor == '+':
                self.eat('T_OP')
                result += self.term()
            elif token.valor == '-':
                self.eat('T_OP')
                result -= self.term()

        return result

    def assignment(self):
        """IDENTIF EQUALS expr"""
        var_name = self.current_token.valor
        self.eat('T_IDENTIF')
        self.eat('T_OP')
        var_value = self.expr()
        self.symbol_table[var_name] = var_value
    
    def conditional_statement(self):
        """ if/elif/else statement """
        self.eat('T_KEYWORD')  # Consumir o "if" ou "elif"
        condition = self.expr()  # Avaliar a expressão condicional

        if condition:
            self.eat('T_PUNCT')  # Consumir o '{'
            self.start()  # Processar o bloco de código dentro do if/elif
            self.eat('T_PUNCT')  # Consumir o '}'
        else:
            # pular o bloco de código e possivelmente ir para um "elif" ou "else"
            while self.current_token.tipo != 'T_PUNCT' or self.current_token.valor != '}':
                self.eat(self.current_token.tipo)

            self.eat('T_PUNCT')  # Consumir o '}'

            if self.current_token and self.current_token.valor == "elif":
                self.conditional_statement()
            elif self.current_token and self.current_token.valor == "else":
                self.eat('T_KEYWORD')  # Consumir o "else"
                self.eat('T_PUNCT')  # Consumir o '{'
                self.start()  # Processar o bloco de código dentro do else
                self.eat('T_PUNCT')  # Consumir o '}'

    def loop_statement(self):
        """ while statement"""
        """TODO Implementar outros loops como o for..."""
        self.eat('T_KEYWORD')  # Consumir o "while"
        condition = self.expr()  # Avaliar a expressão condicional

        while condition:
            self.eat('T_PUNCT')  # Consumir o '{'
            self.start()  # Processar o bloco de código dentro do loop
            self.eat('T_PUNCT')  # Consumir o '}'

    def function_definition(self):
        """ func IDENTIFIER (...) """
        self.eat('T_KEYWORD')  # Consumir o "func"
        func_name = self.current_token.valor
        self.eat('T_IDENTIF')
        self.eat('T_PUNCT')  # Consumir o '('

        # Lista de parâmetros (opcional)
        parameters = []
        while self.current_token.tipo != 'T_PUNCT' or self.current_token.valor != ')':
            parameters.append(self.current_token.valor)
            self.eat('T_IDENTIF')

            # Se houver uma vírgula, é porque há mais parâmetros
            if self.current_token.valor == ',':
                self.eat('T_PUNCT')  # Consumir a ','

        self.eat('T_PUNCT')  # Consumir o ')'

        # Adicionar função à tabela de símbolos (para este exemplo, vamos apenas armazenar o nome)
        self.symbol_table[func_name] = {'type': 'function', 'parameters': parameters}

        self.eat('T_PUNCT')  # Consumir o '{'
        self.start()  # Processar o corpo da função
        self.eat('T_PUNCT')  # Consumir o '}'

    def object_definition(self):
        """TODO expandir isso para incluir membros de objeto, métodos, etc."""
        self.eat('T_KEYWORD')  # Consumir o "object"
        object_name = self.current_token.valor
        self.eat('T_IDENTIF')

        self.symbol_table[object_name] = {'type': 'object'}


    def print_command(self):
        """ Trata o comando print """
        self.eat('T_KEYWORD')  # Consumir "print"
        self.eat('T_PUNCT')    # Consumir '('
        
        # Aqui podemos melhorar para tratar expressões mais complexas dentro do print.
        while self.current_token.tipo != 'T_PUNCT' or self.current_token.valor != ')':
            if self.current_token.tipo == 'T_STRING':
                # Supondo que esteja imprimindo uma string, podemos apenas consumir o token.
                self.eat('T_STRING')
            else:
                self.expr()

            if self.current_token.valor == ',':
                self.eat('T_PUNCT')  # Consumir ','

        self.eat('T_PUNCT')    # Consumir ')'

    def variable_declaration(self):
        """ Trata a declaração de variáveis: var IDENTIF = expr """
        self.eat('T_KEYWORD')  # Consumir "var"
        var_name = self.current_token.valor
        self.eat('T_IDENTIF')
        
        # Aceitação opcional de espaços ou '=' diretamente
        if self.current_token.tipo == 'T_OP' and self.current_token.valor == '=':
            self.eat('T_OP')  # Consumir '='
            var_value = self.expr()
            self.symbol_table[var_name] = var_value
    
    def string_literal(self):
        """ Trata string literais """
        return self.current_token.valor 

    def end_command(self):
        """ Trata o comando end """
        self.eat('T_KEYWORD')  

    def block(self):
        """ Trata blocos de código dentro de chaves """
        self.eat('T_PUNCT')  # Consumir "{"
        while self.current_token.valor != '}':
            self.start()
        self.eat('T_PUNCT')  # Consumir "}"

    def function_call(self):
        """ Trata chamadas de funções """
        func_name = self.current_token.valor
        self.eat('T_IDENTIF')
        self.eat('T_PUNCT')  # Consumir "("
        
        args = []
        while self.current_token.valor != ')':
            args.append(self.expr())
            if self.current_token.valor == ',':
                self.eat('T_PUNCT')  # Consumir ","
        
        self.eat('T_PUNCT')  # Consumir ")"
        # Aqui você pode, por exemplo, tratar a chamada da função.
        # No exemplo, estamos apenas passando por ela.

    def special_function_definition(self):
        """ Trata funções especiais como 'main' e 'init' """
        func_name = self.current_token.valor
        self.eat('T_SPECIAL')  # Consumir o nome especial (main, init, etc.)
        
        self.eat('T_PUNCT')  # Consumir o '('

        # Para simplicidade, estamos supondo que funções especiais não possuem parâmetros.
        # Se elas puderem ter parâmetros, é necessário expandir esta parte.

        self.eat('T_PUNCT')  # Consumir o ')'
        
        # Adicionar função à tabela de símbolos (para este exemplo, vamos apenas armazenar o nome)
        self.symbol_table[func_name] = {'type': 'function', 'parameters': []}

        self.eat('T_PUNCT')  # Consumir o '{'
        self.start()  # Processar o corpo da função
        self.eat('T_PUNCT')  # Consumir o '}'

    def start(self):
        """ entrada do parser """
        while self.current_token is not None:
            print("Valor atual: ", self.current_token.valor, "Tipo: ", self.current_token.tipo, "Idx: ", self.current_idx)
            if self.current_token.tipo == 'T_IDENTIF':
                lookahead_idx = self.current_idx + 1
                if lookahead_idx < len(self.tokens) and self.tokens[lookahead_idx].valor == '(':
                    self.function_call()
                else:
                    self.assignment()
            elif self.current_token.tipo == 'T_KEYWORD':
                if self.current_token.valor == "var":
                    self.variable_declaration()
                elif self.current_token.valor == "if":
                    self.conditional_statement()
                elif self.current_token.valor == "while":
                    self.loop_statement()
                elif self.current_token.valor == "func":
                    self.function_definition()
                elif self.current_token.valor == "end":
                    self.end_command()
                elif self.current_token.valor == "object":
                    self.object_definition()
            elif self.current_token.tipo == 'T_SPECIAL':
                if self.current_token.valor in ["main", "init"]:
                    self.special_function_definition() 
            elif self.current_token.tipo == 'T_STRING':
                self.string_literal()
                self.eat('T_STRING')  # Consumir a string literal
            elif self.current_token.tipo == 'T_PUNCT' and self.current_token.valor == '{':
                self.block()
            else:
                self.get_next_token()

    # def start(self):
    #     """ entrada do parser """
    #     while self.current_token is not None:
    #         print("Valor atual: ", self.current_token.valor, "Tipo: ", self.current_token.tipo, "Idx: ", self.current_idx)
    #         if self.current_token.tipo == 'T_IDENTIF':
    #             lookahead_idx = self.current_idx + 1
    #             if lookahead_idx < len(self.tokens) and self.tokens[lookahead_idx].valor == '(':
    #                 self.function_call()
    #             else:
    #                 self.assignment()
    #         elif self.current_token.tipo == 'T_KEYWORD':
    #             if self.current_token.valor == "var":
    #                 self.variable_declaration()
    #             elif self.current_token.valor == "if":
    #                 self.conditional_statement()
    #             elif self.current_token.valor == "while":
    #                 self.loop_statement()
    #             elif self.current_token.valor == "func":
    #                 lookahead_idx = self.current_idx + 1
    #                 if lookahead_idx < len(self.tokens) and self.tokens[lookahead_idx].valor in ["main", "init"]:
    #                     self.special_function_definition()  
    #                 else:
    #                     self.function_definition()
    #             elif self.current_token.valor == "end":
    #                 self.end_command()
    #             elif self.current_token.valor == "object":
    #                 self.object_definition()
    #         elif self.current_token.tipo == 'T_STRING':
    #             self.string_literal()
    #             self.eat('T_STRING')  # Consumir a string literal
    #         elif self.current_token.tipo == 'T_PUNCT' and self.current_token.valor == '{':
    #             self.block()
    #         else:
    #             self.get_next_token()




<h3>Compilador</h3>

In [92]:
def main():
    filename = 'codigo2.x'
    print(f"Lendo o arquivo {filename} ...")

    # Tokenização
    print("\nTokenização:")
    tokens = tokenize()
    print("Tokens:", tokens)    
    
    # Análise Sintática (Parser)
    print("\nAnálise Sintática:")
    parser = Parser(tokens)
    try:
        parser.start()
        print("Análise sintática concluída com sucesso!")
    except Exception as e:
        print(f"Erro no parser: {e}")

    states = {
        'tokens': tokens,
        'symbol_table': parser.symbol_table
    }
    print("\nEstados salvos:", states)

if __name__ == '__main__':
    main()

Lendo o arquivo codigo2.x ...

Tokenização:
Tokens: [<__main__.Token object at 0x00000180F54AC850>, <__main__.Token object at 0x00000180F54AF850>, <__main__.Token object at 0x00000180F54ACB90>, <__main__.Token object at 0x00000180F54AF810>, <__main__.Token object at 0x00000180F54AF750>, <__main__.Token object at 0x00000180F54AED50>, <__main__.Token object at 0x00000180F54AD750>, <__main__.Token object at 0x00000180F54AE490>, <__main__.Token object at 0x00000180F54AE890>, <__main__.Token object at 0x00000180F54AE410>, <__main__.Token object at 0x00000180F54AFF10>, <__main__.Token object at 0x00000180F54ACD50>, <__main__.Token object at 0x00000180F54AD0D0>, <__main__.Token object at 0x00000180F54AE690>, <__main__.Token object at 0x00000180F54AF250>, <__main__.Token object at 0x00000180F54ADD50>, <__main__.Token object at 0x00000180F54AD950>, <__main__.Token object at 0x00000180F54AD2D0>, <__main__.Token object at 0x00000180F54AFFD0>, <__main__.Token object at 0x00000180F54ACDD0>, <__main