<h3>Token</h3>
<h4>Analisador Lexico - AFD </h4>

In [23]:
T_KEYWORD = "<keyword %s>"
T_OP = "<op %s>"
T_INT = "<int %s>"
T_STRING = "<string %s>"
T_IDENTIF = "<id %s>"
T_SPECIAL = "<special %s>"
T_PUNCT = "<punct %s>"
T_DOT = "<dot>"
T_CONDITIONAL_OP = "<conditional_op %s>"
T_COMMENT = "<comment>"

class Token():
    def __init__(self, tipo, valor=None):
        self.tipo = tipo
        self.valor = valor

    def __str__(self):
        return f"{self.tipo}({self.valor})" if self.valor else self.tipo

class StopExecution(Exception):
    def _render_traceback_(self):
        pass

def tokenize_line(line, line_number):
    tokens = []
    i = 0
    n = len(line)

    while i < n:
        if line[i].isspace():
            i += 1
        elif line[i] == '#':  # Comment
            tokens.append(Token("T_COMMENT"))
            break 
        elif line[i].isalpha():
            start = i
            while i < n and (line[i].isalnum() or line[i] == '_'):
                i += 1
            word = line[start:i]
            if word in ["var", "func", "if", "elif", "else", "return", "object", "init"]:
                tokens.append(Token("T_KEYWORD", word))
            elif word in ["true", "false", "null", "end", "main"]:
                tokens.append(Token("T_SPECIAL", word))
            else:
                tokens.append(Token("T_IDENTIF", word))
        elif line[i].isdigit():
            start = i
            while i < n and line[i].isdigit():
                i += 1
            tokens.append(Token("T_INT", line[start:i]))
        elif line[i] == '"':
            start = i
            i += 1
            while i < n and line[i] != '"':
                i += 1
            if i >= n:
                print(f"Erro: String não fechada na linha {line_number}")
                raise StopExecution
            i += 1
            tokens.append(Token("T_STRING", line[start:i]))
        elif line[i] in "=<>!+-*/":
            start = i
            i += 1
            if i < n and line[i] == "=":
                i += 1
            tokens.append(Token("T_OP", line[start:i]))
        elif line[i] in "(),[]{}":
            tokens.append(Token("T_PUNCT", line[i]))
            i += 1
        elif line[i] in "?:":
            tokens.append(Token("T_CONDITIONAL_OP", line[i]))
            i += 1
        elif line[i] == '.':
            tokens.append(Token("T_DOT"))
            i += 1
        else:
            print(f"Erro: Caractere não reconhecido '{line[i]}' na linha {line_number}")
            raise StopExecution
    return tokens

def tokenize():

    try:
        token_total = [];
        with open('codigo.x', 'r') as f:
            lines = f.read().splitlines()

        line_number = 0
        for line in lines:
            line_number += 1
            tokens = tokenize_line(line, line_number)
            token_total.extend(tokens)
            #print(tokens)

        return token_total

    except StopExecution:
        print("Execução parada devido a erro.")

<h3>Analisador Sinatico - Parser </h3>

In [24]:
class Parser:
    def __init__(self, tokens):
        self.tokens = tokens
        self.current_idx = 0  
        self.current_token = self.tokens[0] if tokens else None
        self.symbol_table = {}

    def get_next_token(self):
        self.current_idx += 1
        if self.current_idx < len(self.tokens):
            self.current_token = self.tokens[self.current_idx]
        else:
            self.current_token = None

    def eat(self, token_type):
        if self.current_token.tipo == token_type:
            self.current_token = self.get_next_token()
        else:
            raise Exception(f"Erro de sintaxe. Esperado: {token_type}. Recebido: {self.current_token.tipo}")

    def factor(self):
        """factor : INT | IDENTIF | ( expr )"""
        token = self.current_token
        if token.tipo == 'T_INT':
            self.eat('T_INT')
            return int(token.valor)
        elif token.tipo == 'T_IDENTIF':
            self.eat('T_IDENTIF')
            return self.symbol_table.get(token.valor, None) 
        elif token.tipo == 'T_PUNCT' and token.valor == '(':
            self.eat('T_PUNCT')
            result = self.expr()
            self.eat('T_PUNCT')
            return result

    def term(self):
        """term : factor ((MUL | DIV) factor)*"""
        result = self.factor()

        while self.current_token is not None and self.current_token.tipo in ('T_OP') and self.current_token.valor in ['*', '/']:
            token = self.current_token
            if token.valor == '*':
                self.eat('T_OP')
                result *= self.factor()
            elif token.valor == '/':
                self.eat('T_OP')
                result /= self.factor()

        return result

    def expr(self):
        """expr : term ((PLUS | MINUS) term)*"""
        result = self.term()

        while self.current_token is not None and self.current_token.tipo in ('T_OP') and self.current_token.valor in ['+', '-']:
            token = self.current_token
            if token.valor == '+':
                self.eat('T_OP')
                result += self.term()
            elif token.valor == '-':
                self.eat('T_OP')
                result -= self.term()

        return result

    def assignment(self):
        """IDENTIF EQUALS expr"""
        var_name = self.current_token.valor
        self.eat('T_IDENTIF')
        self.eat('T_OP')
        var_value = self.expr()
        self.symbol_table[var_name] = var_value
    
    def conditional_statement(self):
        """ if/elif/else statement """
        self.eat('T_KEYWORD')  # Consumir o "if" ou "elif"
        condition = self.expr()  # Avaliar a expressão condicional

        if condition:
            self.eat('T_PUNCT')  # Consumir o '{'
            self.start()  # Processar o bloco de código dentro do if/elif
            self.eat('T_PUNCT')  # Consumir o '}'
        else:
            # pular o bloco de código e possivelmente ir para um "elif" ou "else"
            while self.current_token.tipo != 'T_PUNCT' or self.current_token.valor != '}':
                self.eat(self.current_token.tipo)

            self.eat('T_PUNCT')  # Consumir o '}'

            if self.current_token and self.current_token.valor == "elif":
                self.conditional_statement()
            elif self.current_token and self.current_token.valor == "else":
                self.eat('T_KEYWORD')  # Consumir o "else"
                self.eat('T_PUNCT')  # Consumir o '{'
                self.start()  # Processar o bloco de código dentro do else
                self.eat('T_PUNCT')  # Consumir o '}'

    def loop_statement(self):
        """ while statement"""
        """TODO Implementar outros loops como o for..."""
        self.eat('T_KEYWORD')  # Consumir o "while"
        condition = self.expr()  # Avaliar a expressão condicional

        while condition:
            self.eat('T_PUNCT')  # Consumir o '{'
            self.start()  # Processar o bloco de código dentro do loop
            self.eat('T_PUNCT')  # Consumir o '}'

    def function_definition(self):
        """ func IDENTIFIER (...) """
        self.eat('T_KEYWORD')  # Consumir o "func"
        func_name = self.current_token.valor
        self.eat('T_IDENTIF')
        self.eat('T_PUNCT')  # Consumir o '('

        # Lista de parâmetros (opcional)
        parameters = []
        while self.current_token.tipo != 'T_PUNCT' or self.current_token.valor != ')':
            parameters.append(self.current_token.valor)
            self.eat('T_IDENTIF')

            # Se houver uma vírgula, é porque há mais parâmetros
            if self.current_token.valor == ',':
                self.eat('T_PUNCT')  # Consumir a ','

        self.eat('T_PUNCT')  # Consumir o ')'

        # Adicionar função à tabela de símbolos (para este exemplo, vamos apenas armazenar o nome)
        self.symbol_table[func_name] = {'type': 'function', 'parameters': parameters}

        self.eat('T_PUNCT')  # Consumir o '{'
        self.start()  # Processar o corpo da função
        self.eat('T_PUNCT')  # Consumir o '}'

    def object_definition(self):
        """ object IDENTIFIER """
        """TODO expandir isso para incluir membros de objeto, métodos, etc."""
        self.eat('T_KEYWORD')  # Consumir o "object"
        object_name = self.current_token.valor
        self.eat('T_IDENTIF')

        self.symbol_table[object_name] = {'type': 'object'}

    def start(self):
        """ point of entry for our parser """
        while self.current_token is not None:
            if self.current_token.tipo == 'T_IDENTIF':
                self.assignment()
            elif self.current_token.tipo == 'T_KEYWORD':
                if self.current_token.valor == "if":
                    self.conditional_statement()
                elif self.current_token.valor == "while":
                    self.loop_statement()
                elif self.current_token.valor == "func":
                    self.function_definition()
                elif self.current_token.valor == "object":
                    self.object_definition()
            else:
                self.expr()

        if self.current_token is not None:
            raise Exception('Syntax error: unexpected token {}'.format(self.current_token))

        return self.symbol_table



<h3>Compilador</h3>

In [25]:
def main():
    filename = 'codigo.x'
    print(f"Lendo o arquivo {filename} ...")

    # Tokenização
    print("\nTokenização:")
    tokens = tokenize()
    print("Tokens:", tokens)    
    
    # Análise Sintática (Parser)
    print("\nAnálise Sintática:")
    parser = Parser(tokens)
    try:
        parser.start()
        print("Análise sintática concluída com sucesso!")
    except Exception as e:
        print(f"Erro no parser: {e}")

    states = {
        'tokens': tokens,
        'symbol_table': parser.symbol_table
    }
    print("\nEstados salvos:", states)

if __name__ == '__main__':
    main()

Lendo o arquivo codigo.x ...

Tokenização:
Tokens: [<__main__.Token object at 0x00000180F5365B90>, <__main__.Token object at 0x00000180F53669D0>, <__main__.Token object at 0x00000180F5364690>, <__main__.Token object at 0x00000180F5367A90>, <__main__.Token object at 0x00000180F53654D0>, <__main__.Token object at 0x00000180F5365DD0>, <__main__.Token object at 0x00000180F53648D0>, <__main__.Token object at 0x00000180F5365110>, <__main__.Token object at 0x00000180F5364DD0>, <__main__.Token object at 0x00000180F53644D0>, <__main__.Token object at 0x00000180F5364ED0>, <__main__.Token object at 0x00000180F5367E90>, <__main__.Token object at 0x00000180F53655D0>, <__main__.Token object at 0x00000180F5364D50>, <__main__.Token object at 0x00000180F5364F90>, <__main__.Token object at 0x00000180F5364FD0>, <__main__.Token object at 0x00000180F5365B10>, <__main__.Token object at 0x00000180F5365D90>, <__main__.Token object at 0x00000180F5364790>, <__main__.Token object at 0x00000180F5365F90>, <__main_

KeyboardInterrupt: 