<h3>Token</h3>
<h4>Analisador Lexico - AFD </h4>

In [73]:
T_KEYWORD = "<keyword %s>"
T_OP = "<op %s>"
T_INT = "<int %s>"
T_STRING = "<string %s>"
T_IDENTIF = "<id %s>"
T_SPECIAL = "<special %s>"
T_PUNCT = "<punct %s>"
T_DOT = "<dot>"
T_CONDITIONAL_OP = "<conditional_op %s>"
T_COMMENT = "<comment>"

class Token():
    def __init__(self, tipo, valor=None):
        self.tipo = tipo
        self.valor = valor

    def __str__(self):
        return f"Token(tipo={self.tipo}, valor={self.valor})"

    def __repr__(self):
        return self.__str__()

class StopExecution(Exception):
    def _render_traceback_(self):
        pass

def tokenize_line(line, line_number):
    tokens = []
    i = 0
    n = len(line)

    while i < n:
        if line[i].isspace():
            i += 1
        elif line[i] == '#':  # Comment
            while i < n:
                i += 1
            break
            # start = i
            # while i < n:
            #     i += 1
            # tokens.append(Token("T_COMMENT", line[start:i]))
            break
        elif line[i].isalpha():
            start = i
            while i < n and (line[i].isalnum() or line[i] == '_'):
                i += 1
            word = line[start:i]
            if word in ["var", "func", "if", "elif", "else", "return", "object", "init"]:
                tokens.append(Token("T_KEYWORD", word))
            elif word in ["true", "false", "null", "end", "main"]:
                tokens.append(Token("T_SPECIAL", word))
            else:
                tokens.append(Token("T_IDENTIF", word))
        elif line[i].isdigit():
            start = i
            while i < n and line[i].isdigit():
                i += 1
            tokens.append(Token("T_INT", line[start:i]))
        elif line[i] == '"':
            start = i
            i += 1
            while i < n and line[i] != '"':
                i += 1
            if i >= n:
                print(f"Erro: String não fechada na linha {line_number}")
                raise StopExecution
            i += 1
            tokens.append(Token("T_STRING", line[start:i]))
        # elif line[i] in "=<>!+-*/":
        #     start = i
        #     i += 1
        #     if i < n and line[i] == "=":
        #         i += 1
        #     tokens.append(Token("T_OP", line[start:i]))
        elif line[i] in "=<>!":
            start = i
            i += 1
            if i < n and line[i] == "=":
                i += 1
                tokens.append(Token("T_OP", line[start:i]))
            else:
                tokens.append(Token("T_OP", line[start:i]))
        elif line[i] in "+-*/":
            tokens.append(Token("T_OP", line[i]))
            i += 1
        elif line[i] in "(),[]{}":
            tokens.append(Token("T_PUNCT", line[i]))
            i += 1
        elif line[i] in "?:":
            tokens.append(Token("T_CONDITIONAL_OP", line[i]))
            i += 1
        elif line[i] == '.':
            tokens.append(Token("T_DOT"))
            i += 1
        else:
            print(f"Erro: Caractere não reconhecido '{line[i]}' na linha {line_number}")
            raise StopExecution
    return tokens

def tokenize():

    try:
        token_total = [];
        with open('codigo2.x', 'r') as f:
            lines = f.read().splitlines()

        line_number = 0
        for line in lines:
            line_number += 1
            tokens = tokenize_line(line, line_number)
            token_total.extend(tokens)
            #print(tokens)

        return token_total

    except StopExecution:
        print("Execução parada devido a erro.")

    try:
        token_total = [];
        with open('codigo2.x', 'r') as f:
            lines = f.read().splitlines()

        line_number = 0
        for line in lines:
            line_number += 1
            tokens = tokenize_line(line, line_number)
            token_total.extend(tokens)
            #print(tokens)

        return token_total

    except StopExecution:
        print("Execução parada devido a erro.")

<h3>Analisador Sinatico - Parser </h3>

In [74]:
class Parser:
    def __init__(self, tokens):
        self.tokens = tokens
        self.current_idx = 0
        self.current_token = self.tokens[0] if tokens else None
        self.symbol_table = {}

    def get_next_token(self):
        self.current_idx += 1
        if self.current_idx < len(self.tokens):
            self.current_token = self.tokens[self.current_idx]
        else:
            self.current_token = None

    def look_ahead(self):
        if self.current_idx + 1 < len(self.tokens):
            return self.tokens[self.current_idx + 1]
        return None

    def eat(self, token_type, specific_value=None):
        if self.current_token.tipo == token_type and (specific_value is None or self.current_token.valor == specific_value):
            self.get_next_token()
        else:
            raise Exception(f"Erro de sintaxe. Esperado: {token_type} - Valor: {specific_value if specific_value else ''}. Recebido: {self.current_token.tipo} - Valor: {self.current_token.valor}")

    def factor(self):
        token = self.current_token
        if token.tipo == 'T_INT':
            self.eat('T_INT')
            return int(token.valor)
        elif token.tipo == 'T_IDENTIF':
            # avança o para o proximo token
            next_token = self.look_ahead()
            if next_token and next_token.tipo == 'T_PUNCT' and next_token.valor == '(':
                return self.call_function()  # É uma chamada de função
            else:
                self.eat('T_IDENTIF')
                if token.valor not in self.symbol_table:
                    raise Exception(f"Variável {token.valor} não definida.")
                return self.symbol_table.get(token.valor, None)
        elif token.tipo == 'T_STRING':
            self.eat('T_STRING')
            return token.valor[1:-1]
        elif token.tipo == 'T_PUNCT' and token.valor == '(':
            self.eat('T_PUNCT', '(')
            result = self.expr()
            self.eat('T_PUNCT', ')')
            return result
        elif token.tipo == "T_SPECIAL" and token.valor in ["true", "false"]:
            self.eat("T_SPECIAL")
            return token.valor == "true"
        elif token.tipo == 'T_OP' and token.valor == '?':
            return self.ternary_expr()
        else:
            raise Exception(f"Erro de sintaxe. Token inesperado: {token.tipo} - Valor: {token.valor}")

    def term(self):
        result = self.factor()
        while self.current_token is not None and self.current_token.tipo == 'T_OP' and self.current_token.valor in ['*', '/']:
            token = self.current_token
            if token.valor == '*':
                self.eat('T_OP')
                result *= self.factor()
            elif token.valor == '/':
                self.eat('T_OP')
                result /= self.factor()
        return result
    
    def binary_expr(self):
        left = self.factor()
        if self.current_token.tipo == 'T_OP':
            op = self.current_token.valor
            self.eat('T_OP')
            right = self.factor()
            if op == '+':
                return left + right
            elif op == '-':
                return left - right
            elif op == '*':
                return left * right
            elif op == '/':
                return left / right
            elif op == '==':
                return left == right
            elif op == '!=':
                return left != right
            elif op == '>':
                return left > right
            elif op == '<':
                return left < right
            elif op == '>=':
                return left >= right
            elif op == '<=':
                return left <= right
            else:
                raise Exception(f"Operador desconhecido: {op}")
        else:
            return left

    def expr(self):
        left = self.ternary_expr()
        while self.current_token and self.current_token.tipo == 'T_OP' and self.current_token.valor in ['+', '-', '*', '/', '>', '<', '==', '!=', '>=', '<=']:
            op = self.current_token.valor
            self.eat('T_OP')
            right = self.ternary_expr()  # Chamada para ternary_expr aqui.
            if op == '+':
                left += right
            elif op == '-':
                left -= right
            elif op == '*':
                left *= right
            elif op == '/':
                left /= right
            elif op == '==':
                left = left == right
            elif op == '!=':
                left = left != right
            elif op == '>':
                left = left > right
            elif op == '<':
                left = left < right
            elif op == '>=':
                left = left >= right
            elif op == '<=':
                left = left <= right
        return left


    def simple_expr(self):
        result = self.term()
        while self.current_token is not None and self.current_token.tipo == 'T_OP' and self.current_token.valor in ['+', '-']:
            token = self.current_token
            if token.valor == '+':
                self.eat('T_OP')
                result += self.term()
            elif token.valor == '-':
                self.eat('T_OP')
                result -= self.term()
        return result

    def assignment(self):
        if self.look_ahead() and self.look_ahead().valor == '=':
            var_name = self.current_token.valor
            self.eat('T_IDENTIF')
            self.eat('T_OP', '=')
            var_value = self.expr()
            self.symbol_table[var_name] = var_value
        elif self.look_ahead() and self.look_ahead().valor == '(':
            self.call_function()
        elif self.look_ahead() and self.look_ahead().valor == '==':
            self.relational_expr()
        else:
            raise Exception(f"Erro de sintaxe ao tentar atribuir ou chamar função: {self.current_token.valor}")

    def block(self):
        self.eat('T_PUNCT', '{')
        while self.current_token and (self.current_token.tipo != 'T_PUNCT' or self.current_token.valor != '}'):
            self.statement()
        self.eat('T_PUNCT', '}')

    def conditional_statement(self):
        if self.current_token.valor == "if":
            self.eat('T_KEYWORD')
            self.eat('T_PUNCT', '(')
            condition = self.relational_expr()
            self.eat('T_PUNCT', ')')
            self.block()
        elif self.current_token.valor == "elif":
            self.eat('T_KEYWORD')
            self.eat('T_PUNCT', '(')
            condition = self.relational_expr()
            self.eat('T_PUNCT', ')')
            self.block()
        else:
            self.eat('T_KEYWORD')
            self.block()

    def return_statement(self):
        self.eat('T_KEYWORD')  # Consume 'return'
        if self.current_token and self.look_ahead() and self.look_ahead().tipo == 'T_OP' and self.look_ahead().valor == '?':
            return_value = self.ternary_expr()
        elif self.current_token and self.look_ahead() and self.look_ahead().tipo == 'T_OP':
            return_value = self.binary_expr()
        else:
            return_value = self.factor()
        return return_value

    def ternary_expr(self):
        condition = self.simple_expr()
        if self.current_token and self.current_token.tipo == 'T_OP' and self.current_token.valor == '?':
            self.eat('T_OP', '?')
            true_expr = self.expr()
            self.eat('T_OP', ':')
            false_expr = self.expr()
            return true_expr if condition else false_expr
        return condition

    def loop_statement(self):
        self.eat('T_KEYWORD')  # Consumir "while"
        self.eat('T_PUNCT', '(')
        condition = self.relational_expr()
        self.eat('T_PUNCT', ')')
        while condition:
            self.block()
            condition = self.relational_expr()

    def function_definition(self):
        self.eat('T_KEYWORD')  # Consume "func"
        func_name = self.current_token.valor
        self.eat('T_IDENTIF')
        self.eat('T_PUNCT', '(')

        parameters = []

        while self.current_token.tipo == 'T_IDENTIF':
          param_name = self.current_token.valor
          parameters.append(param_name)
          self.eat('T_IDENTIF')

          # Se houver mais parâmetros, coma a vírgula
          if self.current_token.tipo == 'T_PUNCT' and self.current_token.valor == ',':
              self.eat('T_PUNCT', ',')
        # Handle function arguments as needed
        self.eat('T_PUNCT', ')')
        self.block()

        self.symbol_table[func_name] = {
            'parameters': parameters,
        }

    def var_declaration(self):
        self.eat('T_KEYWORD', 'var')
        var_name = self.current_token.valor
        self.eat('T_IDENTIF')
        self.eat('T_OP', '=')
        if self.current_token.tipo == 'T_IDENTIF' and self.current_token.valor in self.symbol_table:
            var_value = self.call_function()
        else:
            var_value = self.expr()
        self.symbol_table[var_name] = var_value

    def call_function(self):
        func_name = self.current_token.valor
        self.eat('T_IDENTIF')
        self.eat('T_PUNCT', '(')

        arguments = []

        while self.current_token.tipo != 'T_PUNCT' or self.current_token.valor != ')':
          arg_value = self.expr()
          arguments.append(arg_value)

          # Se houver mais argumentos, coma a vírgula
          if self.current_token.tipo == 'T_PUNCT' and self.current_token.valor == ',':
              self.eat('T_PUNCT', ',')

        # Handle function arguments as needed
        self.eat('T_PUNCT', ')')

        if func_name in self.symbol_table:
          parameters = self.symbol_table[func_name]['parameters']
          if len(parameters) != len(arguments):
              raise Exception("Número incorreto de argumentos para a função")
        
        elif func_name == "print":  # Tratando função integrada 'print'
            return self.builtin_print(*arguments)
          
        func_impl = self.symbol_table[func_name]['implementation']
        return func_impl(*arguments)
    
    def builtin_print(self, *args):
        print(" ".join(map(str, args)))

    def relational_expr(self):  
        left = self.simple_expr()
        if left is None:
            left = self.simple_expr()
        if self.current_token.valor == '>':
            self.eat('T_OP')
            right = self.simple_expr()
            return left > right
        elif self.current_token.valor == '<':
            self.eat('T_OP')
            right = self.simple_expr()
            return left < right
        elif self.current_token.valor == '==':
            self.eat('T_OP', '==')
            right = self.simple_expr()
            return left == right
        elif self.current_token.valor == '!=':
            self.eat('T_OP', '!=')
            right = self.simple_expr()
            return left != right
        elif self.current_token.valor == '>=':
            self.eat('T_OP', '>=')
            right = self.simple_expr()
            return left >= right
        elif self.current_token.valor == '<=':
            self.eat('T_OP', '<=')
            right = self.simple_expr()
            return left <= right
        else:
            return left

    def statement(self):
      print(f"Processando token: {self.current_token.tipo}, {self.current_token.valor}") 
      if self.current_token.valor == "var":
          self.var_declaration()
      elif self.current_token.valor in ["if", "elif", "else"]:
          self.conditional_statement()
      elif self.current_token.valor == "while":
          self.loop_statement()
      elif self.current_token.valor == "func":
          self.function_definition()
      elif self.current_token.tipo == 'T_IDENTIF':
          if self.look_ahead() and self.look_ahead().valor == '=':
              self.assignment()
          else:
              # Trate a expressão como uma chamada de função
              self.call_function()
      elif self.current_token.valor == "return":
          self.return_statement()
      else:
          raise Exception("Erro de sintaxe: Instrução desconhecida")

    def parse(self):
        while self.current_token is not None:
          self.statement()

<h3>Compilador</h3>

In [75]:
import traceback
import sys

def main():
    filename = 'codigo2.x'
    print(f"Lendo o arquivo {filename} ...")

    arquivo = open(filename)
    for l in arquivo.readlines():
        l = l.replace('\n','') # remove a quebra de linha
        print(l)

    # Tokenização
    print("\nTokenização:")
    tokens = tokenize()
    print("Tokens:")
    print("\nAnálise Sintática:")
    parser = Parser(tokens)
    try:
        parser.parse()  # Use the parse method instead of start
        print("Análise sintática concluída com sucesso!")
    except Exception as e:
        print(traceback.format_exc())
        print(f"Erro no parser: {e}")

    states = {
        'tokens': tokens,
        'symbol_table': parser.symbol_table
    }
    print("\nEstados salvos:", states)

if __name__ == '__main__':
    main()

Lendo o arquivo codigo2.x ...
# ANT
func home(){
    var a = 1
    var b = 2
    var c = 3

    if(c>a){
        print(c," maior que ", a)
    }
    elif(c<b){
        print(c," menor que ", b)
    }
    else{
        print("error")
    }

    var soma = add(a,b)

}

func add(a,b){
    return a + b
}

Tokenização:
Tokens:

Análise Sintática:
Processando token: T_KEYWORD, func
Processando token: T_KEYWORD, var
Processando token: T_KEYWORD, var
Processando token: T_KEYWORD, var
Processando token: T_KEYWORD, if
Processando token: T_IDENTIF, print
3  maior que  1
Processando token: T_KEYWORD, elif
Processando token: T_IDENTIF, print
3  menor que  2
Processando token: T_KEYWORD, else
Processando token: T_IDENTIF, print
error
Processando token: T_KEYWORD, var
Traceback (most recent call last):
  File "C:\Users\gusta\AppData\Local\Temp\ipykernel_1144\466364762.py", line 24, in main
    parser.parse()  # Use the parse method instead of start
    ^^^^^^^^^^^^^^
  File "C:\Users\gusta\AppData\Lo