In [188]:
#import

import re
import json
import glob
import os

<h2>AFD e Lexer</h2>
<h4>Nessa parte, definimos a classe de tokens, a classe de AFDs, criamos o afd e por fim definimos a classe lexer</h4>

In [189]:
#class token

class Token:
    def __init__(self, value, type, line):
        self.value = value
        self.type = type
        self.line = line
    
    def __str__(self):
        return f"Token({self.type}, '{self.value}', line {self.line})"
    
    def to_dict(self):
        return {
            "token": self.type,
            "linha": self.line,
            "lexema": self.value
        }

In [190]:
#class afd
class AFD:
    def __init__(self, name, transitions, start, accepts):
        self.name = name
        self.transitions = transitions
        self.start = start
        self.accepts = accepts
        self.reset()

    def reset(self):
        self.state = self.start
        self.lexeme = ""

    def step(self, c):
        trans = self.transitions.get(self.state, {})
        if c in trans:
            self.state = trans[c]
        elif 'LETTER' in trans and c.isalpha():
            self.state = trans['LETTER']
        elif 'DIGIT' in trans and c.isdigit():
            self.state = trans['DIGIT']
        elif 'ANY' in trans and c != '"' and c != "'" and c != '\n':
            self.state = trans['ANY']
        else:
            return False
        self.lexeme += c
        return True

    def is_accepting(self):
        return self.state in self.accepts

    def token_type(self):
        return self.accepts.get(self.state)

In [191]:
#make afd
def make_afd():
    transitions = {
        # Estado inicial (0)
        0: {
            'LETTER': 1, 'DIGIT': 2, '_': 1,
            '=': 3, '!': 4, '>': 5, '<': 6, '-': 7,
            '+': 8, '*': 9, '/': 10,
            '(': 11, ')': 12, '{': 13, '}': 14,
            ';': 15, ',': 16, ':': 17,
            '"': 18, "'": 19
        },

        # Identificadores (1)
        1: {'LETTER': 1, 'DIGIT': 1, '_': 1},

        # N√∫meros inteiros (2)
        2: {'DIGIT': 2, '.': 20},
        
        # N√∫meros float (20 -> 21)
        20: {'DIGIT': 21},
        21: {'DIGIT': 21},

        # Operadores relacionais
        3: {'=': 22},   # = -> ==
        4: {'=': 23},   # ! -> !=
        5: {'=': 24},   # > -> >=
        6: {'=': 25},   # < -> <=
        7: {'>': 26},   # - -> ->

        # Strings (18)
        18: {'ANY': 18, '"': 27},

        # Char literals (19 -> 28 -> 29)
        19: {'ANY': 28},
        28: {"'": 29},
    }

    accepts = {
        1: "ID",
        2: "INT_CONST",
        21: "FLOAT_CONST",
        3: "ASSIGN",
        7: "MINUS",
        8: "PLUS",
        9: "MULT",
        10: "DIV",  
        22: "EQ",
        23: "NE", 
        24: "GE",
        25: "LE",
        26: "ARROW",
        5: "GT",
        6: "LT",
        11: "LBRACKET",
        12: "RBRACKET",
        13: "LBRACE",
        14: "RBRACE",
        15: "SEMICOLON",
        16: "COMMA",
        17: "COLON",
        27: "FMT_STRING",
        29: "CHAR_LITERAL",
    }

    return AFD("global", transitions, 0, accepts)

In [192]:
#class lexer
class Lexer:
    def __init__(self, source):
        self.source = source
        self.tokens = []
        self.afd = make_afd()

    def tokenize(self):
        i = 0
        line = 1
        
        while i < len(self.source):
            c = self.source[i]

            # Controle de linhas
            if c == '\n':
                line += 1
                i += 1
                continue

            # Ignorar espa√ßos em branco
            if c.isspace():
                i += 1
                continue

            afd = self.afd
            afd.reset()
            j = i
            
            # Processar lexema com AFD
            while j < len(self.source) and afd.step(self.source[j]):
                j += 1

            # Verificar se reconheceu um token
            if afd.is_accepting():
                lex = afd.lexeme
                tipo = afd.token_type()

                # Palavras reservadas
                reserved = {
                    "fn": "FUNCTION", "main": "MAIN", "let": "LET", 
                    "int": "INT", "float": "FLOAT", "char": "CHAR", 
                    "if": "IF", "else": "ELSE", "while": "WHILE", 
                    "println": "PRINTLN", "return": "RETURN"
                }
                
                if lex in reserved:
                    tipo = reserved[lex]

                self.tokens.append(Token(lex, tipo, line))
                i += len(lex)
            else:
                print(f"Erro l√©xico: caractere inesperado '{self.source[i]}' na linha {line}")
                i += 1
        self.tokens.append(Token("", "EOF", line))
        return self.tokens

<h2>Testes do Lexer</h2>
<h4>Aqui executamos os testes referentes a primeira entrega, onde s√£o usados arquivos de teste da pasta "entradas de tokens reconhecidos" e √© gerado um json para cada teste que √© salvo na pasta "saidas de tokens reconhecidos"</h4>

In [193]:
#teste calculadora.p
entrada = "codigos base da linguagem p/calculadora.p"
saida = "saidas de tokens reconhecidos/calculadora_tokens.json"

lexer = Lexer(open(entrada, "r", encoding="utf-8").read())
tokens = lexer.tokenize()

tokens_json = [token.to_dict() for token in tokens]

with open(saida, "w", encoding="utf-8") as f:
    json.dump(tokens_json, f, indent=2, ensure_ascii=False)

In [194]:
#teste lexical_error.p
entrada = "codigos base da linguagem p/lexical_error.p"
saida = "saidas de tokens reconhecidos/lexical_error_tokens.json"

lexer = Lexer(open(entrada, "r", encoding="utf-8").read())
tokens = lexer.tokenize()

tokens_json = [token.to_dict() for token in tokens]

with open(saida, "w", encoding="utf-8") as f:
    json.dump(tokens_json, f, indent=2, ensure_ascii=False)

Erro l√©xico: caractere inesperado '[' na linha 3
Erro l√©xico: caractere inesperado ']' na linha 3
Erro l√©xico: caractere inesperado '$' na linha 5
Erro l√©xico: caractere inesperado '&' na linha 6


In [195]:
#teste loop_simples.p
entrada = "codigos base da linguagem p/loop_simples.p"
saida = "saidas de tokens reconhecidos/loop_simples_tokens.json"

lexer = Lexer(open(entrada, "r", encoding="utf-8").read())
tokens = lexer.tokenize()

tokens_json = [token.to_dict() for token in tokens]

with open(saida, "w", encoding="utf-8") as f:
    json.dump(tokens_json, f, indent=2, ensure_ascii=False)

In [196]:
#teste media.p
entrada = "codigos base da linguagem p/media.p"
saida = "saidas de tokens reconhecidos/media_tokens.json"

lexer = Lexer(open(entrada, "r", encoding="utf-8").read())
tokens = lexer.tokenize()

tokens_json = [token.to_dict() for token in tokens]

with open(saida, "w", encoding="utf-8") as f:
    json.dump(tokens_json, f, indent=2, ensure_ascii=False)

In [197]:
#teste soma.p
entrada = "codigos base da linguagem p/soma.p"
saida = "saidas de tokens reconhecidos/soma_tokens.json"

lexer = Lexer(open(entrada, "r", encoding="utf-8").read())
tokens = lexer.tokenize()

tokens_json = [token.to_dict() for token in tokens]

with open(saida, "w", encoding="utf-8") as f:
    json.dump(tokens_json, f, indent=2, ensure_ascii=False)

In [198]:
#teste tokens.p
entrada = "codigos base da linguagem p/tokens.p"
saida = "saidas de tokens reconhecidos/tokens_tokens.json"

lexer = Lexer(open(entrada, "r", encoding="utf-8").read())
tokens = lexer.tokenize()

tokens_json = [token.to_dict() for token in tokens]

with open(saida, "w", encoding="utf-8") as f:
    json.dump(tokens_json, f, indent=2, ensure_ascii=False)

<h2>Parser e Tabela de Simbolos</h2>

In [199]:
#class parser
class Parser:
    def __init__(self, tokens):
        self.tokens = tokens
        self.pos = 0
        self.current_token = tokens[0] if tokens else None
        self.errors = []
        self.symbol_table_manager = SymbolTableManager()
        self.current_function = None
        self.param_position = 0  # Contador de posi√ß√£o de par√¢metros

    def parse(self, nome_arquivo=None):
        try:
            self.parse_programa()
            if self.current_token and self.current_token.type != "EOF":
                self.error("Esperado fim de arquivo")
        except Exception as e:
            self.error(f"Erro de sintaxe: {str(e)}")
        
        # DEBUG: Mostrar estado das tabelas de s√≠mbolos
        print("\n=== DEBUG TABELAS DE S√çMBOLOS ===")
        for i, table in enumerate(self.symbol_table_manager.tables):
            print(f"Tabela {i+1}: {table.scope_name}")
            for symbol_name, symbol in table.symbols.items():
                categoria = "par√¢metro" if symbol.is_param else "vari√°vel"
                pos_info = f" (pos: {symbol.pos_param})" if symbol.is_param else ""
                print(f"  - {symbol_name} ({categoria}{pos_info}, {symbol.datatype}, linha {symbol.line})")
        
        self.generate_output_files(nome_arquivo)

    def advance(self):
        self.pos += 1
        if self.pos < len(self.tokens):
            self.current_token = self.tokens[self.pos]
        else:
            self.current_token = None
    
    def match(self, expected_type):
        if self.current_token is None:
            self.error(f"Esperado '{expected_type}', encontrado 'EOF'")
            return None
        if self.current_token.type == expected_type:
            token = self.current_token
            self.advance()
            return token
        else:
            found = self.current_token.type 
            self.error(f"Esperado '{expected_type}', encontrado '{found}'")
            return None
    
    def error(self, message):
        if self.current_token is not None:
            line = self.current_token.line
        elif self.tokens:
            line = self.tokens[-1].line
        else:
            line = 1
        
        error_message = f"Linha {line}: {message}"
        self.errors.append(error_message)
        print(f"Erro de sintaxe: {error_message}")
    
    # ========== M√âTODOS DE RECUPERA√á√ÉO DE ERROS ==========
    def sync_to(self, sync_tokens):
        """Recupera√ß√£o de erro - pula tokens at√© encontrar um dos sync_tokens"""
        while (self.current_token and 
               self.current_token.type not in sync_tokens and
               self.current_token.type != "EOF"):
            print(f"  [RECUPERA√á√ÉO] Pulando token: {self.current_token}")
            self.advance()
        
        if self.current_token and self.current_token.type in sync_tokens:
            print(f"  [RECUPERA√á√ÉO] Sincronizado em: {self.current_token}")
            return True
        return False

    def match_with_recovery(self, expected_type, sync_tokens=None):
        """Match com recupera√ß√£o de erro"""
        if sync_tokens is None:
            sync_tokens = ["SEMICOLON", "RBRACE", "FUNCTION", "EOF"]
            
        if self.current_token is None:
            self.error(f"Esperado '{expected_type}', encontrado 'EOF'")
            return None
            
        if self.current_token.type == expected_type:
            token = self.current_token
            self.advance()
            return token
        else:
            found = self.current_token.type 
            self.error(f"Esperado '{expected_type}', encontrado '{found}'")
            # Recupera√ß√£o
            self.sync_to(sync_tokens)
            return None
    
    # ========== PRODU√á√ïES DA GRAM√ÅTICA ==========
    
    def parse_programa(self):
        self.parse_funcao()
        self.parse_funcao_seq()
    
    def parse_funcao_seq(self):
        while self.current_token and self.current_token.type == "FUNCTION":
            self.parse_funcao()

    def parse_funcao(self):
        self.match_with_recovery("FUNCTION")
        
        nome_funcao_token = self.parse_nome_funcao()
        if nome_funcao_token:
            nome_funcao = nome_funcao_token.value
            self.current_function = nome_funcao
            self.symbol_table_manager.enter_scope(nome_funcao)
            
            # Adicionar fun√ß√£o como s√≠mbolo
            func_symbol = Symbol(nome_funcao, "function", False, nome_funcao_token.line, nome_funcao)
            self.symbol_table_manager.current_table.add_symbol(func_symbol)
        
        self.match_with_recovery("LBRACKET")
        self.param_position = 0  # Reset contador de par√¢metros
        self.parse_lista_params()
        self.match_with_recovery("RBRACKET")
        
        # Processar tipo de retorno
        return_type = self.parse_tipo_retorno_funcao()
        if return_type:
            self.symbol_table_manager.current_table.set_return_type(return_type)
        
        self.parse_bloco()
        
        self.symbol_table_manager.exit_scope()
        self.current_function = None
    
    def parse_nome_funcao(self):
        if self.current_token and self.current_token.type in ["MAIN", "ID"]:
            return self.match_with_recovery(self.current_token.type)
        else:
            self.error("Esperado nome de fun√ß√£o (ID ou 'main')")
            return None
    
    def parse_lista_params(self):
        if self.current_token and self.current_token.type == "ID":
            param_name_token = self.match_with_recovery("ID")
            self.match_with_recovery("COLON")
            type_token = self.parse_type()

            if param_name_token and type_token and self.symbol_table_manager.current_table:
                # ADICIONAR par√¢metro com posi√ß√£o correta
                param_symbol = Symbol(
                    param_name_token.value, 
                    type_token.value.lower(), 
                    True,  # is_param = True
                    param_name_token.line,
                    self.current_function,
                    self.param_position  # posi√ß√£o atual
                )
                if not self.symbol_table_manager.current_table.add_symbol(param_symbol):
                    self.error(f"Par√¢metro '{param_name_token.value}' j√° declarado")
                
                self.param_position += 1  # Incrementar posi√ß√£o
            
            self.parse_lista_params2()
    
    def parse_lista_params2(self):
        while self.current_token and self.current_token.type == "COMMA":
            self.match_with_recovery("COMMA")
            param_name_token = self.match_with_recovery("ID")
            self.match_with_recovery("COLON")
            type_token = self.parse_type()
            
            if param_name_token and type_token and self.symbol_table_manager.current_table:
                param_symbol = Symbol(
                    param_name_token.value, 
                    type_token.value.lower(), 
                    True,  # is_param = True
                    param_name_token.line,
                    self.current_function,
                    self.param_position  # posi√ß√£o atual
                )
                if not self.symbol_table_manager.current_table.add_symbol(param_symbol):
                    self.error(f"Par√¢metro '{param_name_token.value}' j√° declarado")
                
                self.param_position += 1  # Incrementar posi√ß√£o
    
    def parse_tipo_retorno_funcao(self):
        if self.current_token and self.current_token.type == "ARROW":
            self.match_with_recovery("ARROW")
            type_token = self.parse_type()
            if type_token:
                return type_token.value.lower()
        return None

    def parse_type(self):
        if self.current_token and self.current_token.type in ["INT", "FLOAT", "CHAR"]:
            return self.match_with_recovery(self.current_token.type)
        else:
            self.error("Esperado tipo de dado (int, float ou char)")
            return None
    
    def parse_bloco(self):
        self.match_with_recovery("LBRACE")
        self.parse_sequencia()
        self.match_with_recovery("RBRACE")
    
    def parse_sequencia(self):
        while self.current_token and self.current_token.type in self._first_declaracao_comando():
            if self._is_declaracao():
                self.parse_declaracao()
            else:
                self.parse_comando()
    
    def _first_declaracao_comando(self):
        return ["LET", "ID", "IF", "WHILE", "PRINTLN", "RETURN"]
    
    def _is_declaracao(self):
        return self.current_token and self.current_token.type == "LET"
    
    # ========== SISTEMA DE SA√çDA ==========
    def generate_output_files(self, nome_arquivo=None):
        """Gera arquivos de sa√≠da unificados"""
        self._generate_syntax_errors(nome_arquivo)
        self._generate_symbol_tables(nome_arquivo)

    def _generate_syntax_errors(self, nome_arquivo=None): 
        try:
            os.makedirs("saidas do analisador sintatico", exist_ok=True)
            
            # Se n√£o tem nome_arquivo, usa o padr√£o
            if nome_arquivo is None:
                nome_base = "syntax_errors"
            else:
                # Remove extens√£o .p se existir
                nome_base = os.path.splitext(nome_arquivo)[0]
            
            # Arquivo JSON espec√≠fico para este arquivo
            json_path = f"saidas do analisador sintatico/{nome_base}_syntax.json"
            
            with open(json_path, "w", encoding="utf-8") as f:
                resultado = {
                    "arquivo": nome_arquivo or "desconhecido",
                    "valido": len(self.errors) == 0,
                    "total_erros": len(self.errors),
                    "erros": self.errors
                }
                json.dump(resultado, f, indent=2, ensure_ascii=False)
                        
        except Exception as e:
            print(f"Erro ao gerar arquivo de erros de sintaxe: {str(e)}")
    
    def _generate_symbol_tables(self, nome_arquivo=None):
        try:
            os.makedirs("tabelas de simbolos", exist_ok=True)
            
            if nome_arquivo is None:
                nome_base = "symbol_tables"
            else:
                nome_base = os.path.splitext(nome_arquivo)[0]
            
            json_path = f"tabelas de simbolos/{nome_base}_tabelas.json"
            
            with open(json_path, "w", encoding="utf-8") as f:
                symbol_data = self.symbol_table_manager.to_dict()
                symbol_data["arquivo"] = nome_arquivo or "desconhecido"
                json.dump(symbol_data, f, indent=2, ensure_ascii=False)
                        
        except Exception as e:
            print(f"Erro ao gerar arquivo de tabelas de s√≠mbolos: {str(e)}")
    
    def parse_declaracao(self):
        let_token = self.match_with_recovery("LET")
        if not let_token:
            return
            
        var_list = self.parse_var_list()
        self.match_with_recovery("COLON")
        type_token = self.parse_type()
        self.match_with_recovery("SEMICOLON")
    
        # ADICIONAR vari√°veis locais
        if type_token and self.symbol_table_manager.current_table:
            for var_name in var_list:
                var_symbol = Symbol(
                    var_name,
                    type_token.value.lower(),
                    False,  # is_param = False
                    let_token.line,
                    self.current_function,
                    -1  # pos_param = -1 para vari√°veis
                )
                success = self.symbol_table_manager.current_table.add_symbol(var_symbol)
                if not success:
                    self.error(f"Vari√°vel '{var_name}' j√° declarada")
    
    def parse_var_list(self):
        var_name_token = self.match_with_recovery("ID")
        var_names = []
        if var_name_token:
            var_names.append(var_name_token.value)
            # Processar vari√°veis adicionais
            while self.current_token and self.current_token.type == "COMMA":
                self.match_with_recovery("COMMA")
                next_var = self.match_with_recovery("ID")
                if next_var:
                    var_names.append(next_var.value)
        return var_names
    
    def parse_comando(self):
        if self.current_token and self.current_token.type == "ID":
            var_name = self.current_token.value
            
            # Lookahead para determinar se √© atribui√ß√£o ou chamada
            next_pos = self.pos + 1
            if next_pos < len(self.tokens) and self.tokens[next_pos].type == "ASSIGN":
                # √â atribui√ß√£o
                if not self._is_declared(var_name):
                    self.error(f"Vari√°vel '{var_name}' n√£o declarada")
                self.match_with_recovery("ID")
                self.match_with_recovery("ASSIGN")
                self.parse_expr()
                self.match_with_recovery("SEMICOLON")
                
            elif next_pos < len(self.tokens) and self.tokens[next_pos].type == "LBRACKET":
                # √â chamada de fun√ß√£o
                self.match_with_recovery("ID")
                self.match_with_recovery("LBRACKET")
                args = self.parse_lista_args()  # Agora retorna lista de argumentos
                self.match_with_recovery("RBRACKET")
                self.match_with_recovery("SEMICOLON")
                
                # REGISTRAR chamada de fun√ß√£o
                self._register_function_call(var_name, len(args), args)
                
            else:
                self.error("Esperado '=' ou '(' ap√≥s identificador")
                self.sync_to(["SEMICOLON"])
        elif self.current_token and self.current_token.type == "IF":
            self.parse_comando_se()
        elif self.current_token and self.current_token.type == "WHILE":
            self.parse_comando_while()
        elif self.current_token and self.current_token.type == "PRINTLN":
            self.parse_comando_println()
        elif self.current_token and self.current_token.type == "RETURN":
            self.parse_comando_return()
        else:
            self.error("Comando inv√°lido")
            self.sync_to(["SEMICOLON", "RBRACE"])
    
    def _is_declared(self, name):
        """Verifica se um identificador foi declarado em qualquer escopo acess√≠vel"""
        # Na linguagem P, s√≥ temos escopo de fun√ß√£o, ent√£o verificamos na tabela atual
        if self.symbol_table_manager.current_table:
            return self.symbol_table_manager.current_table.lookup(name) is not None
        return False
    
    # NOVO M√âTODO: Registrar chamadas de fun√ß√£o
    def _register_function_call(self, function_name, num_args, args):
        """Registra uma chamada de fun√ß√£o na tabela de s√≠mbolos"""
        if not self.symbol_table_manager.current_table:
            return
            
        # Verifica se a fun√ß√£o j√° est√° na tabela
        symbol = self.symbol_table_manager.current_table.lookup(function_name)
        if not symbol:
            # Cria novo s√≠mbolo para fun√ß√£o chamada
            symbol = Symbol(function_name, "function", False, 
                          self.current_token.line if self.current_token else 0,
                          self.current_function)
            self.symbol_table_manager.current_table.add_symbol(symbol)
        
        # Cria registro de chamada
        call_register = FunctionRegister(function_name, num_args, args)
        symbol.add_call_ref(call_register)
    
    def parse_comando_se(self):
        """ComandoSe ‚Üí if Expr Bloco ComandoSenao"""
        self.match_with_recovery("IF")
        self.parse_expr()  
        self.parse_bloco()
        self.parse_comando_senao()
    
    def parse_comando_senao(self):
        """ComandoSenao ‚Üí else ComandoSe | Œµ"""
        if self.current_token and self.current_token.type == "ELSE":
            self.match_with_recovery("ELSE")
            # Pode ser outro if ou um bloco simples
            if self.current_token and self.current_token.type == "IF":
                self.parse_comando_se()
            else:
                self.parse_bloco()
        # Sen√£o, √© Œµ - n√£o faz nada
    
    def parse_comando_while(self):
        """ComandoWhile ‚Üí while Expr Bloco"""
        self.match_with_recovery("WHILE")
        self.parse_expr()
        self.parse_bloco()
        
    def parse_comando_println(self):
        self.match_with_recovery("PRINTLN")
        self.match_with_recovery("LBRACKET")
        self.match_with_recovery("FMT_STRING")
        self.match_with_recovery("COMMA")
        self.parse_lista_args()
        self.match_with_recovery("RBRACKET")
        self.match_with_recovery("SEMICOLON")
    
    def parse_comando_return(self):
        self.match_with_recovery("RETURN")
        self.parse_expr()
        self.match_with_recovery("SEMICOLON")
    
    def parse_lista_args(self):
        args = []
        if self.current_token and self.current_token.type in ["ID", "INT_CONST", "FLOAT_CONST", "CHAR_LITERAL"]:
            arg_value = self._parse_arg_value()
            if arg_value:
                args.append(arg_value)
            args.extend(self.parse_lista_args2())
        return args

    def parse_lista_args2(self):
        args = []
        while self.current_token and self.current_token.type == "COMMA":
            self.match_with_recovery("COMMA")
            arg_value = self._parse_arg_value()
            if arg_value:
                args.append(arg_value)
        return args

    def _parse_arg_value(self):
        """Extrai o valor do argumento (lexema)"""
        if self.current_token and self.current_token.type == "ID":
            token = self.match_with_recovery("ID")
            return token.value if token else None
        elif self.current_token and self.current_token.type in ["INT_CONST", "FLOAT_CONST", "CHAR_LITERAL"]:
            token = self.match_with_recovery(self.current_token.type)
            return token.value if token else None
        return None
    
    def parse_arg(self):
        if self.current_token and self.current_token.type == "ID":
            # Lookahead para ver se √© chamada de fun√ß√£o
            next_pos = self.pos + 1
            if next_pos < len(self.tokens) and self.tokens[next_pos].type == "LBRACKET":
                self.match_with_recovery("ID")
                self.match_with_recovery("LBRACKET")
                self.parse_lista_args()
                self.match_with_recovery("RBRACKET")
            else:
                # Apenas uma vari√°vel
                self.match_with_recovery("ID")
        elif self.current_token and self.current_token.type in ["INT_CONST", "FLOAT_CONST", "CHAR_LITERAL"]:
            self.match_with_recovery(self.current_token.type)
        else:
            self.error("Esperado ID, constante ou literal")
            # Recupera√ß√£o: avan√ßa para evitar loop
            if self.current_token:
                self.advance()
    
    def parse_chamada_funcao(self):
        if self.current_token and self.current_token.type == "LBRACKET":
            self.match_with_recovery("LBRACKET")
            self.parse_lista_args()
            self.match_with_recovery("RBRACKET")
        # Sen√£o, √© Œµ - n√£o faz nada
    
    # ========== EXPRESS√ïES ==========
    def parse_expr(self):
        """Expr ‚Üí Rel ExprOpc"""
        self.parse_rel()
        self.parse_expr_opc()

    def parse_expr_opc(self):
        """ExprOpc ‚Üí OpIgual Rel ExprOpc | Œµ"""
        if self.current_token and self.current_token.type in ["EQ", "NE"]:
            self.parse_op_igual()
            self.parse_rel()
            self.parse_expr_opc()
        # Sen√£o, √© Œµ

    def parse_op_igual(self):
        if self.current_token and self.current_token.type in ["EQ", "NE"]:
            self.match_with_recovery(self.current_token.type)
        else:
            self.error("Esperado '==' ou '!='")

    def parse_rel(self):
        """Rel ‚Üí Adicao RelOpc"""
        self.parse_adicao()
        self.parse_rel_opc()

    def parse_rel_opc(self):
        """RelOpc ‚Üí OpRel Adicao RelOpc | Œµ"""
        if self.current_token and self.current_token.type in ["LT", "LE", "GT", "GE"]:
            self.parse_op_rel()
            self.parse_adicao()
            self.parse_rel_opc()
        # Sen√£o, √© Œµ

    def parse_op_rel(self):
        if self.current_token and self.current_token.type in ["LT", "LE", "GT", "GE"]:
            self.match_with_recovery(self.current_token.type)
        else:
            self.error("Esperado operador relacional")

    def parse_adicao(self):
        """Adicao ‚Üí Termo AdicaoOpc"""
        self.parse_termo()
        self.parse_adicao_opc()

    def parse_adicao_opc(self):
        """AdicaoOpc ‚Üí OpAdicao Termo AdicaoOpc | Œµ"""
        while self.current_token and self.current_token.type in ["PLUS", "MINUS"]:
            self.parse_op_adicao()
            self.parse_termo()

    def parse_op_adicao(self):
        if self.current_token and self.current_token.type in ["PLUS", "MINUS"]:
            self.match_with_recovery(self.current_token.type)
        else:
            self.error("Esperado '+' ou '-'")

    def parse_termo(self):
        """Termo ‚Üí Fator TermoOpc"""
        self.parse_fator()
        self.parse_termo_opc()

    def parse_termo_opc(self):
        """TermoOpc ‚Üí OpMult Fator TermoOpc | Œµ"""
        while self.current_token and self.current_token.type in ["MULT", "DIV"]:
            self.parse_op_mult()
            self.parse_fator()

    def parse_op_mult(self):
        if self.current_token and self.current_token.type in ["MULT", "DIV"]:
            self.match_with_recovery(self.current_token.type)
        else:
            self.error("Esperado '*' ou '/'")

    def parse_fator(self):
        """Fator ‚Üí ID ChamadaFuncao | CONST | LITERAL | ( Expr )"""
        if self.current_token and self.current_token.type == "ID":
            self.match_with_recovery("ID")
            self.parse_chamada_funcao()
        elif self.current_token and self.current_token.type in ["INT_CONST", "FLOAT_CONST", "CHAR_LITERAL"]:
            self.match_with_recovery(self.current_token.type)
        elif self.current_token and self.current_token.type == "LBRACKET":
            self.match_with_recovery("LBRACKET")
            self.parse_expr()
            self.match_with_recovery("RBRACKET")
        else:
            self.error("Esperado ID, constante, literal ou '('")
            # Recupera√ß√£o: avan√ßa para evitar loop
            if self.current_token:
                self.advance()

In [200]:
#class symbol

class Symbol:
    def __init__(self, name, datatype, is_param, line, scope, pos_param=-1):
        self.name = name          # nome do identificador
        self.datatype = datatype  # 'int', 'float', 'char', 'function'
        self.is_param = is_param  # True para par√¢metros, False para vari√°veis
        self.line = line          # linha de declara√ß√£o
        self.scope = scope        # escopo/fun√ß√£o pertence
        self.pos_param = pos_param  # -1 para vari√°veis, 0,1,2... para par√¢metros
        self.call_refs = []       # lista de registros de chamada

    def add_call_ref(self, call_register):
        self.call_refs.append(call_register)

In [201]:
#class functionregister

class FunctionRegister:
    def __init__(self, name, num_args, args):
        self.name = name          # nome da fun√ß√£o chamada
        self.num_args = num_args  # n√∫mero de argumentos
        self.args = args          # lista de argumentos (lexemas)

In [202]:
#class symbol table

class SymbolTable:
    def __init__(self, scope_name):
        self.scope_name = scope_name  # nome da fun√ß√£o
        self.return_type = "void"     # tipo de retorno (padr√£o void)
        self.symbols = {}             # dicion√°rio nome -> Symbol
    
    def add_symbol(self, symbol):
        if symbol.name in self.symbols:
            return False
        self.symbols[symbol.name] = symbol
        return True
    
    def lookup(self, name):
        return self.symbols.get(name)
    
    def set_return_type(self, return_type):
        self.return_type = return_type

In [203]:
#class symbol table manager

class SymbolTableManager:
    def __init__(self):
        self.tables = []
        self.current_table = None
    
    def enter_scope(self, function_name):
        new_table = SymbolTable(function_name)
        self.tables.append(new_table)
        self.current_table = new_table
        return new_table
    
    def exit_scope(self):
        self.current_table = None
    
    def to_dict(self):
        return {
            "tabelas": [
                {
                    "escopo": table.scope_name,
                    "ret_type": table.return_type,
                    "simbolos": [
                        {
                            "nome": sym.name,
                            "tipo": sym.datatype,
                            "categoria": "parametro" if sym.is_param else "variavel",
                            "linha": sym.line,
                            "pos_param": sym.pos_param,
                            "chamadas": [
                                {
                                    "nome": call.name,
                                    "num_args": call.num_args,
                                    "args": call.args
                                }
                                for call in sym.call_refs
                            ]
                        }
                        for sym in table.symbols.values()
                    ]
                }
                for table in self.tables
            ]
        }

<h2>Testes do Parser e da tabela de Simbolos</h2>
<h4>Aqui executamos os testes referentes a cada arquivo base para erros sintaticos e tabelas de simbolos</h4>

In [204]:
%%capture 
#teste calculadora.p - An√°lise Sint√°tica e Tabela de S√≠mbolos
entrada = "codigos base da linguagem p/calculadora.p"
saida_syntax = "saidas do analisador sintatico/calculadora_syntax.json"
saida_tabelas = "tabelas de simbolos/calculadora_tabelas.json"

print("üß™ TESTANDO: calculadora.p")

try:
    with open(entrada, "r", encoding="utf-8") as f:
        codigo = f.read()
    
    lexer = Lexer(codigo)
    tokens = lexer.tokenize()
    parser = Parser(tokens)
    parser.parse("calculadora.p")
    
    # Mostrar resultado
    erros_count = len(parser.errors)
    if erros_count == 0:
        print("‚úÖ AN√ÅLISE SINT√ÅTICA: V√ÅLIDA")
    else:
        print(f"‚ùå AN√ÅLISE SINT√ÅTICA: {erros_count} erro(s)")
        for erro in parser.errors:
            print(f"   {erro}")
    
    # Mostrar tabelas de s√≠mbolos
    print("\nüìä TABELAS DE S√çMBOLOS:")
    for table in parser.symbol_table_manager.tables:
        print(f"   Escopo: {table.scope_name}")
        for symbol in table.symbols.values():
            categoria = "par√¢metro" if symbol.is_param else "vari√°vel"
            pos_info = f" (pos: {symbol.pos_param})" if symbol.is_param else ""
            print(f"     - {symbol.name} ({categoria}{pos_info}, {symbol.datatype}, linha {symbol.line})")
    
    print(f"üíæ Arquivos gerados:")
    print(f"   - {saida_syntax}")
    print(f"   - {saida_tabelas}")
    
except Exception as e:
    print(f"üí• ERRO: {e}")

In [205]:
%%capture 
#teste lexical_error.p - An√°lise Sint√°tica e Tabela de S√≠mbolos
entrada = "codigos base da linguagem p/lexical_error.p"
saida_syntax = "saidas do analisador sintatico/lexical_error_syntax.json"
saida_tabelas = "tabelas de simbolos/lexical_error_tabelas.json"

print("üß™ TESTANDO: lexical_error.p")

try:
    with open(entrada, "r", encoding="utf-8") as f:
        codigo = f.read()
    
    lexer = Lexer(codigo)
    tokens = lexer.tokenize()
    parser = Parser(tokens)
    parser.parse("lexical_error.p")
    
    # Mostrar resultado
    erros_count = len(parser.errors)
    if erros_count == 0:
        print("‚úÖ AN√ÅLISE SINT√ÅTICA: V√ÅLIDA")
    else:
        print(f"‚ùå AN√ÅLISE SINT√ÅTICA: {erros_count} erro(s)")
        for erro in parser.errors:
            print(f"   {erro}")
    
    # Mostrar tabelas de s√≠mbolos
    print("\nüìä TABELAS DE S√çMBOLOS:")
    for table in parser.symbol_table_manager.tables:
        print(f"   Escopo: {table.scope_name}")
        for symbol in table.symbols.values():
            categoria = "par√¢metro" if symbol.is_param else "vari√°vel"
            pos_info = f" (pos: {symbol.pos_param})" if symbol.is_param else ""
            print(f"     - {symbol.name} ({categoria}{pos_info}, {symbol.datatype}, linha {symbol.line})")
    
    print(f"üíæ Arquivos gerados:")
    print(f"   - {saida_syntax}")
    print(f"   - {saida_tabelas}")
    
except Exception as e:
    print(f"üí• ERRO: {e}")

In [206]:
%%capture 
#teste loop_simples.p - An√°lise Sint√°tica e Tabela de S√≠mbolos
entrada = "codigos base da linguagem p/loop_simples.p"
saida_syntax = "saidas do analisador sintatico/loop_simples_syntax.json"
saida_tabelas = "tabelas de simbolos/loop_simples_tabelas.json"

print("üß™ TESTANDO: loop_simples.p")

try:
    with open(entrada, "r", encoding="utf-8") as f:
        codigo = f.read()
    
    lexer = Lexer(codigo)
    tokens = lexer.tokenize()
    parser = Parser(tokens)
    parser.parse("loop_simples.p")
    
    # Mostrar resultado
    erros_count = len(parser.errors)
    if erros_count == 0:
        print("‚úÖ AN√ÅLISE SINT√ÅTICA: V√ÅLIDA")
    else:
        print(f"‚ùå AN√ÅLISE SINT√ÅTICA: {erros_count} erro(s)")
        for erro in parser.errors:
            print(f"   {erro}")
    
    # Mostrar tabelas de s√≠mbolos
    print("\nüìä TABELAS DE S√çMBOLOS:")
    for table in parser.symbol_table_manager.tables:
        print(f"   Escopo: {table.scope_name}")
        for symbol in table.symbols.values():
            categoria = "par√¢metro" if symbol.is_param else "vari√°vel"
            pos_info = f" (pos: {symbol.pos_param})" if symbol.is_param else ""
            print(f"     - {symbol.name} ({categoria}{pos_info}, {symbol.datatype}, linha {symbol.line})")
    
    print(f"üíæ Arquivos gerados:")
    print(f"   - {saida_syntax}")
    print(f"   - {saida_tabelas}")
    
except Exception as e:
    print(f"üí• ERRO: {e}")

In [207]:
%%capture 
#teste media.p - An√°lise Sint√°tica e Tabela de S√≠mbolos
entrada = "codigos base da linguagem p/media.p"
saida_syntax = "saidas do analisador sintatico/media_syntax.json"
saida_tabelas = "tabelas de simbolos/media_tabelas.json"

print("üß™ TESTANDO: media.p")

try:
    with open(entrada, "r", encoding="utf-8") as f:
        codigo = f.read()
    
    lexer = Lexer(codigo)
    tokens = lexer.tokenize()
    parser = Parser(tokens)
    parser.parse("media.p")
    
    # Mostrar resultado
    erros_count = len(parser.errors)
    if erros_count == 0:
        print("‚úÖ AN√ÅLISE SINT√ÅTICA: V√ÅLIDA")
    else:
        print(f"‚ùå AN√ÅLISE SINT√ÅTICA: {erros_count} erro(s)")
        for erro in parser.errors:
            print(f"   {erro}")
    
    # Mostrar tabelas de s√≠mbolos
    print("\nüìä TABELAS DE S√çMBOLOS:")
    for table in parser.symbol_table_manager.tables:
        print(f"   Escopo: {table.scope_name}")
        for symbol in table.symbols.values():
            categoria = "par√¢metro" if symbol.is_param else "vari√°vel"
            pos_info = f" (pos: {symbol.pos_param})" if symbol.is_param else ""
            print(f"     - {symbol.name} ({categoria}{pos_info}, {symbol.datatype}, linha {symbol.line})")
    
    print(f"üíæ Arquivos gerados:")
    print(f"   - {saida_syntax}")
    print(f"   - {saida_tabelas}")
    
except Exception as e:
    print(f"üí• ERRO: {e}")

In [208]:
%%capture 
#teste soma.p - An√°lise Sint√°tica e Tabela de S√≠mbolos
entrada = "codigos base da linguagem p/soma.p"
saida_syntax = "saidas do analisador sintatico/soma_syntax.json"
saida_tabelas = "tabelas de simbolos/soma_tabelas.json"

print("üß™ TESTANDO: soma.p")

try:
    with open(entrada, "r", encoding="utf-8") as f:
        codigo = f.read()
    
    lexer = Lexer(codigo)
    tokens = lexer.tokenize()
    parser = Parser(tokens)
    parser.parse("soma.p")
    
    # Mostrar resultado
    erros_count = len(parser.errors)
    if erros_count == 0:
        print("‚úÖ AN√ÅLISE SINT√ÅTICA: V√ÅLIDA")
    else:
        print(f"‚ùå AN√ÅLISE SINT√ÅTICA: {erros_count} erro(s)")
        for erro in parser.errors:
            print(f"   {erro}")
    
    # Mostrar tabelas de s√≠mbolos
    print("\nüìä TABELAS DE S√çMBOLOS:")
    for table in parser.symbol_table_manager.tables:
        print(f"   Escopo: {table.scope_name}")
        for symbol in table.symbols.values():
            categoria = "par√¢metro" if symbol.is_param else "vari√°vel"
            pos_info = f" (pos: {symbol.pos_param})" if symbol.is_param else ""
            print(f"     - {symbol.name} ({categoria}{pos_info}, {symbol.datatype}, linha {symbol.line})")
    
    print(f"üíæ Arquivos gerados:")
    print(f"   - {saida_syntax}")
    print(f"   - {saida_tabelas}")
    
except Exception as e:
    print(f"üí• ERRO: {e}")