In [7]:
import re
import math

T_KEYWORD = "keyword"
T_OP = "op"
T_INT = "int"
T_FLOAT = "float"
T_STRING = "string"
T_ID = "id"
T_EOF = "eof"
T_DELIMITER = "delimiter"
T_BLOCK = "block"
T_PARA = "parenthesis"
T_VAR_TYPE = "var_type"
T_IF = "if"
T_ELIF = "elif"
T_ELSE = "else"
T_OP_COMP = 'T_OP_COMP' 
T_PRINT = "print"
T_WHILE = 'while'
T_FOR = 'for'
T_ARROW_UP = "arrow_up"  
T_ARROW_DOWN = "arrow_down"
T_COMMA = "comma"
T_FUNC = "func"

class Token():
    
    def __init__(self, tipo, valor=None):
        self.tipo = tipo
        self.valor = valor
        
    def __str__(self):
        return '<%s %s>' % (self.tipo, self.valor)
    
    def __repr__(self):
        return self.__str__()


class StopExecution(Exception):
    def _render_traceback_(self):
        pass

    
def afd_int(token):
    try:
        int_pattern = r'^-?\d+$'
        if re.match(int_pattern, str(token)):
            return True
        return False
    except:
        return False

def afd_float(token):
    try:
        float_pattern = r'^-?\d+\.\d+$'  
        if re.match(float_pattern, str(token)):
            return True
        return False
    except:
        return False
    
def afd_string(token):
    if token[0] == '"' and token[-1] == '"':
        if '"' not in token[1:-1]:
            return True
        else:
            raise ValueError('Aspas em um local inesperado.')
    else:
        return False
    
def afd_identificador(token):
    regex = re.compile('[a-zA-Z0-9_]+')
    r = regex.match(token)
    if r is not None:
        if r.group() == token:
            return True
        else:
            return False
    else:
        return False
    
def afd_print(token):
    return token == "print"
    
def afd_if(token):
    return token == "if"

def afd_elif(token):
    return token == "elif"

def afd_else(token):
    return token == "else"

def afd_delimiter(token):
    return token == ";"

def afd_block(token):
    return token in ["{", "}"]

def afd_var_type(token):
    return token in ["var"]

def afd_para(token):
    return token in ["(", ")"]
    
def afd_principal(token):
    if token == "init":
        return Token(T_KEYWORD, 'init')
    
    elif token in "=+-*/^":
        return Token(T_OP, token)
    
    elif token in ["==", "!=", "<", ">", "<=", ">="]:
        return Token(T_OP_COMP, token)

    if token == "func":
        return Token(T_FUNC, token)

    elif afd_int(token):
        return Token(T_INT, token)
    
    elif afd_float(token):
        return Token(T_FLOAT, token)
    
    elif afd_string(token):
        return Token(T_STRING, token)
    
    elif afd_if(token):
        return Token(T_IF, token)
    
    elif afd_elif(token):
        return Token(T_ELIF, token)
    
    elif afd_else(token):
        return Token(T_ELSE, token)
    
    if token == 'while':
        return Token(T_WHILE, token)
    
    elif token == 'for':
        return Token(T_FOR, token)

    elif token == "->":
        return Token(T_ARROW_UP, token)
    elif token == "<-":
        return Token(T_ARROW_DOWN, token)
    
    elif token == ",":
        return Token(T_COMMA, token)
    
    elif afd_print(token):
        return Token(T_PRINT, token)
    
    elif afd_delimiter(token):
        return Token(T_DELIMITER, token)
    
    elif afd_block(token):
        return Token(T_BLOCK, token)
    
    elif afd_var_type(token):
        return Token(T_VAR_TYPE, token)
    
    elif afd_para(token):
        return Token(T_PARA, token)
    
    elif afd_identificador(token):
        return Token(T_ID, token)
    
    else:
        raise ValueError('Valor inesperado')

class Parser():
    
    def __init__(self, tokens):
        self.tokens = tokens
        self.pos = -1
        self.token_atual = None
        self.symbol_table = {}
        self.ir = []
        
        self.proximo()

    def generate_ir(self):
        return "\n".join(self.ir)
        
    def proximo(self):
        self.pos += 1
        
        if self.pos >= len(self.tokens):
            self.token_atual = Token(T_EOF)
        else:    
            self.token_atual = self.tokens[self.pos]

        print(self.token_atual)

        print(f"Token atual: {self.token_atual}, Posição: {self.pos}") 
        return self.token_atual
    
    
    def erro(self, expected=None, message=None):
        error_message = f'Erro de sintaxe. Esperado: {expected}, Encontrado: {self.token_atual}'
        if message:
            error_message += f" {message}"
        raise Exception(error_message)
        
        
    def use(self, tipo, valor=None):
        if self.token_atual.tipo != tipo or (valor is not None and self.token_atual.valor != valor):
            self.erro(expected=f"{tipo}{' ' + valor if valor else ''}")
        else:
            self.proximo()
    
    def instructions(self):
        """
        statements ::= <instruction> <instructions>
        """
        print("Parsing instructions...") 
        while self.token_atual.tipo in [T_VAR_TYPE, T_ID, T_IF, T_ELIF, T_ELSE, T_PRINT, T_WHILE, T_FOR]:
            self.instruction()
            if self.token_atual.tipo == T_BLOCK and self.token_atual.valor == "}":
                break
            
    
    def instruction(self):
        """
        instruction ::= int <id> | float <id>;
        instruction ::= statement ;
        """
        print("Parsing instruction...") 
        if self.token_atual.tipo == T_VAR_TYPE:
            type = self.token_atual.valor
            self.use(T_VAR_TYPE, "var")
            name = self.token_atual.valor
            self.use(T_ID)
            self.use(T_DELIMITER, ";")
            if name in self.symbol_table:
                raise Exception(f"Erro semantico. Simbolo {name} foi declarado mais de uma vez.")
            self.symbol_table[name] = None
            self.ir.append(f"int {name};")
        else:
            self.statement()
            if self.token_atual.tipo != T_BLOCK:
                self.use(T_DELIMITER, ";")

    
    def start(self):
        """
        start ::= { instructions }
        """
        self.ir.append("#include <stdio.h>")
        self.ir.append("#include <math.h>")
        self.ir.append("int main() {")

        # Procura pelo início do bloco de instruções
        while self.token_atual.tipo != T_BLOCK or (self.token_atual.tipo == T_BLOCK and self.token_atual.valor != "{"):
            self.proximo()
            if self.token_atual.tipo == T_EOF:
                raise Exception("Erro de sintaxe. Esperado: {")
        
        # Uma vez encontrado o '{', comece a processar as instruções
        self.use(T_BLOCK, "{")
        self.instructions()
        self.use(T_BLOCK, "}")
        self.ir.append("}")


    def statement(self):
        """
        statement ::= <id> <op => expr | control_structure
        """
        print("Parsing statement...")
        if self.token_atual.tipo == T_IF:
            self.control_structure()
        elif self.token_atual.tipo == T_ID:
            name = self.token_atual.valor
            self.use(T_ID)
            self.use(T_OP, '=')
            value, str_expr = self.expr()
            if name not in self.symbol_table:
                raise Exception(f"Erro semantico. Simbolo {name} nao foi declarado.")
            self.symbol_table[name] = value
            self.ir.append(f"{name} = {str_expr};")
        elif self.token_atual.tipo == T_PRINT:
            self.parse_print() 
        elif self.token_atual.tipo == T_WHILE:
            self.parse_while()
        elif self.token_atual.tipo == T_FOR:
            self.parse_for()
        else:
            self.erro("Instrução inesperada.")

    def parse_while(self):
        print("Parsing while loop...")
        self.use(T_WHILE)
        self.use(T_PARA, "(")
        condition, condition_str = self.expr()  
        self.use(T_PARA, ")")
        self.use(T_BLOCK, "{")
        self.ir.append(f"while ({condition_str}) {{")  
        self.instructions()
        self.use(T_BLOCK, "}")
        self.ir.append("}")
        
    def parse_for(self):
        """
        Parses a for loop.
        for ( a -> b , c ) {
            print ( a )
        }
        """
        print("Parsing for loop...")
        self.use(T_FOR)
        self.use(T_PARA, "(")

        # Parse loop initialization
        start_variable = self.token_atual.valor
        self.use(T_ID)
        
        if self.token_atual.valor == '->':
            direction = 'increment'
            self.use(T_ARROW_UP)
        elif self.token_atual.valor == '<-':
            direction = 'decrement'
            self.use(T_ARROW_DOWN)

        end_variable = self.token_atual.valor
        self.use(T_ID)
        self.use(T_COMMA)

        # Parse step expression
        _, step_expression = self.expr()  # This should return a string representation of the step expression

        self.use(T_PARA, ")")

        # Generate C for loop code
        for_init = f"int {start_variable} = {start_variable};"
        for_cond = f"{start_variable} {'<=' if direction == 'increment' else '>='} {end_variable};"
        for_incr = f"{start_variable} {'+=' if direction == 'increment' else '-='} {step_expression}"
        self.ir.append(f"for ({for_init} {for_cond} {for_incr}) {{")

        # Parse loop body
        self.use(T_BLOCK, "{")
        self.instructions()
        self.use(T_BLOCK, "}")

        self.ir.append("}")

    def parse_print(self):
        """
        Parses a print statement.
        """
        print("Parsing print statement...")
        self.use(T_PRINT)  
        self.use(T_PARA, "(")  
        format_strings = []
        print_arguments = []

        while self.token_atual.tipo != T_PARA or (self.token_atual.tipo == T_PARA and self.token_atual.valor != ")"):
            if self.token_atual.tipo == T_STRING:
                format_strings.append(self.token_atual.valor.strip('"'))
                self.use(T_STRING)
            elif self.token_atual.tipo in [T_ID, T_INT, T_FLOAT]:
                _, expr_str = self.expr()
                print_arguments.append(expr_str)
                format_type = "%d" if self.token_atual.tipo == T_INT else "%f"
                format_strings.append(format_type)
            
            if self.token_atual.tipo == T_DELIMITER and self.token_atual.valor == ",":
                self.use(T_DELIMITER, ",")

        self.use(T_PARA, ")")  

        
        formatted_print = 'printf("' + ' '.join(format_strings) + '\\n", ' + ', '.join(print_arguments) + ');'
        self.ir.append(formatted_print)

    def control_structure(self):
        """
        control_structure ::= if ( expr ) { instructions } [ elif ( expr ) { instructions } ]* [ else { instructions } ]
        """
        print("Parsing control structure...")
        if self.token_atual.tipo == T_IF:
            self.use(T_IF)
            self.use(T_PARA, "(")
            condition, condition_str = self.expr()  # This should return a condition expression
            self.use(T_PARA, ")")
            self.use(T_BLOCK, "{")
            self.ir.append(f"if ({condition_str}) {{")  # Properly format the if statement
            self.instructions()
            self.use(T_BLOCK, "}")
            self.ir.append("}")
        
        if self.token_atual.tipo == T_ELIF:
            self.use(T_ELIF)
            self.use(T_PARA, "(")
            condition, condition_str = self.expr()  # This should return a condition expression
            self.use(T_PARA, ")")
            self.use(T_BLOCK, "{")
            self.ir.append(f"else if ({condition_str}) {{")  
            self.instructions()
            self.use(T_BLOCK, "}")
            self.ir.append("}")
        
        if self.token_atual.tipo == T_ELSE:
            self.use(T_ELSE)
            self.use(T_BLOCK, "{")
            self.ir.append(f"else {{")
                    
    def compare_expr(self):
        """
        compare_expr ::= expr <op_comp> expr
        """
        left_value, left_str = self.arith_expr()
        op = self.token_atual.valor
        self.use(T_OP_COMP)
        right_value, right_str = self.arith_expr()
        return f"{left_str} {op} {right_str}"
            
        
    def expr(self):
        """
        expr ::= arith_expr | compare_expr
        """
        print("Parsing expression...")
        if self.token_atual.tipo in [T_INT, T_FLOAT, T_ID]:
            return self.arith_expr()
        elif self.token_atual.tipo == T_OP_COMP:
            return self.compare_expr()  
        else:
            self.erro("Invalid expression")

    def arith_expr(self):
        """
        arith_expr ::= term expr_e_line
        """
        print("Parsing arithmetic expression...")
        term_value, term_str = self.term()
        expr_line_value, expr_line_str = self.expr_e_line(term_value)
        return expr_line_value, f"{term_str} {expr_line_str}"

    def term(self):
        """
        term ::= factor expr_t_line
        """
        print("Parsing term...")
        factor_value, factor_str = self.factor()  # Parse the first factor
        term_line_value, term_line_str = self.expr_t_line(factor_value)  # Continue with products/divisions of factors
        return term_line_value, f"{factor_str} {term_line_str}"
    
    def factor(self):
        """
        factor ::= ( expr ) | <id> | <int> | <float>
        """
        if self.token_atual.tipo == T_PARA and self.token_atual.valor == "(":
            self.use(T_PARA, "(")
            _, expr_str = self.expr()
            self.use(T_PARA, ")")
            return _, f"({expr_str})"
        elif self.token_atual.tipo == T_ID:
            var_name = self.token_atual.valor
            self.use(T_ID)
            if var_name not in self.symbol_table:
                raise Exception(f"Variable {var_name} not declared")
            return self.symbol_table[var_name], var_name
        elif self.token_atual.tipo in [T_INT, T_FLOAT]:
            literal_value = float(self.token_atual.valor)
            self.use(T_INT if self.token_atual.tipo == T_INT else T_FLOAT)
            return literal_value, str(literal_value)
        else:
            self.erro("Expected a factor")
    
    def expr_t(self) -> int:
        """
        expr_t ::= expr_f expr_t_line
        """
        print("Parsing expression t...")
        r, s = self.expr_f()
        r2, s2 = self.expr_t_line(r)
        return r2, f"{s} {s2}"

    def expr_e_line(self, inherited_t: int):
        """
        expr_e_line ::= <op +> expr_t expr_e_line | 
                        <op -> expr_t expr_e_line | epsilon
        """
        print("Parsing expression e line...")
        if self.token_atual.tipo == T_OP:
            if self.token_atual.valor == "+":
                self.use(T_OP, "+")
                a, s = self.expr_t()
                res = a + inherited_t
                a2, s2 = self.expr_e_line(res)
                str_expr = f"+ {s} {s2}"
                return a2, str_expr
            elif self.token_atual.valor == "-":
                self.use(T_OP, "-")
                a, s = self.expr_t()
                res = inherited_t - a
                a2, s2 = self.expr_e_line(res)
                str_expr = f"- {s} {s2}"
                return a2, str_expr
            elif self.token_atual.valor == "*":
                self.use(T_OP, "*")
                a, s = self.expr_t()
                res = a * inherited_t
                a2, s2 = self.expr_e_line(res)
                str_expr = f"* {s} {s2}"
                return a2, str_expr
            elif self.token_atual.valor == "/":
                self.use(T_OP, "/")
                a, s = self.expr_t()
                if a == 0:
                    raise Exception("Erro semantico. Divisão por zero.")
                res = inherited_t / a
                a2, s2 = self.expr_e_line(res)
                str_expr = f"/ {s} {s2}"
                return a2, str_expr
        elif self.token_atual.valor in ["==", "!=", "<", ">", "<=", ">="]:
            teste = self.token_atual
            self.use(T_OP_COMP)
            a, s = self.expr_t()
            print("a: ", a, "s: ", s)   
            res = inherited_t
            a2, s2 = self.expr_e_line(res)
            str_expr = f"/ {s} {s2}"
            return a2, str_expr
            
        # Prod vazia
        return 0 + inherited_t, ""

    def expr_t_line(self, inherited_t: int):
        """
        expr_t_line ::= <op *> expr_f expr_t_line |
                        <op /> expr_f expr_t_line | epsilon
        """
        print("Parsing expression t line...")
        if self.token_atual.tipo == T_OP:
            if self.token_atual.valor == "*":
                self.use(T_OP, "*")
                a, s = self.expr_f()
                res = a * inherited_t
                a2, s2 = self.expr_t_line(res)
                str_expr = f"* {s} {s2}"
                return a2, str_expr
            elif self.token_atual.valor == "/":
                self.use(T_OP, "/")
                a, s = self.expr_f()
                if a == 0:
                    raise Exception("Erro semantico. Divisão por zero.")
                res = inherited_t / a
                a2, s2 = self.expr_t_line(res)
                str_expr = f"/ {s} {s2}"
                return a2, str_expr
        elif self.token_atual.tipo == T_OP_COMP:
            comp = self.token_atual
            self.use(T_OP_COMP)
            print("EEEEEEEEEEEEE")
            a, s = self.expr_f()
            res = inherited_t
            a2, s2 = self.expr_t_line(res)
            str_expr = f"{comp.valor} {s} {s2}"
            return a2, str_expr
        # Prod vazia
        return 1 * inherited_t, ""

    def expr_f(self):
        """
        expr_f ::= ( expr ) | <id> | <int> | <float> | sqrt ( expr ) | <int> ^ <int>
        """
        print("Parsing expression f...")
        str_expr = ""
        if self.token_atual.tipo == T_PARA:
            self.use(T_PARA, "(")
            res, s = self.expr()
            self.use(T_PARA, ")")
            str_expr = f"({s})"
        elif self.token_atual.tipo == T_ID:
            if self.token_atual.valor not in self.symbol_table:
                raise Exception(f"Erro Semantico, variavel {self.token_atual.valor} nao foi declarada")
            res = self.symbol_table[self.token_atual.valor]
            str_expr = self.token_atual.valor
            self.use(T_ID)
        elif self.token_atual.tipo == T_INT:
            # Converte o int para float antes de retornar o valor
            res = float(self.token_atual.valor)
            str_expr = str(self.token_atual.valor)
            self.use(T_INT)
        elif self.token_atual.tipo == T_FLOAT:
            str_expr = str(self.token_atual.valor)
            res = float(self.token_atual.valor)
            self.use(T_FLOAT)
        else:
            self.erro()

        return res, str_expr

##############################################################################

# test_token = afd_principal("print")
# print(test_token)

arquivo = open('codigo.x','r')
ln = 1

tokens = []

for l in arquivo.readlines():
    
    # analisador lexico
    
    l = l.replace('\n','') 

    for token in l.split():        
        try:
            tokens.append(afd_principal(token))
        except Exception as e:
            print(tokens)
            print(str(e) + " na posição %i da linha %i - %s" % (l.index(token), ln, token))
            raise StopExecution
    ln += 1

# print([str(t) for t in tokens])
    
# analisador sintatico

parser = Parser(tokens)
parser.start()
code = parser.generate_ir()

with open("codigo.c", "w") as out_f:
    out_f.write(code)

['<block {>', '<var_type var>', '<id result>', '<delimiter ;>', '<var_type var>', '<id a>', '<delimiter ;>', '<var_type var>', '<id b>', '<delimiter ;>', '<var_type var>', '<id c>', '<delimiter ;>', '<id teste>', '<parenthesis (>', '<id a>', '<parenthesis )>', '<delimiter ;>', '<block }>', '<func func>', '<parenthesis (>', '<id a>', '<parenthesis )>', '<block {>', '<print print>', '<parenthesis (>', '<id a>', '<parenthesis )>', '<block }>']
