In [25]:
import re
import math

T_KEYWORD = "keyword"
T_OP = "op"
T_INT = "int"
T_FLOAT = "float"
T_STRING = "string"
T_ID = "id"
T_EOF = "eof"
T_DELIMITER = "delimiter"
T_BLOCK = "block"
T_PARA = "parenthesis"
T_VAR_TYPE = "var_type"
T_IF = "if"
T_ELIF = "elif"
T_ELSE = "else"
T_OP_COMP = 'T_OP_COMP' 

class Token():
    
    def __init__(self, tipo, valor=None):
        self.tipo = tipo
        self.valor = valor
        
    def __str__(self):
        return '<%s %s>' % (self.tipo, self.valor)
    
    def __repr__(self):
        return self.__str__()


class StopExecution(Exception):
    def _render_traceback_(self):
        pass

    
def afd_int(token):
    try:
        int_pattern = r'^-?\d+$'
        if re.match(int_pattern, str(token)):
            return True
        return False
    except:
        return False

def afd_float(token):
    try:
        float_pattern = r'^-?\d+\.\d+$'  
        if re.match(float_pattern, str(token)):
            return True
        return False
    except:
        return False
    
def afd_string(token):
    if token[0] == '"' and token[-1] == '"':
        if '"' not in token[1:-1]:
            return True
        else:
            raise ValueError('Aspas em um local inesperado.')
    else:
        return False
    
def afd_identificador(token):
    regex = re.compile('[a-zA-Z0-9_]+')
    r = regex.match(token)
    if r is not None:
        if r.group() == token:
            return True
        else:
            return False
    else:
        return False
    
def afd_if(token):
    return token == "if"

def afd_elif(token):
    return token == "elif"

def afd_else(token):
    return token == "else"

def afd_delimiter(token):
    return token == ";"

def afd_block(token):
    return token in ["{", "}"]

def afd_var_type(token):
    return token in ["var"]

def afd_para(token):
    return token in ["(", ")"]
    
def afd_principal(token):
    if token == "init":
        return Token(T_KEYWORD, 'init')
    
    elif token in "=+-*/^":
        return Token(T_OP, token)
    
    elif token in ["==", "!=", "<", ">", "<=", ">="]:
        return Token(T_OP_COMP, token)
    
    elif afd_int(token):
        return Token(T_INT, token)
    
    elif afd_float(token):
        return Token(T_FLOAT, token)
    
    elif afd_string(token):
        return Token(T_STRING, token)
    
    elif afd_if(token):
        return Token(T_IF, token)
    
    elif afd_elif(token):
        return Token(T_ELIF, token)
    
    elif afd_else(token):
        return Token(T_ELSE, token)
    
    elif afd_delimiter(token):
        return Token(T_DELIMITER, token)
    
    elif afd_block(token):
        return Token(T_BLOCK, token)
    
    elif afd_var_type(token):
        return Token(T_VAR_TYPE, token)
    
    elif afd_para(token):
        return Token(T_PARA, token)
    
    elif afd_identificador(token):
        return Token(T_ID, token)
    
    else:
        raise ValueError('Valor inesperado')

class Parser():
    
    def __init__(self, tokens):
        self.tokens = tokens
        self.pos = -1
        self.token_atual = None
        self.symbol_table = {}
        self.ir = []
        
        self.proximo()

    def generate_ir(self):
        return "\n".join(self.ir)
        
    def proximo(self):
        self.pos += 1
        
        if self.pos >= len(self.tokens):
            self.token_atual = Token(T_EOF)
        else:    
            self.token_atual = self.tokens[self.pos]

        print(self.token_atual)
        return self.token_atual
    
    
    def erro(self, message=None):
        error_message = f'Erro de sintaxe. {self.token_atual}'
        if message:
            error_message += f" {message}"
        raise Exception(error_message)
        
        
    def use(self, tipo, valor=None):
                
        if self.token_atual.tipo != tipo:
            self.erro()
        elif valor is not None and self.token_atual.valor != valor:
            self.erro()
        else:
            self.proximo()
    
    def instructions(self):
        """
        statements ::= <instruction> <instructions>
        """
        self.instruction()
        while self.token_atual.tipo in [T_VAR_TYPE, T_ID]:
            self.instruction()
    
    def instruction(self):
        """
        instruction ::= int <id> ;
        instruction ::= statement ;
        """
        if self.token_atual.tipo == T_VAR_TYPE:
            type = self.token_atual.valor
            self.use(T_VAR_TYPE, "var")
            name = self.token_atual.valor
            self.use(T_ID)
            self.use(T_DELIMITER, ";")
            if name in self.symbol_table:
                raise Exception(f"Erro semantico. Simbolo {name} foi declarado mais de uma vez.")
            self.symbol_table[name] = None
            self.ir.append(f"int {name};")
        else:
            self.statement()
            self.use(T_DELIMITER, ";")

    
    def start(self):
        """
        start ::= { instructions }
        """
        self.ir.append("#include <stdio.h>")
        self.ir.append("#include <math.h>")
        # Inserir as declarações de funções
        self.ir.append("int main() {")
        self.use(T_BLOCK, "{")
        self.instructions()
        self.use(T_BLOCK, "}")
        self.ir.append("}")


    def statement(self):
        """
        statement ::= <id> <op => expr
        """

        name = self.token_atual.valor
        self.use(T_ID)
        
        self.use(T_OP, '=')
        
        value, str_expr = self.expr()

        self.ir.append(f"{name} = {str_expr};")

        if name not in self.symbol_table:
            raise Exception(f"Erro semantico. Simbolo {name} nao foi declarado.")
        
        self.symbol_table[name] = value
        
    def expr(self) -> int:
        """
        expr ::= term ( <op +> | <op -> term )*
        """

        t, s = self.expr_t()
        res, s2 = self.expr_e_line(t)
        return res, f"{s} {s2}"
    
    def expr_t(self) -> int:
        """
        expr_t ::= expr_f expr_t_line
        """
        r, s = self.expr_f()
        r2, s2 = self.expr_t_line(r)
        return r2, f"{s} {s2}"

    def expr_e_line(self, inherited_t: int):
        """
        expr_e_line ::= <op +> expr_t expr_e_line | 
                        <op -> expr_t expr_e_line | epsilon
        """
        if self.token_atual.tipo == T_OP:
            if self.token_atual.valor == "+":
                self.use(T_OP, "+")
                a, s = self.expr_t()
                res = a + inherited_t
                a2, s2 = self.expr_e_line(res)
                str_expr = f"+ {s} {s2}"
                return a2, str_expr
            elif self.token_atual.valor == "-":
                self.use(T_OP, "-")
                a, s = self.expr_t()
                res = inherited_t - a
                a2, s2 = self.expr_e_line(res)
                str_expr = f"- {s} {s2}"
                return a2, str_expr
        # Prod vazia
        return 0 + inherited_t, ""

    def expr_t_line(self, inherited_t: int):
        """
        expr_t_line ::= <op *> expr_f expr_t_line |
                        <op /> expr_f expr_t_line | epsilon
        """
        if self.token_atual.tipo == T_OP:
            if self.token_atual.valor == "*":
                self.use(T_OP, "*")
                a, s = self.expr_f()
                res = a * inherited_t
                a2, s2 = self.expr_t_line(res)
                str_expr = f"* {s} {s2}"
                return a2, str_expr
            elif self.token_atual.valor == "/":
                self.use(T_OP, "/")
                a, s = self.expr_f()
                if a == 0:
                    raise Exception("Erro semantico. Divisão por zero.")
                res = inherited_t / a
                a2, s2 = self.expr_t_line(res)
                str_expr = f"/ {s} {s2}"
                return a2, str_expr
        # Prod vazia
        return 1 * inherited_t, ""

    def expr_f(self):
        """
        expr_f ::= ( expr ) | <id> | <int> | <float> | sqrt ( expr ) | <int> ^ <int>
        """
        str_expr = ""
        if self.token_atual.tipo == T_PARA:
            self.use(T_PARA, "(")
            res, s = self.expr()
            self.use(T_PARA, ")")
            str_expr = f"({s})"
        elif self.token_atual.tipo == T_ID:
            if self.token_atual.valor not in self.symbol_table:
                raise Exception(f"Erro Semantico, variavel {self.token_atual.valor} nao foi declarada")
            res = self.symbol_table[self.token_atual.valor]
            str_expr = self.token_atual.valor
            self.use(T_ID)
        elif self.token_atual.tipo == T_INT:
            # Converte o int para float antes de retornar o valor
            res = float(self.token_atual.valor)
            str_expr = str(self.token_atual.valor)
            self.use(T_INT)
        elif self.token_atual.tipo == T_FLOAT:
            str_expr = str(self.token_atual.valor)
            res = float(self.token_atual.valor)
            self.use(T_FLOAT)
        else:
            self.erro()

        return res, str_expr

##############################################################################
    
arquivo = open('codigo.x','r')
ln = 1

tokens = []

for l in arquivo.readlines():
    
    # analisador lexico
    
    l = l.replace('\n','') # remove a quebra de linha
    
    for token in l.split():        
        try:
            tokens.append(afd_principal(token))
        except Exception as e:
            print(tokens)
            print(str(e) + " na posição %i da linha %i - %s" % (l.index(token), ln, token))
            raise StopExecution
    ln += 1

print([str(t) for t in tokens])
    
# analisador sintatico

parser = Parser(tokens)
parser.start()
code = parser.generate_ir()

with open("codigo.c", "w") as out_f:
    out_f.write(code)

['<block {>', '<var_type var>', '<id result>', '<delimiter ;>', '<var_type var>', '<id a>', '<delimiter ;>', '<var_type var>', '<id b>', '<delimiter ;>', '<id a>', '<op =>', '<int 3>', '<delimiter ;>', '<id b>', '<op =>', '<int 3>', '<delimiter ;>', '<id result>', '<op =>', '<id a>', '<op +>', '<id b>', '<delimiter ;>', '<id result>', '<op =>', '<id a>', '<op ->', '<id b>', '<delimiter ;>', '<id result>', '<op =>', '<id a>', '<op *>', '<id b>', '<delimiter ;>', '<id result>', '<op =>', '<id a>', '<op />', '<id b>', '<delimiter ;>', '<block }>']
<block {>
<var_type var>
<id result>
<delimiter ;>
<var_type var>
<id a>
<delimiter ;>
<var_type var>
<id b>
<delimiter ;>
<id a>
<op =>
<int 3>
<delimiter ;>
<id b>
<op =>
<int 3>
<delimiter ;>
<id result>
<op =>
<id a>
<op +>
<id b>
<delimiter ;>
<id result>
<op =>
<id a>
<op ->
<id b>
<delimiter ;>
<id result>
<op =>
<id a>
<op *>
<id b>
<delimiter ;>
<id result>
<op =>
<id a>
<op />
<id b>
<delimiter ;>
<block }>
<eof None>
