In [1]:
import re
nomeArquivo = 'codigo.js'

In [9]:
T_INIT = "init"
T_KEYWORD = "keyword"
T_OP = "op"
T_INT = "int"
T_STRING = "string"
T_ID = "id"
T_EOF = "eof"
T_DELIMITER = "delimiter"
T_BLOCK = "block"
T_PARA = "parenthesis"
T_VAR_TYPE = "var_type"

In [3]:
def preprocessamento_linha(line_code):
    line_code = re.sub("[\\(\\[\\]\\)\\{\\}\\;\\?\\:\\,]", " \\g<0> ", line_code)#processa caracteres especiais
    line_code = re.sub(r"\.(?!\d)", " . ", line_code)#processa pontos quando não tem 1 número após ele

    return line_code

In [4]:
class Token():

    def __init__(self, tipo, valor=None):
        self.tipo = tipo
        self.valor = valor

    def __str__(self):
        return '<%s %s>' % (self.tipo, self.valor)

In [6]:
def afd_keyword(token):
    keyword_list = ["this", ".", "constructor", "=>", "map", "slice", "reduce", "some", "sort", "toString", "String", "Boolean", "parseInt", "keys", "push", "pop", "forEach", "split", "join", "map","class","if", "else", "return", "function", "then", "try", "catch", "(", ")", "{","}","[","]", ";", ","]
    return token in keyword_list

def afd_init(token):
    init_list = ['let', 'var', 'const']

    return token in init_list

def afd_int(token):
    try:
        token = float(token)
        return True
    except:
        return False
    
def afd_string(token):
    if token[0] == '"' and token[-1] == '"':
        if '"' not in token[1:-1]:
            return True
        else:
            raise ValueError('Aspas em um local inesperado.')
    else:
        return False
    
def afd_identificador(token):
    regex = re.compile('[a-zA-Z0-9_]+')
    r = regex.match(token)
    if r is not None:
        if r.group() == token:
            return True
        else:
            return False
    else:
        return False

def afd_operador(token):
    return token in "=+-*\\%"

def afd_delimiter(token):
    return token == ";"

def afd_block(token):
    return token in ["{", "}"]

def afd_var_type(token):
    return token in ["int", "float", "string"]

def afd_para(token):
    return token in ["(", ")"]
    
def afd_principal(token):
    
    if afd_init(token):
        return Token(T_INIT, token)
    
    if afd_int(token):
        return Token(T_INT, token)
    
    if afd_keyword(token):
        return Token(T_KEYWORD, token)
    
    if afd_operador(token):
        return Token(T_OP, token)
    
    if afd_string(token):
        return Token(T_STRING, token)

    if afd_delimiter(token):
        return Token(T_DELIMITER, token)
    
    if afd_block(token):
        return Token(T_BLOCK, token)
    
    if afd_var_type(token):
        return Token(T_VAR_TYPE, token)

    if afd_para(token):
        return Token(T_PARA, token)
    
    if afd_identificador(token):
        return Token(T_ID, token)
    
    raise ValueError('Valor inesperado')


In [7]:


class Parser():

    def __init__(self, tokens):
        self.tokens = tokens
        self.pos = -1
        self.token_atual = None
        self.symbol_table = {}
        self.ir = []

        self.proximo()

    def generate_ir(self):
        return "\n".join(self.ir)

    def proximo(self):
        self.pos += 1

        if self.pos >= len(self.tokens):
            self.token_atual = Token(T_EOF)
        else:
            self.token_atual = self.tokens[self.pos]

        print(self.token_atual)
        return self.token_atual

    def erro(self):
        raise Exception('Erro de sintaxe. %s' % (self.token_atual))

    def use(self, tipo, valor=None):

        if self.token_atual.tipo != tipo:
            self.erro()
        elif valor is not None and self.token_atual.valor != valor:
            self.erro()
        else:
            self.proximo()

    def instructions(self):
        """
        statements ::= <instruction> <instructions>
        """
        self.instruction()
        while self.token_atual.tipo in [T_VAR_TYPE, T_ID]:
            self.instruction()
    
    def instruction(self):
        """
        instruction ::= int <id> ;
        instruction ::= statement ;
        """
        if self.token_atual.tipo == T_VAR_TYPE:
            type = self.token_atual.valor
            self.use(T_VAR_TYPE, "int")
            name = self.token_atual.valor
            self.use(T_ID)
            self.use(T_DELIMITER, ";")
            if name in self.symbol_table:
                raise Exception(f"Erro semantico. Simbolo {name} foi declarado mais de uma vez.")
            self.symbol_table[name] = None
            self.ir.append(f"int {name};")
        else:
            self.statement()
            self.use(T_DELIMITER, ";")

    def start(self):
        """
        start ::= { instructions }
        """
        self.ir.append("#include <stdio.h>")
        # Inserir as declarações de funções
        self.ir.append("int main() {")
        self.use(T_BLOCK, "{")
        self.instructions()
        self.use(T_BLOCK, "}")
        self.ir.append("}")

    def statement(self):
        """
        <statement> ::= <id> <op => <expr>
        """

        name = self.token_atual.valor
        self.use(T_ID)
        
        self.use(T_OP, '=')
        
        value, str_expr = self.expr()

        self.ir.append(f"{name} = {str_expr};")

        if name not in self.symbol_table:
            raise Exception(f"Erro semantico. Simbolo {name} nao foi declarado.")
        
        self.symbol_table[name] = value

    def expr(self):
        """
        expr ::= term ( <op +> | <op -> | <op *> | <op /> | <op %> term )*
        """

        self.term()
        while self.token_atual.tipo == T_OP and self.token_atual.valor in ['+','-','*','/' '%']:
            self.use(T_OP)
            self.term()


    def term(self):
        """
        term ::= <id> | <int> | <string>
        """

        if self.token_atual.tipo == T_INT:
            self.use(T_INT)
        if self.token_atual.tipo == T_STRING:
            self.use(T_STRING)
        elif self.token_atual.tipo == T_ID:
            self.use(T_ID)
        else:
            self.erro()

In [None]:
    # def expr(self) -> int:
    #     """
    #     expr ::= term ( <op +> | <op -> term )*
    #     """

    #     t, s = self.expr_t()
    #     res, s2 = self.expr_e_line(t)
    #     return res, f"{s} {s2}"
    
    # def expr_t(self) -> int:
    #     """
    #     expr_t ::= expr_f expr_t_line
    #     """
    #     r, s = self.expr_f()
    #     r2, s2 = self.expr_t_line(r)
    #     return r2, f"{s} {s2}"

    # def expr_e_line(self, inherited_t: int):
    #     """
    #     expr_e_line ::= <op +> expr_t expr_e_line | epsilon
    #     """
    #     if self.token_atual.tipo == T_OP and self.token_atual.valor == "+":
    #         self.use(T_OP, "+")
    #         a, s = self.expr_t()
    #         res = a + inherited_t
    #         a2, s2 = self.expr_e_line(res)
    #         str_expr = f"+ {s} {s2}"
    #         return a2, str_expr
    #     # Prod vazia
    #     return 0 + inherited_t, ""

    # def expr_t_line(self, inherited_t: int):
    #     """
    #     expr_e_line ::= <op *> expr_f expr_t_line | epsilon
    #     """
    #     if self.token_atual.tipo == T_OP and self.token_atual.valor == "*":
    #         self.use(T_OP, "*")
    #         a, s = self.expr_f()
    #         res = a * inherited_t
    #         a2, s2 = self.expr_t_line(res)
    #         str_expr = f"* {s} {s2}"
    #         return a2, str_expr
    #     # Prod vazia
    #     return 1 * inherited_t, ""

    # def expr_f(self):
    #     """
    #     expr_f ::= ( expr ) | <id> | <int>
    #     """
    #     str_expr = ""
    #     if self.token_atual.tipo == T_PARA:
    #         self.use(T_PARA, "(")
    #         res, s = self.expr()
    #         self.use(T_PARA, ")")
    #         str_expr = f"({s})"
    #     elif self.token_atual.tipo == T_ID:
    #         if self.token_atual.valor not in self.symbol_table:
    #             raise Exception(f"Erro Semantico, variavel {self.token_atual.valor} nao foi declarada")
    #         res = self.symbol_table[self.token_atual.valor]
    #         str_expr = self.token_atual.valor
    #         self.use(T_ID)
    #     elif self.token_atual.tipo == T_INT:
    #         str_expr = str(self.token_atual.valor)
    #         res = int(self.token_atual.valor)
    #         self.use(T_INT)
    #     else:
    #         self.erro()

    #     return res, str_expr

In [8]:

class StopExecution(Exception):
    def _render_traceback_(self):
        pass

##############################################################################

arquivo = open(nomeArquivo,'r')
ln = 1

tokens = []

for l in arquivo.readlines():

    # analisador lexico

    l = preprocessamento_linha(l)


    for token in l.split():
        try:
            tokens.append(afd_principal(token))
        except Exception as e:
            print(tokens)
            print(str(e) + " na posição %i da linha %i" % (l.index(token), ln))
            raise StopExecution
    ln += 1

print([str(t) for t in tokens])

# analisador sintatico

# analisador sintatico

parser = Parser(tokens)
parser.start()
code = parser.generate_ir()

with open("codigo.c", "w") as out_f:
    out_f.write(code)

['<init var>', '<id a>', '<op =>', '<int 1>', '<keyword ;>', '<init var>', '<id b>', '<op =>', '<int 2>', '<keyword ;>', '<init var>', '<id c>', '<keyword ;>', '<id c>', '<op =>', '<int 1>', '<op +>', '<int 2>', '<keyword ;>']
<init var>
