In [12]:
import re

T_INIT = "init"
T_KEYWORD = "keyword"
T_OP = "op"
T_INT = "int"
T_FLOAT = "float"
T_STRING = "string"
T_ID = "id"
T_EOF = "eof"
T_DELIMITER = "delimiter"
T_BLOCK = "block"
T_PARA = "parenthesis"
T_VAR_var_initiable = "var_var_initiable"

def preprocessamento_linha(line_code):
    line_code = re.sub("[\\(\\[\\]\\)\\{\\}\\;\\?\\+\\-\\:\\,]", " \\g<0> ", line_code)#processa caracteres especiais
    line_code = re.sub(r"\.(?!\d)", " . ", line_code)#processa pontos quando não tem 1 número após ele

    return line_code

class Token():
    
    def __init__(self, tipo, valor=None):
        self.tipo = tipo
        self.valor = valor
        
    def __str__(self):
        return '<%s %s>' % (self.tipo, self.valor)
    
    def __repr__(self):
        return self.__str__()


class StopExecution(Exception):
    def _render_traceback_(self):
        pass

def afd_keyword(token):
    keyword_list = ["this", ".", "constructor", "=>", "map", "slice", "reduce", "some", "sort", "toString", "String", "Boolean", "parseInt", "keys", "push", "pop", "forEach", "split", "join", "map","class","if", "else", "return", "function", "then", "try", "catch", "[","]",  ","]
    return token in keyword_list

def afd_init(token):
    init_list = ['let', 'var', 'const']

    return token in init_list
    
def afd_int(token):
    try:
        token = int(token)
        return True
    except:
        return False
    
def afd_float(token):
    try:
        token = float(token)
        return True
    except:
        return False
    
def afd_string(token):
    if token[0] == '"' and token[-1] == '"':
        if '"' not in token[1:-1]:
            return True
        else:
            raise ValueError('Aspas em um local inesperado.')
    else:
        return False
    
def afd_identificador(token):
    regex = re.compile('[a-zA-Z0-9_]+')
    r = regex.match(token)
    if r is not None:
        if r.group() == token:
            return True
        else:
            return False
    else:
        return False

def afd_delimiter(token):
    return token == ";"

def afd_operador(token):
    return token in "=+-*\\%<>"

def afd_block(token):
    return token in ["{", "}"]

def afd_var_type(token):
    return token in ["let", "var", "const"]

def afd_para(token):
    return token in ["(", ")"]
    
def afd_principal(token):
    if afd_keyword(token):
        return Token(T_KEYWORD, token)

    if afd_init(token):
        return Token(T_INIT, token)
    
    if afd_operador(token):
        return Token(T_OP, token)
    
    if afd_int(token):
        return Token(T_INT, token)
    
    if afd_float(token):
        return Token(T_FLOAT, token)
    
    if afd_string(token):
        return Token(T_STRING, token)
    
    if afd_delimiter(token):
        return Token(T_DELIMITER, token)
    
    if afd_block(token):
        return Token(T_BLOCK, token)
    
    if afd_var_type(token):
        return Token(T_INIT, token)
    
    if afd_para(token):
        return Token(T_PARA, token)
    
    if afd_identificador(token):
        return Token(T_ID, token)
    
    raise ValueError('Valor inesperado')

class Parser():
    
    def __init__(self, tokens):
        self.tokens = tokens
        self.pos = -1
        self.token_atual = None
        self.symbol_table = {}
        self.ir = []
        
        self.proximo()

    def generate_ir(self):
        return "\n".join(self.ir)
        
    def proximo(self):
        self.pos += 1
        
        if self.pos >= len(self.tokens):
            self.token_atual = Token(T_EOF)
        else:    
            self.token_atual = self.tokens[self.pos]

        print(self.token_atual)
        return self.token_atual
    
    
    def erro(self):
        raise Exception('Erro de sintaxe. %s' % (self.token_atual))
        
        
    def use(self, tipo, valor=None):
                
        if self.token_atual.tipo != tipo:
            self.erro()
        elif valor is not None and self.token_atual.valor != valor:
            self.erro()
        else:
            self.proximo()
    
    def instructions(self):
        """
        statements ::= <instruction> <instructions>
        """
        self.instruction()
        while self.token_atual.tipo in [T_INIT, T_ID]:
            self.instruction()
    
    def instruction(self):
        """
        instruction ::= <T_INIT> <id> ;
        instruction ::= statement ;
        """
        if self.token_atual.tipo == T_INIT:
            var_initiable = self.token_atual.valor

            if var_initiable not in ['let', 'const', 'var', ]:
                raise Exception(f"Erro semantico. Simbolo {var_initiable} não é um inicializador de variável.")

            self.use(T_INIT, var_initiable)
            name = self.token_atual.valor
            self.use(T_ID)
            if self.token_atual.valor == ";":
                self.use(T_DELIMITER, ";")
            if name in self.symbol_table:
                raise Exception(f"Erro semantico. Simbolo {name} foi declarado mais de uma vez.")
            self.symbol_table[name] = None
            self.ir.append(f"{var_initiable} {name};")
        else:
            self.statement()
            self.use(T_DELIMITER, ";")

    
    def start(self):
        """
        start ::=  instructions 
        """
        self.ir.append("import React from 'react'")
        # Inserir as declarações de funções

        self.instructions()


    def statement(self): 
        """ statement ::= <id> <op => expr """
        name = self.token_atual.valor
        self.use(T_ID)
        
        self.use(T_OP, '=')
        
        value, str_expr = self.expr()

        self.ir.append(f"{name} = {str_expr};")

        if name not in self.symbol_table:
            raise Exception(f"Erro semantico. Simbolo {name} nao foi declarado.")
        
        self.symbol_table[name] = value
        
    def expr(self) -> int:
        """
        expr ::= term ( <op +> | <op -> term )*
        """

        t, s = self.expr_t()
        res, s2 = self.expr_e_line(t)
        return res, f"{s} {s2}"

    def expr_t(self) -> int:
        """
        expr_t ::= expr_f expr_t_line
        """
        r, s = self.expr_f()
        r2, s2 = self.expr_t_line(r)
        return r2, f"{s} {s2}"

    def expr_e_line(self, inherited_t: any):
        """
        expr_e_line ::= <op +> expr_t expr_e_line | epsilon
        """
        if self.token_atual.tipo == T_OP and self.token_atual.valor == "+":
            self.use(T_OP, "+")
            a, s = self.expr_t()
            res = a + inherited_t
            a2, s2 = self.expr_e_line(res)
            str_expr = f"+ {s} {s2}"
            return a2, str_expr
        # Prod vazia
        return inherited_t, ""

    def expr_t_line(self, inherited_t: any):
        """
        expr_e_line ::= <op *> expr_f expr_t_line | epsilon
        """
        if self.token_atual.tipo == T_OP and self.token_atual.valor == "*":
            self.use(T_OP, "*")
            a, s = self.expr_f()
            res = a * inherited_t
            a2, s2 = self.expr_t_line(res)
            str_expr = f"* {s} {s2}"
            return a2, str_expr
        # Prod vazia
        return inherited_t, ""

    def expr_f(self):
        """
        expr_f ::= ( expr ) | <id> | <int> | <float> | <string>
        """
        str_expr = ""
        if self.token_atual.tipo == T_PARA:
            self.use(T_PARA, "(")
            res, s = self.expr()
            self.use(T_PARA, ")")
            str_expr = f"({s})"
        elif self.token_atual.tipo == T_ID:
            if self.token_atual.valor not in self.symbol_table:
                raise Exception(f"Erro Semantico, variavel {self.token_atual.valor} nao foi declarada")
            res = self.symbol_table[self.token_atual.valor]
            str_expr = self.token_atual.valor
            self.use(T_ID)
        elif self.token_atual.tipo == T_INT:
            str_expr = str(self.token_atual.valor)
            res = int(self.token_atual.valor)
            self.use(T_INT)
        elif self.token_atual.tipo == T_FLOAT:
            str_expr = str(self.token_atual.valor)
            res = float(self.token_atual.valor)
            self.use(T_FLOAT)
        elif self.token_atual.tipo == T_STRING:
            str_expr = self.token_atual.valor
            res = self.token_atual.valor
            self.use(T_STRING)
        else:
            self.erro()

        return res, str_expr


arquivo = open('codigo.js','r')
ln = 1

tokens = []

for l in arquivo.readlines():
    # analisador lexico
    l = l.replace('\n','') # remove a quebra de linha
    l = preprocessamento_linha(l)

    for token in l.split():        
        try:
            tokens.append(afd_principal(token))
        except Exception as e:
            print(tokens)
            print(str(e) + " na posição %i da linha %i - %s" % (l.index(token), ln, token))
            raise StopExecution
    ln += 1

print([str(t) for t in tokens])
    
# analisador sintatico

parser = Parser(tokens)
parser.start()
code = parser.generate_ir()

with open("codigo.jsx", "w") as out_f:
    out_f.write(code)

['<init let>', '<id a>', '<delimiter ;>', '<init let>', '<id b>', '<delimiter ;>', '<init let>', '<id c>', '<delimiter ;>', '<id a>', '<op =>', '<int 5>', '<delimiter ;>', '<id b>', '<op =>', '<float 3.5>', '<delimiter ;>', '<id c>', '<op =>', '<string "12">', '<delimiter ;>', '<keyword function>', '<id teste>', '<parenthesis (>', '<id a>', '<keyword ,>', '<id b>', '<parenthesis )>', '<block {>', '<keyword return>', '<id a>', '<op +>', '<id b>', '<delimiter ;>', '<block }>', '<init let>', '<id result>', '<delimiter ;>', '<id result>', '<op =>', '<id teste>', '<parenthesis (>', '<id a>', '<keyword ,>', '<id b>', '<parenthesis )>', '<delimiter ;>']
<init let>
<id a>
<delimiter ;>
<init let>
<id b>
<delimiter ;>
<init let>
<id c>
<delimiter ;>
<id a>
<op =>
<int 5>
<delimiter ;>
<id b>
<op =>
<float 3.5>
<delimiter ;>
<id c>
<op =>
<string "12">
<delimiter ;>
<keyword function>
