In [236]:
import re
nomeArquivo = 'codigo.js'

In [237]:
T_KEYWORD = "keyword"
T_INIT = "init"
T_OP = "op"
T_INT = "int"
T_STRING = "string"
T_ID = "id"
T_EOF = "eof"

In [238]:
def preprocessamento_linha(line_code):
    line_code = re.sub("[\\(\\[\\]\\)\\{\\}\\;\\?\\:\\,]", " \\g<0> ", line_code)#processa caracteres especiais
    line_code = re.sub(r"\.(?!\d)", " . ", line_code)#processa pontos quando não tem 1 número após ele

    return line_code

In [239]:
class Token():

    def __init__(self, tipo, valor=None):
        self.tipo = tipo
        self.valor = valor

    def __str__(self):
        return '<%s %s>' % (self.tipo, self.valor)

In [240]:
def afd_init(token):
    init_list = ['let', 'var', 'const']

    return token in init_list



def afd_int(token):
    try:
        token = float(token)
        return True
    except:
        return False
    
def afd_string(token):
    if token[0] == '"' and token[-1] == '"':
        if '"' not in token[1:-1]:
            return True
        else:
            raise ValueError('Aspas em um local inesperado.')
    else:
        return False
    
def afd_identificador(token):
    regex = re.compile('[a-zA-Z0-9_]+')
    r = regex.match(token)
    if r is not None:
        if r.group() == token:
            return True
        else:
            return False
    else:
        return False
    
def afd_keyword(token):
    keyword_list = ["this", ".", "constructor", "=>", "map", "slice", "reduce", "some", "sort", "toString", "String", "Boolean", "parseInt", "keys", "push", "pop", "forEach", "split", "join", "map","class","if", "else", "return", "function", "then", "try", "catch", "(", ")", "{","}","[","]", ";", ","]
    return token in keyword_list

def afd_operador(token):
    return token in "=+-*\\%"
    
def afd_principal(token):
    
    if afd_init(token):
        return Token(T_INIT, token)
    
    if afd_int(token):
        return Token(T_INT, token)
    
    if afd_keyword(token):
        return Token(T_KEYWORD, token)
    
    if afd_operador(token):
        return Token(T_OP, token)
    
    if afd_string(token):
        return Token(T_STRING, token)
    
    if afd_identificador(token):
        return Token(T_ID, token)
    
    raise ValueError('Valor inesperado')


In [241]:


class Parser():

    def __init__(self, tokens):
        self.tokens = tokens
        self.pos = -1
        self.token_atual = None

        self.proximo()


    def proximo(self):
        self.pos += 1

        if self.pos >= len(self.tokens):
            self.token_atual = Token(T_EOF)
        else:
            self.token_atual = self.tokens[self.pos]

        print(self.token_atual)
        return self.token_atual

    def erro(self):
        raise Exception('Erro de sintaxe.')

    def use(self, tipo, valor=None):

        if self.token_atual.tipo != tipo:
            self.erro()
        elif valor is not None and self.token_atual.valor != valor:
            self.erro()
        else:
            self.proximo()


    def statement(self):
        """
        <statement> ::= <id> <op => <expr>
        """
        
        self.use(T_ID)
        self.use(T_OP, '=')
        self.expr()


    def expr(self):
        """
        expr ::= term ( <op +> | <op -> | <op *> | <op /> | <op %> term )*
        """

        self.term()
        while self.token_atual.tipo == T_OP and self.token_atual.valor in ['+','-','*','/' '%']:
            self.use(T_OP)
            self.term()


    def term(self):
        """
        term ::= <id> | <int> | <string>
        """

        if self.token_atual.tipo == T_INT:
            self.use(T_INT)
        if self.token_atual.tipo == T_STRING:
            self.use(T_STRING)
        elif self.token_atual.tipo == T_ID:
            self.use(T_ID)
        else:
            self.erro()

In [242]:

class StopExecution(Exception):
    def _render_traceback_(self):
        pass

##############################################################################

arquivo = open(nomeArquivo,'r')
ln = 1

tokens = []

for l in arquivo.readlines():

    # analisador lexico

    l = preprocessamento_linha(l)


    for token in l.split():
        try:
            tokens.append(afd_principal(token))
        except Exception as e:
            print(tokens)
            print(str(e) + " na posição %i da linha %i" % (l.index(token), ln))
            raise StopExecution
    ln += 1

print([str(t) for t in tokens])

# analisador sintatico

parser = Parser(tokens)
#parser.statement()

[<__main__.Token object at 0x7f79d7a775e0>, <__main__.Token object at 0x7f79d7a77160>, <__main__.Token object at 0x7f79d7a77d30>, <__main__.Token object at 0x7f79d7a7b3a0>, <__main__.Token object at 0x7f79d7a7b5b0>, <__main__.Token object at 0x7f79d7a7bb50>, <__main__.Token object at 0x7f79d7a7b4f0>, <__main__.Token object at 0x7f79d8b07cd0>, <__main__.Token object at 0x7f79d8b07f70>, <__main__.Token object at 0x7f79d8b07310>, <__main__.Token object at 0x7f79d8b07880>, <__main__.Token object at 0x7f79d8b073a0>, <__main__.Token object at 0x7f79d8b07af0>, <__main__.Token object at 0x7f79d8b072e0>, <__main__.Token object at 0x7f79d8b23220>, <__main__.Token object at 0x7f79d8b23280>, <__main__.Token object at 0x7f79d8b23670>, <__main__.Token object at 0x7f79d8b236d0>, <__main__.Token object at 0x7f79d8b23370>]
Aspas em um local inesperado. na posição 4 da linha 7
