# Analizador Lexico - Teoria de la computacion

## Separar los tokens del documento "prueba.mio" en una lista

In [2]:
filename = "prueba.mio"

In [3]:
token_list = []
with open(filename) as fp:
    for line in fp:
        if line[0] != "#":
            token_list.append(line.split(" "))
    fp.close()

for linea in token_list:
    if linea[-1] == "\n":
        linea.pop()
    elif linea[-1][-1:] == "\n":
        linea[-1] = linea[-1][:-1]

In [4]:
token_list

[['PROGRAMA', 'prueba'],
 ['VarX', '=', '100'],
 ['VarY', '=', '0xA1'],
 ['LEE', 'Num'],
 ['VarZ', '=', 'VarY', '+', '0x1D'],
 ['VarX', '=', 'VarX', '*', 'Num'],
 ['IMPRIME', '"Variable', 'X', 'es"'],
 ['IMPRIME', 'VarX'],
 ['IMPRIME', '"Varaible', 'Z', 'es"'],
 ['IMPRIME', 'VarZ'],
 ['FINPROG']]

## Clasificando los tokens en token_list

In [5]:
IDS, TXT, VAL = [],[],[]

In [20]:
def esDecimal(token):
    return token.isdigit()


def esHexadecimal(token):
    LETRAS = ['A','B','C','D','E','F']
    DIGITOS = ['1','2','3','4','5','6','7','8','9','0']
    if len(token) <= 2:
        return False
    if token[0:2] == "0x":
        for char in token[2:]:
            if (char not in LETRAS) and (char not in DIGITOS):
                return False
        return True

def esVariable(token):
    if len(token)>16 or token[0].isdigit() :
        return False
    return token.isalnum()

def esString(token):
    if token[0] == r'"' and token[-1] == r'"':
        count = 0;
        for char in token:
            if char == '"':
                count+=1
        if count != 2:
           return False 
        else:
            return True
    return False
        
def esInicioString(token):
    sub = '"'
    if token[0]!='"' or sub in token[1:]:
        return False
    else:
        return True

def esFinalString(token):
    sub = '"'
    if token[-1] != sub or sub in token[:-1]:
        return False
    return True

def imprimeError(indiceLinea, token):
    print(f"Error en la linea {indiceLinea}: {token}")

In [21]:
def registrarIdentificador(token):
    if len(IDS) == 0:
        IDS.append([token, 'id01'])
    else:
        flag = True
        for row in IDS:
            if row[0] == token:
                flag = False
                break

        if flag:
            num_id = f"{(len(IDS)+1)}"
            if len(num_id) == 1:
                IDS.append([token, f"id0{num_id}"])
            else:
                IDS.append([token, f"id{num_id}"])                



def registrarString(token: str):
    if len(TXT) == 0:
        TXT.append([token, 'txt01'])
    else:
        flag = True
        for row in TXT:
            if row[0] == token:
                flag = False
                break
        
        if flag:
            num_id = f"{(len(TXT)+1)}"
            if len(num_id) == 1:
                TXT.append([token, f"txt0{num_id}"])
            else:
                TXT.append([token, f"txt{num_id}"])

def registrarNum(token: str): 
    if token[0:2] == "0x":
        VAL.append([token,int(token, base=16)])
    else:
        VAL.append([token,int(token)])



In [25]:
def analex():
    PALABRAS_RESERVADAS = ["PROGRAMA","FINPROG","IMPRIME","LEE"]
    OPERADORES_ARITMETICOS = ["+","-","*","/","="]
    SALIDA = []
    for indiceLinea, line in enumerate(token_list):
        indiceToken=0
        while indiceToken<len(line):
            token = line[indiceToken]
            finalizado = False
            if token in PALABRAS_RESERVADAS:
                SALIDA.append(token)
                finalizado = True
            elif token in OPERADORES_ARITMETICOS:
                SALIDA.append(token)
                finalizado = True
            elif esDecimal(token) or esHexadecimal(token): # SI ES CONSTANTE NUMERICA
                SALIDA.append("[valorn]")
                registrarNum(token)
                finalizado = True
            elif esVariable(token): # SI ES VARIABLE
                SALIDA.append("[id]")
                registrarIdentificador(token)
                finalizado = True
            elif esString(token): # SI ES STRING 
                SALIDA.append("[litalfnum]") #TODO: CONCATENAR VARIOS TOKENS EN UN STRING
                finalizado = True
            elif esInicioString(token):
                stringReconstruido = token+" "
                while indiceToken+1 < len(line):
                    indiceToken+=1
                    stringReconstruido += line[indiceToken]
                    if esString(stringReconstruido):
                        finalizado = True
                        SALIDA.append("[litalfanum]")
                        registrarString(stringReconstruido)
                        break
                    stringReconstruido+=" "

            if not finalizado:
                imprimeError(indiceLinea, token)
                return
            else:
                indiceToken+=1
            
    return SALIDA

In [26]:
SALIDA = analex()

In [10]:
with open("prueba.lex","w") as fp:
    for line in SALIDA:
        fp.write(line+"\n")
    fp.close()

In [11]:
with open("factorial.sim","w") as fp:
    fp.write("IDS\n")
    for row in IDS:
        fp.write(f"{row[0]} {row[1]}\n")
    
    fp.write("\nTXT\n")
    for row in TXT:
        fp.write(f"{row[0]} {row[1]}\n")
    
    fp.write("\nVAL\n")
    for row in VAL:
        fp.write(f"{row[0]} {row[1]}\n")  
    
    fp.close()

In [24]:
SALIDA

['PROGRAMA',
 '[id]',
 '[id]',
 '=',
 '[valorn]',
 '[id]',
 '=',
 '[valorn]',
 'LEE',
 '[id]',
 '[id]',
 '=',
 '[id]',
 '+',
 '[valorn]',
 '[id]',
 '=',
 '[id]',
 '*',
 '[id]',
 'IMPRIME',
 '[litalfanum]',
 'IMPRIME',
 '[id]',
 'IMPRIME',
 '[litalfanum]',
 'IMPRIME',
 '[id]',
 'FINPROG']

In [28]:
TXT

[['"Variable X es"', 'txt01'], ['"Varaible Z es"', 'txt02']]