In [1]:
import pandas as pd
import re

In [2]:
instructions = {
    'ADD': '18',
    'AND': '40',
    'COMP': '28',
    'DIV': '24',
    'J': '3C',
    'JEQ': '30',
    'JGT': '34',
    'JLT': '38',
    'JSUB': '48',
    'LDA': '00',
    'LDCH': '50',
    'LDL': '08',
    'LDX': '04',
    'MUL': '20',
    'OR': '44',
    'RD': 'D8',
    'RSUB': '4C',
    'STA': '0C',
    'STCH': '54',
    'STL': '14',
    'STSW': 'E8',
    'STX': '10',
    'SUB': '1C',
    'TD': 'E0',
    'TIX': '2C',
    'WD': 'DC',
    'FIX': ['C4', 1],
    'FLOAT': ['CO', 1],
    'HIO': ['F4', 1],
    'NORM': ['C8', 1],
    'SIO': ['F0', 1],
    'TIO': ['F8', 1],
}

directives = ['START', 'END', 'BYTE', 'WORD', 'RESB', 'RESW']

In [3]:
def is_instruction_directive(string):
    return string in instructions or string in directives

In [4]:
def open_file(file):
    with open(file) as f:
        program = [re.sub(r'\s+', ' ', line).strip().upper() for line in f]
    return program

In [5]:
def helper_1():
    fout = open("zeee.txt", "w", encoding='utf-8')
    for ind in df.index:
        if(df.Label[ind] == ' '):
            fout.write('\t\t{0}\t{1}\n'.format(df.Mnemonic[ind].ljust(8, ' '), df.Value[ind]).ljust(8, ' '))

        else:
            fout.write('{0}\t{1}\t{2}\n'.format(df.Label[ind].ljust(8, ' '), df.Mnemonic[ind].ljust(8, ' '), df.Value[ind].ljust(8, ' ')))
    fout.close()
    return

In [8]:
def prog_name(df):
    return df.iloc[0].Label.ljust(6, 'x')

In [9]:
def return_df(program):
    dict = []
    for line in program:
        
        temp = line.split(" ")[1: 4]
        if len(temp) < 1:
            continue
        if temp[0] == '.':
            continue
        if temp[0] in instructions:
            temp = temp[: 2] 
        
        if not is_instruction_directive(temp[0]) and not is_instruction_directive(temp[1]):
            print('{} or {} is not found'.format(temp[0], temp[1]))
            return 
            #quit()
        
        if temp[0]  == 'RSUB':
            label = ' '
            mnemonic = temp[0]
            value = ' '
            
        elif temp[0]  == 'END':
            label = ' '
            mnemonic = temp[0]
            value = temp[1]

        elif temp[1] == 'RSUB':
            label = temp[0]
            mnemonic = temp[1]
            value = ' '
        
            
        elif temp[0] in instructions:
            label = ' '
            mnemonic = temp[0]
            value = temp[1]

        elif len(temp) >= 3:
            label = temp[0]
            mnemonic = temp[1]
            value = temp[2]
        

        dict.append({
            'Label':label,
            'Mnemonic': mnemonic,
            'Value': value
        })
    df = pd.DataFrame(dict)

    if df.iloc[df.index.stop -1].Mnemonic != 'END':
        df2 = pd.DataFrame([[' ', 'END', df.iloc[0].Value]], columns=df.columns)
        df = pd.concat([df, df2], ignore_index = True)
    return df

In [10]:
program = open_file('in2.txt')
df = return_df(program)
df

Unnamed: 0,Label,Mnemonic,Value
0,PROG3,START,0030
1,WRREC,LDX,ZERO
2,WLOOP,TD,OUTPUT
3,,JEQ,WLOOP
4,,LDCH,"RECORD,X"
5,,WD,OUTPUT
6,,TIX,LENGTH
7,,JLT,WLOOP
8,ZERO,WORD,0
9,LENGTH,WORD,1


In [11]:

def location_counter(df):
    start_index = df.Value[0]
    list_counter = [" ", start_index]
    counter = start_index
    for i in range(1, df.index.stop):

        if df.Mnemonic[i] in instructions:
            if type(instructions[df.Mnemonic[i]]) == list:
                temp = hex(int(counter, 16) + 1)
            else:
                temp = hex(int(counter, 16) + 3)
            
        if df.Mnemonic[i] == 'WORD':
            temp = hex(int(counter, 16) + 3)
        if df.Mnemonic[i] == 'BYTE':
            value = df.Value[i].split('\'')
            length = len(value[1])
            if value[0] == 'C':
                temp = hex(int(counter, 16) + int(length))
            else:
                temp = hex(int(counter, 16) + int(int(length)/2))
                
        if df.Mnemonic[i] == 'RESW':
            temp = hex(int(counter, 16) + int(df.Value[i]) *3)
            
        if df.Mnemonic[i] == 'RESB':
            temp = hex(int(counter, 16) + int(df.Value[i]))
            
    
        counter = temp.split('x')[1].rjust(4, '0').upper()
        list_counter.append(counter)
        
    df.insert(0, 'Location_Counter', list_counter[:-1])
    return df

In [12]:
df = location_counter(df)

In [13]:
df

Unnamed: 0,Location_Counter,Label,Mnemonic,Value
0,,PROG3,START,0030
1,0030,WRREC,LDX,ZERO
2,0033,WLOOP,TD,OUTPUT
3,0036,,JEQ,WLOOP
4,0039,,LDCH,"RECORD,X"
5,003C,,WD,OUTPUT
6,003F,,TIX,LENGTH
7,0042,,JLT,WLOOP
8,0045,ZERO,WORD,0
9,0048,LENGTH,WORD,1


In [14]:
def symbol_table(df):
    list = {}
    # Ignoring the first row 
    for i in range(1, df.index.stop):
        if df.Label[i] != ' ':
            list[df.Label[i]] = df.Location_Counter[i]
    return list
sym_table = symbol_table(df)

In [15]:
sym_table

{'WRREC': '0030',
 'WLOOP': '0033',
 'ZERO': '0045',
 'LENGTH': '0048',
 'OUTPUT': '004B',
 'RECORD': '004C'}

In [16]:
def object_code(df, sym_table):
    object_code_list = [' ']
    for i in range(1, df.index.stop):
        if df.Mnemonic[i] in instructions:
            if df.Mnemonic[i] == 'RSUB':
                object_code = "4C0000"
                
            elif type(instructions[df.Mnemonic[i]]) != list :
                op_code = bin(int(instructions[df.Mnemonic[i]], 16))[2: -1].rjust(7, '0')
                op_code += '1' if df.Value[i][0] == '#'  else  '0'
                
                op_code_index = op_code
                op_code_index += '1' if len(df.Value[i].split(',')) > 1 else  '0'
                
                if df.Value[i].split(',')[0] not in sym_table:
                    print('VARIABLE {0} DOES NOT EXIST'.format(df.Value[i].split(',')[0]))
                    return
                    #quit()
                
                address = sym_table[df.Value[i].split(',')[0]]
                address_binary = bin(int(address, 16))[2: ].rjust(15, '0')
                object_code = op_code_index + address_binary
                
                object_code = hex(int(object_code, 2))[2: ].rjust(6, '0').upper()
                
        elif df.Mnemonic[i] == 'WORD':
            object_code = hex(int(df.Value[i]))[2: ].rjust(6, '0').upper()
        
        elif df.Mnemonic[i] == 'BYTE':
            if df.Value[i].split('\'')[0] == 'X':
                object_code = df.Value[i].split('\'')[1].upper()
            else:
                string = df.Value[i].split('\'')[1]
                object_code = ''
                for ascii_value in string.encode('ascii'):
                    object_code += str(ascii_value)
        else:
            object_code = ' '
        object_code_list.append(object_code)
        
    df.insert(4, 'Object_code', object_code_list)   
    return df

In [17]:
df = object_code(df, sym_table)
df

Unnamed: 0,Location_Counter,Label,Mnemonic,Value,Object_code
0,,PROG3,START,0030,
1,0030,WRREC,LDX,ZERO,040045
2,0033,WLOOP,TD,OUTPUT,E0004B
3,0036,,JEQ,WLOOP,300033
4,0039,,LDCH,"RECORD,X",50804C
5,003C,,WD,OUTPUT,DC004B
6,003F,,TIX,LENGTH,2C0048
7,0042,,JLT,WLOOP,380033
8,0045,ZERO,WORD,0,000000
9,0048,LENGTH,WORD,1,000001
