In [184]:
import numpy as np
import re
import math
from pprint import pprint

#### GLOBAL VARIABLES

In [153]:
INSTRUCTIONS = {'LDA':'00', 'LDX':'04', 'LDCH':'50', 'STA':'0C', 'JEQ':'30', 'WD':'DC', 'TD':'E0', 'TIX':'2C', 'JLT':'38'}
OPCODES = ['WORD', 'BYTE', 'RESW', 'RESB']
INST_LABEL = '###'
DEFAULT_CODE_NAME = 'ASSEMBLER_CODE'
BLANK = '___'
PROG_NAME= ''

### Helper Methods (for PASS ONE)

#### - **Count number of characters in byte**

In [29]:
def Count_byte(operand):
    operand= re.sub(r'[^\w]', '', operand) ## remove everything except alphanumeric and Underscore and Space ;)
    if operand.startswith('C'):
        return len(operand[1:])
    if operand.startswith('X'):
        return math.ceil(len(operand[1:]) / 2 )
    
assert Count_byte('X05') == 1
assert Count_byte('X05F3') == 2
assert Count_byte('X054') == 2
assert Count_byte('X05F305F3') == 4
assert Count_byte('C05F305F3') == 8

#### - Add Hexa-values

In [30]:
def add_X(prev, amount):
    A1 = int(prev, 16)
    return format(A1 + int(amount), '04X')

assert add_X('100f', 11) == '101A'
assert add_X('101A', 11) == '1025'

### Read program

In [126]:
content = dict()
with open('./Prog_1.txt', 'r') as f:
    for i,v in enumerate(f.readlines()):
        content[i] = v.split()

content

{0: ['HWA', 'START', '1010'],
 1: ['LDA', 'ALPHA'],
 2: ['STA', 'GAMMA'],
 3: ['LDA', 'BETA'],
 4: ['STA', 'ALPHA'],
 5: ['LDA', 'GAMMA'],
 6: ['STA', 'BETA'],
 7: ['ALPHA', 'WORD', '100'],
 8: ['BETA', 'WORD', '333'],
 9: ['GAMMA', 'RESW', '1'],
 10: ['END', '1010']}

### Main Method (PASS ONE)

#### Create Symbol Table

In [162]:
def Create_SYMTABLE(content):
    global PROG_NAME
    STARTADD= LOCCTR = content[0][-1]  ## initialize LOCCTR[will count in hexa] and Starting Address
    LOC = dict()  ## label : Location
    
    ## initialization for Start Location and Lenght of prog
    LOC[ DEFAULT_CODE_NAME if content[0][0] =='START' else content[0][0]] = "___" 
    
    PROG_NAME = DEFAULT_CODE_NAME if content[0][0] =='START' else content[0][0]
    LenOfProg = 0

    for i, v in enumerate(content.values()):
        if i == 0: continue  ## Skip Head line
        if str.upper(v[0]) == 'END':
            LenOfProg = format(int(LOCCTR,16) - int(STARTADD, 16), '02X')  ## make a method
            LOC['END'] = BLANK
            break

        if v[0] in INSTRUCTIONS.keys():     ## just instruction (without label)
            ## Instruction Segment

            LOC[INST_LABEL + str(i)] = LOCCTR
            LOCCTR = add_X(LOCCTR, 3)
        elif  v[1] in INSTRUCTIONS.keys():  ## Labeled Instruction (loop, ...etc)
            LOC[v[0]] = LOCCTR
            LOCCTR = add_X(LOCCTR, 3)
        
        elif v[0] not in OPCODES and v[1] not in OPCODES:
            raise Exception(f'INVALID OPERATION CODE ==> {v[0]}')
        else:
            ## DATA Segment
            label = v[0]    ## label
            OpCode = v[1]   ## Operation Code
            operand = v[2]  ## Operand (value)
            ## WORD -> add 3
            if OpCode == 'WORD':
                LOC[label] = LOCCTR
                LOCCTR = add_X(LOCCTR, 3)
            ## BYTE add lenght of Operand
            elif  OpCode == 'BYTE':
                LOC[label] = LOCCTR
                len_Of_str = Count_byte(''.join(v[2:]))  #['X', '05'] => ['X05']
                LOCCTR = add_X(LOCCTR, len_Of_str)
            ## RESB -> add Operand
            elif  OpCode == 'RESB':
                LOC[label] = LOCCTR
                LOCCTR = add_X(LOCCTR, operand)
                ## RESB -> add Operand * 3
            elif  OpCode == 'RESW':
                LOC[label] = LOCCTR
                LOCCTR = add_X(LOCCTR, int(operand) * 3)
            else:
                raise Exception(f'INVALID OPERATION CODE ==> {OpCode}')

    return LOC, LenOfProg, STARTADD

In [163]:
LOCTBLE, lenPg, STRT = Create_SYMTABLE(content)

In [164]:
LOCTBLE

{'HWA': '___',
 '###1': '1010',
 '###2': '1013',
 '###3': '1016',
 '###4': '1019',
 '###5': '101C',
 '###6': '101F',
 'ALPHA': '1022',
 'BETA': '1025',
 'GAMMA': '1028',
 'END': '___'}

In [165]:
lenPg

'1B'

In [166]:
STRT

'1010'

In [167]:
PROG_NAME

'HWA'

### Helper Methods (for PASS TWO)

In [116]:
## takes   string with hexa   values
## returns string with binary values with lenght 16 digit
def hex_to_binary(hexVal):
    return bin(int(hexVal, 16))[2:].zfill(16)

assert hex_to_binary('0045') == '0000000001000101'

In [117]:
## Takes string binary
## returns string Hexa (after adding 1 to the first 0)
## [add x(if-exists i.e x=1) to address ]  
## examples:
## 004C ==> 804C   (0 --> 0000) after addition (1000) = 8
## 104C ==> 904C   (1 --> 0001) after addition (1001) = 9

def add_B_X(A):
    added = "1" + "0" * 15
    return format(int(A, 2) + int(added, 2), 'X')

assert add_B_X( hex_to_binary('004C') ) == '804C'
assert add_B_X( hex_to_binary('204C') ) == 'A04C'

In [118]:
def make_OBJ_Code(OpCode,address, x = 0):
    if x == 0:
        return OpCode + address
    else:
        return OpCode + add_B_X(hex_to_binary(address))
        
assert make_OBJ_Code('00','004C', 1) == '00804C'
assert make_OBJ_Code('00','004C')    == '00004C'

In [119]:
def x_exist(labelX):
    labelX = labelX.replace(' ', '') ## remove whiteSpace
    return labelX.endswith(',X')
    
    
assert x_exist('RECORD, X') == True

In [120]:
def x_exist_data(labelX):
    return labelX.startswith('X')

assert x_exist_data("X '04'") == True
assert x_exist_data("C '04'") == False


In [121]:
## BYTE OBJCODE
def get_BYTE_OBCODE(value, x='X'):
    if x =='X':
        return value
    return ''.join(format(ord(ch), 'X') for ch in value)

assert get_BYTE_OBCODE('TEST STRING','C') == '5445535420535452494E47'
assert get_BYTE_OBCODE('05','X') == '05'
assert get_BYTE_OBCODE('05') == '05'
assert get_BYTE_OBCODE('05','C') == '3035'

In [122]:
## WORD OBJECT CODE

## takes input in decimal [string | integer]
## convert it into hexa
## return string with hexa
def get_WORD_OBCODE(value):
    return format(int(value), 'X').zfill(6)

assert get_WORD_OBCODE(333) == '00014D'

### Main Method (PASS TWO)

####  Create Object Code

In [185]:
def Create_OBJ_CODES(SymTable, content):
    ObjCds = []
    ObjCds.append(BLANK) ## Start Address doesn't have Object Code
    for i, v in enumerate(content.values()):
        if i == 0:  continue
        ## Normal Instruction
        if v[0] in INSTRUCTIONS.keys():
            OpCode = INSTRUCTIONS[v[0]]
            address= SymTable[re.sub(r',','',v[1])]
            if x_exist(''.join(v[1:])):
                ObjCds.append(make_OBJ_Code(OpCode, address, x = 1))
            else:
                ObjCds.append(make_OBJ_Code(OpCode, address))
        
        ## labeled INSTRUCTION (loop, jump, ... etc)
        elif v[1] in INSTRUCTIONS.keys(): 
            OpCode = INSTRUCTIONS[v[1]]
            address= SymTable[v[2]]
            if x_exist(''.join(v[2:])):
                ObjCds.append(make_OBJ_Code(OpCode, address, x = 1))
            else:
                ObjCds.append(make_OBJ_Code(OpCode, address))
        else:
            if   v[1] == 'WORD':
                ObjCds.append(get_WORD_OBCODE(v[2]))
            elif v[1] == 'BYTE':
                if x_exist_data(''.join(v[2:])):
                    ObjCds.append(get_BYTE_OBCODE(v[-1]))
                else:
                    ObjCds.append(get_BYTE_OBCODE(v[-1], 'C'))
            elif v[1] == 'RESB' or v[1] == 'RESW':
                ObjCds.append(BLANK)
                
            elif v[0] == 'END':
                ObjCds.append(BLANK)
            else:
                raise Exception(f'INVALID INPUT CODE ==> {v[1]}')                
    return ObjCds

In [144]:
OBJECTS = Create_OBJ_CODES(LOCTBLE, content)

In [145]:
from pprint import pprint
pprint(OBJECTS)

['___',
 '001022',
 '0C1028',
 '001025',
 '0C1022',
 '001028',
 '0C1025',
 '000064',
 '00014D',
 '___',
 '___']


#### Writing Symbol-table and Object Codes into file

In [183]:
f = open('Progg_'+PROG_NAME+".txt", "w")
f.write('='*30 + '\n')
f.write('LOC \t\t OBJ_CODE\n')
f.write('='*30 + '\n')
for loc, obcod in zip(LOCTBLE.values(),OBJECTS):
    f.write(loc+ " \t\t "+ obcod + "\n")
f.write('='*30 + '\n')
f.close()

###### additional method

In [87]:
def bin_to_hex(binVal):
    return format(int(binVal,2), '04X')
    
assert bin_to_hex('1000000001000101') == '8045'