In [None]:
import os
from typing import List, Dict, Tuple
import sys
sys.path.append('../src')
import instructions as inst
import exceptions as exc
from constants import ALU_OPS

In [None]:
prog_dir = '../programs'
test_prog_path = os.path.join(prog_dir, 'test.asm')

In [None]:
def read_asm(path:str) -> List[str]:
    with open(path, 'r') as f:
        return f.readlines()

In [None]:
test_prog = read_asm(test_prog_path)

In [None]:
test_prog

In [None]:
def clean_program(program:List[str], prefix:str='//'):
    cleaned_program = []
    
    for line in program:
        # Remove empty lines
        if line.strip() == '': continue
        
        idx = line.find(prefix)
        if idx == -1: cleaned_program.append(line.strip().upper())
        elif idx == 0: continue
        else: cleaned_program.append(line[:idx-1].strip().upper())
            
    return cleaned_program

In [None]:
cleaned_program = clean_program(test_prog)
cleaned_program

In [None]:
def get_labels(program:List[str], suffix:str=':') -> Tuple[List[str], Dict[str, int]]:
    '''
        Function to extract labels from program.
    '''
    label_dict = dict()
    stripped_program = []
    
    for label_idx, line in enumerate(program):
        idx = line.find(suffix)
        
        if idx == 0:
            raise exc.InvalidLabelException(f'Label suffixed by "{suffix}" can\'t be empty in line "{line}"')
        
        if idx == len(line) - 1:
            raise exc.InvalidLabelException(f'Label suffixed by "{suffix}" must be followed by instruction in line "{line}"')
        
        if idx > 0:
            stripped_program.append(line[idx+1:].strip())
            label = line[:idx]
            label_dict[label_idx] = label
            continue
            
        stripped_program.append(line)
   
    return stripped_program, label_dict

In [None]:
stripped_program, label_dict = get_labels(cleaned_program)

In [None]:
stripped_program

In [None]:
TOKENS = {'LB',
          'SB',
          'ADD',
          'SUB',
          'MUL',
          'SLA',
          'SRA',
          'INC',
          'DEC',
          'SEQ',
          'SGT',
          'SLT',
          'AND',
          'OR',
          'XOR',
          'NOT', 
          'COPY',
          'BEQ',
          'BGT',
          'BLT',
          'JUMP',
          'IDLE',
          'FUNC',
          'RETURN',
          'DEREF'}

REGISTERS = {'A', 'B'}

# Instruction type mapping
# Memory operation, e.g. <OPP> {A, B} <ADDRESS>
S_ARG_COUNT = 2
S_TYPE = {'LB',
          'SB'}

# Simple register operation, e.g. <OPP> {A, B}
R_ARG_COUNT = 1
R_TYPE = {'ADD',
          'SUB',
          'MUL',
          'SLA',
          'SRA',
          'SEQ',
          'SGT',
          'SLT',
          'AND',
          'OR',
          'XOR',
          'NOT', 
          'COPY',
          'DEREF'}

# Complex register operation, e.g. <OPP> {A, B} {A, B}
RR_ARG_COUNT = 2
RR_TYPE = {'INC',
           'DEC'}

# Branch like operation, e.g. <OPP> <LABEL>
B_ARG_COUNT = 1
B_TYPE = {'BEQ',
          'BGT',
          'BLT',
          'JUMP', 
          'FUNC'}
# Direct operation, e.g. <OPP> (no additional operand)
D_ARG_COUNT = 0
D_TYPE = {'IDLE',
          'RETURN'}

In [None]:
def parse_S(line:str) -> str:
    '''
        Function to parse preprocessed S-Type instructions.
    '''
    args = line.split()
    if len(args) != S_ARG_COUNT + 1:
        raise exc.InvalidArgumentException(f'S-Type instruction expects {S_ARG_COUNT} arguments but got {len(args) - 1} in line "{line}".')
    
    # Parse line
    token = args[0]
    reg = args[1]
    addr = args[2]
    
    if reg not in REGISTERS:
        raise exc.InvalidRegisterException(f'Register {reg} provided for {token} is not supported. Use one of {REGISTERS} instead.')
    
    opps_A = {
        'LB' : lambda: inst.read_mem_to_A(addr, check=False),
        'SB' : lambda: inst.write_A_to_mem(addr, check=False)
    }
    opps_B = {
        'LB' : lambda: inst.read_mem_to_B(addr, check=False),
        'SB' : lambda: inst.write_B_to_mem(addr, check=False)
    }
    
    if reg == 'A':
        return opps_A[token]()
    
    return opps_B[token]()
    
def parse_R(line:str) -> str:
    '''
        Function to parse R-Type instructions.
    '''
    args = line.split()
    if len(args) != R_ARG_COUNT + 1:
        raise exc.InvalidArgumentException(f'R-Type instruction expects {R_ARG_COUNT} arguments but got {len(args) - 1} in line "{line}".')
    
    # Parse line
    token = args[0]
    reg = args[1]
    
    if reg not in REGISTERS:
        raise exc.InvalidRegisterException(f'Register {reg} provided for {token} is not supported. Use one of {REGISTERS} instead.')
        
    opps_A = {
        'ADD'   : lambda: inst.alu_to_A(ALU_OPS.ADD),
        'SUB'   : lambda: inst.alu_to_A(ALU_OPS.SUB),
        'MUL'   : lambda: inst.alu_to_A(ALU_OPS.MUL),
        'SLA'   : lambda: inst.alu_to_A(ALU_OPS.SL_A),
        'SRA'   : lambda: inst.alu_to_A(ALU_OPS.SR_A),
        'SEQ'   : lambda: inst.alu_to_A(ALU_OPS.EQ),
        'SGT'   : lambda: inst.alu_to_A(ALU_OPS.GT),
        'SLT'   : lambda: inst.alu_to_A(ALU_OPS.LT),
        'AND'   : lambda: inst.alu_to_A(ALU_OPS.AND),
        'OR'    : lambda: inst.alu_to_A(ALU_OPS.OR),
        'XOR'   : lambda: inst.alu_to_A(ALU_OPS.XOR),
        'NOT'   : lambda: inst.alu_to_A(ALU_OPS.NOT_A),
        'COPY'  : lambda: inst.alu_to_A(ALU_OPS.OUT_A),
        'DEREF' : lambda: inst.deref_A()
    }
    opps_B = {
        'ADD'   : lambda: inst.alu_to_B(ALU_OPS.ADD),
        'SUB'   : lambda: inst.alu_to_B(ALU_OPS.SUB),
        'MUL'   : lambda: inst.alu_to_B(ALU_OPS.MUL),
        'SLA'   : lambda: inst.alu_to_B(ALU_OPS.SL_A),
        'SRA'   : lambda: inst.alu_to_B(ALU_OPS.SR_A),
        'SEQ'   : lambda: inst.alu_to_B(ALU_OPS.EQ),
        'SGT'   : lambda: inst.alu_to_B(ALU_OPS.GT),
        'SLT'   : lambda: inst.alu_to_B(ALU_OPS.LT),
        'AND'   : lambda: inst.alu_to_B(ALU_OPS.AND),
        'OR'    : lambda: inst.alu_to_B(ALU_OPS.OR),
        'XOR'   : lambda: inst.alu_to_B(ALU_OPS.XOR),
        'NOT'   : lambda: inst.alu_to_B(ALU_OPS.NOT_A),
        'COPY'  : lambda: inst.alu_to_B(ALU_OPS.OUT_A),
        'DEREF' : lambda: inst.deref_B()
    }
    
    if reg == 'A':
        return opps_A[token]()
    
    return opps_B[token]()
    
    
def parse_RR(token:str, target_reg:str, source_reg:str) -> str:
    '''
        Function to parse RR-Type instructions.
        
        Parameters:
            token: Token of RR-Type instruction to parse.
            target_reg: Register to store result of instruction.
            source_reg: Register to operate on.
            
        Returns:
            String corresponding to the HEX representation of the instruction.
            
        Raises:
            InvalidRegisterException if the target or source register is not supported.
    '''
    args = line.split()
    if len(args) != RR_ARG_COUNT + 1:
        raise exc.InvalidArgumentException(f'RR-Type instruction expects {RR_ARG_COUNT} arguments but got {len(args) - 1} in line "{line}".')
    
    # Parse line
    token = args[0]
    target_reg = args[1]
    source_reg = args[2]
    
    if target_reg not in REGISTERS:
        raise exc.InvalidRegisterException(f'Target register {target_reg} provided for {token} is not supported. Use one of {REGISTERS} instead.')
    if source_reg not in REGISTERS:
        raise exc.InvalidRegisterException(f'Source register {source_reg} provided for {token} is not supported. Use one of {REGISTERS} instead.')
      
    opps_A = {
        'A' : {
            'INC' : lambda: inst.alu_to_A(ALU_OPS.INC_A),
            'DEC' : lambda: inst.alu_to_A(ALU_OPS.DEC_A)
        },
        'B' : {
            'INC' : lambda: inst.alu_to_A(ALU_OPS.INC_B),
            'DEC' : lambda: inst.alu_to_A(ALU_OPS.DEC_B)
        }
    }
    opps_B = {
        'A': {
            'INC' : lambda: inst.alu_to_B(ALU_OPS.INC_A),
            'DEC' : lambda: inst.alu_to_B(ALU_OPS.DEC_A)
        },
        'B': {
            'INC' : lambda: inst.alu_to_B(ALU_OPS.INC_B),
            'DEC' : lambda: inst.alu_to_B(ALU_OPS.DEC_B)
        }
    }
    
    if target_reg == 'A':
        return opps_A[source_reg][token]()
    
    return opps_B[source_reg][token]()

def parse_B(line:str):
    '''
        Function to parse B-Type instructions.
    
    '''
    args = line.split()
    if len(args) != B_ARG_COUNT + 1:
        raise exc.InvalidArgumentException(f'B-Type instruction expects {B_ARG_COUNT} arguments but got {len(args) - 1} in line "{line}".')
    
    # Parse line
    token = args[0]
    label = args[1]
    
    
    raise Exception('Not implemented')
    
def parse_D(line:str):
    '''
        Function to parse D-Type instructions.
    '''
    args = line.split()
    if len(args) != D_ARG_COUNT + 1:
        raise exc.InvalidArgumentException(f'D-Type instruction expects {D_ARG_COUNT} arguments but got {len(args) - 1} in line "{line}".')
    
    # Parse line
    token = args[0]
    
    opps = {
        'IDLE': lambda: inst.goto_idle(),
        'RETURN': lambda: inst.func_return()
    }
    
    return opps[token]()

In [None]:
def parse_tokens(program:List[str]) -> List[str]:
    parsed_program = []
    
    # Index to keep track of location in ROM
    idx = 0
    
    for line in program:
        token = line.split()[0]
        
        if token not in TOKENS:
            raise exc.InvalidTokenException(f'Unsupported token {token} found in line "{line}"')
        
        # S-Type instruction
        if token in S_TYPE:
            parsed_program.append(parse_S(line))
            idx += 2
            continue
                
        # R-Type instruction
        if token in R_TYPE:
            parsed_program.append(parse_R(line))
            idx += 1
            continue
            
        # RR-Type instruction
        if token in RR_TYPE:
            
            # Call instruction parser
            parsed_program.append(parse_RR(line))
            idx += 1
            continue
            
        # B-Type instruction
        if token in B_TYPE:
            parsed_program.append(parse_B(line))
            idx += 2
            continue
            
        # D-Type instruction
        if token in D_TYPE:
            parsed_program.append(parse_D(line))
            idx += 1
            continue
        
    return parsed_program

In [None]:
p = parse_tokens(stripped_program)

In [None]:
print('\n'.join(p))