In [1]:
import typing
import re
from pathlib import Path
import pdb
import ipdb

In [2]:
#consts:

## parser

In [3]:
class Command:
    ''' keep track of commands and process their type:
    check for null, or A, C and L. 
    for each type, find key values.
    '''
    def __init__(self, command: str) -> None:
        self.command = command
        self.symbol = None
        self.comp = None
        self.dest = None
        self.jump = None
        self.command_type = None
        if command is None or command == '':
            self.command = None
            return

        self.command_type = self.check_type() # so that always has some value
        self.process_c_command()
        self.process_symbol()

    def check_type(self):
        first_char = self.command[0]
        if first_char == "(":
             return "L_COMMAND"
        if first_char == "@":
             return "A_COMMAND"
        return "C_COMMAND"
    def process_symbol(self):
        '''
        if
        '''
        if self.command_type != 'A_COMMAND' and self.command_type != 'L_COMMAND': return
        #remove @ or () from command
        self.symbol = self.command[1:]
        if self.command_type == 'L_COMMAND':
            self.symbol = self.symbol[:-1]

    def process_c_command(self):
        '''process D=M+1, or D;JEQ
        '''
        if self.command_type != 'C_COMMAND': return
        eq_sign_ind = self.command.find('=')
        if eq_sign_ind != -1: 
            self.dest = self.command[:eq_sign_ind]
            self.comp = self.command[1+eq_sign_ind:]
        jump_comp_ind = self.command.find(';')
        if jump_comp_ind != -1: 
            self.jump = self.command[1+jump_comp_ind:]
            self.comp=self.command[:jump_comp_ind]

    def get_type(self):
        #with getattr return None or value
        return self.command_type
    def get_symbol(self):
        return self.symbol
    def get_dest(self):
        return self.dest
    def get_comp(self):
        return self.comp
    def get_jump(self):
        return self.jump
            
    def __repr__(self) -> str:
        if self.command is None or self.command == '':
            return "null"
        details = [f"Command Type: {self.command_type}"]
        details.append(self.command)
        if self.symbol is not None:
            details.append(f"Symbol: {self.symbol}")
        if self.dest is not None:
            details.append(f"Dest: {self.dest}")
        if self.comp is not None:
            details.append(f"Comp: {self.comp}")
        if self.jump is not None:
            details.append(f"Jump: {self.jump}")
        return ", ".join(details)

In [35]:
class Parser:
    """Encapsulates access to the input code. Reads an assembly program
    by reading each command line-by-line, parses the current command,
    and provides convenient access to the commands components (fields
    and symbols). In addition, removes all white space and comments.

    to deal with multi line comments, don't read line by line
    
    """

    def __init__(self, input_file: typing.TextIO) -> None:
        """Opens the input file and gets ready to parse it.
        

        Args:
            input_file (typing.TextIO): input file.
        """
        self.file = input_file
        self.file_lines = input_file.readlines()
        self.processed_lines = [] #save processed lines for second pass
        self.curr_line_counter = 0
        self.current_line = None
        self.curr_command = None
        self.ROM_command_count = 0 #if starts whitespace
        self.mlc = False #flag for multi line comment
        self.symbol_table = SymbolTable()
        self.first_pass = True
        self.second_pass = False
        self.coder = Code()
        # Your code goes here!
        # A good place to start is to read all the lines of the input:
        # input_lines = input_file.read().splitlines()
        pass

    def has_more_commands(self) -> bool:
        """Are there more commands in the input?
        although, the first and second will have same num lines

        Returns:
            bool: True if there are more commands, False otherwise.
        """
        if self.first_pass:
            return self.curr_line_counter < len(self.file_lines)
        if self.second_pass:
            return self.curr_line_counter < len(self.processed_lines)
        
    def check_mlc(self, file_line: str):
        ''' check if in multi line comment
        '''
        if file_line.startswith("/*"):
            self.mlc = True
        if file_line.startswith("*/"):
            self.mlc = False

    def rem_non_code(self, file_line: str) -> str:
        ''' remove all comments to white space from a 'line',
        as selected by the _ module.
        '''
        if self.mlc: file_line = ''
        file_line = file_line.replace(' ', '')
        file_line = file_line.replace('\n', '')
        file_line = file_line.replace('\t', '')
        file_line = re.sub(r'^.*\*/.*|(?://[^\n]*|/\*(?:(?!\*/).)*\*/)|[\s]', '', file_line)
        return file_line

    def pre_process_line(self,file_line) -> None:
        file_line = file_line.strip()
        self.check_mlc(file_line)
        file_line = self.rem_non_code(file_line)
        return file_line

    def add_label_to_symbol_table(self):
        '''add label to symbol table
        take into account encountering label before decleration.
        only add label if being declared: (xxx)'''
        if self.curr_command.command is not None and self.curr_command.get_type() == 'L_COMMAND':
            # check if label is being declared
            if self.curr_command.command.startswith("(") and self.curr_command.command.endswith(")"):
                print(f'adding label  {self.symbol()} to symbol table with value {self.ROM_command_count}')
                #check if symbol already in table
                if not self.symbol_table.contains(self.symbol()):
                    self.symbol_table.add_entry(self.symbol(), self.ROM_command_count)
    def add_variable_to_symbol_table(self):
        if self.curr_command.command is None: return
        command_type = self.command_type()
        if command_type == 'A_COMMAND' and not self.symbol_table.contains(self.symbol()):
            self.symbol_table.add_entry(self.symbol(), self.symbol_table.get_next_free_ind())
            self.symbol_table.next_free_ram_ind += 1
            
    def update_rom_count(self):
        if self.curr_command.get_type() in ['A_COMMAND', 'C_COMMAND']:
            self.ROM_command_count += 1
            
    def advance(self) -> None:
        """Reads the next command from the input and makes it the current command.
        Should be called only if has_more_commands() is true.
        """
        if self.first_pass:
            file_line = self.file_lines[self.curr_line_counter]
            file_line = self.pre_process_line(file_line)
            self.curr_command = Command(file_line)
            #add labels to symbol table (with check for L_COMMAND)
            self.add_label_to_symbol_table()
            #after cretaing symbol, update ROM count
            self.update_rom_count()
            self.processed_lines.append(file_line)
        if self.second_pass:
            file_line = self.processed_lines[self.curr_line_counter]
            if file_line == '':
                self.curr_line_counter += 1
                return None
            self.curr_command = Command(file_line)
            # add variables to symbol table
            self.add_variable_to_symbol_table()
            binary_command = self.get_command_binary()
            self.curr_line_counter += 1
            return binary_command
            #write code
        self.curr_line_counter += 1 #after reading
        return None

    def get_command_binary(self):
        if self.curr_command.get_type() == 'A_COMMAND':
            return self.binarise_a()
        if self.curr_command.get_type() == 'C_COMMAND':
            return self.binarise_c()
        return None

    def binarise_a(self):
        '''@Xxx where Xxx is either a symbol or a decimal number
        '''
        symbol = self.symbol()
        #if numeric:
        if symbol.isnumeric():
            return str('{0:016b}'.format(int(symbol)))
        #if variable:
        return str('{0:016b}'.format(self.symbol_table.get_address(symbol)))

    def binarise_c(self):
        '''dest=comp;jump
        '''
        dest = self.coder.dest(self.curr_command.get_dest())
        comp = self.coder.comp(self.curr_command.get_comp())
        jump = self.coder.jump(self.curr_command.get_jump())
        type = self.curr_command.get_type()
        return f"{self.coder.start(type)}{comp}{dest}{jump}"

    def prep_for_second_pass(self):
        self.first_pass = False
        self.second_pass = True
        self.curr_line_counter = 0
        self.curr_command = None
        return

    def command_type(self) -> str:
        """
        Returns:
            str: the type of the current command:
            "A_COMMAND" for @Xxx where Xxx is either a symbol or a decimal number
            "C_COMMAND" for dest=comp;jump
            "L_COMMAND" (actually, pseudo-command) for (Xxx) where Xxx is a symbol
        """
        # Your code goes here!
        return self.curr_command.get_type()

    def symbol(self) -> str:
        """
        Returns:
            str: the symbol or decimal Xxx of the current command @Xxx or
            (Xxx). Should be called only when command_type() is "A_COMMAND" or
            "L_COMMAND".
        """
        return self.curr_command.get_symbol()


    def dest(self) -> str:
        """
        Returns:
            str: the dest mnemonic in the current C-command. Should be called
            only when commandType() is "C_COMMAND".
        """
        # Your code goes here!
        return self.curr_command.get_dest()

    def comp(self) -> str:
        """
        Returns:
            str: the comp mnemonic in the current C-command. Should be called
            only when commandType() is "C_COMMAND".
        """
        # Your code goes here!
        return self.curr_command.get_comp()

    def jump(self) -> str:
        """
        Returns:
            str: the jump mnemonic in the current C-command. Should be called
            only when commandType() is "C_COMMAND".
        """
        # Your code goes here!
        return self.curr_command.get_jump()

## symbol table

In [26]:
'8192'.isnumeric()

True

In [5]:
VIRTUAL_REGISTERS = {
    'R0': 0,
    'R1': 1,
    'R2': 2,
    'R3': 3,
    'R4': 4,
    'R5': 5,
    'R6': 6,
    'R7': 7,
    'R8': 8,
    'R9': 9,
    'R10': 10,
    'R11': 11,
    'R12': 12,
    'R13': 13,
    'R14': 14,
    'R15': 15
}

NAMED_ADDRESSES = {
    "SCREEN": 16384,
    "KBD": 24576,
    "SP": 0,
    "LCL": 1,
    "ARG": 2,
    "THIS": 3,
    "THAT": 4,
    }


In [6]:
"""
This file is part of nand2tetris, as taught in The Hebrew University, and
was written by Aviv Yaish. It is an extension to the specifications given
[here](https://www.nand2tetris.org) (Shimon Schocken and Noam Nisan, 2017),
as allowed by the Creative Common Attribution-NonCommercial-ShareAlike 3.0
Unported [License](https://creativecommons.org/licenses/by-nc-sa/3.0/).


Usualy done by saving the translated code in memory 0-x (say 1024), and saving variables from x (1024):
Then each new varibale gets the next assigned the next address in the symbol table.
"""


class SymbolTable:
    """
    A symbol table that keeps a correspondence between symbolic labels and
    numeric addresses.
    note that labels may be mapped to the same numbers as varaibles, but they refer to different mem devices (not problem of the symbol table)
    the table tracks RAM addreses used, the parser counts ROM commands
    """

    def __init__(self) -> None:
        """Creates a new symbol table initialized with all the predefined symbols
        and their pre-allocated RAM addresses, according to section 6.2.3 of the
        book.
        """
        # Your code goes here!
        def_table = VIRTUAL_REGISTERS
        def_table.update(NAMED_ADDRESSES)
        self.mapping_table = def_table
        self.next_free_ram_ind = len(VIRTUAL_REGISTERS.keys())

    def get_next_free_ind(self):
        return self.next_free_ram_ind


    def add_entry(self, symbol: str, address: int) -> None:
        """Adds the pair (symbol, address) to the table.

        Args:
            symbol (str): the symbol to add.
            address (int): the address corresponding to the symbol.
        """
        self.mapping_table[symbol] = address

    def contains(self, symbol: str) -> bool:
        """Does the symbol table contain the given symbol?

        Args:
            symbol (str): a symbol.

        Returns:
            bool: True if the symbol is contained, False otherwise.
        """
        return symbol in self.mapping_table.keys()

    def get_address(self, symbol: str) -> int:
        """Returns the address associated with the symbol.

        Args:
            symbol (str): a symbol.

        Returns:
            int: the address associated with the symbol.
        """

        return self.mapping_table[symbol]


## code writer

In [15]:
# create dicts mapping:
'''
comp (when a=0)	c1	c2	c3	c4	c5	c6	comp (when a=1)
            0	1	0	1	0	1	0
            1	1	1	1	1	1	1
            -1	1	1	1	0	1	0
            D	0	0	1	1	0	0
            A	1	1	0	0	0	0	M
            !D	0	0	1	1	0	1
            !A	1	1	0	0	0	1	!M
            -D	0	0	1	1	1	1
            -A	1	1	0	0	1	1	-M
            D+1	0	1	1	1	1	1
            A+1	1	1	0	1	1	1	M+1
            D-1	0	0	1	1	1	0
            A-1	1	1	0	0	1	0	M-1
            D+A	0	0	0	0	1	0	D+M
            D-A	0	1	0	0	1	1	D-M
            A-D	0	0	0	1	1	1	M-D
            D&A	0	0	0	0	0	0	D&M
            D|A	0	1	0	1	0	1	D|M

and shift:
Instruction	    15	14	13	a	c1	c2	c3	c4	c5	c6
dest=A<<;jump	1	0	1	0	1	0	0	0	0	0
dest=D<<;jump	1	0	1	0	1	1	0	0	0	0
dest=M<<;jump	1	0	1	1	1	0	0	0	0	0
dest=A>>;jump	1	0	1	0	0	0	0	0	0	0
dest=D>>;jump	1	0	1	0	0	1	0	0	0	0
dest=M>>;jump	1	0	1	1	0	0	0	0	0	0
'''

COMP_BITS = {'':'0000000','null':'0000000','0':'0101010', '1':'0111111', '-1':'0111010', 'D':'0001100', 'A':'0110000', '!D':'0001101', '!A':'0110001', '-D':'0001111', '-A':'0110011', 'D+1':'0011111','1+D':'0011111', '1+A':'0110111','A+1':'0110111', 'D-1':'0001110', 'A-1':'0110010', 'D+A':'0000010','A+D':'0000010', 'D-A':'0010011', 'A-D':'0000111', 'D&A':'0000000', 'A&D':'0000000','D|A':'0010101', 'A|D':'0010101',
  'M':'1110000', '!M':'1110001', '-M':'1110011', 'M+1':'1110111', '1+M':'1110111','M-1':'1110010', 'D+M':'1000010', 'M+D':'1000010','D-M':'1010011', 'M-D':'1000111', 'D&M':'1000000', 'M&D':'1000000', 'D|M':'1010101', 'M|D':'1010101',   "A<<": "0100000", "D<<": "0110000", "M<<": "1100000",
    "A>>": "0000000", "D>>": "0010000", "M>>": "1000000"}

DEST_BITS = {None:'000','null':'000', 'M':'001',
             'D':'010', 'MD':'011',
             'A':'100', 'AM':'101',
             'AD':'110', 'AMD':'111'}

JUMP_BITS = {None:'000','null':'000', 'JGT':'001',
                'JEQ':'010', 'JGE':'011',
                'JLT':'100', 'JNE':'101',
                'JLE':'110', 'JMP':'111'}

START_BITS = {'A_COMMAND':'000', 'C_COMMAND':'111', 'L_COMMAND':'000', 'shift':'101'}

In [17]:

class Code:
    """Translates Hack assembly language mnemonics into binary codes."""

    @staticmethod
    def dest(mnemonic: str) -> str:
        """
        Args:
            mnemonic (str): a dest mnemonic string.

        Returns:
            str: 3-bit long binary code of the given mnemonic.
        """
        # Your code goes here!
        return DEST_BITS[mnemonic]

    @staticmethod
    def comp(mnemonic: str) -> str:
        """
        Args:
            mnemonic (str): a comp mnemonic string.

        Returns:
            str: the binary code of the given mnemonic.
        """
        # Your code goes here!
        return COMP_BITS[mnemonic]

    @staticmethod
    def jump(mnemonic: str) -> str:
        """
        Args:
            mnemonic (str): a jump mnemonic string.

        Returns:
            str: 3-bit long binary code of the given mnemonic.
        """
        # Your code goes here!
        return JUMP_BITS[mnemonic]

    @staticmethod
    def start(command_type: str) -> str:
        """
        Args:
            mnemonic (str): a jump mnemonic string.

        Returns:
            str: 3-bit long binary code of the given mnemonic.
        """
        # Your code goes here!
        return START_BITS[command_type]



## main

In [9]:
def assemble_file(
        input_file: typing.TextIO, output_file: typing.TextIO) -> None:
    """Assembles a single file.

    Args:
        input_file (typing.TextIO): the file to assemble.
        output_file (typing.TextIO): writes all output to this file.
    """
    pass

In [38]:
# content of assemble file func
# init parser: while file open, lines get saved in self.file_lines
test_file_path = './../04/fill/Fill.asm'
output_file = Path('./test_fill').with_suffix('.hack')
with open(test_file_path, 'r') as input_file:
    parser = Parser(input_file)
processed_lines = []
while parser.has_more_commands():
    parser.advance()
    #print(parser.curr_command)
# do first pass: populate symbol table with labels, and remove non code
# do second pass: write code
parser.prep_for_second_pass()
binary_lines = []
#while writing to output:
with open(output_file, 'w') as output_file:
    while parser.has_more_commands():
        binary = parser.advance()
        binary_lines.append(binary)
        if binary is not None: 
            output_file.write(f"{binary}\n")
        #print(parser.curr_command)
        #write to output file
        #output_file.write(f"{parser.curr_command}\n")




adding label  loop to symbol table with value 10
adding label  no_key to symbol table with value 18
adding label  update_screen to symbol table with value 20
adding label  reset_index to symbol table with value 35


In [37]:
for i in range(len(binary_lines)):
    print(f'command: {parser.processed_lines[i]}; binary: {binary_lines[i]}')

command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: ; binary: None
command: @SCREEN; binary: 01000000000000

In [51]:
parser.processed_lines[3] == ''

True

In [68]:
parser.symbol_table.mapping_table

{'R0': 0,
 'R1': 1,
 'R2': 2,
 'R3': 3,
 'R4': 4,
 'R5': 5,
 'R6': 6,
 'R7': 7,
 'R8': 8,
 'R9': 9,
 'R10': 10,
 'R11': 11,
 'R12': 12,
 'R13': 13,
 'R14': 14,
 'R15': 15,
 'SCREEN': 16384,
 'KBD': 24576,
 'SP': 0,
 'LCL': 1,
 'ARG': 2,
 'THIS': 3,
 'THAT': 4,
 'loop': 10,
 'no_key': 18,
 'update_screen': 20,
 'reset_index': 35,
 '8192': 27,
 'screen_end_p1': 28,
 'index': 29,
 'color_vlaue': 30}

## test

In [19]:
test_file_path = './Mult.asm'
with open(test_file_path, 'r') as file:
    parser = Parser(file)


for i in range(len(parser.file_lines)):
    parser.advance()
    print(parser.curr_command)

null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
null
Command Type: A_COMMAND, @R2, Symbol: R2
Command Type: C_COMMAND, M=0, Dest: M, Comp: 0
null
Command Type: A_COMMAND, @R0, Symbol: R0
Command Type: C_COMMAND, D=M, Dest: D, Comp: M
Command Type: A_COMMAND, @END, Symbol: END
Command Type: C_COMMAND, D;JEQ, Comp: D, Jump: JEQ
null
Command Type: A_COMMAND, @R1, Symbol: R1
Command Type: C_COMMAND, D=M, Dest: D, Comp: M
Command Type: A_COMMAND, @END, Symbol: END
Command Type: C_COMMAND, D;JEQ, Comp: D, Jump: JEQ
null
Command Type: A_COMMAND, @R1, Symbol: R1
Command Type: C_COMMAND, D=M, Dest: D, Comp: M
Command Type: A_COMMAND, @i, Symbol: i
Command Type: C_COMMAND, M=D, Dest: M, Comp: D
null
Command Type: L_COMMAND, (LOOP), Symbol: LOOP
null
Command Type: A_COMMAND, @i, Symbol: i
Command Type: C_COMMAND, D=M, Dest: D, Comp: M
Command Type: A_COMMAND, @END, Symbol: END
Command Type: C_COMMAND, D;JEQ, Comp: D, Jump: JEQ
null
Com