In [7]:
import typing
import re
from pathlib import Path
import pdb
import ipdb

In [2]:
#consts:

## parser

In [29]:
class Command:
    ''' keep track of commands and process their type:
    check for null, or A, C and L. 
    for each type, find key values.
    '''
    def __init__(self, command: str) -> None:
        self.command = command
        self._symbol = None
        self._comp = None
        self._dest = None
        self._jump = None
        self.command_type = None
        if command is None or command == '':
            self.command = None
            return

        self.command_type = self.check_type()  # so that always has some value
        self.process_c_command()
        self.process_symbol()

    def check_type(self):
        """
        Determines the type of the assembly command.

        Returns:
            str: The command type ('A_COMMAND', 'C_COMMAND', 'L_COMMAND').
        """
        first_char = self.command[0]
        if first_char == "(":
            return "L_COMMAND"
        if first_char == "@":
            return "A_COMMAND"
        return "C_COMMAND"

    def process_symbol(self):
        """
        Extracts the symbol from A or L commands.
        """
        if self.command_type not in ('A_COMMAND', 'L_COMMAND'):
            return
        # Remove '@' or '(' and ')' from the command to get the symbol
        self._symbol = self.command[1:]
        if self.command_type == 'L_COMMAND':
            self._symbol = self._symbol[:-1]

    def process_c_command(self):
        """
        Parses the dest, comp, and jump parts of C commands.
        """
        if self.command_type != 'C_COMMAND':
            return
        # Use regex to parse the C command
        pattern = re.compile(r'(?:(?P<dest>[^=]+)=)?(?P<comp>[^;]+)(?:;(?P<jump>.+))?')
        match = pattern.match(self.command)
        if match:
            self._dest = match.group('dest')
            self._comp = match.group('comp')
            self._jump = match.group('jump')

    @property
    def type(self):
        """
        Returns the type of the command.

        Returns:
            str: The command type.
        """
        return self.command_type

    @property
    def symbol(self):
        """
        Returns the symbol of the command.

        Returns:
            str: The symbol extracted from the command.
        """
        return self._symbol

    @property
    def dest(self):
        """
        Returns the dest part of the command.

        Returns:
            str: The dest mnemonic.
        """
        return self._dest

    @property
    def comp(self):
        """
        Returns the comp part of the command.

        Returns:
            str: The comp mnemonic.
        """
        return self._comp

    @property
    def jump(self):
        """
        Returns the jump part of the command.

        Returns:
            str: The jump mnemonic.
        """
        return self._jump

    def __repr__(self) -> str:
        """
        Returns a string representation of the command for debugging.

        Returns:
            str: A string representation of the command.
        """
        if self.command is None or self.command == '':
            return "null"
        details = [f"Command Type: {self.command_type}"]
        details.append(self.command)
        if self._symbol is not None:
            details.append(f"Symbol: {self._symbol}")
        if self._dest is not None:
            details.append(f"Dest: {self._dest}")
        if self._comp is not None:
            details.append(f"Comp: {self._comp}")
        if self._jump is not None:
            details.append(f"Jump: {self._jump}")
        return ", ".join(details)

In [35]:
class Parser:
    """Encapsulates access to the input code. Reads an assembly program
    by reading each command line-by-line, parses the current command,
    and provides convenient access to the commands components (fields
    and symbols). In addition, removes all white space and comments.

    to deal with multi line comments, don't read line by line
    
    """

    def __init__(self, input_file: typing.TextIO) -> None:
        """Opens the input file and gets ready to parse it.
        

        Args:
            input_file (typing.TextIO): input file.
        """
        self.file_content = input_file.read()
        self.processed_lines = [] #save processed lines for second pass
        self.curr_line_counter = 0
        self.current_line = None
        self.curr_command = None
        self.ROM_command_count = 0 #if starts whitespace
        self.mlc = False #flag for multi line comment
        self.symbol_table = SymbolTable()
        self.first_pass = True
        self.second_pass = False
        self.coder = Code()
        self.remove_comments_and_whitespace()

    def remove_comments_and_whitespace(self):
        """
        Preprocesses the file content to remove comments and unnecessary whitespace.
        """
        # Remove block comments (/* */)
        content = re.sub(r'/\*.*?\*/', '', self.file_content, flags=re.DOTALL)
        # Split into lines
        lines = content.split('\n')
        self.processed_lines = []
        for line in lines:
            # Remove inline comments (//)
            line = re.sub(r'//.*', '', line)
            # Strip leading and trailing whitespace
            line = line.strip()
            # Skip empty lines
            if not line:
                continue
            self.processed_lines.append(line)

    def has_more_commands(self) -> bool:
        """Are there more commands in the input?
        although, the first and second will have same num lines

        Returns:
            bool: True if there are more commands, False otherwise.
        """
        return self.curr_line_counter < len(self.processed_lines)


    def add_label_to_symbol_table(self):
        '''add label to symbol table
        take into account encountering label before decleration.
        only add label if being declared: (xxx)'''
        # check if label is being declared
        if self.curr_command.command.startswith("(") and self.curr_command.command.endswith(")"):
            #print(f'adding label  {self.symbol()} to symbol table with value {self.ROM_command_count}')
            #check if symbol already in table
            if not self.symbol_table.contains(self.symbol()):
                self.symbol_table.add_label(self.symbol(), self.ROM_command_count)

    def add_variable_to_symbol_table(self):
        if self.curr_command.command is None: return
        command_type = self.command_type()
        if command_type == 'A_COMMAND' and not self.symbol_table.contains(self.symbol()):
            self.symbol_table.add_variable(self.symbol())

            
    def update_rom_count(self):
        if self.curr_command.type in ['A_COMMAND', 'C_COMMAND']:
            self.ROM_command_count += 1
            
    def advance(self) -> None:
        """Reads the next command from the input and makes it the current command.
        Should be called only if has_more_commands() is true.
        """
        if self.curr_line_counter >= len(self.processed_lines):
            return None

        file_line = self.processed_lines[self.curr_line_counter]
        print(f'processing line: {file_line}')
        self.curr_command = Command(file_line)

        if self.first_pass:
            if self.command_type() == 'L_COMMAND': self.add_label_to_symbol_table()
            self.update_rom_count()
        elif self.second_pass:
            self.add_variable_to_symbol_table()
            binary_command = self.get_command_binary()
            self.curr_line_counter += 1
            return binary_command

        self.curr_line_counter += 1
        return None

    def get_command_binary(self):
        if self.command_type() == 'A_COMMAND':
            return self.binarise_a()
        if self.command_type() == 'C_COMMAND':
            return self.binarise_c()
        return None

    def binarise_a(self):
        '''@Xxx where Xxx is either a symbol or a decimal number
        '''
        symbol = self.symbol()
        #if numeric:
        if symbol.isnumeric():
            return str('{0:016b}'.format(int(symbol)))
        #if variable:
        return str('{0:016b}'.format(self.symbol_table.get_address(symbol)))

    def binarise_c(self):
        '''dest=comp;jump
        '''
        dest = self.coder.dest(self.curr_command.dest)
        comp = self.coder.comp(self.curr_command.comp)
        jump = self.coder.jump(self.curr_command.jump)
        type = self.curr_command.type
        return f"{self.coder.start(type)}{comp}{dest}{jump}"

    def prep_for_second_pass(self):
        self.first_pass = False
        self.second_pass = True
        self.curr_line_counter = 0
        self.curr_command = None
        return

    def command_type(self) -> str:
        """
        Returns:
            str: the type of the current command:
            "A_COMMAND" for @Xxx where Xxx is either a symbol or a decimal number
            "C_COMMAND" for dest=comp;jump
            "L_COMMAND" (actually, pseudo-command) for (Xxx) where Xxx is a symbol
        """
        # Your code goes here!
        return self.curr_command.type

    def symbol(self) -> str:
        """
        Returns:
            str: the symbol or decimal Xxx of the current command @Xxx or
            (Xxx). Should be called only when command_type() is "A_COMMAND" or
            "L_COMMAND".
        """
        return self.curr_command.symbol


    def dest(self) -> str:
        """
        Returns:
            str: the dest mnemonic in the current C-command. Should be called
            only when commandType() is "C_COMMAND".
        """
        # Your code goes here!
        return self.curr_command.dest

    def comp(self) -> str:
        """
        Returns:
            str: the comp mnemonic in the current C-command. Should be called
            only when commandType() is "C_COMMAND".
        """
        # Your code goes here!
        return self.curr_command.comp

    def jump(self) -> str:
        """
        Returns:
            str: the jump mnemonic in the current C-command. Should be called
            only when commandType() is "C_COMMAND".
        """
        # Your code goes here!
        return self.curr_command.jump

## symbol table

In [10]:
VIRTUAL_REGISTERS = {
    'R0': 0,
    'R1': 1,
    'R2': 2,
    'R3': 3,
    'R4': 4,
    'R5': 5,
    'R6': 6,
    'R7': 7,
    'R8': 8,
    'R9': 9,
    'R10': 10,
    'R11': 11,
    'R12': 12,
    'R13': 13,
    'R14': 14,
    'R15': 15
}

NAMED_ADDRESSES = {
    "SCREEN": 16384,
    "KBD": 24576,
    "SP": 0,
    "LCL": 1,
    "ARG": 2,
    "THIS": 3,
    "THAT": 4,
    }


In [11]:
"""
This file is part of nand2tetris, as taught in The Hebrew University, and
was written by Aviv Yaish. It is an extension to the specifications given
[here](https://www.nand2tetris.org) (Shimon Schocken and Noam Nisan, 2017),
as allowed by the Creative Common Attribution-NonCommercial-ShareAlike 3.0
Unported [License](https://creativecommons.org/licenses/by-nc-sa/3.0/).


Usualy done by saving the translated code in memory 0-x (say 1024), and saving variables from x (1024):
Then each new varibale gets the next assigned the next address in the symbol table.
"""


class SymbolTable:
    """
    A symbol table that keeps a correspondence between symbolic labels and
    numeric addresses.
    note that labels may be mapped to the same numbers as varaibles, but they refer to different mem devices (not problem of the symbol table)
    the table tracks RAM addreses used, the parser counts ROM commands
    """

    def __init__(self) -> None:
        """Creates a new symbol table initialized with all the predefined symbols
        and their pre-allocated RAM addresses, according to section 6.2.3 of the
        book.
        """
        # Your code goes here!
        def_table = VIRTUAL_REGISTERS
        def_table.update(NAMED_ADDRESSES)
        self.mapping_table = def_table
        self.next_free_ram_ind = len(VIRTUAL_REGISTERS.keys())

    def get_next_free_ind(self):
        return self.next_free_ram_ind

    def add_label(self, symbol: str, rom_count: int) -> None:
        """Adds the pair (symbol, address) to the table.

        Args:
            symbol (str): the symbol to add.
            address (int): the address corresponding to the symbol.
        """
        self.add_entry(symbol, rom_count)

    def add_variable(self, symbol: str) -> None:
        '''add variable to symbol table
        '''
        self.add_entry(symbol, self.next_free_ram_ind)
        self.next_free_ram_ind += 1

    def add_entry(self, symbol: str, address: int) -> None:
        """Adds the pair (symbol, address) to the table.

        Args:
            symbol (str): the symbol to add.
            address (int): the address corresponding to the symbol.
        """
        self.mapping_table[symbol] = address

    def contains(self, symbol: str) -> bool:
        """Does the symbol table contain the given symbol?

        Args:
            symbol (str): a symbol.

        Returns:
            bool: True if the symbol is contained, False otherwise.
        """
        return symbol in self.mapping_table.keys()

    def get_address(self, symbol: str) -> int:
        """Returns the address associated with the symbol.

        Args:
            symbol (str): a symbol.

        Returns:
            int: the address associated with the symbol.
        """

        return self.mapping_table[symbol]


## code writer

In [12]:
# create dicts mapping:
'''
comp (when a=0)	c1	c2	c3	c4	c5	c6	comp (when a=1)
            0	1	0	1	0	1	0
            1	1	1	1	1	1	1
            -1	1	1	1	0	1	0
            D	0	0	1	1	0	0
            A	1	1	0	0	0	0	M
            !D	0	0	1	1	0	1
            !A	1	1	0	0	0	1	!M
            -D	0	0	1	1	1	1
            -A	1	1	0	0	1	1	-M
            D+1	0	1	1	1	1	1
            A+1	1	1	0	1	1	1	M+1
            D-1	0	0	1	1	1	0
            A-1	1	1	0	0	1	0	M-1
            D+A	0	0	0	0	1	0	D+M
            D-A	0	1	0	0	1	1	D-M
            A-D	0	0	0	1	1	1	M-D
            D&A	0	0	0	0	0	0	D&M
            D|A	0	1	0	1	0	1	D|M

and shift:
Instruction	    15	14	13	a	c1	c2	c3	c4	c5	c6
dest=A<<;jump	1	0	1	0	1	0	0	0	0	0
dest=D<<;jump	1	0	1	0	1	1	0	0	0	0
dest=M<<;jump	1	0	1	1	1	0	0	0	0	0
dest=A>>;jump	1	0	1	0	0	0	0	0	0	0
dest=D>>;jump	1	0	1	0	0	1	0	0	0	0
dest=M>>;jump	1	0	1	1	0	0	0	0	0	0
'''

COMP_BITS = {'':'0000000','null':'0000000','0':'0101010', '1':'0111111', '-1':'0111010', 'D':'0001100', 'A':'0110000', '!D':'0001101', '!A':'0110001', '-D':'0001111', '-A':'0110011', 'D+1':'0011111','1+D':'0011111', '1+A':'0110111','A+1':'0110111', 'D-1':'0001110', 'A-1':'0110010', 'D+A':'0000010','A+D':'0000010', 'D-A':'0010011', 'A-D':'0000111', 'D&A':'0000000', 'A&D':'0000000','D|A':'0010101', 'A|D':'0010101',
  'M':'1110000', '!M':'1110001', '-M':'1110011', 'M+1':'1110111', '1+M':'1110111','M-1':'1110010', 'D+M':'1000010', 'M+D':'1000010','D-M':'1010011', 'M-D':'1000111', 'D&M':'1000000', 'M&D':'1000000', 'D|M':'1010101', 'M|D':'1010101',   "A<<": "0100000", "D<<": "0110000", "M<<": "1100000",
    "A>>": "0000000", "D>>": "0010000", "M>>": "1000000"}

DEST_BITS = {None:'000','null':'000', 'M':'001',
             'D':'010', 'MD':'011',
             'A':'100', 'AM':'101',
             'AD':'110', 'AMD':'111'}

JUMP_BITS = {None:'000','null':'000', 'JGT':'001',
                'JEQ':'010', 'JGE':'011',
                'JLT':'100', 'JNE':'101',
                'JLE':'110', 'JMP':'111'}

START_BITS = {'A_COMMAND':'000', 'C_COMMAND':'111', 'L_COMMAND':'000', 'shift':'101'}

In [14]:

class Code:
    """Translates Hack assembly language mnemonics into binary codes."""

    @staticmethod
    def dest(mnemonic: str) -> str:
        """
        Args:
            mnemonic (str): a dest mnemonic string.

        Returns:
            str: 3-bit long binary code of the given mnemonic.
        """
        # Your code goes here!
        return DEST_BITS[mnemonic]

    @staticmethod
    def comp(mnemonic: str) -> str:
        """
        Args:
            mnemonic (str): a comp mnemonic string.

        Returns:
            str: the binary code of the given mnemonic.
        """
        # Your code goes here!
        return COMP_BITS[mnemonic]

    @staticmethod
    def jump(mnemonic: str) -> str:
        """
        Args:
            mnemonic (str): a jump mnemonic string.

        Returns:
            str: 3-bit long binary code of the given mnemonic.
        """
        # Your code goes here!
        return JUMP_BITS[mnemonic]

    @staticmethod
    def start(command_type: str) -> str:
        """
        Args:
            mnemonic (str): a jump mnemonic string.

        Returns:
            str: 3-bit long binary code of the given mnemonic.
        """
        # Your code goes here!
        return START_BITS[command_type]



## main

In [15]:
def assemble_file(
        input_file: typing.TextIO, output_file: typing.TextIO) -> None:
    """Assembles a single file.

    Args:
        input_file (typing.TextIO): the file to assemble.
        output_file (typing.TextIO): writes all output to this file.
    """
    pass

In [36]:
# content of assemble file func
# init parser: while file open, lines get saved in self.file_lines
test_file_path = './../04/fill/Fill.asm'
output_file = Path('./test_fill').with_suffix('.hack')
with open(test_file_path, 'r') as input_file:
    parser = Parser(input_file)
processed_lines = []
while parser.has_more_commands():
    parser.advance()
    #print(parser.curr_command)
# do first pass: populate symbol table with labels, and remove non code
# do second pass: write code
parser.prep_for_second_pass()
binary_lines = []
#while writing to output:
with open(output_file, 'w') as output_file:
    while parser.has_more_commands():
        binary = parser.advance()
        binary_lines.append(binary)
        if binary is not None: 
            output_file.write(f"{binary}\n")
        #print(parser.curr_command)
        #write to output file
        #output_file.write(f"{parser.curr_command}\n")




processing line: @SCREEN
processing line: D=A
processing line: @8192
processing line: D=D+A
processing line: @screen_end_p1
processing line: M=M+D
processing line: @SCREEN
processing line: D=A
processing line: @index
processing line: M=D
processing line: (loop)
processing line: @KBD
processing line: D=M
processing line: @no_key
processing line: D;JEQ
processing line: @color_vlaue
processing line: M=-1
processing line: @update_screen
processing line: 0;JMP
processing line: (no_key)
processing line: @color_vlaue
processing line: M=0
processing line: (update_screen)
processing line: @color_vlaue
processing line: D=M
processing line: @index
processing line: A=M
processing line: M=D
processing line: @index
processing line: M=M+1
processing line: @screen_end_p1
processing line: D=M
processing line: @index
processing line: D=D-M
processing line: @reset_index
processing line: D;JEQ
processing line: @loop
processing line: 0;JMP
processing line: (reset_index)
processing line: @SCREEN
processing 

## test