In [7]:
class State:
    def __init__(self, instructions=None, data=None):
        # Initialize registers R0 to R7 as 16-bit integers (default 0)
        self.R0 = 0
        self.R1 = 0
        self.R2 = 0
        self.R3 = 0
        self.R4 = 0
        self.R5 = 0
        self.R6 = 0
        self.R7 = 0
        
        # Initialize Instructions and Data as arrays of 16-bit numbers.
        # They can be provided during object creation or default to empty lists.
        self.instructions = instructions if instructions is not None else []
        self.data = data if data is not None else []

    def __repr__(self):
        return (
            f"State(R0={self.R0}, R1={self.R1}, R2={self.R2}, R3={self.R3}, "
            f"R4={self.R4}, R5={self.R5}, R6={self.R6}, R7={self.R7}, "
            f"instructions={self.instructions}, data={self.data})"
        )

In [8]:
def to_twos_complement(value: int, bits: int) -> str:
    """Converts an integer into a two's complement binary string of length 'bits'.
       Raises a ValueError if the value is out of range.
    """
    min_val = -(1 << (bits - 1))
    max_val = (1 << (bits - 1)) - 1
    if value < min_val or value > max_val:
        raise ValueError(f"Immediate value {value} out of range for {bits}-bit two's complement")
    if value < 0:
        value = (1 << bits) + value
    return format(value, f'0{bits}b')

def decode_instruction(instr: str) -> str:
    """Decodes one assembly instruction (as a string) into its 16-bit binary encoding.
       The ISA is defined as per the specification.
    """
    # Convert the instruction to lowercase
    instr = instr.lower()

    # Remove extra whitespace and split into mnemonic and operand part.
    parts = instr.strip().split()
    if not parts:
        raise ValueError("Empty instruction")
    mnemonic = parts[0].lower()
    # Get the operands; they are comma‐separated.
    args_str = " ".join(parts[1:])
    args = [arg.strip() for arg in args_str.split(",") if arg.strip() != ""]
    
    # Helper to check register range
    def check_reg(reg: int):
        if reg < 0 or reg > 7:
            raise ValueError("Register out of range (must be 0-7)")
        return format(reg, '03b')
    
    # Group 1: ada, adc, adz, awc, aca, acc, acz, acw
    if mnemonic in {"ada", "adc", "adz", "awc", "aca", "acc", "acz", "acw"}:
        if len(args) != 3:
            raise ValueError(f"{mnemonic} requires 3 operands (rc, ra, rb)")
        try:
            # Note: The assembly order is: rc, ra, rb
            rc = int(args[0])
            ra = int(args[1])
            rb = int(args[2])
        except Exception:
            raise ValueError("Invalid register operand; must be an integer 0-7")
        # Function codes for each mnemonic:
        func_codes = {
            "ada": "000",
            "adz": "001",
            "adc": "010",
            "awc": "011",
            "aca": "100",
            "acz": "101",
            "acc": "110",
            "acw": "111",
        }
        func = func_codes[mnemonic]
        # Machine code format: 0001 | RA | RB | RC | func
        return "0001" + check_reg(ra) + check_reg(rb) + check_reg(rc) + func

    # Group 2: adi rb, ra, imm6  -> 0000 | RA | RB | IMM6
    elif mnemonic == "adi":
        if len(args) != 3:
            raise ValueError("adi requires 3 operands (rb, ra, imm6)")
        try:
            rb = int(args[0])
            ra = int(args[1])
            imm = int(args[2])
        except Exception:
            raise ValueError("Invalid operand for adi")
        # Check registers
        ra_bin = check_reg(ra)
        rb_bin = check_reg(rb)
        # Check immediate: imm6 is 6-bit two's complement (-32..31)
        imm_bin = to_twos_complement(imm, 6)
        return "0000" + ra_bin + rb_bin + imm_bin

    # Group 3: nd instructions: ndu, ndc, ndz, ncu, ncc, ncz
    elif mnemonic in {"ndu", "ndc", "ndz", "ncu", "ncc", "ncz"}:
        if len(args) != 3:
            raise ValueError(f"{mnemonic} requires 3 operands (rc, ra, rb)")
        try:
            rc = int(args[0])
            ra = int(args[1])
            rb = int(args[2])
        except Exception:
            raise ValueError("Invalid register operand")
        nd_funcs = {
            "ndu": "000",
            "ndz": "001",
            "ndc": "010",
            "ncu": "100",
            "ncz": "101",
            "ncc": "110",
        }
        func = nd_funcs[mnemonic]
        return "0010" + check_reg(ra) + check_reg(rb) + check_reg(rc) + func

    # Group 4: lli ra, imm9 -> 0011 | RA | IMM9
    elif mnemonic == "lli":
        if len(args) != 2:
            raise ValueError("lli requires 2 operands (ra, imm9)")
        try:
            ra = int(args[0])
            imm = int(args[1])
        except Exception:
            raise ValueError("Invalid operand for lli")
        ra_bin = check_reg(ra)
        imm_bin = to_twos_complement(imm, 9)
        return "0011" + ra_bin + imm_bin

    # Group 5: lw ra, rb, imm6 -> 0100 | RA | RB | IMM6
    elif mnemonic == "lw":
        if len(args) != 3:
            raise ValueError("lw requires 3 operands (ra, rb, imm6)")
        try:
            ra = int(args[0])
            rb = int(args[1])
            imm = int(args[2])
        except Exception:
            raise ValueError("Invalid operand for lw")
        ra_bin = check_reg(ra)
        rb_bin = check_reg(rb)
        imm_bin = to_twos_complement(imm, 6)
        return "0100" + ra_bin + rb_bin + imm_bin

    # Group 6: sw ra, rb, imm6 -> 0101 | RA | RB | IMM6
    elif mnemonic == "sw":
        if len(args) != 3:
            raise ValueError("sw requires 3 operands (ra, rb, imm6)")
        try:
            ra = int(args[0])
            rb = int(args[1])
            imm = int(args[2])
        except Exception:
            raise ValueError("Invalid operand for sw")
        ra_bin = check_reg(ra)
        rb_bin = check_reg(rb)
        imm_bin = to_twos_complement(imm, 6)
        return "0101" + ra_bin + rb_bin + imm_bin

    # Group 7: lm ra, imm8 -> 0110 | RA | 0 | IMM8
    elif mnemonic == "lm":
        if len(args) != 2:
            raise ValueError("lm requires 2 operands (ra, imm8)")
        try:
            ra = int(args[0])
        except Exception:
            raise ValueError("Invalid register for lm")
        imm8 = args[1]
        if len(imm8) != 8 or any(ch not in "01" for ch in imm8):
            raise ValueError("imm8 must be an 8-bit binary string")
        ra_bin = check_reg(ra)
        return "0110" + ra_bin + "0" + imm8

    # Group 8: sm ra, imm8 -> 0111 | RA | 0 | IMM8
    elif mnemonic == "sm":
        if len(args) != 2:
            raise ValueError("sm requires 2 operands (ra, imm8)")
        try:
            ra = int(args[0])
        except Exception:
            raise ValueError("Invalid register for sm")
        imm8 = args[1]
        if len(imm8) != 8 or any(ch not in "01" for ch in imm8):
            raise ValueError("imm8 must be an 8-bit binary string")
        ra_bin = check_reg(ra)
        return "0111" + ra_bin + "0" + imm8

    # Group 9: beq ra, rb, imm6 -> 1000 | RA | RB | IMM6
    elif mnemonic == "beq":
        if len(args) != 3:
            raise ValueError("beq requires 3 operands (ra, rb, imm6)")
        try:
            ra = int(args[0])
            rb = int(args[1])
            imm = int(args[2])
        except Exception:
            raise ValueError("Invalid operand for beq")
        ra_bin = check_reg(ra)
        rb_bin = check_reg(rb)
        imm_bin = to_twos_complement(imm, 6)
        return "1000" + ra_bin + rb_bin + imm_bin

    # Group 10: blt ra, rb, imm6 -> 1001 | RA | RB | IMM6
    elif mnemonic == "blt":
        if len(args) != 3:
            raise ValueError("blt requires 3 operands (ra, rb, imm6)")
        try:
            ra = int(args[0])
            rb = int(args[1])
            imm = int(args[2])
        except Exception:
            raise ValueError("Invalid operand for blt")
        ra_bin = check_reg(ra)
        rb_bin = check_reg(rb)
        imm_bin = to_twos_complement(imm, 6)
        return "1001" + ra_bin + rb_bin + imm_bin

    # Group 11: ble ra, rb, imm6 -> 1010 | RA | RB | IMM6
    elif mnemonic == "ble":
        if len(args) != 3:
            raise ValueError("ble requires 3 operands (ra, rb, imm6)")
        try:
            ra = int(args[0])
            rb = int(args[1])
            imm = int(args[2])
        except Exception:
            raise ValueError("Invalid operand for ble")
        ra_bin = check_reg(ra)
        rb_bin = check_reg(rb)
        imm_bin = to_twos_complement(imm, 6)
        return "1010" + ra_bin + rb_bin + imm_bin

    # Group 12: jal ra, imm9 -> 1100 | RA | IMM9
    elif mnemonic == "jal":
        if len(args) != 2:
            raise ValueError("jal requires 2 operands (ra, imm9)")
        try:
            ra = int(args[0])
            imm = int(args[1])
        except Exception:
            raise ValueError("Invalid operand for jal")
        ra_bin = check_reg(ra)
        imm_bin = to_twos_complement(imm, 9)
        return "1100" + ra_bin + imm_bin

    # Group 13: jlr ra, rb -> 1101 | RA | RB | 000000
    elif mnemonic == "jlr":
        if len(args) != 2:
            raise ValueError("jlr requires 2 operands (ra, rb)")
        try:
            ra = int(args[0])
            rb = int(args[1])
        except Exception:
            raise ValueError("Invalid operand for jlr")
        ra_bin = check_reg(ra)
        rb_bin = check_reg(rb)
        return "1101" + ra_bin + rb_bin + "000000"

    # Group 14: jri ra, imm9 -> 1111 | RA | IMM9
    elif mnemonic == "jri":
        if len(args) != 2:
            raise ValueError("jri requires 2 operands (ra, imm9)")
        try:
            ra = int(args[0])
            imm = int(args[1])
        except Exception:
            raise ValueError("Invalid operand for jri")
        ra_bin = check_reg(ra)
        imm_bin = to_twos_complement(imm, 9)
        return "1111" + ra_bin + imm_bin

    else:
        raise ValueError(f"Unknown mnemonic '{mnemonic}'")


In [9]:
def reg_to_bin(reg: str) -> str:
    """Convert a register like 'r3' to its 3‐bit binary representation."""
    reg = reg.lower().strip()
    if not reg.startswith("r") or not reg[1:].isdigit():
        raise ValueError(f"Invalid register: {reg}")
    num = int(reg[1:])
    if not 0 <= num <= 7:
        raise ValueError(f"Register out of range: {reg}")
    return format(num, "03b")

def imm_to_bin(value: int, bits: int) -> str:
    """
    Convert an integer to a two's complement binary string with the given number of bits.
    For example, imm_to_bin(-3, 6) returns the 6-bit two's complement representation of -3.
    """
    min_val = -(1 << (bits - 1))
    max_val = (1 << (bits - 1)) - 1
    if value < min_val or value > max_val:
        raise ValueError(f"Immediate {value} out of range for {bits}-bit two's complement (range {min_val} to {max_val}).")
    if value < 0:
        value = (1 << bits) + value
    return format(value, f"0{bits}b")

def decode_instruction(instr: str) -> str:
    """
    Decode a single assembly instruction (as a string) into its 16-bit binary encoding.
    
    Supported instruction formats:
    
      Type1 (3-register):
        Syntax:  mnemonic  rc, ra, rb
        Binary:  [opcode:4][ra:3][rb:3][rc:3][func:3]
        (Note: In assembly, the destination register is the first operand,
         but in the encoding it is placed in the RC field.)
         
         Example: "ada r3, r1, r2" ->
                  opcode "0001", ra=r1, rb=r2, rc=r3, func "000" ->
                  "0001" + reg(r1) + reg(r2) + reg(r3) + "000"
      
      Type2 (register, register, imm6):
        Syntax: For most: mnemonic  ra, rb, imm6
                Special case "adi": mnemonic  rb, ra, imm6
        Binary:  [opcode:4][RA:3][RB:3][imm6:6]
        (For "adi", RA comes from the second operand and RB from the first.)
      
      Type3 (register, imm9):
        Syntax:  mnemonic  ra, imm9
        Binary:  [opcode:4][ra:3][imm9:9]
      
      Type4 (register, imm8):
        Syntax:  mnemonic  ra, imm8
        Binary:  [opcode:4][ra:3]["0":1][imm8:8]
        (imm8 must be provided as an 8-character string of 0s and 1s.)
      
      Type5 (register, register):
        Syntax:  mnemonic  ra, rb
        Binary:  [opcode:4][ra:3][rb:3]["000000":6]
    
    The mnemonic and its fields are defined as follows:
    
      ada  rc, ra, rb   -> 0001_RA__RB__RC_000
      adc  rc, ra, rb   -> 0001_RA__RB__RC_010
      adz  rc, ra, rb   -> 0001_RA__RB__RC_001
      awc  rc, ra, rb   -> 0001_RA__RB__RC_011
      aca  rc, ra, rb   -> 0001_RA__RB__RC_100
      acc  rc, ra, rb   -> 0001_RA__RB__RC_110
      acz  rc, ra, rb   -> 0001_RA__RB__RC_101
      acw  rc, ra, rb   -> 0001_RA__RB__RC_111
      
      ndu  rc, ra, rb   -> 0010_RA__RB__RC_000
      ndc  rc, ra, rb   -> 0010_RA__RB__RC_010
      ndz  rc, ra, rb   -> 0010_RA__RB__RC_001
      ncu  rc, ra, rb   -> 0010_RA__RB__RC_100
      ncc  rc, ra, rb   -> 0010_RA__RB__RC_110
      ncz  rc, ra, rb   -> 0010_RA__RB__RC_101
      
      adi  rb, ra, imm6 -> 0000_RA__RB__IMM6_
      lw   ra, rb, imm6 -> 0100_RA__RB__IMM6_
      sw   ra, rb, imm6 -> 0101_RA__RB__IMM6_
      beq  ra, rb, imm6 -> 1000_RA__RB__IMM6_
      blt  ra, rb, imm6 -> 1001_RA__RB__IMM6_
      ble  ra, rb, imm6 -> 1010_RA__RB__IMM6_
      
      lli  ra, imm9     -> 0011_RA__IMM9_
      jal  ra, imm9     -> 1100_RA__IMM9_
      jri  ra, imm9     -> 1111_RA__IMM9_
      
      lm   ra, imm8     -> 0110_RA_0_IMM8_
      sm   ra, imm8     -> 0111_RA_0_IMM8_
      
      jlr  ra, rb       -> 1101_RA__RB_000000
      
    Immediate values for imm6 and imm9 are encoded in 2's complement.
    (imm6: valid range -32 to 31; imm9: valid range -256 to 255)
    For imm8 the string must be exactly 8 binary digits.
    
    Returns:
        A 16-character string of 0s and 1s.
    """
    instr = instr.lower()
    instr = instr.strip()
    if not instr:
        raise ValueError("Empty instruction")
    
    # Split into mnemonic and operand part.
    parts = instr.split(None, 1)
    mnemonic = parts[0].lower()
    operands = []
    if len(parts) > 1:
        # Split operands on commas and remove extra spaces.
        operands = [op.strip() for op in parts[1].split(',')]
    
    # Dictionary defining instruction types and fixed fields.
    # For type1: tuple("type1", opcode, function code)
    # For type2: tuple("type2", opcode)
    # For type3: tuple("type3", opcode)
    # For type4: tuple("type4", opcode)
    # For type5: tuple("type5", opcode)
    instr_info = {
        # Type1: Three-register instructions.
        "ada": ("type1", "0001", "000"),
        "adc": ("type1", "0001", "010"),
        "adz": ("type1", "0001", "001"),
        "awc": ("type1", "0001", "011"),
        "aca": ("type1", "0001", "100"),
        "acc": ("type1", "0001", "110"),
        "acz": ("type1", "0001", "101"),
        "acw": ("type1", "0001", "111"),
        "ndu": ("type1", "0010", "000"),
        "ndc": ("type1", "0010", "010"),
        "ndz": ("type1", "0010", "001"),
        "ncu": ("type1", "0010", "100"),
        "ncc": ("type1", "0010", "110"),
        "ncz": ("type1", "0010", "101"),
        # Type2: Two-register + imm6 instructions.
        "adi": ("type2", "0000"),
        "lw":  ("type2", "0100"),
        "sw":  ("type2", "0101"),
        "beq": ("type2", "1000"),
        "blt": ("type2", "1001"),
        "ble": ("type2", "1010"),
        # Type3: Register + imm9 instructions.
        "lli": ("type3", "0011"),
        "jal": ("type3", "1100"),
        "jri": ("type3", "1111"),
        # Type4: Register + imm8 instructions.
        "lm":  ("type4", "0110"),
        "sm":  ("type4", "0111"),
        # Type5: Two-register with fixed constant.
        "jlr": ("type5", "1101"),
    }
    
    if mnemonic not in instr_info:
        raise ValueError(f"Unknown mnemonic: {mnemonic}")
    
    inst_type = instr_info[mnemonic][0]
    opcode = instr_info[mnemonic][1]
    
    if inst_type == "type1":
        # Expected syntax: mnemonic rc, ra, rb
        if len(operands) != 3:
            raise ValueError(f"{mnemonic} expects 3 operands (rc, ra, rb)")
        # Mapping: In assembly, the first operand is the destination (rc),
        # while the encoding is: [opcode][RA][RB][RC][func]
        ra = reg_to_bin(operands[1])
        rb = reg_to_bin(operands[2])
        rc = reg_to_bin(operands[0])
        func = instr_info[mnemonic][2]
        return opcode + ra + rb + rc + func

    elif inst_type == "type2":
        # Expected syntax: 
        #   For "adi": mnemonic rb, ra, imm6  (special operand order)
        #   For others: mnemonic ra, rb, imm6
        if len(operands) != 3:
            raise ValueError(f"{mnemonic} expects 3 operands")
        if mnemonic == "adi":
            # For adi, RA comes from the second operand and RB from the first.
            ra = reg_to_bin(operands[1])
            rb = reg_to_bin(operands[0])
        else:
            ra = reg_to_bin(operands[0])
            rb = reg_to_bin(operands[1])
        try:
            imm_val = int(operands[2], 0)  # Allow decimal (or hex if prefixed with 0x)
        except ValueError:
            raise ValueError(f"Invalid immediate value: {operands[2]}")
        imm_bin = imm_to_bin(imm_val, 6)
        return opcode + ra + rb + imm_bin

    elif inst_type == "type3":
        # Expected syntax: mnemonic ra, imm9
        if len(operands) != 2:
            raise ValueError(f"{mnemonic} expects 2 operands")
        ra = reg_to_bin(operands[0])
        try:
            imm_val = int(operands[1], 0)
        except ValueError:
            raise ValueError(f"Invalid immediate value: {operands[1]}")
        imm_bin = imm_to_bin(imm_val, 9)
        return opcode + ra + imm_bin

    elif inst_type == "type4":
        # Expected syntax: mnemonic ra, imm8
        if len(operands) != 2:
            raise ValueError(f"{mnemonic} expects 2 operands")
        ra = reg_to_bin(operands[0])
        imm8_str = operands[1]
        if len(imm8_str) != 8 or any(ch not in "01" for ch in imm8_str):
            raise ValueError("imm8 must be an 8-bit binary string (e.g. '01101111')")
        # Format: [opcode][ra][0][imm8]
        return opcode + ra + "0" + imm8_str

    elif inst_type == "type5":
        # Expected syntax: mnemonic ra, rb
        if len(operands) != 2:
            raise ValueError(f"{mnemonic} expects 2 operands")
        ra = reg_to_bin(operands[0])
        rb = reg_to_bin(operands[1])
        # Fixed constant of 6 zeros at the end.
        return opcode + ra + rb + "000000"

    else:
        raise ValueError("Unsupported instruction type")



In [10]:
# Some test cases:
examples = [
    "ada r3, r1, r2",      # ada rc, ra, rb: here rc=3, ra=1, rb=2 → 0001|001|010|011|000
    "adi r4, r2, -5",     # adi rb, ra, imm6: rb=4, ra=2, imm6=-5
    "lli r7, 15",        # lli ra, imm9: ra=7, imm9=15
    "lm r2, 11001100",   # lm ra, imm8: ra=2, imm8=11001100
    "jlr r5, r0"          # jlr ra, rb: ra=5, rb=0
]

prog1 = [
    "LW R4, R7, 2",
    "LW R5, R7, 2",
    "NDU R3, R4, R5",
    "NCU R6, R4, R5",
    "NDC R1, R4, R5",
    "NCC R1, R4, R5",
    "ADA R4, R4, R5",
    "NDC R1, R4, R5",
    "NCC R2, R4, R5",
    "NDZ R4, R4, R5",
    "NCZ R4, R4, R5",
    "ADA R7, R7, R7",
    "NDZ R4, R4, R5",
    "ADA R7, R7, R7",
    "NCZ R4, R4, R5"
]

prog2 = [
    "LLI R1, 10",
    "LLI R2, 20",
    "LLI R3, 30",
    "LLI R4, 40",
    "LLI R5, 50",
    "LLI R6, 60",
    "LLI R7, 70",

    # "LW R7, R3, 24",

    # "LLI R1, 10",
    # "LLI R2, 20",
    # "LLI R3, 30",
    # "LLI R4, 40",
    # "LLI R5, 50",
    # "LLI R6, 60",
    # "LLI R7, 70"
]

with open("Instr.hex", "w") as file:
    for ex in prog2:
        try:
            encoded = decode_instruction(ex)
            hex_code = format(int(encoded, 2), "04X")
            print(f"{ex} -> {encoded} -> {hex_code}")
            # Write the higher byte and then the lower byte on separate lines
            file.write(hex_code[:2] + "\n")
            file.write(hex_code[2:] + "\n")
        except ValueError as e:
            file.write(f"Error decoding '{ex}': {e}\n")

# for ex in prog1:
#     try:
#         encoded = decode_instruction(ex)
#         print(f"{ex} -> {encoded}")
#     except ValueError as e:
#         print(f"Error decoding '{ex}': {e}")

LLI R1, 10 -> 0011001000001010 -> 320A
LLI R2, 20 -> 0011010000010100 -> 3414
LLI R3, 30 -> 0011011000011110 -> 361E
LLI R4, 40 -> 0011100000101000 -> 3828
LLI R5, 50 -> 0011101000110010 -> 3A32
LLI R6, 60 -> 0011110000111100 -> 3C3C
LLI R7, 70 -> 0011111001000110 -> 3E46


In [11]:
def reg_to_bin(reg: str) -> str:
    """Convert a register like 'r3' to its 3‐bit binary representation."""
    reg = reg.lower().strip()
    if not reg.startswith("r") or not reg[1:].isdigit():
        raise ValueError(f"Invalid register: {reg}")
    num = int(reg[1:])
    if not 0 <= num <= 7:
        raise ValueError(f"Register out of range: {reg}")
    return format(num, "03b")

def imm_to_bin(value: int, bits: int) -> str:
    """
    Convert an integer to a two's complement binary string with the given number of bits.
    For example, imm_to_bin(-3, 6) returns the 6-bit two's complement representation of -3.
    """
    min_val = -(1 << (bits - 1))
    max_val = (1 << (bits - 1)) - 1
    if value < min_val or value > max_val:
        raise ValueError(f"Immediate {value} out of range for {bits}-bit two's complement (range {min_val} to {max_val}).")
    if value < 0:
        value = (1 << bits) + value
    return format(value, f"0{bits}b")

def decode_instruction(instr: str) -> str:
    """
    Decode a single assembly instruction (as a string) into its 16-bit binary encoding.
    (See the ISA specification for the complete mapping.)
    """
    instr = instr.strip()
    if not instr:
        raise ValueError("Empty instruction")
    
    parts = instr.split(None, 1)
    mnemonic = parts[0].lower()
    operands = []
    if len(parts) > 1:
        operands = [op.strip() for op in parts[1].split(',')]
    
    # Mapping: each mnemonic is assigned a type, an opcode, and for type1 instructions a function code.
    instr_info = {
        # Type1 arithmetic instructions
        "ada": ("type1", "0001", "000"),
        "adc": ("type1", "0001", "010"),
        "adz": ("type1", "0001", "001"),
        "awc": ("type1", "0001", "011"),
        "aca": ("type1", "0001", "100"),
        "acc": ("type1", "0001", "110"),
        "acz": ("type1", "0001", "101"),
        "acw": ("type1", "0001", "111"),
        # Type1 NAND instructions (using same format as type1)
        "ndu": ("type1", "0010", "000"),
        "ndc": ("type1", "0010", "010"),
        "ndz": ("type1", "0010", "001"),
        "ncu": ("type1", "0010", "100"),
        "ncc": ("type1", "0010", "110"),
        "ncz": ("type1", "0010", "101"),
        # Type2 instructions (register, register, imm6)
        "adi": ("type2", "0000"),
        "lw":  ("type2", "0100"),
        "sw":  ("type2", "0101"),
        "beq": ("type2", "1000"),
        "blt": ("type2", "1001"),
        "ble": ("type2", "1010"),
        # Type3 instructions (register, imm9)
        "lli": ("type3", "0011"),
        "jal": ("type3", "1100"),
        "jri": ("type3", "1111"),
        # Type4 instructions (register, imm8)
        "lm":  ("type4", "0110"),
        "sm":  ("type4", "0111"),
        # Type5 instruction (register, register)
        "jlr": ("type5", "1101"),
    }
    
    if mnemonic not in instr_info:
        raise ValueError(f"Unknown mnemonic: {mnemonic}")
    
    inst_type = instr_info[mnemonic][0]
    opcode = instr_info[mnemonic][1]
    
    if inst_type == "type1":
        if len(operands) != 3:
            raise ValueError(f"{mnemonic} expects 3 operands (destination, src1, src2)")
        # In assembly the first operand is destination (rc)
        ra = reg_to_bin(operands[1])
        rb = reg_to_bin(operands[2])
        rc = reg_to_bin(operands[0])
        func = instr_info[mnemonic][2]
        return opcode + ra + rb + rc + func

    elif inst_type == "type2":
        if len(operands) != 3:
            raise ValueError(f"{mnemonic} expects 3 operands")
        # For ADI the order is special.
        if mnemonic == "adi":
            ra = reg_to_bin(operands[1])
            rb = reg_to_bin(operands[0])
        else:
            ra = reg_to_bin(operands[0])
            rb = reg_to_bin(operands[1])
        try:
            imm_val = int(operands[2], 0)
        except ValueError:
            raise ValueError(f"Invalid immediate value: {operands[2]}")
        imm_bin = imm_to_bin(imm_val, 6)
        return opcode + ra + rb + imm_bin

    elif inst_type == "type3":
        if len(operands) != 2:
            raise ValueError(f"{mnemonic} expects 2 operands")
        ra = reg_to_bin(operands[0])
        try:
            imm_val = int(operands[1], 0)
        except ValueError:
            raise ValueError(f"Invalid immediate value: {operands[1]}")
        imm_bin = imm_to_bin(imm_val, 9)
        return opcode + ra + imm_bin

    elif inst_type == "type4":
        if len(operands) != 2:
            raise ValueError(f"{mnemonic} expects 2 operands")
        ra = reg_to_bin(operands[0])
        imm8_str = operands[1]
        if len(imm8_str) != 8 or any(ch not in "01" for ch in imm8_str):
            raise ValueError("imm8 must be an 8-bit binary string (e.g. '01101111')")
        return opcode + ra + "0" + imm8_str

    elif inst_type == "type5":
        if len(operands) != 2:
            raise ValueError(f"{mnemonic} expects 2 operands")
        ra = reg_to_bin(operands[0])
        rb = reg_to_bin(operands[1])
        return opcode + ra + rb + "000000"
    
    else:
        raise ValueError("Unsupported instruction type")

class State:
    def __init__(self, asm_instructions):
        # Initialize registers R0-R7 (16-bit, all 0; note R0 is the program counter)
        self.registers = [0] * 8  
        # Initialize flags: Carry (C) and Zero (Z)
        self.C = 0
        self.Z = 0
        # Initialize data memory: an array of 8-bit values (here we use 256 bytes)
        self.data = [0] * 256
        # Create the instructions memory: each instruction is a 16-bit binary string.
        self.instructions = [decode_instruction(instr) for instr in asm_instructions]

    def display_state(self):
        print("==== CPU STATE ====")
        print("Registers:")
        for i in range(8):
            print(f"  R{i}: 0x{self.registers[i]:04X} ({self.registers[i]})")
        print(f"Flags: C = {self.C}, Z = {self.Z}")
        print("Data Memory (first 64 bytes):")
        for i in range(0, 64, 16):
            chunk = self.data[i:i+16]
            print("  " + " ".join(f"{byte:02X}" for byte in chunk))
        print(f"PC (R0): {self.registers[0]}")
        print("===================\n")

    def execute(self):
        # Execute the instruction at index (R0 // 2) since instructions are 16-bit words.
        pc = self.registers[0]
        idx = pc // 2
        if idx < 0 or idx >= len(self.instructions):
            raise IndexError("Program counter out of instruction memory bounds.")
        instr = self.instructions[idx]
        opcode = instr[0:4]
        # Save current PC for branch computations.
        old_pc = self.registers[0]
        # A flag to indicate that this is a branch/jump instruction; in that case, we do not auto-add 2.
        branch_instr = False

        # Helper: sign extend a binary string.
        def sign_extend(bits_str):
            val = int(bits_str, 2)
            if bits_str[0] == "1":
                val -= (1 << len(bits_str))
            return val

        # Helper: convert a 16-bit unsigned value to signed.
        def to_signed(x):
            return x if x < 0x8000 else x - 0x10000

        # --- Decode and simulate based on opcode ---
        if opcode == "0001":  
            # Type1 arithmetic: ADA, ADC, ADZ, AWC, ACA, ACC, ACZ, ACW.
            ra = int(instr[4:7], 2)
            rb = int(instr[7:10], 2)
            rc = int(instr[10:13], 2)
            func = instr[13:16]
            execute_op = True
            result = None
            if func == "000":  # ADA: always: R[rc] = R[ra] + R[rb]
                result = self.registers[ra] + self.registers[rb]
            elif func == "010":  # ADC: add if Carry set.
                if self.C == 1:
                    result = self.registers[ra] + self.registers[rb]
                else:
                    execute_op = False
            elif func == "001":  # ADZ: add if Zero set.
                if self.Z == 1:
                    result = self.registers[ra] + self.registers[rb]
                else:
                    execute_op = False
            elif func == "011":  # AWC: add with carry: R[rc] = R[ra] + R[rb] + C.
                result = self.registers[ra] + self.registers[rb] + self.C
            elif func == "100":  # ACA: add with complement: R[rc] = R[ra] + (~R[rb] & 0xFFFF)
                result = self.registers[ra] + ((~self.registers[rb]) & 0xFFFF)
            elif func == "110":  # ACC: add if carry set: R[rc] = R[ra] + (~R[rb] & 0xFFFF)
                if self.C == 1:
                    result = self.registers[ra] + ((~self.registers[rb]) & 0xFFFF)
                else:
                    execute_op = False
            elif func == "101":  # ACZ: add if zero set: R[rc] = R[ra] + (~R[rb] & 0xFFFF)
                if self.Z == 1:
                    result = self.registers[ra] + ((~self.registers[rb]) & 0xFFFF)
                else:
                    execute_op = False
            elif func == "111":  # ACW: add with carry: R[rc] = R[ra] + (~R[rb] & 0xFFFF) + C.
                result = self.registers[ra] + ((~self.registers[rb]) & 0xFFFF) + self.C
            else:
                raise ValueError("Unknown function code in type1 instruction")
            if execute_op and result is not None:
                self.C = 1 if result >= 0x10000 else 0
                result &= 0xFFFF
                self.registers[rc] = result
                self.Z = 1 if result == 0 else 0
            if rc != 0:
                self.registers[0] += 2

        elif opcode == "0000":
            # ADI: Type2: Format: opcode, RA, RB, imm6. (Syntax: adi rb, ra, imm6)
            ra = int(instr[4:7], 2)
            rb = int(instr[7:10], 2)
            imm6 = instr[10:16]
            imm_val = sign_extend(imm6)
            result = self.registers[ra] + imm_val
            self.C = 1 if result >= 0x10000 else 0
            result &= 0xFFFF
            self.registers[rb] = result
            self.Z = 1 if result == 0 else 0
            if rb != 0:
                self.registers[0] += 2

        elif opcode == "0010":
            # NAND instructions: Type1 (format same as arithmetic).
            ra = int(instr[4:7], 2)
            rb = int(instr[7:10], 2)
            rc = int(instr[10:13], 2)
            func = instr[13:16]
            execute_op = True
            result = None
            if func == "000":  # NDU: always: R[rc] = ~(R[ra] & R[rb])
                result = (~(self.registers[ra] & self.registers[rb])) & 0xFFFF
            elif func == "010":  # NDC: if Carry set.
                if self.C == 1:
                    result = (~(self.registers[ra] & self.registers[rb])) & 0xFFFF
                else:
                    execute_op = False
            elif func == "001":  # NDZ: if Zero set.
                if self.Z == 1:
                    result = (~(self.registers[ra] & self.registers[rb])) & 0xFFFF
                else:
                    execute_op = False
            elif func == "100":  # NCU: always: R[rc] = ~(R[ra] & (~R[rb] & 0xFFFF))
                result = (~(self.registers[ra] & ((~self.registers[rb]) & 0xFFFF))) & 0xFFFF
            elif func == "110":  # NCC: if Carry set.
                if self.C == 1:
                    result = (~(self.registers[ra] & ((~self.registers[rb]) & 0xFFFF))) & 0xFFFF
                else:
                    execute_op = False
            elif func == "101":  # NCZ: if Zero set.
                if self.Z == 1:
                    result = (~(self.registers[ra] & ((~self.registers[rb]) & 0xFFFF))) & 0xFFFF
                else:
                    execute_op = False
            else:
                raise ValueError("Unknown function code in NAND instruction")
            if execute_op and result is not None:
                self.registers[rc] = result
                self.Z = 1 if result == 0 else 0
            if rc != 0:
                self.registers[0] += 2

        elif opcode == "0011":
            # LLI: Type3: Format: opcode, RA, imm9.
            ra = int(instr[4:7], 2)
            imm9 = instr[7:16]
            # Place the 9-bit immediate in the lower 9 bits (upper 7 bits become 0).
            imm_val = int(imm9, 2)
            self.registers[ra] = imm_val & 0x1FF
            if ra != 0:
                self.registers[0] += 2

        elif opcode == "0100":
            # LW: Type2: Format: opcode, RA, RB, imm6.
            ra = int(instr[4:7], 2)   # destination register
            rb = int(instr[7:10], 2)  # base register for address
            imm6 = instr[10:16]
            imm_val = sign_extend(imm6)
            addr = self.registers[rb] + imm_val
            if addr < 0 or addr+1 >= len(self.data):
                raise ValueError("Memory address out of range in LW")
            # Fetch two bytes and combine (big-endian assumed).
            value = (self.data[addr] << 8) | self.data[addr+1]
            self.registers[ra] = value
            self.Z = 1 if value == 0 else 0
            if ra != 0:
                self.registers[0] += 2

        elif opcode == "0101":
            # SW: Type2: Format: opcode, RA, RB, imm6.
            ra = int(instr[4:7], 2)   # source register (value to store)
            rb = int(instr[7:10], 2)  # base register for address
            imm6 = instr[10:16]
            imm_val = sign_extend(imm6)
            addr = self.registers[rb] + imm_val
            if addr < 0 or addr+1 >= len(self.data):
                raise ValueError("Memory address out of range in SW")
            value = self.registers[ra]
            self.data[addr] = (value >> 8) & 0xFF
            self.data[addr+1] = value & 0xFF
            # For SW, always increment PC by 2.
            self.registers[0] += 2

        elif opcode == "0110":
            # LM: Type4: Format: opcode, RA, '0', imm8.
            ra = int(instr[4:7], 2)  # base register holding memory address
            imm8 = instr[8:16]
            bits = [int(b) for b in imm8]  # bit for each register R0-R7 (leftmost bit for R0)
            addr = self.registers[ra]
            # Load registers in reverse order: R7 down to R0.
            for reg_num in range(7, -1, -1):
                if bits[reg_num] == 1:
                    if addr < 0 or addr+1 >= len(self.data):
                        raise ValueError("Memory address out of range in LM")
                    value = (self.data[addr] << 8) | self.data[addr+1]
                    self.registers[reg_num] = value
                    addr += 2
            if ra != 0:
                self.registers[0] += 2

        elif opcode == "0111":
            # SM: Type4: Format: opcode, RA, '0', imm8.
            ra = int(instr[4:7], 2)  # base register holding memory address
            imm8 = instr[8:16]
            bits = [int(b) for b in imm8]
            addr = self.registers[ra]
            # Store registers in reverse order: from R7 down to R0.
            for reg_num in range(7, -1, -1):
                if bits[reg_num] == 1:
                    value = self.registers[reg_num]
                    if addr < 0 or addr+1 >= len(self.data):
                        raise ValueError("Memory address out of range in SM")
                    self.data[addr] = (value >> 8) & 0xFF
                    self.data[addr+1] = value & 0xFF
                    addr += 2
            if ra != 0:
                self.registers[0] += 2

        elif opcode == "1000":
            # BEQ: Type2: Format: opcode, RA, RB, imm6.
            branch_instr = True
            ra = int(instr[4:7], 2)
            rb = int(instr[7:10], 2)
            imm6 = instr[10:16]
            imm_val = sign_extend(imm6)
            if self.registers[ra] == self.registers[rb]:
                self.registers[0] = old_pc + imm_val * 2
            else:
                self.registers[0] = old_pc + 2

        elif opcode == "1001":
            # BLT: Type2: Format: opcode, RA, RB, imm6.
            branch_instr = True
            ra = int(instr[4:7], 2)
            rb = int(instr[7:10], 2)
            imm6 = instr[10:16]
            imm_val = sign_extend(imm6)
            if to_signed(self.registers[ra]) < to_signed(self.registers[rb]):
                self.registers[0] = old_pc + imm_val * 2
            else:
                self.registers[0] = old_pc + 2

        elif opcode == "1010":
            # BLE: Type2: Format: opcode, RA, RB, imm6.
            branch_instr = True
            ra = int(instr[4:7], 2)
            rb = int(instr[7:10], 2)
            imm6 = instr[10:16]
            imm_val = sign_extend(imm6)
            if to_signed(self.registers[ra]) <= to_signed(self.registers[rb]):
                self.registers[0] = old_pc + imm_val * 2
            else:
                self.registers[0] = old_pc + 2

        elif opcode == "1100":
            # JAL: Type3: Format: opcode, RA, imm9.
            branch_instr = True
            ra = int(instr[4:7], 2)
            imm9 = instr[7:16]
            imm_val = sign_extend(imm9)
            # Save PC+2 in the link register.
            self.registers[ra] = old_pc + 2
            self.registers[0] = old_pc + imm_val * 2

        elif opcode == "1101":
            # JLR: Type5: Format: opcode, RA, RB, fixed "000000".
            branch_instr = True
            ra = int(instr[4:7], 2)
            rb = int(instr[7:10], 2)
            self.registers[ra] = old_pc + 2
            self.registers[0] = self.registers[rb]

        elif opcode == "1111":
            # JRI: Type3: Format: opcode, RA, imm9.
            branch_instr = True
            ra = int(instr[4:7], 2)
            imm9 = instr[7:16]
            imm_val = sign_extend(imm9)
            self.registers[0] = self.registers[ra] + imm_val * 2

        else:
            raise ValueError("Unknown opcode encountered during execution.")

        # For non-branch instructions, the PC (R0) was already auto-incremented (if destination wasn't R0).
        # For branch/jump instructions the PC is explicitly set.
