# Processor Instructions Conversion to Machine Code
    This program makes use of pandas to look for the asked instruction from the defined instruction set presented in form of a data frame and convert it to the respective machine code according to the type of instruction (mod), register, displacement and opcodes.
## Importing Libraries

In [1]:
import pandas as pd
import re
import sys

## Creating data frames

In [2]:
mod = pd.DataFrame([ ["00", "No Displacement"],
                   ["01", "8-bit signed extended displacement"],
                   ["10", "16-bit signed displacement"],
                   ["11", "R\M is a register"]], columns = ['mod', 'Function'])

mod = mod.astype(pd.StringDtype()) #conversion to string as data type

print(mod)

  mod                            Function
0  00                     No Displacement
1  01  8-bit signed extended displacement
2  10          16-bit signed displacement
3  11                   R\M is a register


In [3]:
reg = pd.DataFrame([["000", "AL","AX"],
                   ["001", "CL", "CX"],
                   ["010", "DL", "DX"],
                   ["011", "BL","BX"],
                   ["100", "AH","SP"],
                   ["101", "CH","BP"],
                   ["110", "DH","SI"],
                   ["111", "BH","DI"]],
                   columns = ['Code', 'W= 0 (byte)', 'W= 1 (word)'])

reg = reg.astype(pd.StringDtype()) #conversion to string as data type
print(reg)

  Code W= 0 (byte) W= 1 (word)
0  000          AL          AX
1  001          CL          CX
2  010          DL          DX
3  011          BL          BX
4  100          AH          SP
5  101          CH          BP
6  110          DH          SI
7  111          BH          DI


In [4]:
opcode = pd.DataFrame([["MOV1", "1111"],
                      ["MOV2", "1110"],
                      ["MOV3", "1101"],
                      ["MOV4", "1100"],
                      ["FACT", "1011"],
                      ["AND0", "1010"],
                      ["ADD0", "1001"],
                      ["SUB0", "1000"],
                      ["MUL0", "0111"],
                      ["DIV0", "0110"],
                      ["POW0", "0101"],
                      ["REM0", "0100"],
                      ["INC0", "0011"],
                      ["DEC0", "0010"],
                      ["CLR0", "0001"],
                      ["PERC", "0000"]], columns = ["Instruction", "Opcode"])

opcode = opcode.astype(pd.StringDtype()) #conversion to string as data type
print(opcode)

   Instruction Opcode
0         MOV1   1111
1         MOV2   1110
2         MOV3   1101
3         MOV4   1100
4         FACT   1011
5         AND0   1010
6         ADD0   1001
7         SUB0   1000
8         MUL0   0111
9         DIV0   0110
10        POW0   0101
11        REM0   0100
12        INC0   0011
13        DEC0   0010
14        CLR0   0001
15        PERC   0000


In [5]:
RM = pd.DataFrame([["000", "BX+SI","BX+SI","BX+SI"],
                   ["001", "BX+DI","BX+DI","BX+DI"],
                   ["010", "BP+SI","BP+SI","BP+SI"],
                   ["011", "BP+DI","BP+DI","BP+DI"],
                   ["100", "SI","SI","SI"],
                   ["101", "DI","DI","DI"],
                   ["110", "direct","BP","BP"],
                   ["111", "BX","BX","BX"]],
                   columns = ['R/M', 'mod= 00', 'mod= 01 (...+D8)', 'mod= 10(...+D16)'])
RM = RM.astype(pd.StringDtype()) #conversion to string as data type
print(RM)

   R/M mod= 00 mod= 01 (...+D8) mod= 10(...+D16)
0  000   BX+SI            BX+SI            BX+SI
1  001   BX+DI            BX+DI            BX+DI
2  010   BP+SI            BP+SI            BP+SI
3  011   BP+DI            BP+DI            BP+DI
4  100      SI               SI               SI
5  101      DI               DI               DI
6  110  direct               BP               BP
7  111      BX               BX               BX


In [6]:
RM_mod11 = pd.DataFrame([["000", "AL","AX"],
                   ["001", "CL","CX"],
                   ["010", "DL","DX"],
                   ["011", "BL","BX"],
                   ["100", "AH","SP"],
                   ["101", "CH","BP"],
                   ["110", "DH","SI"],
                   ["111", "BH","DI"]],
                   columns = ['R/M', 'W= 0', 'W= 1'])
RM_mod11 = RM_mod11.astype(pd.StringDtype()) #conversion to string as data type
print(RM_mod11)

   R/M W= 0 W= 1
0  000   AL   AX
1  001   CL   CX
2  010   DL   DX
3  011   BL   BX
4  100   AH   SP
5  101   CH   BP
6  110   DH   SI
7  111   BH   DI


## Code for the program

In [None]:
#### def print_InsSet():
    print('''The following instructions are permissible:\n 
            1.MOV1 Rn,direct\n
            2.MOV2 Rn,[Rm]\n
            3.MOV3 Rn,Rm\n
            4.MOV4 Rn,relative\n
            5.FACT Rn,Rm\n
            6.AND0 Rn,Rm\n
            7.ADD0 Rn,Rm\n
            8.SUB0 Rn,Rm\n
            9.MUL0 Rn,Rm\n
            10.DIV0 Rn,Rm\n
            11.POW0 Rn,Rm\n
            12.REM0 Rn,Rm\n
            13.INC0 Rn\n
            14.DEC0 Rn\n
            15.CLR0 Rn\n
            16.PERC Rn,Rm\n''')
    print('''Instructions:\n
            1. You can use the available registers for the instructions\n
            2.Please type in the instruction according to given format\n
            3.Displacement of only 8 and 16 bits is allowed\n
            4.Please refer to the manual of the instruction set to see the available registers\n''')


def get_opcode():
    global machine_code
    global components
    
    if components[0] in opcode.values:  
        machine_code = str(opcode['Opcode'][opcode['Instruction'] == components[0]].values[0]) #extracting opcode
    else: sys.exit("This instruction does not exist!")
    
def get_D_bit():
    global direction
    global machine_code
    machine_code = machine_code + "1" #direction bit is always 1 since destination is always a register

def get_W_bit():
    global writing_bit
    global machine_code
    global components
    
    #extracting writing bit
    if components[1][1:2] == 'L' or components[1][1:2] == 'H':
        machine_code = machine_code + "0"
        writing_bit = "0"
    elif components[1][1:2] == 'X' or components[1][1:2] == 'P' or components[1][1:2] == 'I':
        machine_code = machine_code + "1"
        writing_bit = "1"
    else:
        sys.exit("Invalid register used")

def get_mod(): 
    global mod
    global machine_code
    global components
    
    if components[0] == "INC0" or components[0] == "DEC0" or components[0] == "CLR0":
        machine_code = machine_code + "00" #no displacement
        mod = "00"
    
    elif bool(re.search(r'\d', components[2][0])) == True: #if raw data is being moved into the register
        machine_code = machine_code + "00"
        mod = "00"
    
    elif components[2][0] != "[": #if r/m is register
        machine_code = machine_code + "11"
        mod = "11"
    
    elif components[2][0:1] == "[" and components[2][3:4] == "+": #displacement takes place
        if components[2][7] == "]":
            machine_code = machine_code + "01" #8-bit displacement
            mod = "01"
        elif components[2][9] == "]":
            machine_code = machine_code + "10" # 16-bit displacement
            mod = "10"
        elif components[2][10] == "]":
            machine_code = machine_code + "01" #8-bit displacement
            mod = "01"
        elif components[2][12] == "]":
            machine_code = machine_code + "10" # 16-bit displacement
            mod = "10"
        else: sys.exit("Displacement is not the required bits")
            
    else:
        machine_code = machine_code + "00" #no displacement
        mod = "00"

def append_zeros():
    global machine_code
    global components     
    machine_code = machine_code + "00"  #adding two zeros for consistency of bytes
    
def get_reg():
    #defining global variables being used inside the function
    global writing_bit
    global machine_code
    global components
    
#extracting reg
    if writing_bit == "0": #8-bit register is used
        #appending machine code of location of register to the machine code
        machine_code = machine_code + reg['Code'][reg['W= 0 (byte)'] == components[1]].values[0]
    elif writing_bit == "1":
        #appending machine code of location of register to the machine code
        machine_code = machine_code + reg['Code'][reg['W= 1 (word)'] == components[1]].values[0]
    else: sys.exit("Invalid instruction")

def get_RM():
    global writing_bit
    global mod
    global machine_code
    global components
    
#extracting r\m
    if mod == "11":
        if components[1][1] == components[2][1]:
            if writing_bit == "0":
                machine_code = machine_code + RM_mod11['R/M'][RM_mod11['W= 0'] == components[2][0:2]].values[0]
            elif writing_bit == "1":
                machine_code = machine_code + RM_mod11['R/M'][RM_mod11['W= 1'] == components[2][0:2]].values[0]
        else:
            sys.exit("Size of the registers do not match")

    
    elif mod == "00": #no displacemement
    #comparison using the components of the given instruction
        if bool(re.search(r'\d', components[2][0])) == True: #if data is being moved directly
            machine_code = machine_code + RM['R/M'][RM['mod= 00'] == "direct"].values[0]
        elif components[2][3] == "]":  # if [Rn] is the source
            machine_code = machine_code + RM['R/M'][RM['mod= 00'] == components[2][1:3]].values[0]
        elif components[2][6] == "]":  # satisfies the conditions when r/m is memory
            machine_code = machine_code + RM['R/M'][RM['mod= 00'] == components[2][1:6]].values[0] 
        else:
            sys.exit("You entered invalid R/M")
        
    elif mod == "01": # 8-bit displacement
        #extracting R/M machine code for the register or memory used
        if components[2][7] == "]": 
            machine_code = machine_code + RM['R/M'][RM['mod= 01 (...+D8)'] == components[2][1:3]].values[0]
            
        elif components[2][10] == "]":        
            machine_code = machine_code + RM['R/M'][RM['mod= 01 (...+D8)'] == components[2][1:6]].values[0]

        else:
            sys.exit("Invalid memory used")
        
    elif mod == "10":  #16-bit displacement
    #extracting R/M machine code for the register or memory used
        if components[2][9] == "]": 
            machine_code = machine_code + RM['R/M'][RM['mod= 01 (...+D8)'] == components[2][1:3]].values[0]
            
        elif components[2][12] == "]":
            machine_code = machine_code + RM['R/M'][RM['mod= 01 (...+D8)'] == components[2][1:6]].values[0]

        else:
            sys.exit("Invalid memory used")

#extracting displacement
def dec_to_bin(x):
    return str(int(bin(int(x))[2:]))

def hex_to_bin(h):
    return bin(int(h, 16))[2:].zfill(len(h) * 4)

def get_DispOrData():
    #defining global variables being used
    global mod
    global machine_code
    global components
    
#printing in big NDN
#performing conversions according to size of data or displacement
    if mod == "01": #8-bit displacement
        if components[2][7] == "]":
            if components[2][6] == "h":
                machine_code = machine_code + hex_to_bin(components[2][4:6])
            elif components[2][6] == "d":
                machine_code = machine_code + dec_to_bin(components[2][4:6])
            else: sys.exit("Invalid instruction")
            
        elif components[2][10] == "]":
            if components[2][9] == "h":
                machine_code = machine_code + hex_to_bin(components[2][7:9])
            elif components[2][9] == "d":
                machine_code = machine_code + dec_to_bin(components[2][7:9])
            else: sys.exit("Invalid instruction")
        
    elif mod == "10": #16-bit displacement
        if components[2][9] == "]": # caters case where source is [Rn+16-bit disp]
            if components[2][8] == "h":
                machine_code = machine_code + hex_to_bin(components[2][4:8])
            elif components[2][8] == "d":
                machine_code = machine_code + dec_to_bin(components[2][4:8])
            else: sys.exit("Invalid instruction")
            
        elif components[2][12] == "]": # caters case where source is [Rn+SI+16-bit disp]
            if components[2][11] == "h":
                machine_code = machine_code + hex_to_bin(components[2][7:11])
            elif components[2][11] == "d":
                machine_code = machine_code + dec_to_bin(components[2][7:11])
            else: sys.exit("Invalid instruction")
    
#if data is to be moved directly into the register
    elif mod == "00" and components[2][0] != "[": 
        # conversion of 8-bit data
        if components[2][2] == "h":
            machine_code = machine_code + hex_to_bin(components[2][0:2]) 
        elif components[2][2] == "d":
            machine_code = machine_code + dec_to_bin(components[2][0:2])   
            
        #conversion of 16-bit data
        elif components[2][4] == "h":
            machine_code = machine_code + hex_to_bin(components[2][0:4]) 
        elif components[2][4] == "d":
            machine_code = machine_code + dec_to_bin(components[2][0:4])  
        else: sys.exit("Invalid instruction")      

def bin_to_hex():
    global machine_code
    return format(int(machine_code, 2), 'x')  # convert binary to int and int to hexadecimal

def main():
    global machine_code
    global components 
    global ins
    
    print_InsSet()
    
    ins = input("Please type in the instruction: ")
    components = re.split('\s|[,.]', ins) #using regular expression to split string for space or . or comma

    get_opcode()

    if components[0] != "INC0" and components[0] != "DEC0" and components[0] != "CLR0":
        get_D_bit()
        get_W_bit()
        get_mod()
        append_zeros()
        get_reg()
        get_RM()
        get_DispOrData()
        
    else:
        get_W_bit()
        get_reg()
        
    print("Machine code in binary:")
    print(machine_code)
    print("Machine code in hexadecimal:")
    print(bin_to_hex()) #converting machine code from binary to hexadecimal

if __name__ == "__main__":
    main()
    