# LEGv8 Helpers and References

In [1]:
import os
import pandas as pd

In [140]:
orig_dir = !pwd
path_ref = "/Users/brtonnies/Desktop/legv8-instructions.csv"

In [141]:
df = pd.read_csv(path_ref, index_col=False)
df

Unnamed: 0,instruction,opcode,opcode_size,opcode_bit_range,format
0,B,101,6,160-191,B
1,STURB,111000000,11,448,D
2,LDURB,111000010,11,450,D
3,B.cond,1010100,8,672-679,CB
4,ORRI,1011001000,10,712-713,I
5,EORI,1101001000,10,840-841,I
6,STURH,1111000000,11,960,D
7,LDURH,1111000010,11,962,D
8,AND,10001010000,11,1104,R
9,ADD,10001011000,11,1112,R


## Using the Dataframe (`df`) and `formats` Objects

+ The `formats` object defined in the next sell is just a dictionary that holds the lengths of different fields for each of the main LEGv8 instruction formats

In [142]:
formats = {
    'R': {
        'opcode': [0, 11],
        'Rm': [11, 16],# 5,
        'shamt': [16, 22],# 6,
        'Rn': [22, 27], # 5,
        'Rd': [27, 32], # 5
    }, 
    'D': {
        'opcode': [0, 11],
        'dt_address': [11, 20], # 9 bits
        'op': [20, 22], # 2 bits
        'Rn': [22, 27], # 5 bits
        'Rt': [27, 32] # 5 bits
    },
    'I': {
        'opcode': [0, 10],
        'alu_immediate': [10, 22], # 12 bits
        'Rn': [22, 27], # 5 bits
        'Rd': [27, 32] # 5 bits
    },
    'B': {
        'opcode': [0, 6],
        'br_address': [6, 32] # 26 bits
    },
    'CB': {
        'opcode': [0, 8],
        'cond_br_address': [8, 27], # 19 bits
        'Rt': [27, 32] # 5 bits
    },
    'IW': {
        'opcode': [0, 11],
        'mov_immediate': [11, 27], # 16 bits
        'Rd': [27, 32] # 5 bits
    }
}

formats.keys()

dict_keys(['R', 'D', 'I', 'B', 'CB', 'IW'])

In [143]:
def hex2bin(hexstr):
    return str(bin(int(hexstr, 16)))[2:]

def bin2hex(binstr):
    return hex(int(binstr, 2))

def dec2bin(n):
    return str(bin(n))[2:]

def bin2dec(binstr):
    return int(binstr, 2)

def sign_extend(binstr, arch=32):
    res = binstr
    diff = arch - len(binstr)
    ext = binstr[0]
    
    for i in range(diff):
        res = ext + res
        
    return res

def left_shift(binstr, shamt=2):
    l = len(binstr)
    return binstr + '00'

## Given the Hex Instruction: 0xf8014062 (32-bit)

+ `0xf8014062` --> `1111 1000 0000 0001 0100 0000 0110 0010` in binary

Let's identify which instruction it is and what the values of its fields are!

In [183]:
instruction = pd.DataFrame()
hex_inst = 'CB0B039B'
bin_inst = hex2bin(hex_inst) # str(bin(int(hex_inst, 16)))[2:]
# inst = '11111000000000010100000001100010'

bin2hex(bin_inst)
# sign_extend('000010100')
bin_inst

'11001011000010110000001110011011'

In [178]:
# determine the instruction type
inst_type = None
for i in range(len(df)):
    f = dict(df.iloc[i])
    if bin_inst[0:f['opcode_size']] == str(f['opcode']):
        inst_type = f

instruction = inst_type
instruction

{'instruction': 'SUB',
 'opcode': 11001011000,
 'opcode_size': 11,
 'opcode_bit_range': '1624',
 'format': 'R'}

In [179]:
# determine the fields of the instruction and make a nice little pandas series out of it
f = formats[instruction['format']]
fields = list(f.keys())

for key in [j for j in f.keys() if j != 'opcode']:
    pos = f.get(key)
    instruction[key] = bin_inst[pos[0]:pos[1]]

fields.append('instruction')
inst_frame = pd.Series(data=instruction)

## The Instruction:

Well, we have the basic pieces now.  Let's see what we've got:

In [180]:
inst_frame

instruction                 SUB
opcode              11001011000
opcode_size                  11
opcode_bit_range           1624
format                        R
Rm                        01011
shamt                    000000
Rn                        11100
Rd                        11011
dtype: object

# Now What?

The 3 Instruction Classes:
+ R-Type
+ Load/Store
+ Condition Branch

The ALU Control Signal:
+ The 26th bit of the LEGv8 instruction (5th bit with zero-based left to right indexing) is 0 for data transfer instructions and 1 for conditional branch instructions

In [181]:
path_ref = "/Users/brtonnies/Desktop/legv8-instructions.csv"

class Instruction:
    def __init__(self, instr=None):
        self.format_data = pd.read_csv(path_ref, index_col=False)
        self.formats = {
            'R': {
                'opcode': [0, 11],
                'Rm': [11, 16],# 5,
                'shamt': [16, 22],# 6,
                'Rn': [22, 27], # 5,
                'Rd': [27, 32], # 5
            }, 
            'D': {
                'opcode': [0, 11],
                'dt_address': [11, 20], # 9 bits
                'op': [20, 22], # 2 bits
                'Rn': [22, 27], # 5 bits
                'Rt': [27, 32] # 5 bits
            },
            'I': {
                'opcode': [0, 10],
                'alu_immediate': [10, 22], # 12 bits
                'Rn': [22, 27], # 5 bits
                'Rd': [27, 32] # 5 bits
            },
            'B': {
                'opcode': [0, 6],
                'br_address': [6, 32] # 26 bits
            },
            'CB': {
                'opcode': [0, 8],
                'cond_br_address': [8, 27], # 19 bits
                'Rt': [27, 32] # 5 bits
            },
            'IW': {
                'opcode': [0, 11],
                'mov_immediate': [11, 27], # 16 bits
                'Rd': [27, 32] # 5 bits
            }
        }
        
        if instr is not None:
            self.instr = instr
            self.instruction_base = self.get_base(instr)
            
            if self.instruction_base == 16:
                self.hex_instruction = instr
                self.binary_instruction = self.hex2bin(instr)
            elif self.instruction_base == 2:
                self.hex_instruction = self.bin2hex(instr)
                self.binary_instruction = instr
            elif self.instruction_base == 10:
                self.binary_instruction = self.dec2bin(instr)
                self.hex_instruction = self.bin2hex(self.binary_instruction)
            
            self.instruction_type = None
            for i in range(len(self.format_data)):
                f = dict(self.format_data.iloc[i])
                if bin_inst[0:f['opcode_size']] == str(f['opcode']):
                    self.instruction = f
            
            f = self.formats[self.instruction['format']]
            fields = list(f.keys())

            for key in [j for j in f.keys() if j != 'opcode']:
                pos = f.get(key)
                self.instruction[key] = self.binary_instruction[pos[0]:pos[1]]

            fields.append('instruction')
            self.instruction_frame = pd.Series(data=instruction)
            
    def print_frame(self):
        print(self.instruction_frame)
    
    def get_base(self, n):
        if type(n) == str:
            if len([c for c in n if c not in ['1','0']]) > 0:
                return 16
            elif len([c for c in n if c in ['1','0']]) == len(n):
                return 2
            else:
                raise("Invalid Instruction Format: {} is not in a recognized base (hex/binary/decimal)".format(n))
        else:
            return 10

    def hex2bin(self, hexstr):
        return str(bin(int(hexstr, 16)))[2:]

    def bin2hex(self, binstr):
        return hex(int(binstr, 2))

    def dec2bin(self, n):
        return str(bin(n))[2:]

    def bin2dec(self, binstr):
        return int(binstr, 2)

    def sign_extend(self, binstr, arch=32):
        res = binstr
        diff = arch - len(binstr)
        ext = binstr[0]

        for i in range(diff):
            res = ext + res

        return res

    def left_shift(self, binstr, shamt=2):
        l = len(binstr)
        return binstr + '00'


In [182]:
ins = Instruction('0xCB0B039B')
ins.print_frame()

instruction                 SUB
opcode              11001011000
opcode_size                  11
opcode_bit_range           1624
format                        R
Rm                        01011
shamt                    000000
Rn                        11100
Rd                        11011
dtype: object


In [168]:
a = '10011100'
b = '11000111'

bin2dec(a)+ bin2dec(b)

355

In [169]:
dec2bin(355)

'101100011'

In [170]:
bin2dec('0111100110011001100110011001100110011001100110011010')

2139209823000986

In [174]:
f = '0111100110011001100110011001100110011001100110011010'
len(f)
print(bin2dec(f))
print(2**len(f))
bin2dec(f) / 2** len(f)

2139209823000986
4503599627370496


0.4750000000000001

In [184]:
bin2hex('11000001001110100000000000000000')

'0xc13a0000'