In [1]:
import opcodes 


In [2]:
from opcodes.x86_64 import read_instruction_set
instruction_set = read_instruction_set()  # set of Instruction instances 

# create an dictionary containing initial default value for latency 
latency_dict = dict() 
for instr in instruction_set: 
    for form in instr.forms: 
        # instances of InstructionForm -- forms mainly differen by operands 
        name = form.name  # base mnemonic (i think) 
        gas_name = form.gas_name  # name with possible modifiers (I think) 
        operands = [op.type for op in form.operands]
        entry_key = (name,gas_name ) + tuple(operands)
        latency_dict[entry_key] = 1
        
    

In [3]:
import os
import pandas as pd

data_dir = os.path.abspath("../data1/")
ods_file = os.path.join(data_dir, "skylave_extracted.ods")
raw_data = pd.read_excel(ods_file, engine="odf")

latency_data = raw_data[['instruction', 'operands', 'latency']]
latency_data = latency_data.drop(latency_data[latency_data.instruction.isnull()].index)

# iterate over rows 
latency = None 

def convert_latency(lat):
    """
    Convert the given latency to a fixed type (float) -- ensure uniformity between rows 
    """
    if isinstance(lat, int):
        return lat
    elif isinstance(lat, float): 
        return int(lat)
    elif isinstance(lat, str):
        try: 
            # try casting string to float and returning it 
            return int(lat)
        except ValueError: 
            # if it doesn succeed, continue with rest of the function z
            pass 
        # several special cases, handle seperately 
        if "-" in lat: 
            # latency of type 'a-b' -- return float(b) 
            return int(lat.split("-")[-1])
        if 'b' in lat: 
            return -1 
        if "~" in lat: 
            return int(lat[1:])

### Step 1 : Ensure every row has a valid latency value ### 
for index, row in latency_data.iterrows():
    if row.isnull().latency == False:
        converted_latency = convert_latency(row.latency)  # convert latency to a float
        assert isinstance(converted_latency, int)
        row.latency = converted_latency # store this converted value
        latency = converted_latency # store locally as wel
    else:
        row.latency = latency

print("NaN values in latency: ", latency_data.latency.isnull().any())
latency_data.head(20)


NaN values in latency:  False


Unnamed: 0,instruction,operands,latency
2,MOV,"r,i",1
3,MOV,"r8/16,r8/16",1
4,MOV,"r32/64,r32/64",1
5,MOV,"r8l,m",1
6,MOV,"r8h,m",1
7,MOV,"r16,m",1
8,MOV,"r32/64,m",2
9,MOV,"m,r",2
10,MOV,"m,i",2
11,MOVNTI,"m,r",400


In [9]:
### Step 2 : Split up rows that contain multiple instructions seperated by spaces ### 
latency_data_cpy = latency_data.copy(deep=True)

multi_instructions = latency_data_cpy['instruction'].str.contains(" ")

target_rows_index = latency_data_cpy[multi_instructions].index

new_rows = []

for i in target_rows_index: 
    row = latency_data_cpy.loc[i]
    operands = row['operands']
    latency = row['latency']
    names = list(filter(lambda x : len(x) > 0, row['instruction'].split(" ")))
    
    # create new rows, one for each seperate name 
    new_rows += [[n, operands, latency] for n in names] 

# create a dataframe that contains the new rows 
new_data = pd.DataFrame(new_rows, columns = latency_data_cpy.columns)

# delete the rows that have just been split up from the original dataframe
latency_data_cpy.drop(target_rows_index, inplace=True)

# finally, append/concatenate the dataframe containing the newly created rows 
processed_latency = pd.concat([latency_data_cpy, new_data])
processed_latency.reset_index(drop=True)
processed_latency.loc[processed_latency['instruction'].str.contains("SUB")]
processed_latency.head(10)

Unnamed: 0,instruction,operands,latency
2,MOV,"r,i",1
3,MOV,"r8/16,r8/16",1
4,MOV,"r32/64,r32/64",1
5,MOV,"r8l,m",1
6,MOV,"r8h,m",1
7,MOV,"r16,m",1
8,MOV,"r32/64,m",2
9,MOV,"m,r",2
10,MOV,"m,i",2
11,MOVNTI,"m,r",400


In [5]:
"""
From the instructions documen : 
Operands can be different types of registers, memory, or immediate constants. Ab-
breviations used in the tables are: i = immediate constant, r = any general purpose
register, r32 = 32-bit register, etc., mm = 64 bit mmx register, x or xmm = 128 bit xmm
register, y = 256 bit ymm register, z = 512 bit zmm register, v = any vector register, sr
= segment register, m = any memory operand including indirect operands, m64
means 64-bit memory operand, etc
"""
register_names = ["r"] + [f"r{x}" for x in [8, 16, 32, 64, 128, 256]] + [f"{x}mm" for x in ["", "x", "y", "z"]]
register_names += ["x", "y", "z"]
register_names += ["v", "sr"]
register_names += ['r8l', 'r8h']
register_names += ['AX', 'ax']

memory_operands = [f"m{x}" for x in ['', 8, 16, 32, 64, 128, 256, 512]]

immediate_operands = ["i"]

all_types = register_names + memory_operands + immediate_operands

def is_type(a): 
    return a in all_types 

def is_reg(a):
    if isinstance(a, opcodes.x86_64.Operand):
        return a.is_register
    else: 
        return a in register_names

def is_mem(a):
    if isinstance(a, opcodes.x86_64.Operand):
        return a.is_memory
    else: 
        return a in memory_operands

def is_imm(a):
    if isinstance(a, opcodes.x86_64.Operand):
        return a.is_immediate
    else: 
        return a in immediate_operands

def type_match(a, b): 
    if is_reg(a) and is_reg(b):
        return True 
    elif is_mem(a) and is_mem(b): 
        return True 
    elif is_imm(a) and is_imm(b): 
        return True 
    else: 
        return False 


In [6]:
latency_dict_operands = processed_latency['operands']

def cast_to_types(op_types):
    if not False in map(lambda x : is_type(x), op_types): 
        return op_types
    # first, determine if this is some special case 
    if '[' in op_types[0] and ']' in op_types[0]: 
        # 'r+s*y' (represents some compound type maybe? IDK?)
        return []
    elif 'stack pointer' in op_types[0]: 
        # TODO: check if stack pointer needs its own type 
        return ['m'] 
    elif 'cl' in op_types[0]: 
        # no clue what this could be 
        return []
    elif 'short' in op_types[0] or 'near' in op_types[0]:
        # TODO check if distinction between short and near is important for latency 
        return ['m'] 
    elif 'a' in op_types[0] or 'b' in op_types[0]:        
        return ['i16'] # use only in enter (and somehwere else, idk) https://www.felixcloutier.com/x86/enter
    elif '0' in op_types[0] or '1' in op_types[0]: 
        return ['i16'] # TODO nakijken 
    elif len(op_types) == 2 and is_type(op_types[0]):
        # cast second op type to type of first -- 
        # at this point you can simply preprend first char to get crrect type 
        return [op_types[0], op_types[0][0] + op_types[1]] 
    

def extract_operand_types(op_string):
    """
    Return for each operand position (seperated by commas) a list of possible operand typess 
    eg input 'r32/64,r32/64' return [r32, r64], [r32, r64] 
    """

    if op_string == "nan" or op_string is None or isinstance(op_string, float):
        # nan is apparently of type float 
        return [] 
    
    operand_positions = op_string.split(",")
    operand_types = []
    for ops in operand_positions:
        # now we are looking at individual operands -- ensure they are one of the types defined above, if not cast 
        op_types = list(map(lambda x : x.strip(), ops.split("/")))
        operand_types.append(cast_to_types(op_types))
    return operand_types


In [7]:
# for each value in the dicationary, query the latency data


def length_diff(a, b): 
    return abs(len(a) - len(b))

def best_candidate(k, candidates): 
    # get the candidate that is the best/closest match to key k (in terms of name, operands, etc)
    if len(candidates) == 1: 
        return candidates.iloc[0]
    else: 
        # sort candidates based on length similarity 
        target_name, target_gas_name = key[:2]
        operands = key[2:] if len(key) > 2 else []         
        candidates = candidates.values.tolist() 
        candidates.sort(key=lambda x: length_diff(x[0], key[0]))
        
        candidate_match_counts = []
        for c in candidates: 
            candidate_ops = c[1]
            candidate_op_types = extract_operand_types(candidate_ops)
            match_count = 0 
            for op, candidate_types in zip(operands, candidate_op_types): 
                op = opcodes.x86_64.Operand(op)
                for c in candidate_types:
                    if type_match(op, c):
                        match_count += 1 
                        break 
            candidate_match_counts.append(match_count) 
        max_i = candidate_match_counts.index(max(candidate_match_counts))
        return candidates[max_i]    
        
for key, value in latency_dict.items():
    name = key[0]  # get the instruction name
    gas_name = key[1]
    #  find all rows that potentially contain this instruction 
    candidates = processed_latency[processed_latency['instruction'].str.contains(name, case=False)] 
    if len(candidates) == 0: 
        continue 
    # find the best match among these candidates 
    best = best_candidate(key, candidates) 
    best_latency = best[-1]
    latency_dict[key] = best_latency

In [8]:
latency_dict


{('ADC', 'adcb', 'al', 'imm8'): 1,
 ('ADC', 'adcb', 'r8', 'imm8'): 1,
 ('ADC', 'adcb', 'r8', 'r8'): 1,
 ('ADC', 'adcb', 'r8', 'm8'): 1,
 ('ADC', 'adcw', 'ax', 'imm16'): 1,
 ('ADC', 'adcw', 'r16', 'imm8'): 1,
 ('ADC', 'adcw', 'r16', 'imm16'): 1,
 ('ADC', 'adcw', 'r16', 'r16'): 1,
 ('ADC', 'adcw', 'r16', 'm16'): 1,
 ('ADC', 'adcl', 'eax', 'imm32'): 1,
 ('ADC', 'adcl', 'r32', 'imm8'): 1,
 ('ADC', 'adcl', 'r32', 'imm32'): 1,
 ('ADC', 'adcl', 'r32', 'r32'): 1,
 ('ADC', 'adcl', 'r32', 'm32'): 1,
 ('ADC', 'adcq', 'rax', 'imm32'): 1,
 ('ADC', 'adcq', 'r64', 'imm8'): 1,
 ('ADC', 'adcq', 'r64', 'imm32'): 1,
 ('ADC', 'adcq', 'r64', 'r64'): 1,
 ('ADC', 'adcq', 'r64', 'm64'): 1,
 ('ADC', 'adcb', 'm8', 'imm8'): 5,
 ('ADC', 'adcb', 'm8', 'r8'): 5,
 ('ADC', 'adcw', 'm16', 'imm8'): 5,
 ('ADC', 'adcw', 'm16', 'imm16'): 5,
 ('ADC', 'adcw', 'm16', 'r16'): 5,
 ('ADC', 'adcl', 'm32', 'imm8'): 5,
 ('ADC', 'adcl', 'm32', 'imm32'): 5,
 ('ADC', 'adcl', 'm32', 'r32'): 5,
 ('ADC', 'adcq', 'm64', 'imm8'): 5,
 ('AD