## NOP insructions
The aim of this notebook is to find and test no operation instructions of various latencies. These are operations that can be executed without affecting the state of the program (e.g. multiplying a register with 1) 

In [1]:
%load_ext autoreload
%autoreload 2
!pwd
import os
import sys 
if os.path.abspath("/home/gilles/git-repos/NemesisRetroWrite/retrowrite") not in sys.path:
    sys.path.append(os.path.abspath("/home/gilles/git-repos/NemesisRetroWrite/retrowrite")) 


/home/gilles/git-repos/NemesisRetroWrite/retrowrite/rwtools/nemesis/notebooks


In [2]:
from rwtools.nemesis.nop_instructions import get_nop_instruction

get_nop_instruction(1)

('addl $0, %eax', [])

In [3]:
from rwtools.nemesis.LatencyMapper import construct_latency_mapper

LAT_MAP_IF = "/home/gilles/git-repos/NemesisRetroWrite/retrowrite/rwtools/nemesis/utils/pickled_latency_map.p" 
latency_mapper = construct_latency_mapper(LAT_MAP_IF)

base_map = latency_mapper.base_map


## analyze the possible latencies of instructions 
This is a list of all possible latencies -- for each of these latencies there should be (hopefully) some 
instruction that can serve as a nop instruction
questions to answer  
1) what are the different possible latencies  
2) how many instructions are there of the various latencies  

In [4]:
from collections import defaultdict

base_map = latency_mapper.base_map
latency_set = set(base_map.values())

# get all values that have the given latency, return the count 
count_dict = defaultdict(lambda : 0)

for _, latency in base_map.items(): 
    count_dict[latency] += 1 
    
count_dict

defaultdict(<function __main__.<lambda>()>,
            {1: 5071,
             5: 115,
             6: 63,
             4: 85,
             8: 4,
             12: 24,
             2: 130,
             3: 281,
             -1: 10,
             7: 59,
             23: 8,
             14: 17,
             11: 14,
             9: 6,
             13: 19,
             24: 4,
             500: 8,
             450: 7,
             400: 9,
             418: 1,
             10: 4,
             16: 14})

## Instructions of latency 1 
Easy: Multiply a register with itself   
`imulq %eax, %eax` 

In [5]:
candidate_keys = [k for k, v in base_map.items() if v == 1]
candidate_keys
latency_mapper.get_latency("addl", "$1, %eax")

1

## Instructions of latency 2
according to current latency map, the following instruction has latency 2   
`movd %xmm, %xmm` 

In [6]:
candidate_keys = [k for k, v in base_map.items() if v == 2]
candidate_keys

[('BEXTR', 'bextr', r32, r32, imm32),
 ('BEXTR', 'bextr', r32, r32, r32),
 ('BEXTR', 'bextr', r32, m32, imm32),
 ('BEXTR', 'bextr', r32, m32, r32),
 ('BEXTR', 'bextr', r64, r64, imm32),
 ('BEXTR', 'bextr', r64, r64, r64),
 ('BEXTR', 'bextr', r64, m64, imm32),
 ('BEXTR', 'bextr', r64, m64, r64),
 ('LFENCE', 'lfence'),
 ('MFENCE', 'mfence'),
 ('MOV', 'movb', m8, imm8),
 ('MOV', 'movb', m8, r8),
 ('MOV', 'movw', m16, imm16),
 ('MOV', 'movw', m16, r16),
 ('MOV', 'movl', m32, imm32),
 ('MOV', 'movl', m32, r32),
 ('MOV', 'movq', m64, imm32),
 ('MOV', 'movq', m64, r64),
 ('MOV', 'movabs', moffs32, eax),
 ('MOV', 'movabs', moffs64, rax),
 ('MOVBE', 'movbew', r16, m16),
 ('MOVBE', 'movbel', r32, m32),
 ('MOVBE', 'movbeq', r64, m64),
 ('MOVBE', 'movbew', m16, r16),
 ('MOVBE', 'movbel', m32, r32),
 ('MOVBE', 'movbeq', m64, r64),
 ('MOVD', 'movd', r32, mm),
 ('MOVD', 'movd', r32, xmm),
 ('MOVD', 'movd', mm, r32),
 ('MOVD', 'movd', mm, m32),
 ('MOVD', 'movd', xmm, r32),
 ('MOVD', 'movd', xmm, m32),

In [7]:
mnemonic = "movd"
op_str = "%xmm, %xmm"
latency_mapper.get_latency(mnemonic, op_str)

2

## Instructions of latency 3 
following insruction has latency 3   
`mull %eax`  
note, this modifies register `%eax` so this register has to be pushed and popped at start of stack 
(in all nodes that add this instruction) 

In [8]:
candidate_keys = [(k, v) for k, v in base_map.items() if v == 3]
candidate_keys


[(('BSF', 'bsfw', r16, r16), 3),
 (('BSF', 'bsfw', r16, m16), 3),
 (('BSF', 'bsfl', r32, r32), 3),
 (('BSF', 'bsfl', r32, m32), 3),
 (('BSF', 'bsfq', r64, r64), 3),
 (('BSF', 'bsfq', r64, m64), 3),
 (('BSR', 'bsrw', r16, r16), 3),
 (('BSR', 'bsrw', r16, m16), 3),
 (('BSR', 'bsrl', r32, r32), 3),
 (('BSR', 'bsrl', r32, m32), 3),
 (('BSR', 'bsrq', r64, r64), 3),
 (('BSR', 'bsrq', r64, m64), 3),
 (('CALL', 'call', rel32), 3),
 (('CALL', 'callq', r64), 3),
 (('CALL', 'callq', m64), 3),
 (('CMPXCHG', 'cmpxchgb', r8, r8), 3),
 (('CMPXCHG', 'cmpxchgw', r16, r16), 3),
 (('CMPXCHG', 'cmpxchgl', r32, r32), 3),
 (('CMPXCHG', 'cmpxchgq', r64, r64), 3),
 (('CMPXCHG', 'cmpxchgb', m8, r8), 3),
 (('CMPXCHG', 'cmpxchgw', m16, r16), 3),
 (('CMPXCHG', 'cmpxchgl', m32, r32), 3),
 (('CMPXCHG', 'cmpxchgq', m64, r64), 3),
 (('CMPXCHG16B', 'cmpxchg16b', m128), 3),
 (('CMPXCHG8B', 'cmpxchg8b', m64), 3),
 (('CRC32', 'crc32b', r32, r8), 3),
 (('CRC32', 'crc32w', r32, r16), 3),
 (('CRC32', 'crc32l', r32, r32), 3)

In [9]:
mnemonic = "mulq"
op_str = "%rax"
latency_mapper.get_latency(mnemonic, op_str)

mnemonic = "pushq"
op_str = "%rax"
print(f"{mnemonic} {op_str} - {latency_mapper.get_latency(mnemonic, op_str)}")

mnemonic = "popq"
op_str = "%rax"
print(f"{mnemonic} {op_str} - {latency_mapper.get_latency(mnemonic, op_str)}")

# mnemonic = "popq"
# op_str = "%rax"
# latency_mapper.get_latency(mnemonic, op_str)

pushq %rax - 3
popq %rax - 2


## Latency of push and pop instructions 

In [10]:
mnemonic = "pushq"
op_str = "%eax"
print(latency_mapper.get_latency(mnemonic, op_str))

mnemonic = "popq"
op_str = "%eax"
print(latency_mapper.get_latency(mnemonic, op_str))


3
2


# Instruction of latency 5 


In [11]:
candidate_keys = [(k, v) for k, v in base_map.items() if v == 5]
for k in candidate_keys:
    print(k)



(('ADC', 'adcb', m8, imm8), 5)
(('ADC', 'adcb', m8, r8), 5)
(('ADC', 'adcw', m16, imm8), 5)
(('ADC', 'adcw', m16, imm16), 5)
(('ADC', 'adcw', m16, r16), 5)
(('ADC', 'adcl', m32, imm8), 5)
(('ADC', 'adcl', m32, imm32), 5)
(('ADC', 'adcl', m32, r32), 5)
(('ADC', 'adcq', m64, imm8), 5)
(('ADC', 'adcq', m64, imm32), 5)
(('ADC', 'adcq', m64, r64), 5)
(('ADD', 'addb', m8, imm8), 5)
(('ADD', 'addb', m8, r8), 5)
(('ADD', 'addw', m16, imm8), 5)
(('ADD', 'addw', m16, imm16), 5)
(('ADD', 'addw', m16, r16), 5)
(('ADD', 'addl', m32, imm8), 5)
(('ADD', 'addl', m32, imm32), 5)
(('ADD', 'addl', m32, r32), 5)
(('ADD', 'addq', m64, imm8), 5)
(('ADD', 'addq', m64, imm32), 5)
(('ADD', 'addq', m64, r64), 5)
(('AND', 'andb', m8, imm8), 5)
(('AND', 'andb', m8, r8), 5)
(('AND', 'andw', m16, imm8), 5)
(('AND', 'andw', m16, imm16), 5)
(('AND', 'andw', m16, r16), 5)
(('AND', 'andl', m32, imm8), 5)
(('AND', 'andl', m32, imm32), 5)
(('AND', 'andl', m32, r32), 5)
(('AND', 'andq', m64, imm8), 5)
(('AND', 'andq', m64

In [12]:
from rwtools.nemesis.LatencyMapper import one_op_regex, two_ops_regex
mnemonic = "addb"
op_str = "$0, (%ebx, %esi, 4)"

two_ops_regex.fullmatch("$0, $0")
one_op_regex.fullmatch(op_str)
one_op_regex.match("(%ebx, %esi, 4)")

two_ops_regex.fullmatch(op_str)

# ('SBB', 'sbbl', m32, r32)


<re.Match object; span=(0, 19), match='$0, (%ebx, %esi, 4)'>

In [13]:
mnemonic = "sbbq"
op_str = "$0, %rax"

latency_mapper.get_latency(mnemonic, op_str)

1

In [14]:
# print(mnemonic, op_str)
# print("getting latency: ")
latency_mapper.get_latency(mnemonic, op_str)

1

In [15]:
from rwtools.nemesis.string_matching import *
two_ops_regex.fullmatch("$0, (%ebx, %esi, 4)")
COMPOUND_OP.search("sdaf, (%ebx, %esi, 4)")

# print(COMPOUND_OP.pattern)

<re.Match object; span=(6, 21), match='(%ebx, %esi, 4)'>

In [16]:
latency_mapper.get_latency("imulw", "%eax, %eax")

3

# the `call` instruction

In [17]:
candidate_keys = [k for k, v in base_map.items() if "call" in k[1]]
for k in candidate_keys:
    print(k, base_map[k])


('CALL', 'call', rel32) 3
('CALL', 'callq', r64) 3
('CALL', 'callq', m64) 3
('SYSCALL', 'syscall') 1


In [18]:
candidate_keys = [k for k, v in base_map.items() if "movsbl" in k[1]]
for k in candidate_keys:
    print(k, base_map[k])

    

('MOVSX', 'movsbl', r32, r8) 1
('MOVSX', 'movsbl', r32, m8) 1


# PUSH and POP instructions 


In [28]:
def push_in_key(key): 
    print(list(key))
    for elem in list(key): 
        if "adc" in str(elem) or "adc" in str(elem): 
            return True 
    return False 

        
#     return True in ["push" in key_elemens for key_elemens in list(key)]

candidate_keys = [(k, v) for k, v in base_map.items() if "adc" in k[1]]
for k in candidate_keys:
    print(k)

candidate_keys = [(k, v) for k, v in base_map.items() if "pop" in k[1]]
for k in candidate_keys:
    print(k)

(('ADC', 'adcb', al, imm8), 1)
(('ADC', 'adcb', r8, imm8), 1)
(('ADC', 'adcb', r8, r8), 1)
(('ADC', 'adcb', r8, m8), 1)
(('ADC', 'adcw', ax, imm16), 1)
(('ADC', 'adcw', r16, imm8), 1)
(('ADC', 'adcw', r16, imm16), 1)
(('ADC', 'adcw', r16, r16), 1)
(('ADC', 'adcw', r16, m16), 1)
(('ADC', 'adcl', eax, imm32), 1)
(('ADC', 'adcl', r32, imm8), 1)
(('ADC', 'adcl', r32, imm32), 1)
(('ADC', 'adcl', r32, r32), 1)
(('ADC', 'adcl', r32, m32), 1)
(('ADC', 'adcq', rax, imm32), 1)
(('ADC', 'adcq', r64, imm8), 1)
(('ADC', 'adcq', r64, imm32), 1)
(('ADC', 'adcq', r64, r64), 1)
(('ADC', 'adcq', r64, m64), 1)
(('ADC', 'adcb', m8, imm8), 5)
(('ADC', 'adcb', m8, r8), 5)
(('ADC', 'adcw', m16, imm8), 5)
(('ADC', 'adcw', m16, imm16), 5)
(('ADC', 'adcw', m16, r16), 5)
(('ADC', 'adcl', m32, imm8), 5)
(('ADC', 'adcl', m32, imm32), 5)
(('ADC', 'adcl', m32, r32), 5)
(('ADC', 'adcq', m64, imm8), 5)
(('ADC', 'adcq', m64, imm32), 5)
(('ADC', 'adcq', m64, r64), 5)
(('ADCX', 'adcxl', r32, r32), 1)
(('ADCX', 'adcxl', r

In [24]:
candidate_keys = [(k, v) for k, v in base_map.items() if "jmp" in k[1]]
for k in candidate_keys:
    print(k)
    


(('JMP', 'jmp', rel8), 3)
(('JMP', 'jmp', rel32), 3)
(('JMP', 'jmpq', r64), 3)
(('JMP', 'jmpq', m64), 3)


In [23]:
latency_mapper.get_latency("addq", "$0x8, %rsp") 

1