## NOP insructions
The aim of this notebook is to find and test no operation instructions of various latencies. These are operations that can be executed without affecting the state of the program (e.g. multiplying a register with 1) 

In [1]:
%load_ext autoreload
%autoreload 2
!pwd
import os
import sys 
if os.path.abspath("../../../") not in sys.path:
    sys.path.append(os.path.abspath("../../../")) 

/home/gilles/git-repos/NemesisRetroWrite/retrowrite/rwtools/nemesis/notebooks


In [2]:
from rwtools.nemesis.nop_insructions import get_nop_instruction

get_nop_instruction(1)



('placeholder', [])

In [3]:
from rwtools.nemesis.LatencyMapper import construct_latency_mapper

LAT_MAP_IF = "/home/gilles/git-repos/NemesisRetroWrite/retrowrite/rwtools/nemesis/utils/pickled_latency_map.p" 
latency_mapper = construct_latency_mapper(LAT_MAP_IF)

base_map = latency_mapper.base_map


## analyze the possible latencies of instructions 
This is a list of all possible latencies -- for each of these latencies there should be (hopefully) some 
instruction that can serve as a nop instruction
questions to answer  
1) what are the different possible latencies  
2) how many instructions are there of the various latencies  

In [4]:
from collections import defaultdict

base_map = latency_mapper.base_map
latency_set = set(base_map.values())

# get all values that have the given latency, return the count 
count_dict = defaultdict(lambda : 0)

for _, latency in base_map.items(): 
    count_dict[latency] += 1 
    
count_dict

defaultdict(<function __main__.<lambda>()>,
            {1: 5071,
             5: 115,
             6: 63,
             4: 85,
             8: 4,
             12: 24,
             2: 130,
             3: 281,
             -1: 10,
             7: 59,
             23: 8,
             14: 17,
             11: 14,
             9: 6,
             13: 19,
             24: 4,
             500: 8,
             450: 7,
             400: 9,
             418: 1,
             10: 4,
             16: 14})

## Instructions of latency 1 
Easy: Multiply a register with itself   
`imulq %eax, %eax` 

## Instructions of latency 2
according to current latency map, the following instruction has latency 2   
`movd %xmm, %xmm` 

In [5]:
candidate_keys = [k for k, v in base_map.items() if v == 2]
candidate_keys

[('BEXTR', 'bextr', r32, r32, imm32),
 ('BEXTR', 'bextr', r32, r32, r32),
 ('BEXTR', 'bextr', r32, m32, imm32),
 ('BEXTR', 'bextr', r32, m32, r32),
 ('BEXTR', 'bextr', r64, r64, imm32),
 ('BEXTR', 'bextr', r64, r64, r64),
 ('BEXTR', 'bextr', r64, m64, imm32),
 ('BEXTR', 'bextr', r64, m64, r64),
 ('LFENCE', 'lfence'),
 ('MFENCE', 'mfence'),
 ('MOV', 'movb', m8, imm8),
 ('MOV', 'movb', m8, r8),
 ('MOV', 'movw', m16, imm16),
 ('MOV', 'movw', m16, r16),
 ('MOV', 'movl', m32, imm32),
 ('MOV', 'movl', m32, r32),
 ('MOV', 'movq', m64, imm32),
 ('MOV', 'movq', m64, r64),
 ('MOV', 'movabs', moffs32, eax),
 ('MOV', 'movabs', moffs64, rax),
 ('MOVBE', 'movbew', r16, m16),
 ('MOVBE', 'movbel', r32, m32),
 ('MOVBE', 'movbeq', r64, m64),
 ('MOVBE', 'movbew', m16, r16),
 ('MOVBE', 'movbel', m32, r32),
 ('MOVBE', 'movbeq', m64, r64),
 ('MOVD', 'movd', r32, mm),
 ('MOVD', 'movd', r32, xmm),
 ('MOVD', 'movd', mm, r32),
 ('MOVD', 'movd', mm, m32),
 ('MOVD', 'movd', xmm, r32),
 ('MOVD', 'movd', xmm, m32),

In [6]:
mnemonic = "movd"
op_str = "%xmm, %xmm"
latency_mapper.get_latency(mnemonic, op_str)

2

## Instructions of latency 3 
following insruction has latency 3   
`mull %eax`  
note, this modifies register `%eax` so this register has to be pushed and popped at start of stack 
(in all nodes that add this instruction) 

In [7]:
candidate_keys = [k for k, v in base_map.items() if v == 3]
candidate_keys


[('BSF', 'bsfw', r16, r16),
 ('BSF', 'bsfw', r16, m16),
 ('BSF', 'bsfl', r32, r32),
 ('BSF', 'bsfl', r32, m32),
 ('BSF', 'bsfq', r64, r64),
 ('BSF', 'bsfq', r64, m64),
 ('BSR', 'bsrw', r16, r16),
 ('BSR', 'bsrw', r16, m16),
 ('BSR', 'bsrl', r32, r32),
 ('BSR', 'bsrl', r32, m32),
 ('BSR', 'bsrq', r64, r64),
 ('BSR', 'bsrq', r64, m64),
 ('CALL', 'call', rel32),
 ('CALL', 'callq', r64),
 ('CALL', 'callq', m64),
 ('CMPXCHG', 'cmpxchgb', r8, r8),
 ('CMPXCHG', 'cmpxchgw', r16, r16),
 ('CMPXCHG', 'cmpxchgl', r32, r32),
 ('CMPXCHG', 'cmpxchgq', r64, r64),
 ('CMPXCHG', 'cmpxchgb', m8, r8),
 ('CMPXCHG', 'cmpxchgw', m16, r16),
 ('CMPXCHG', 'cmpxchgl', m32, r32),
 ('CMPXCHG', 'cmpxchgq', m64, r64),
 ('CMPXCHG16B', 'cmpxchg16b', m128),
 ('CMPXCHG8B', 'cmpxchg8b', m64),
 ('CRC32', 'crc32b', r32, r8),
 ('CRC32', 'crc32w', r32, r16),
 ('CRC32', 'crc32l', r32, r32),
 ('CRC32', 'crc32b', r32, m8),
 ('CRC32', 'crc32w', r32, m16),
 ('CRC32', 'crc32l', r32, m32),
 ('CRC32', 'crc32b', r64, r8),
 ('CRC32', '

In [8]:
mnemonic = "mulq"
op_str = "%rax"
latency_mapper.get_latency(mnemonic, op_str)


3

## Latency of push and pop instructions 

In [9]:
nemonic = "push"
op_str = "%eax"
print(latency_mapper.get_latency(mnemonic, op_str))

nemonic = "pop"
op_str = "%eax"
print(latency_mapper.get_latency(mnemonic, op_str))


3
3


# Just checking to see if some instructions exists
