In [3]:
out_code = []
out_list = []
out_mode = '16' # '32'

In [180]:
#('imm',minsize,data)
#    data - бинарная строка или целое число
#('reg',size,name)
# e-acdb-xlh, cdsefg-s, e-flags
#('mem',seg,base,index,offset[,size])
#   string 'ds:[ax*4+bp+7]'
class Mem:
    def __init__(self,seg,base,index,offset):
        self.type = 'mem'
        self.seg = seg
        self.base = base
        self.index = index
        self.offset = offset
    def __repr__(self):
        off = []
        if self.base!=None: off.append(self.base)
        if self.index!=None: off.append(self.index)
        if self.offset!=None: off.append(str(self.offset))
        return '{}:[{}]'.format(self.seg,'+'.join(off))
def default_seg(base):
    return 'ss' if base=='bp' else 'ds'
def parse_mem(seg,arg):
    arg = arg.split('+')
    base = None
    index = None
    offset = None
    for c in arg:
        if c in {'bx','bp'}:
            if base !=None: raise Exception('double base in indexing')
            base = c
        elif c in {'si','di'}:
            if index !=None: raise Exception('double index in indexing')
            index = c
        else:
            if offset !=None: raise Exception('double offset in indexing')
            offset = int(c)
    if seg==None:
        seg = default_seg(base)
    return Mem(default_seg(base),base,index,offset)

class Reg:
    def __init__(self,size,name):
        self.type = 'reg'
        self.size = size
        self.name = name
    def __repr__(self):
        return self.name
    def number(self):
        x = self.name
        return \
            0 if x=='ax' else\
            1 if x=='cx' else\
            2 if x=='dx' else\
            3 if x=='bx' else\
            4 if x=='sp' else\
            5 if x=='bp' else\
            6 if x=='si' else\
            7 if x=='di' else\
            \
            0 if x=='al' else\
            1 if x=='cl' else\
            2 if x=='dl' else\
            3 if x=='bl' else\
            4 if x=='ah' else\
            5 if x=='ch' else\
            6 if x=='dh' else\
            7 if x=='bh' else\
            \
            0 if x=='es' else\
            1 if x=='cs' else\
            2 if x=='ss' else\
            3 if x=='ds' else\
            4 if x=='fs' else\
            5 if x=='gs' else\
            None
    def is_seg(self):
        return self.name in {'cs','ds','ss','es','fs','gs'}
            
class Imm:
    def __init__(self,minsize,data):
        self.type = 'imm'
        self.minsize = minsize
        self.data = data
    def __repr__(self):
        return str(self.data)

def parse(arg):
    if type(arg) is int:
        if   arg>=2**(10*8): raise Exception('too big immediate arg (10 bytes)')
        elif arg>=2**(8*8): return Imm(10,arg)
        elif arg>=2**(6*8): return Imm(8,arg)
        elif arg>=2**(4*8): return Imm(6,arg)
        elif arg>=2**(2*8): return Imm(4,arg)
        elif arg>=2**(1*8): return Imm(2,arg)
        
        elif arg<=-2**(10*8-1): raise Exception('too big immediate arg (10 bytes)')
        elif arg<=-2**(8*8-1): return Imm(10,arg)
        elif arg<=-2**(6*8-1): return Imm(8,arg)
        elif arg<=-2**(4*8-1): return Imm(6,arg)
        elif arg<=-2**(2*8-1): return Imm(4,arg)
        elif arg<=-2**(1*8-1): return Imm(2,arg)
        else: return Imm(1,arg)
    elif type(arg) is bytes:
        return Imm(len(arg),arg)
    
    elif arg in {'ax','bx','cx','dx','bp','sp','si','di',
               'cs','ds','ss','es','fs','gs','flags'}:
        return Reg(2,arg)
    elif arg in {'al','ah','bl','bh','cl','ch','dl','dh'}:
        return Reg(1,arg)
    
    else: 
        arg = arg.split(':')
        if len(arg)==2:
            seg = arg[0]
            arg = arg[1]
            if arg[0]=='[' : arg = arg[1:-1]
        else:
            seg = None
            arg = arg[0]
            assert arg[0]=='['
            arg = arg[1:-1]
        return parse_mem(seg,arg)

def is_ax_al(reg):
    return reg.type=='reg' and (reg.name=='ax' or reg.name=='al')
def offset_only(mem):
    return isinstance(mem,Mem) and mem.base==None and mem.index==None
def int2bytes(x,l):
    if x<0: x += 2**(l*8)
    return (x).to_bytes(l, byteorder='little')
    
        
def mov(dest1,src1,size=None):
    dest = parse(dest1)
    src = parse(src1)
    global out_code, out_list
    if src.type=='imm':
        if dest.type=='reg':
    #		B0 + rb MOV reg8,imm8 	2 		Move immediate byte to register
            if dest.size==1:
                assert src.minsize<=dest.size and not dest.is_seg()
                out_code.append(int2bytes(0xB0+dest.number(),1)+int2bytes(src.data,1))
                out_list.append('mov {}, {}'.format(dest,src))
    #		B8 + rd MOV reg32,imm32	2 		16+ Move immediate dword to register
            else:
                assert src.minsize<=dest.size and not dest.is_seg()
                out_code.append(int2bytes(0xB8+dest.number(),1)+int2bytes(src.data,2))
                out_list.append('mov {}, {}'.format(dest,src))
    #		C6	MOV r/m8,imm8 	2/2 		Move immediate byte to r/m byte
    #		C7 	MOV r/m32,imm32	2/2 		16+ Move immediate dword to r/m dword
        
    elif is_ax_al(dest) and offset_only(src):
        assert default_seg(src.base)==src.seg
    #		A0 	MOV AL,moffs8 	4 		Move byte at (seg:offset) to AL
        if dest.name=='al':
            out_code.append(b'\xA0'+int2bytes(src.offset,2))
            out_list.append('mov al, '+repr(src))
    #		A1 	MOV EAX,moffs32	4 		16+ Move dword at (seg:offset) to EAX
        else:
            out_code.append(b'\xA1'+int2bytes(src.offset,2))
            out_list.append('mov ax, '+repr(src))
    elif offset_only(dest) and is_ax_al(src):
    #		A2 	MOV moffs8,AL 	2 		Move AL to (seg:offset)
        if src.name=='al':
            out_code.append(b'\xA2'+int2bytes(dest.offset,2))
            out_list.append('mov '+repr(dest)+',al')
    #		A3 	MOV moffs32,EAX	2 		16+ Move EAX to (seg:offset)
        else:
            out_code.append(b'\xA3'+int2bytes(dest.offset,2))
            out_list.append('mov '+repr(dest)+',ax')
    elif dest.type=='reg' and src.type=='mem':
        pass
    elif dest.type=='mem' and src.type=='reg':
        pass
    #		88 /r 	MOV r/m8,r8 	2/2 		Move byte register to r/m byte
    #		89 /r 	MOV r/m32,r32 	2/2 		16+ Move dword register to r/m dword
    #		8A /r 	MOV r8,r/m8 	2/4 		Move r/m byte to byte register
    #		8B /r 	MOV r32,r/m32 	2/4 		16+ Move r/m dword to dword register


    #		8C /r 	MOV r/m16,Sreg 	2/2 		Move segment register to r/m word
    #		8E /r 	MOV Sreg,r/m16 	2/5,pm=18/19 	Move r/m word to segment register


In [187]:
mov('ax',0x0003)
_int(0x10)
mov('ax',0x1301)
mov('cx',0x1301)
mov('bl',0x07)
mov('dx',0x0000)
push(0)
pop(es)


NameError: name '_int' is not defined

In [186]:
out_code

[b'\xa0\x07',
 b'\xa0\x07\x00',
 b'\xa0\x07\x00',
 b'\xa0\x07\x00',
 b'\xa0\x07\x00',
 b'\xa0\x07\x00',
 b'\xb0\x07',
 b'\xb4\x07']

In [168]:
Mem('1','2','3',4)+''

TypeError: unsupported operand type(s) for +: 'Mem' and 'str'

In [30]:
-1<< 3

-8

In [24]:
int('123')

123

In [52]:
(0x123).to_bytes(4, byteorder='little')

b'#\x01\x00\x00'

In [108]:
x=1
y = {}

In [109]:
for i in range(1000000):
    x*=0x4e35
    x &= 0xFFFF
    x+=1
    if x in y:
        y[x]+=1
    else:
        y[x]=1


In [142]:
y[1243]

16

In [111]:
0x4e35

20021

In [131]:
12%1

0