/
asm.py
executable file
·270 lines (232 loc) · 10.1 KB
/
asm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
#!/usr/bin/env python2
import sys, operator
from pyparsing import Keyword,Word,Regex,Suppress,\
Optional,Group,alphas,alphanums,quotedString,delimitedList,\
CaselessKeyword
# assembler for notch's dcpu-16 architecture
# specifications here: http://0x10c.com/doc/dcpu-16.txt
# todo:
# - support more flexible constexprs
# - support something more than just a literal address for org
# - better error reporting
# - make everything case-insensitive [except string literals]
# - more pseudo-ops
# - macros!
ops = [None, 'set','add','sub','mul','div','mod','shl','shr',
'and','bor','xor','ife','ifn','ifg','ifb']
xops = [None, 'jsr'] # encoded in 0-operand space
pops = ['dat','org','def','res'] # not actually things.
rewrites = {
'ret': lambda xs: ('set', ['pc', 'pop']),
'jmp': lambda xs: ('set', ['pc'] + xs),
'call': lambda xs: ('jsr', xs),
'dw': lambda xs: ('dat', xs),
'data': lambda xs: ('dat', xs),
'reserve': lambda xs: ('res', xs),
}
def Keywords(xs):
return reduce(operator.or_, map(Keyword,xs))
def CaselessKeywords(xs):
return reduce(operator.or_, map(CaselessKeyword,xs))
class MemRef(object):
def __init__(self,expr):
self.expr = expr[0]
def __repr__(self):
return 'MemRef(' + str(self.expr) + ')'
class StrData(object):
def __init__(self,expr):
self.expr = expr[0][1:-1]
def __repr__(self):
return 'StrData(' + str(self.expr) + ')'
class AddExpr(object):
def __init__(self,expr):
self.a = expr[0]
self.b = expr[1]
def __repr__(self):
return 'AddExpr(' + str(self.a) + ',' + str(self.b) + ')'
def maybeAdd(x):
if len(x) > 1:
return AddExpr(x)
return x
def basenum(x): return int(str(x[0]),0)
ident = CaselessKeyword('sp+') | CaselessKeyword('-sp') |\
Word(alphas + '_.', alphanums + '_.')
number = Regex('0x[0-9a-fA-F]+|[0-9]+').setParseAction(basenum)
comment = Regex(';.*$')
label = (ident + Suppress(':')) | (Suppress(':') + ident)
op = CaselessKeywords([o for o in (ops+xops+pops+rewrites.keys()) if o])
val3 = ident | number | quotedString.setParseAction(StrData)
val2 = (val3 + Optional(Suppress('+') + val3)).setParseAction(maybeAdd);
memref = (Suppress('[') + val2 + Suppress(']')).setParseAction(MemRef);
val = val2 | memref
inst = Group(op) + Group(Optional(delimitedList(val)))
line = Group(Optional(label)) +\
Optional(inst) +\
Suppress(Optional(comment))
class State(object):
def __init__(self):
self.org = 0 # current assembly position
self.maxorg = 0 # high water mark for what we must actually emit
self.out = [0] * 0x10000
self.localsyms = {} # sym -> addr
self.globalsyms = {} # sym -> addr
self.fixups = {} # addr -> sym
def emit(self,x):
if type(x) == int: self.out[ self.org ] = x
else: self.fixups[ self.org ] = x
self.org += 1
def flushlocals(self):
for f,s in self.fixups.items()[:]:
if s.startswith('.'):
if s in self.localsyms:
self.out[f] = self.localsyms[s]
del self.fixups[f]
else:
raise Exception('Unresolved local symbol %s' % s)
self.localsyms = {}
def flushglobals(self):
for f,s in self.fixups.items()[:]:
if not s.startswith('.'):
if s in self.globalsyms:
self.out[f] = self.globalsyms[s]
del self.fixups[f]
else:
raise Exception('Unresolved global symbol %s' % s)
self.globalsyms = {}
def define(self, sym):
# if it's global, we need to flush the locals.
if not sym.startswith('.'):
self.flushlocals()
self.globalsyms[sym] = self.org
else: # otherwise just add it to the local syms
self.localsyms[sym] = self.org
def main(args):
src = ''
has_src = False
dest = None
next_is_out = False
little_endian = True
for a in args[1:]:
if a != '-' and a.startswith('-'):
if a == '--help':
print 'usage: asm.py [options] -o outfile infile ...'
print 'options: -b,--big: emit big-endian images'
print ' -l,--little: emit little-endian images (default)'
return 0
if a == '-b' or a == '--big':
little_endian = False
if a == '-l' or a == '--little':
little_endian = True
else:
if next_is_out:
if dest: raise Exception( 'Output already specified.' )
# todo: support `-` => stdout
dest = file(a, 'wb')
else:
# todo: support `-` => stdin
src += file(a).read()
has_src = True
next_is_out = a == '-o'
if not has_src:
raise Exception( 'No input files.' )
if not dest:
raise Exception( 'No output specified.' )
state = State()
def assemble_arg(x):
# return (bits,[extra words])
direct_regs = { 'a':0, 'b':1, 'c':2, 'x':3, 'y':4, 'z':5,
'i':6, 'j':7, 'sp':27, 'pc':28, 'o':29,
'peek':25, 'pop':24, 'push':26 }
indirect_regs = { 'a':8, 'b':9, 'c':10, 'x':11, 'y':12, 'z':13,
'i':14, 'j':15, 'sp':25, 'sp+':24, '-sp':26 }
indirect_ofs_regs = { 'a':16, 'b':17, 'c':18, 'x':19, 'y':20, 'z':21,
'i':22, 'j':23 }
if type(x) == int:
if x >= 0 and x < 32: return (32+x, []) # try short immediate form in operand first
return (31,[x]) # otherwise, put it in the next word.
if type(x) == str:
if x.lower() in direct_regs: return (direct_regs[x.lower()], [])
return (31,[x]) # todo: support emitting short immediate here?
if type(x) == MemRef: # [arg]
y = x.expr
if type(y) == int:
return (30,[y])
if type(y) == AddExpr:
# todo: do this properly
if type(y.a) == str and y.a.lower() in indirect_ofs_regs:
if type(y.b) == str or type(y.b) == int:
return (indirect_ofs_regs[y.a.lower()], [y.b])
if type(y.b) == str and y.b.lower() in indirect_ofs_regs:
if type(y.a) == str or type(y.a) == int:
return (indirect_ofs_regs[y.b.lower()], [y.a])
if type(y) == str:
if y.lower() in indirect_regs: return (indirect_regs[y.lower()], [])
return (30,[y]) # indirect immediate
raise Exception( 'Don\'t know how to assemble arg `%s`' % x )
for l in src.split('\n'): # todo: parse everything at once, so we get proper line numbers
rr = line.parseString(l,parseAll=True)
if len(rr[0]):
state.define(rr[0][0])
# actually assemble some opcodes
if len(rr) > 1:
op = rr[1][0]
args = rr[2][:] # listify to drop extra parser state we dont care about here.
# apply rewrite rules for pseudo-ops
while op in rewrites:
op,args = rewrites[op](args)
def check_num_operands(expected):
if len(args) != expected:
raise Exception( 'Expected %d operands for `%s`, got %d' % (expected, op, len(args)) )
if op == 'org':
check_num_operands(1)
state.maxorg = max(state.org,state.maxorg)
if type(args[0]) == int:
state.org = args[0]
else:
raise Exception( 'Don\'t know how to evaluate `%s` in argument of `org`' % args[0] )
if op == 'res':
check_num_operands(1)
state.maxorg = max(state.org,state.maxorg)
if type(args[0]) == int:
state.org += args[0]
else:
raise Exception( 'Don\'t know how to evaluate `%s` in argument of `res`' % args[0] )
elif op == 'dat': # various literal data
for a in args:
if type(a) == StrData:
for x in a.expr: state.emit(ord(x))
else: state.emit(a) # literal or symbol
elif op == 'def': # random definition (useful for memory mapped peripherals)
check_num_operands(2) # (but if you want to do something serious, maybe the C preprocessor is better
state.globalsyms[ args[0] ] = args[1]
elif op in ops:
opindex = ops.index(op)
check_num_operands(2)
op1,e1 = assemble_arg(args[0])
op2,e2 = assemble_arg(args[1])
# instruction format: bbbbbbaaaaaaoooo: o=opcode, a=first operand, b=second operand
state.emit(opindex | (op1<<4) | (op2<<10))
for e in e1+e2: state.emit(e)
elif op in xops:
opindex = xops.index(op)
check_num_operands(1)
# extended instruction format aaaaaaoooooo0000: 0=zero, o=opcode, a=operand
op1,e1 = assemble_arg(args[0])
state.emit(0 | (opindex<<4) | (op1<<10))
for e in e1: state.emit(e)
state.flushlocals()
state.flushglobals()
state.maxorg = max(state.org,state.maxorg)
# now output the assembled code:
for i in xrange(0,state.maxorg):
# support both big and little endian output.
# the spec says little endian, but much of the other tooling
# assumes big-endian images.
if little_endian:
dest.write( chr(state.out[i] & 0xff) )
dest.write( chr((state.out[i] >> 8) & 0xff) )
if not little_endian:
dest.write( chr(state.out[i] & 0xff) )
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv))