forked from jefftranter/udis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
udis.py
executable file
·399 lines (349 loc) · 15 KB
/
udis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
#! /usr/bin/env python3
#
# Universal Disassembler
# Copyright (c) 2013-2020 by Jeff Tranter <tranter@pobox.com>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import argparse
import label
WINDOWS = sys.platform.find('win32') == 0
leadInBytes = []
opcodeTable = []
addressModeTable = []
maxLength = 3 # define these for linting tools; correct value initiated in the cpu-specific part
# Flags
pcr = 1
und = 2
z80bit = 4
labels = False
labellist = []
# Functions
def isprint(char):
"Return if character is printable ASCII 0x20 up to 0x7F"
return ' ' <= char <= '~'
# Avoids an error when output piped, e.g. to "less" on Linux or Mac
if not WINDOWS:
#pylint: disable=undefined variable
import signal
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
# Parse command line options
parser = argparse.ArgumentParser()
parser.add_argument("filename", help="Binary file to disassemble")
parser.add_argument("-c", "--cpu", help="Specify CPU type (defaults to 6502)", default="6502")
parser.add_argument("-n", "--nolist", help="Don't list instruction bytes (make output suitable for assembler)", action="store_true")
parser.add_argument("-a", "--address", help="Specify decimal starting address (defaults to 0)", default="0")
parser.add_argument("-u", "--undocumented", help="Allow undocumented opcodes", action="store_true")
parser.add_argument("-i", "--invalid", help="Show invalid opcodes as ??? rather than constants", action="store_true")
parser.add_argument("-b", "--block", help="Specify file with byte/word/string block information", default="")
args = parser.parse_args()
# Load CPU plugin based on command line option.
# Looks for plugin in same directory as this program.
plugindir = os.path.dirname(os.path.realpath(__file__))
plugin = plugindir + os.sep + args.cpu + ".py"
try:
exec(open(plugin).read())
except FileNotFoundError:
print(("error: CPU plugin file '{}' not found.".format(plugin)), file=sys.stderr)
print("The following CPUs are supported: 1802 6502 65816 65c02 6800 6801/6803 6809 6811 8051 8080 8085 z80")
sys.exit(1)
# Get filename from command line arguments.
filename = args.filename
# Load blocks of bytes/words/string from textfile (in same path as filename
blocks = []
if args.block != "":
blockfd = open( os.path.dirname(os.path.abspath(filename)) + os.sep + args.block, "r")
lines = blockfd.read().split('\n')
for singleline in lines:
if singleline.strip() != '':
line = singleline.split(',')
start = int(line[0].strip(), base=16)
end = int(line[1].strip(), base=16)
try:
btype = line[2].strip()[0]
except:
btype = 'b' # defaults to bytes
blocks.append([start, end, btype])
if labels:
label.readLabels(filename, labellist)
# Current instruction address. Silently force it to be in valid range.
if args.address.startswith("0x"):
address = int(args.address[2:], base=16) & 0xffff
else:
address = int(args.address) & 0xffff
# Any flags for current instruction.
flags = 0
# Contains a line of output.
line = ""
# Open input file.
# Display error and exit if filename does not exist.
try:
f = open(filename, "rb")
except FileNotFoundError:
print(("error: input file '{}' not found.".format(filename)), file=sys.stderr)
sys.exit(1)
# Variables:
# address - current instruction address
# opcode - binary instruction opcode (may be multiple bytes)
# length - length of current instruction
# mnemonic - assembler mnemonic for current instruction
# format - operand format string
# line - line to output
# leadin - extended opcode (true/false)
s = " "
# Print initial origin address
if args.nolist is False:
print("{0:04X}{1:s} .org ${0:04X}\n".format(address, s[0:maxLength*3+3]))
else:
print(" .org ${0:04X}\n".format(address))
while True:
try:
b = c = 1
label.checkPCAddress(address, labellist) # Note: if there are no labels this will do nothing
for block in blocks:
if address >= block[0] and address <= block[1]:
if block[2] == 'a':
while address <= block[1]:
b = f.read(1) # Get binary byte from file
if not b: # handle EOF
break
bvalue = ord(b)
if args.nolist is False:
line += "{0:04X} {1:02X}{2:s}".format(address, bvalue, s[0:(maxLength-1)*3+1])
if isprint(chr(bvalue)):
sp = chr(bvalue)
else:
sp = "${0:02X}".format(bvalue)
line += " .ascii {0:s}".format(sp)
print(line)
address = (address + 1) & 0xffff
line = ""
label.checkPCAddress(address, labellist)
if block[2] == 'b':
while address <= block[1]:
b = f.read(1) # Get binary byte from file
if not b: # handle EOF
break
bvalue = ord(b)
if args.nolist is False:
line += "{0:04X} {1:02X}{2:s}".format(address, bvalue, s[0:(maxLength-1)*3+1])
line += " .byte ${0:02X}".format(bvalue)
print(line)
address = (address + 1) & 0xffff
line = ""
label.checkPCAddress(address, labellist)
if block[2] == 'W':
while address <= block[1]:
b = f.read(1) # Get binary byte from file
c = f.read(1)
if not b or not c: # unexpected EOF
break
wvalue = ord(c) + 256*ord(b)
if args.nolist is False:
line += "{0:04X} {1:02X} {2:02X}{3:s}".format(address, ord(b), ord(c), s[0:(maxLength-2)*3+1])
line += " .dw ${0:04X}".format(wvalue)
print(line)
address = (address + 2) & 0xffff
line = ""
label.checkPCAddress(address, labellist)
if block[2] == 'w':
while address <= block[1]:
b = f.read(1) # Get binary byte from file
c = f.read(1)
if not b or not c: # unexpected EOF
break
wvalue = ord(b) + 256*ord(c)
if args.nolist is False:
line += "{0:04X} {1:02X} {2:02X}{3:s}".format(address, ord(b), ord(c), s[0:(maxLength-2)*3+1])
line += " .word ${0:04X}".format(wvalue)
print(line)
address += 2
line = ""
address &= 0xffff
label.checkPCAddress(address, labellist)
if block[2] == 's':
strvalue = ''
length = 0
ops = []
if args.nolist is False:
line += "{0:04X} ".format(address)
while (address + length) <= block[1]:
b = f.read(1) # Get binary byte from file
if not b: # handle EOF
break
length += 1
b =ord(b)
ops.append(b)
if isprint(chr(b)) or b==0x20:
strvalue += chr(b)
else:
strvalue += '\\x{0:02x}'.format(b)
if not args.nolist and length <= maxLength:
line += "{0:02X} ".format(b)
if args.nolist is False and length < maxLength:
for i in range(maxLength):
if i >= length:
line += " "
line += " .string '{0:s}'".format(strvalue)
print(line)
address = (address + length) & 0xffff
line = ""
label.checkPCAddress(address, labellist)
if not b:
break
if not b or not c:
break # unexpected EOF
b = f.read(1) # Get binary byte from file
if not b: # handle EOF
if args.nolist is False:
print("\n{0:04X}{1:s} end".format(address, s[0:maxLength*3+3]))
else:
print("\n end")
break
# Get op code
opcode = ord(b)
# Handle if opcode is a leadin byte
if opcode in leadInBytes:
b = f.read(1) # Get next byte of extended opcode
if not b: # Unexpected EOF
break
opcode = (opcode << 8) + ord(b)
leadin = True
else:
leadin = False
# Given opcode, get data from opcode table and address mode table for CPU.
if opcode in opcodeTable:
length = opcodeTable[opcode][0]
mnemonic = opcodeTable[opcode][1]
mode = opcodeTable[opcode][2]
if len(opcodeTable[opcode]) > 3:
flags = opcodeTable[opcode][3] # Get optional flags
else:
flags = 0
if mode in addressModeTable:
opcodeformat = addressModeTable[mode]
else:
print(("error: mode '{}' not found in addressModeTable.".format(mode)), file=sys.stderr)
sys.exit(1)
else:
length = 1 # Invalid opcode
opcodeformat = ""
mnemonic = "???"
if flags & 2 == und and not args.undocumented:
# currently only handles one-byte undocumented opcodes
length = 1
opcodeformat = ""
mnemonic = "???"
# Disassembly format:
# XXXX XX XX XX XX XX nop ($1234,X)
# With --nolist option:
# nop ($1234,X)
# Add current address to output line
if args.nolist is False:
if leadin is True:
line += "{0:04X} {1:02X} {2:02X}".format(address, opcode // 256, opcode % 256)
length -= 1
else:
line += "{0:04X} {1:02X}".format(address, opcode)
op = {} # Array to hold operands
# Get any operands and store in an array
for i in range(1, maxLength):
if i < length:
b = f.read(1)
if not b: # Unexpected EOF
break
op[i] = ord(b) # Get operand bytes
if args.nolist is False:
line += " {0:02X}".format(op[i])
else:
if args.nolist is False and leadin is False and i != length-1:
line += " "
if not b: # Unexpected EOF
break
# Handle relative addresses. Indicated by the flag pcr being set.
# Assumes the operand that needs to be PC relative is the last one.
# Note: Code will need changes if more flags are added.
if flags & pcr:
if op[length-1] < 128:
op[length-1] = address + op[length-1] + length
else:
op[length-1] = address - (256 - op[length-1]) + length
if op[length-1] < 0:
op[length-1] += 65536
# Format the operand using format string and any operands.
if length == 1:
if opcodeformat != '' and labels:
operand = opcodeformat[0]
else:
operand = opcodeformat
elif length == 2:
if labels:
if flags & pcr:
operand = label.getFullLabelrel(labellist, opcodeformat, op[1])
else:
operand = opcodeformat[0].format(op[1])
else:
operand = opcodeformat.format(op[1])
elif length == 3:
if flags & z80bit:
opcode = (opcode << 16) + op[2]
# reread opcode table for real format string
length, mnemonic, mode, flags = opcodeTable[opcode]
opcodeformat = addressModeTable[mode]
operand = opcodeformat.format(op[1])
else:
if labels:
operand = label.getFullLabelstring(labellist, opcodeformat, op[1], op[2])
else:
operand = opcodeformat.format(op[1], op[2])
elif length == 4:
operand = opcodeformat.format(op[1], op[2], op[3])
elif length == 5:
operand = opcodeformat.format(op[1], op[2], op[3], op[4])
elif length == 6:
operand = opcodeformat.format(op[1], op[2], op[3], op[4], op[5])
elif length == 7:
operand = opcodeformat.format(op[1], op[2], op[3], op[4], op[5], op[6])
# Special check for invalid op code. Display as ??? or .byte depending on command line option.
if mnemonic == "???" and not args.invalid:
# Handle case where invalid opcode has a leadin byte.
if leadin is True:
if args.nolist is False:
mnemonic = "{0:s}.byte ${1:02X},${2:02X}".format(s[0:(maxLength-length-2)*3], opcode // 256, opcode % 256)
else:
mnemonic = ".byte ${0:02X},${1:02X}".format(opcode // 256, opcode % 256)
else:
if isprint(chr(opcode)):
mnemonic = ".byte '{0:c}'".format(opcode)
else:
mnemonic = ".byte ${0:02X}".format(opcode)
# Need one more space if not in no list mode.
if args.nolist is False:
line += " "
# Add mnemonic and any operands to the output line.
if operand == "":
line += " {0:s}".format(mnemonic)
else:
line += " {0:5s} {1:s}".format(mnemonic, operand)
# Print line of output
print(line)
# Update address, handlng wraparound at 64K.
address = (address + length) & 0xffff
# Reset variables for next line of output.
line = ""
operand = ""
flags = 0
except KeyboardInterrupt:
print("Interrupted by Control-C", file=sys.stderr)
break