-
Notifications
You must be signed in to change notification settings - Fork 1.5k
/
disasm.py
177 lines (145 loc) · 7.5 KB
/
disasm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import capstone
import binwalk.core.common
import binwalk.core.compat
from binwalk.core.module import Module, Option, Kwarg
class ArchResult(object):
def __init__(self, **kwargs):
for (k, v) in binwalk.core.compat.iterator(kwargs):
setattr(self, k, v)
class Architecture(object):
def __init__(self, **kwargs):
for (k, v) in binwalk.core.compat.iterator(kwargs):
setattr(self, k, v)
class Disasm(Module):
THRESHOLD = 10
DEFAULT_MIN_INSN_COUNT = 500
TITLE = "Disassembly Scan"
ORDER = 10
CLI = [
Option(short='Y',
long='disasm',
kwargs={'enabled': True},
description='Identify the CPU architecture of a file using the capstone disassembler'),
Option(short='T',
long='minsn',
type=int,
kwargs={'min_insn_count': 0},
description='Minimum number of consecutive instructions to be considered valid (default: %d)' % DEFAULT_MIN_INSN_COUNT),
Option(long='continue',
short='k',
kwargs={'keep_going': True},
description="Don't stop at the first match"),
]
KWARGS = [
Kwarg(name='enabled', default=False),
Kwarg(name='keep_going', default=False),
Kwarg(name='min_insn_count', default=DEFAULT_MIN_INSN_COUNT),
]
ARCHITECTURES = [
Architecture(type=capstone.CS_ARCH_ARM,
mode=capstone.CS_MODE_ARM,
endianness=capstone.CS_MODE_BIG_ENDIAN,
description="ARM executable code, 32-bit, big endian"),
Architecture(type=capstone.CS_ARCH_ARM,
mode=capstone.CS_MODE_ARM,
endianness=capstone.CS_MODE_LITTLE_ENDIAN,
description="ARM executable code, 32-bit, little endian"),
Architecture(type=capstone.CS_ARCH_ARM64,
mode=capstone.CS_MODE_ARM,
endianness=capstone.CS_MODE_BIG_ENDIAN,
description="ARM executable code, 64-bit, big endian"),
Architecture(type=capstone.CS_ARCH_ARM64,
mode=capstone.CS_MODE_ARM,
endianness=capstone.CS_MODE_LITTLE_ENDIAN,
description="ARM executable code, 64-bit, little endian"),
Architecture(type=capstone.CS_ARCH_PPC,
mode=capstone.CS_MODE_BIG_ENDIAN,
endianness=capstone.CS_MODE_BIG_ENDIAN,
description="PPC executable code, 32/64-bit, big endian"),
Architecture(type=capstone.CS_ARCH_MIPS,
mode=capstone.CS_MODE_64,
endianness=capstone.CS_MODE_BIG_ENDIAN,
description="MIPS executable code, 32/64-bit, big endian"),
Architecture(type=capstone.CS_ARCH_MIPS,
mode=capstone.CS_MODE_64,
endianness=capstone.CS_MODE_LITTLE_ENDIAN,
description="MIPS executable code, 32/64-bit, little endian"),
Architecture(type=capstone.CS_ARCH_ARM,
mode=capstone.CS_MODE_THUMB,
endianness=capstone.CS_MODE_LITTLE_ENDIAN,
description="ARM executable code, 16-bit (Thumb), little endian"),
Architecture(type=capstone.CS_ARCH_ARM,
mode=capstone.CS_MODE_THUMB,
endianness=capstone.CS_MODE_BIG_ENDIAN,
description="ARM executable code, 16-bit (Thumb), big endian"),
]
def init(self):
self.disassemblers = []
if not self.min_insn_count:
self.min_insn_count = self.DEFAULT_MIN_INSN_COUNT
self.disasm_data_size = self.min_insn_count * 10
for arch in self.ARCHITECTURES:
self.disassemblers.append((capstone.Cs(arch.type, (arch.mode + arch.endianness)), arch.description))
def scan_file(self, fp):
total_read = 0
while True:
result = None
(data, dlen) = fp.read_block()
if dlen < 1:
break
# If this data block doesn't contain at least two different bytes, skip it
# to prevent false positives (e.g., "\x00\x00\x00\x00" is a nop in
# MIPS).
if len(set(data)) >= 2:
block_offset = 0
# Loop through the entire block, or until we're pretty sure
# we've found some valid code in this block
while (block_offset < dlen) and (result is None or result.count < self.THRESHOLD):
# Don't pass the entire data block into disasm_lite, it's horribly inefficient
# to pass large strings around in Python. Break it up into
# smaller code blocks instead.
code_block = binwalk.core.compat.str2bytes(data[block_offset:block_offset + self.disasm_data_size])
# If this code block doesn't contain at least two different bytes, skip it
# to prevent false positives (e.g., "\x00\x00\x00\x00" is a
# nop in MIPS).
if len(set(code_block)) >= 2:
for (md, description) in self.disassemblers:
insns = [insn for insn in md.disasm_lite(code_block, (total_read + block_offset))]
binwalk.core.common.debug("0x%.8X %s, at least %d valid instructions" % ((total_read + block_offset),
description,
len(insns)))
# Did we disassemble at least self.min_insn_count
# instructions?
if len(insns) >= self.min_insn_count:
# If we've already found the same type of code
# in this block, simply update the result
# counter
if result and result.description == description:
result.count += 1
if result.count >= self.THRESHOLD:
break
else:
result = ArchResult(offset=total_read +
block_offset + fp.offset,
description=description,
insns=insns,
count=1)
block_offset += 1
self.status.completed += 1
if result is not None:
r = self.result(offset=result.offset,
file=fp,
description=(result.description + ", at least %d valid instructions" % len(result.insns)))
if r.valid and r.display:
if self.config.verbose:
for (position, size, mnem, opnds) in result.insns:
self.result(offset=position, file=fp, description="%s %s" % (mnem, opnds))
if not self.keep_going:
return
total_read += dlen
self.status.completed = total_read
def run(self):
for fp in iter(self.next_file, None):
self.header()
self.scan_file(fp)
self.footer()