forked from llvm/llvm-project
-
-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathextract-section.py
executable file
·153 lines (138 loc) · 4.75 KB
/
extract-section.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#!/usr/bin/env python
from __future__ import print_function
"""
Helper script to print out the raw content of an ELF section.
Example usages:
```
# print out as bits by default
extract-section.py .text --input-file=foo.o
```
```
# read from stdin and print out in hex
cat foo.o | extract-section.py -h .text
```
This is merely a wrapper around `llvm-readobj` that focuses on the binary
content as well as providing more formatting options.
"""
# Unfortunately reading binary from stdin is not so trivial in Python...
def read_raw_stdin():
import sys
if sys.version_info >= (3, 0):
reading_source = sys.stdin.buffer
else:
# Windows will always read as string so we need some
# special handling
if sys.platform == "win32":
import os, msvcrt
msvcrt.setformat(sys.stdin.fileno(), os.O_BINARY)
reading_source = sys.stdin
return reading_source.read()
def get_raw_section_dump(readobj_path, section_name, input_file):
import subprocess
cmd = [
readobj_path,
"--elf-output-style=GNU",
"--hex-dump={}".format(section_name),
input_file,
]
proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
if input_file == "-":
# From stdin
out, _ = proc.communicate(input=read_raw_stdin())
else:
out, _ = proc.communicate()
return out.decode("utf-8") if type(out) is not str else out
if __name__ == "__main__":
import argparse
# The default '-h' (--help) will conflict with our '-h' (hex) format
arg_parser = argparse.ArgumentParser(add_help=False)
arg_parser.add_argument(
"--readobj-path",
metavar="<executable path>",
type=str,
help="Path to llvm-readobj",
)
arg_parser.add_argument(
"--input-file",
metavar="<file>",
type=str,
help="Input object file, or '-' to read from stdin",
)
arg_parser.add_argument(
"section", metavar="<name>", type=str, help="Name of the section to extract"
)
# Output format
format_group = arg_parser.add_mutually_exclusive_group()
format_group.add_argument(
"-b",
dest="format",
action="store_const",
const="bits",
help="Print out in bits",
)
arg_parser.add_argument(
"--byte-indicator",
action="store_true",
help="Whether to print a '.' every 8 bits in bits printing mode",
)
arg_parser.add_argument(
"--bits-endian",
metavar="<little/big>",
type=str,
choices=["little", "big"],
help="Print out bits in specified endianness (little or big); defaults to big",
)
format_group.add_argument(
"-h",
dest="format",
action="store_const",
const="hex",
help="Print out in hexadecimal",
)
arg_parser.add_argument(
"--hex-width",
metavar="<# of bytes>",
type=int,
help="The width (in byte) of every element in hex printing mode",
)
arg_parser.add_argument("--help", action="help")
arg_parser.set_defaults(
format="bits",
tool_path="llvm-readobj",
input_file="-",
byte_indicator=False,
hex_width=4,
bits_endian="big",
)
args = arg_parser.parse_args()
raw_section = get_raw_section_dump(args.tool_path, args.section, args.input_file)
results = []
for line in raw_section.splitlines(False):
if line.startswith("Hex dump"):
continue
parts = line.strip().split(" ")[1:]
for part in parts[:4]:
# exclude any non-hex dump string
try:
val = int(part, 16)
if args.format == "bits":
# divided into bytes first
offsets = (24, 16, 8, 0)
if args.bits_endian == "little":
offsets = (0, 8, 16, 24)
for byte in [(val >> off) & 0xFF for off in offsets]:
for bit in [(byte >> off) & 1 for off in range(7, -1, -1)]:
results.append(str(bit))
if args.byte_indicator:
results.append(".")
elif args.format == "hex":
assert args.hex_width <= 4 and args.hex_width > 0
width_bits = args.hex_width * 8
offsets = [off for off in range(32 - width_bits, -1, -width_bits)]
mask = (1 << width_bits) - 1
format_str = "{:0" + str(args.hex_width * 2) + "x}"
for word in [(val >> i) & mask for i in offsets]:
results.append(format_str.format(word))
except:
break
print(" ".join(results), end="")