From 439dc29becb2bc9ac7e9ca36479f6e903265ae62 Mon Sep 17 00:00:00 2001 From: Ivan Perez Date: Tue, 5 Dec 2023 08:41:26 +0000 Subject: [PATCH] [analyzer] Fix processing of llvm-objdump-14's output in ikos-scan (#203). The output format of llvm-objdump has changed since version 9 (the one supported by IKOS 3.0) and version 14 (the one supported by IKOS 3.1). Specifically, the output produces one fewer empty line prior to the actual content of the section being extracted from a binary file. This difference is breaking ikos-scan, which parses the output of llvm-objdump looking for the section. The way that such output is produced by llvm-objdump is hard-coded. There are no settings that would allow us to obtain only the hex the section we are looking for without the preceding preamble, address or the posterior ASCII. This commit updates the code that processes the output to skip only three lines, conforming to the output produced by llvm-objdump-14. The code is also documented to help understand the nature of that magic number, as well as other aspects of that "parser". --- analyzer/python/ikos/scan.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/analyzer/python/ikos/scan.py b/analyzer/python/ikos/scan.py index e5bbfc84..80941b95 100644 --- a/analyzer/python/ikos/scan.py +++ b/analyzer/python/ikos/scan.py @@ -571,7 +571,14 @@ def extract_bitcode(exe_path, bc_path): output = check_output(cmd) section_content = b'' - for line in itertools.islice(output.splitlines(), 4, None): + # The output of llvm-objdump is prefixed by three lines: an empty one, one + # that specifies the format, and one that describes what comes next (the + # contents of the section requested). After that, lines have an address, + # the hex representation (36 characters plus spaces), and the ASCII + # representation, with some spaces in between sections or columns. The + # following obtains only the hex code, ignoring the ASCII and the + # addresses. + for line in itertools.islice(output.splitlines(), 3, None): n = line.find(b' ', 1) line = line[n + 1:n + 36] for item in line.split(b' '):