diff --git a/README.md b/README.md index 5998956..895a92a 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,6 @@ PoC by Shiz, bugfixing and 64-bit version by PoroCYon. * GCC (not clang, as the latter doesn't support `nolto-rel` output), GNU ld, binutils, GNU make, ... * nasm 2.13 or newer -* `scanelf` from `pax-utils` * Python 3 ## Usage @@ -25,7 +24,7 @@ the smol startup/symbol resolving code will jump to an undefined location. ``` usage: smold.py [-h] [-m TARGET] [-l LIB] [-L DIR] [-s] [-n] [-d] [-fuse-interp] [-falign-stack] [-fuse-nx] [-fuse-dnload-loader] [-fskip-zero-value] [-fuse-dt-debug] [-fuse-dl-fini] [-fskip-entries] - [-fno-start-arg] [-funsafe-dynamic] [--nasm NASM] [--cc CC] [--scanelf SCANELF] [--readelf READELF] + [-fno-start-arg] [-funsafe-dynamic] [--nasm NASM] [--cc CC] [--readelf READELF] [--cflags CFLAGS] [--asflags ASFLAGS] [--ldflags LDFLAGS] [--smolrt SMOLRT] [--smolld SMOLLD] [--verbose] [--keeptmp] input [input ...] output @@ -71,7 +70,6 @@ optional arguments: entire binary as the Dyn table, so only enable this if you're sure this won't break things! --nasm NASM which nasm binary to use --cc CC which cc binary to use (MUST BE GCC!) - --scanelf SCANELF which scanelf binary to use --readelf READELF which readelf binary to use --cflags CFLAGS Flags to pass to the C compiler for the relinking step --asflags ASFLAGS Flags to pass to the assembler when creating the ELF header and runtime startup code diff --git a/smol/parse.py b/smol/parse.py index 09ba4fd..a078c1e 100644 --- a/smol/parse.py +++ b/smol/parse.py @@ -4,6 +4,7 @@ import subprocess import struct import sys +import re from .shared import * @@ -150,14 +151,38 @@ def find_lib(spaths, wanted): error("E: couldn't find library '" + wanted + "'.") -def find_libs(spaths, wanted): return map(lambda l: find_lib(spaths, l), wanted) +def find_libs(spaths, wanted): + return [find_lib(spaths, l) for l in wanted] -def find_symbol(scanelf_bin, libraries, libnames, symbol): - output = subprocess.check_output([scanelf_bin, '-B', '-F' '%s %S', '-s', \ - '+{}'.format(symbol)] + libraries, stderr=subprocess.DEVNULL) - for entry in output.decode('utf-8').splitlines(): - sym, soname, path = entry.split(' ', 2) - if symbol in sym.split(',') and \ - any(soname.startswith('lib'+l) for l in libnames): - return soname +def list_symbols(readelf_bin, lib): + out = subprocess.check_output([readelf_bin, '-sW', lib], stderr=subprocess.DEVNULL) + + lines = set(out.decode('utf-8').split('\n')) + symbols = [] + + for line in lines: + fields = re.split(r"\s+", line) + if len(fields) != 9: + continue + + vis, ndx, symbol = fields[6:9] + if vis != "DEFAULT" or ndx == "UND": + continue + # strip away GNU versions + symbol = re.sub(r"@@.*$", "", symbol) + symbols.append(symbol) + + return symbols + +def build_symbol_map(readelf_bin, libraries): + # create dictionary that maps symbols to libraries that provide them + symbol_map = {} + for lib in libraries: + symbols = list_symbols(readelf_bin, lib) + for symbol in symbols: + if symbol not in symbol_map: + symbol_map[symbol] = [] + soname = lib.split("/")[-1] + symbol_map[symbol].append(soname) + return symbol_map diff --git a/smold.py b/smold.py index 22d434b..ab39688 100755 --- a/smold.py +++ b/smold.py @@ -85,8 +85,6 @@ def main(): help="which nasm binary to use") parser.add_argument('--cc', default=os.getenv('CC') or shutil.which('cc'), \ help="which cc binary to use (MUST BE GCC!)") - parser.add_argument('--scanelf', default=os.getenv('SCANELF') or shutil.which('scanelf'), \ - help="which scanelf binary to use") parser.add_argument('--readelf', default=os.getenv('READELF') or shutil.which('readelf'), \ help="which readelf binary to use") @@ -134,7 +132,7 @@ def main(): if args.fifunc_support: args.asflags.insert(0, "-DIFUNC_SUPPORT") if args.fifunc_strict_cconv: args.asflags.insert(0, "-DIFUNC_CORRECT_CCONV") - for x in ['nasm','cc','scanelf','readelf']: + for x in ['nasm','cc','readelf']: val = args.__dict__[x] if val is None or not os.path.isfile(val): error("'%s' binary%s not found" % @@ -169,13 +167,17 @@ def main(): syms = get_needed_syms(args.readelf, objinput) spaths = args.libdir + cc_paths['libraries'] libraries = cc_paths['libraries'] - libs = list(find_libs(spaths, args.library)) + libs = find_libs(spaths, args.library) if args.verbose: eprintf("libs = %s" % str(libs)) + libs_symbol_map = build_symbol_map(args.readelf, libs) symbols = {} for symbol, reloc in syms: - library = find_symbol(args.scanelf, libs, args.library, symbol) - if not library: + if symbol not in libs_symbol_map: error("could not find symbol: {}".format(symbol)) + libs_for_symbol = libs_symbol_map[symbol] + if len(libs_for_symbol) > 1: + error("E: the symbol '" + symbol + "' is provided by more than one library: " + str(libs_for_symbol)) + library = libs_for_symbol[0] symbols.setdefault(library, []) symbols[library].append((symbol, reloc)) diff --git a/smoldd.py b/smoldd.py index c23c3b2..5e35810 100755 --- a/smoldd.py +++ b/smoldd.py @@ -36,19 +36,10 @@ def find_libs(deflibs, libname): for d in dirs: for f in glob.glob(glob.escape(d + '/' + libname) + '*'): yield f -def build_hashtab(scanelf_bin, lib): - out = subprocess.check_output([scanelf_bin, '-B', '-F', '%s', '-s', '%pd%*', lib], - stderr=subprocess.DEVNULL) +def build_hashtab(readelf_bin, lib): + symbols = list_symbols(readelf_bin, lib) - blah = set(out.decode('utf-8').split('\n')) - ret = dict({}) - - for x in blah: - y = x.split() - if len(y) != 7: continue - ret[hash_djb2(y[6])] = y[6] - - return ret + return { hash_djb2(symbol):symbol for symbol in symbols } def addr2off(elf, addr): for x in elf.phdrs: @@ -147,8 +138,8 @@ def main(): default=sys.stdin.buffer, help="input file") parser.add_argument('--cc', default=shutil.which('cc'), help="C compiler binary") - parser.add_argument('--scanelf', - default=shutil.which('scanelf'), help="scanelf binary") + parser.add_argument('--readelf', + default=shutil.which('readelf'), help="readelf binary") parser.add_argument('--map', type=argparse.FileType('r'), help=\ "Get the address of the symbol hash table from the "+\ "linker map output instead of attempting to parse the"+\ @@ -164,7 +155,7 @@ def main(): htbl = get_hashtbl(elf, blob, args) - libhashes = dict((l, build_hashtab(args.scanelf, neededpaths[l])) for l in needed) + libhashes = dict((l, build_hashtab(args.readelf, neededpaths[l])) for l in needed) hashresolves = dict({}) noresolves = []