Skip to content

Commit a7b2e73

Browse files
authored
Add support for reading the dynamic symbol table from PT_DYNAMIC (llvm#112596)
Allow LLDB to parse the dynamic symbol table from an ELF file or memory image in an ELF file that has no section headers. This patch uses the ability to parse the PT_DYNAMIC segment and find the DT_SYMTAB, DT_SYMENT, DT_HASH or DT_GNU_HASH to find and parse the dynamic symbol table if the section headers are not present. It also adds a helper function to read data from a .dynamic key/value pair entry correctly from the file or from memory.
1 parent 589ab28 commit a7b2e73

File tree

3 files changed

+244
-21
lines changed

3 files changed

+244
-21
lines changed

lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp

Lines changed: 161 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include "llvm/Support/MathExtras.h"
4545
#include "llvm/Support/MemoryBuffer.h"
4646
#include "llvm/Support/MipsABIFlags.h"
47+
#include "lldb/Target/Process.h"
4748

4849
#define CASE_AND_STREAM(s, def, width) \
4950
case def: \
@@ -3007,9 +3008,10 @@ void ObjectFileELF::ParseSymtab(Symtab &lldb_symtab) {
30073008
// section, nomatter if .symtab was already parsed or not. This is because
30083009
// minidebuginfo normally removes the .symtab symbols which have their
30093010
// matching .dynsym counterparts.
3011+
Section *dynsym = nullptr;
30103012
if (!symtab ||
30113013
GetSectionList()->FindSectionByName(ConstString(".gnu_debugdata"))) {
3012-
Section *dynsym =
3014+
dynsym =
30133015
section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true)
30143016
.get();
30153017
if (dynsym) {
@@ -3019,6 +3021,20 @@ void ObjectFileELF::ParseSymtab(Symtab &lldb_symtab) {
30193021
m_address_class_map.merge(address_class_map);
30203022
}
30213023
}
3024+
if (!dynsym) {
3025+
// Try and read the dynamic symbol table from the .dynamic section.
3026+
uint32_t num_symbols = 0;
3027+
std::optional<DataExtractor> symtab_data =
3028+
GetDynsymDataFromDynamic(num_symbols);
3029+
std::optional<DataExtractor> strtab_data = GetDynstrData();
3030+
if (symtab_data && strtab_data) {
3031+
auto [num_symbols_parsed, address_class_map] =
3032+
ParseSymbols(&lldb_symtab, symbol_id, section_list, num_symbols,
3033+
symtab_data.value(), strtab_data.value());
3034+
symbol_id += num_symbols_parsed;
3035+
m_address_class_map.merge(address_class_map);
3036+
}
3037+
}
30223038

30233039
// DT_JMPREL
30243040
// If present, this entry's d_ptr member holds the address of
@@ -3828,6 +3844,33 @@ ObjectFileELF::MapFileDataWritable(const FileSpec &file, uint64_t Size,
38283844
Offset);
38293845
}
38303846

3847+
std::optional<DataExtractor>
3848+
ObjectFileELF::ReadDataFromDynamic(const ELFDynamic *dyn, uint64_t length,
3849+
uint64_t offset) {
3850+
// ELFDynamic values contain a "d_ptr" member that will be a load address if
3851+
// we have an ELF file read from memory, or it will be a file address if it
3852+
// was read from a ELF file. This function will correctly fetch data pointed
3853+
// to by the ELFDynamic::d_ptr, or return std::nullopt if the data isn't
3854+
// available.
3855+
const lldb::addr_t d_ptr_addr = dyn->d_ptr + offset;
3856+
if (ProcessSP process_sp = m_process_wp.lock()) {
3857+
if (DataBufferSP data_sp = ReadMemory(process_sp, d_ptr_addr, length))
3858+
return DataExtractor(data_sp, GetByteOrder(), GetAddressByteSize());
3859+
} else {
3860+
// We have an ELF file with no section headers or we didn't find the
3861+
// .dynamic section. Try and find the .dynstr section.
3862+
Address addr;
3863+
if (!addr.ResolveAddressUsingFileSections(d_ptr_addr, GetSectionList()))
3864+
return std::nullopt;
3865+
DataExtractor data;
3866+
addr.GetSection()->GetSectionData(data);
3867+
return DataExtractor(data,
3868+
d_ptr_addr - addr.GetSection()->GetFileAddress(),
3869+
length);
3870+
}
3871+
return std::nullopt;
3872+
}
3873+
38313874
std::optional<DataExtractor> ObjectFileELF::GetDynstrData() {
38323875
if (SectionList *section_list = GetSectionList()) {
38333876
// Find the SHT_DYNAMIC section.
@@ -3855,31 +3898,15 @@ std::optional<DataExtractor> ObjectFileELF::GetDynstrData() {
38553898
// and represent the dynamic symbol tables's string table. These are needed
38563899
// by the dynamic loader and we can read them from a process' address space.
38573900
//
3858-
// When loading and ELF file from memory, only the program headers end up
3859-
// being mapped into memory, and we can find these values in the PT_DYNAMIC
3860-
// segment.
3901+
// When loading and ELF file from memory, only the program headers are
3902+
// guaranteed end up being mapped into memory, and we can find these values in
3903+
// the PT_DYNAMIC segment.
38613904
const ELFDynamic *strtab = FindDynamicSymbol(DT_STRTAB);
38623905
const ELFDynamic *strsz = FindDynamicSymbol(DT_STRSZ);
38633906
if (strtab == nullptr || strsz == nullptr)
38643907
return std::nullopt;
38653908

3866-
if (ProcessSP process_sp = m_process_wp.lock()) {
3867-
if (DataBufferSP data_sp =
3868-
ReadMemory(process_sp, strtab->d_ptr, strsz->d_val))
3869-
return DataExtractor(data_sp, GetByteOrder(), GetAddressByteSize());
3870-
} else {
3871-
// We have an ELF file with no section headers or we didn't find the
3872-
// .dynamic section. Try and find the .dynstr section.
3873-
Address addr;
3874-
if (addr.ResolveAddressUsingFileSections(strtab->d_ptr, GetSectionList())) {
3875-
DataExtractor data;
3876-
addr.GetSection()->GetSectionData(data);
3877-
return DataExtractor(data,
3878-
strtab->d_ptr - addr.GetSection()->GetFileAddress(),
3879-
strsz->d_val);
3880-
}
3881-
}
3882-
return std::nullopt;
3909+
return ReadDataFromDynamic(strtab, strsz->d_val, /*offset=*/0);
38833910
}
38843911

38853912
std::optional<lldb_private::DataExtractor> ObjectFileELF::GetDynamicData() {
@@ -3912,3 +3939,116 @@ std::optional<lldb_private::DataExtractor> ObjectFileELF::GetDynamicData() {
39123939
}
39133940
return std::nullopt;
39143941
}
3942+
3943+
std::optional<uint32_t> ObjectFileELF::GetNumSymbolsFromDynamicHash() {
3944+
const ELFDynamic *hash = FindDynamicSymbol(DT_HASH);
3945+
if (hash == nullptr)
3946+
return std::nullopt;
3947+
3948+
// The DT_HASH header looks like this:
3949+
struct DtHashHeader {
3950+
uint32_t nbucket;
3951+
uint32_t nchain;
3952+
};
3953+
if (auto data = ReadDataFromDynamic(hash, 8)) {
3954+
// We don't need the number of buckets value "nbucket", we just need the
3955+
// "nchain" value which contains the number of symbols.
3956+
offset_t offset = offsetof(DtHashHeader, nchain);
3957+
return data->GetU32(&offset);
3958+
}
3959+
3960+
return std::nullopt;
3961+
}
3962+
3963+
std::optional<uint32_t> ObjectFileELF::GetNumSymbolsFromDynamicGnuHash() {
3964+
const ELFDynamic *gnu_hash = FindDynamicSymbol(DT_GNU_HASH);
3965+
if (gnu_hash == nullptr)
3966+
return std::nullopt;
3967+
3968+
// Create a DT_GNU_HASH header
3969+
// https://flapenguin.me/elf-dt-gnu-hash
3970+
struct DtGnuHashHeader {
3971+
uint32_t nbuckets = 0;
3972+
uint32_t symoffset = 0;
3973+
uint32_t bloom_size = 0;
3974+
uint32_t bloom_shift = 0;
3975+
};
3976+
uint32_t num_symbols = 0;
3977+
// Read enogh data for the DT_GNU_HASH header so we can extract the values.
3978+
if (auto data = ReadDataFromDynamic(gnu_hash, sizeof(DtGnuHashHeader))) {
3979+
offset_t offset = 0;
3980+
DtGnuHashHeader header;
3981+
header.nbuckets = data->GetU32(&offset);
3982+
header.symoffset = data->GetU32(&offset);
3983+
header.bloom_size = data->GetU32(&offset);
3984+
header.bloom_shift = data->GetU32(&offset);
3985+
const size_t addr_size = GetAddressByteSize();
3986+
const addr_t buckets_offset =
3987+
sizeof(DtGnuHashHeader) + addr_size * header.bloom_size;
3988+
std::vector<uint32_t> buckets;
3989+
if (auto bucket_data = ReadDataFromDynamic(gnu_hash, header.nbuckets * 4, buckets_offset)) {
3990+
offset = 0;
3991+
for (uint32_t i = 0; i < header.nbuckets; ++i)
3992+
buckets.push_back(bucket_data->GetU32(&offset));
3993+
// Locate the chain that handles the largest index bucket.
3994+
uint32_t last_symbol = 0;
3995+
for (uint32_t bucket_value : buckets)
3996+
last_symbol = std::max(bucket_value, last_symbol);
3997+
if (last_symbol < header.symoffset) {
3998+
num_symbols = header.symoffset;
3999+
} else {
4000+
// Walk the bucket's chain to add the chain length to the total.
4001+
const addr_t chains_base_offset = buckets_offset + header.nbuckets * 4;
4002+
for (;;) {
4003+
if (auto chain_entry_data = ReadDataFromDynamic(gnu_hash, 4, chains_base_offset + (last_symbol - header.symoffset) * 4)) {
4004+
offset = 0;
4005+
uint32_t chain_entry = chain_entry_data->GetU32(&offset);
4006+
++last_symbol;
4007+
// If the low bit is set, this entry is the end of the chain.
4008+
if (chain_entry & 1)
4009+
break;
4010+
} else {
4011+
break;
4012+
}
4013+
}
4014+
num_symbols = last_symbol;
4015+
}
4016+
}
4017+
}
4018+
if (num_symbols > 0)
4019+
return num_symbols;
4020+
4021+
return std::nullopt;
4022+
}
4023+
4024+
std::optional<DataExtractor>
4025+
ObjectFileELF::GetDynsymDataFromDynamic(uint32_t &num_symbols) {
4026+
// Every ELF file which represents an executable or shared library has
4027+
// mandatory .dynamic entries. The DT_SYMTAB value contains a pointer to the
4028+
// symbol table, and DT_SYMENT contains the size of a symbol table entry.
4029+
// We then can use either the DT_HASH or DT_GNU_HASH to find the number of
4030+
// symbols in the symbol table as the symbol count is not stored in the
4031+
// .dynamic section as a key/value pair.
4032+
//
4033+
// When loading and ELF file from memory, only the program headers end up
4034+
// being mapped into memory, and we can find these values in the PT_DYNAMIC
4035+
// segment.
4036+
num_symbols = 0;
4037+
// Get the process in case this is an in memory ELF file.
4038+
ProcessSP process_sp(m_process_wp.lock());
4039+
const ELFDynamic *symtab = FindDynamicSymbol(DT_SYMTAB);
4040+
const ELFDynamic *syment = FindDynamicSymbol(DT_SYMENT);
4041+
// DT_SYMTAB and DT_SYMENT are mandatory.
4042+
if (symtab == nullptr || syment == nullptr)
4043+
return std::nullopt;
4044+
4045+
if (std::optional<uint32_t> syms = GetNumSymbolsFromDynamicHash())
4046+
num_symbols = *syms;
4047+
else if (std::optional<uint32_t> syms = GetNumSymbolsFromDynamicGnuHash())
4048+
num_symbols = *syms;
4049+
else
4050+
return std::nullopt;
4051+
if (num_symbols == 0)
4052+
return std::nullopt;
4053+
return ReadDataFromDynamic(symtab, syment->d_val * num_symbols);
4054+
}

lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,47 @@ class ObjectFileELF : public lldb_private::ObjectFile {
435435
/// \return The bytes that represent the string table data or \c std::nullopt
436436
/// if an error occured.
437437
std::optional<lldb_private::DataExtractor> GetDynstrData();
438+
439+
/// Read the bytes pointed to by the \a dyn dynamic entry.
440+
///
441+
/// ELFDynamic::d_ptr values contain file addresses if we load the ELF file
442+
/// form a file on disk, or they contain load addresses if they were read
443+
/// from memory. This function will correctly extract the data in both cases
444+
/// if it is available.
445+
///
446+
/// \param[in] dyn The dynamic entry to use to fetch the data from.
447+
///
448+
/// \param[in] length The number of bytes to read.
449+
///
450+
/// \param[in] offset The number of bytes to skip after the d_ptr value
451+
/// before reading data.
452+
///
453+
/// \return The bytes that represent the dynanic entries data or
454+
/// \c std::nullopt if an error occured or the data is not available.
455+
std::optional<lldb_private::DataExtractor>
456+
ReadDataFromDynamic(const elf::ELFDynamic *dyn, uint64_t length,
457+
uint64_t offset = 0);
458+
459+
/// Get the bytes that represent the dynamic symbol table from the .dynamic
460+
/// section from process memory.
461+
///
462+
/// This functon uses the DT_SYMTAB value from the .dynamic section to read
463+
/// the symbols table data from process memory. The number of symbols in the
464+
/// symbol table is calculated by looking at the DT_HASH or DT_GNU_HASH
465+
/// values as the symbol count isn't stored in the .dynamic section.
466+
///
467+
/// \return The bytes that represent the symbol table data from the .dynamic
468+
/// section or section headers or \c std::nullopt if an error
469+
/// occured or if there is no dynamic symbol data available.
470+
std::optional<lldb_private::DataExtractor>
471+
GetDynsymDataFromDynamic(uint32_t &num_symbols);
472+
473+
/// Get the number of symbols from the DT_HASH dynamic entry.
474+
std::optional<uint32_t> GetNumSymbolsFromDynamicHash();
475+
476+
/// Get the number of symbols from the DT_GNU_HASH dynamic entry.
477+
std::optional<uint32_t> GetNumSymbolsFromDynamicGnuHash();
478+
438479
};
439480

440481
#endif // LLDB_SOURCE_PLUGINS_OBJECTFILE_ELF_OBJECTFILEELF_H
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
// This test verifies that loading an ELF file that has no section headers can
2+
// load the dynamic symbol table using the DT_SYMTAB, DT_SYMENT, DT_HASH or
3+
// the DT_GNU_HASH .dynamic key/value pairs that are loaded via the PT_DYNAMIC
4+
// segment.
5+
6+
// RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj \
7+
// RUN: -o - - <<<".globl defined, undefined; defined:" | \
8+
// RUN: ld.lld /dev/stdin -o - --hash-style=gnu -export-dynamic -shared \
9+
// RUN: -z nosectionheader -o %t.gnu
10+
// RUN: %lldb %t.gnu -b \
11+
// RUN: -o "image dump objfile" \
12+
// RUN: | FileCheck %s --dump-input=always --check-prefix=GNU
13+
// GNU: (lldb) image dump objfile
14+
// GNU: Dumping headers for 1 module(s).
15+
// GNU: ObjectFileELF, file =
16+
// GNU: ELF Header
17+
// GNU: e_type = 0x0003 ET_DYN
18+
// Make sure there are no section headers
19+
// GNU: e_shnum = 0x00000000
20+
// Make sure we were able to load the symbols
21+
// GNU: Symtab, file = {{.*}}elf-dynsym.test.tmp.gnu, num_symbols = 2:
22+
// GNU-DAG: undefined
23+
// GNU-DAG: defined
24+
25+
// RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj \
26+
// RUN: -o - - <<<".globl defined, undefined; defined:" | \
27+
// RUN: ld.lld /dev/stdin -o - --hash-style=sysv -export-dynamic -shared \
28+
// RUN: -z nosectionheader -o %t.sysv
29+
// RUN: %lldb %t.sysv -b \
30+
// RUN: -o "image dump objfile" \
31+
// RUN: | FileCheck %s --dump-input=always --check-prefix=HASH
32+
// HASH: (lldb) image dump objfile
33+
// HASH: Dumping headers for 1 module(s).
34+
// HASH: ObjectFileELF, file =
35+
// HASH: ELF Header
36+
// HASH: e_type = 0x0003 ET_DYN
37+
// Make sure there are no section headers
38+
// HASH: e_shnum = 0x00000000
39+
// Make sure we were able to load the symbols
40+
// HASH: Symtab, file = {{.*}}elf-dynsym.test.tmp.sysv, num_symbols = 2:
41+
// HASH-DAG: undefined
42+
// HASH-DAG: defined

0 commit comments

Comments
 (0)