Skip to content

Commit 5ae4d50

Browse files
authored
Add support for reading the dynamic symbol table from PT_DYNAMIC (llvm#116689)
Resubmissions of llvm#112596 with buildbot fixes. Allow LLDB to parse the dynamic symbol table from an ELF file or memory image in an ELF file that has no section headers. This patch uses the ability to parse the PT_DYNAMIC segment and find the DT_SYMTAB, DT_SYMENT, DT_HASH or DT_GNU_HASH to find and parse the dynamic symbol table if the section headers are not present. It also adds a helper function to read data from a .dynamic key/value pair entry correctly from the file or from memory.
1 parent 170e1fe commit 5ae4d50

File tree

3 files changed

+244
-20
lines changed

3 files changed

+244
-20
lines changed

lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp

Lines changed: 160 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "lldb/Host/LZMA.h"
2323
#include "lldb/Symbol/DWARFCallFrameInfo.h"
2424
#include "lldb/Symbol/SymbolContext.h"
25+
#include "lldb/Target/Process.h"
2526
#include "lldb/Target/SectionLoadList.h"
2627
#include "lldb/Target/Target.h"
2728
#include "lldb/Utility/ArchSpec.h"
@@ -3017,6 +3018,19 @@ void ObjectFileELF::ParseSymtab(Symtab &lldb_symtab) {
30173018
ParseSymbolTable(&lldb_symtab, symbol_id, dynsym);
30183019
symbol_id += num_symbols;
30193020
m_address_class_map.merge(address_class_map);
3021+
} else {
3022+
// Try and read the dynamic symbol table from the .dynamic section.
3023+
uint32_t dynamic_num_symbols = 0;
3024+
std::optional<DataExtractor> symtab_data =
3025+
GetDynsymDataFromDynamic(dynamic_num_symbols);
3026+
std::optional<DataExtractor> strtab_data = GetDynstrData();
3027+
if (symtab_data && strtab_data) {
3028+
auto [num_symbols_parsed, address_class_map] = ParseSymbols(
3029+
&lldb_symtab, symbol_id, section_list, dynamic_num_symbols,
3030+
symtab_data.value(), strtab_data.value());
3031+
symbol_id += num_symbols_parsed;
3032+
m_address_class_map.merge(address_class_map);
3033+
}
30203034
}
30213035
}
30223036

@@ -3828,6 +3842,32 @@ ObjectFileELF::MapFileDataWritable(const FileSpec &file, uint64_t Size,
38283842
Offset);
38293843
}
38303844

3845+
std::optional<DataExtractor>
3846+
ObjectFileELF::ReadDataFromDynamic(const ELFDynamic *dyn, uint64_t length,
3847+
uint64_t offset) {
3848+
// ELFDynamic values contain a "d_ptr" member that will be a load address if
3849+
// we have an ELF file read from memory, or it will be a file address if it
3850+
// was read from a ELF file. This function will correctly fetch data pointed
3851+
// to by the ELFDynamic::d_ptr, or return std::nullopt if the data isn't
3852+
// available.
3853+
const lldb::addr_t d_ptr_addr = dyn->d_ptr + offset;
3854+
if (ProcessSP process_sp = m_process_wp.lock()) {
3855+
if (DataBufferSP data_sp = ReadMemory(process_sp, d_ptr_addr, length))
3856+
return DataExtractor(data_sp, GetByteOrder(), GetAddressByteSize());
3857+
} else {
3858+
// We have an ELF file with no section headers or we didn't find the
3859+
// .dynamic section. Try and find the .dynstr section.
3860+
Address addr;
3861+
if (!addr.ResolveAddressUsingFileSections(d_ptr_addr, GetSectionList()))
3862+
return std::nullopt;
3863+
DataExtractor data;
3864+
addr.GetSection()->GetSectionData(data);
3865+
return DataExtractor(data, d_ptr_addr - addr.GetSection()->GetFileAddress(),
3866+
length);
3867+
}
3868+
return std::nullopt;
3869+
}
3870+
38313871
std::optional<DataExtractor> ObjectFileELF::GetDynstrData() {
38323872
if (SectionList *section_list = GetSectionList()) {
38333873
// Find the SHT_DYNAMIC section.
@@ -3855,31 +3895,15 @@ std::optional<DataExtractor> ObjectFileELF::GetDynstrData() {
38553895
// and represent the dynamic symbol tables's string table. These are needed
38563896
// by the dynamic loader and we can read them from a process' address space.
38573897
//
3858-
// When loading and ELF file from memory, only the program headers end up
3859-
// being mapped into memory, and we can find these values in the PT_DYNAMIC
3860-
// segment.
3898+
// When loading and ELF file from memory, only the program headers are
3899+
// guaranteed end up being mapped into memory, and we can find these values in
3900+
// the PT_DYNAMIC segment.
38613901
const ELFDynamic *strtab = FindDynamicSymbol(DT_STRTAB);
38623902
const ELFDynamic *strsz = FindDynamicSymbol(DT_STRSZ);
38633903
if (strtab == nullptr || strsz == nullptr)
38643904
return std::nullopt;
38653905

3866-
if (ProcessSP process_sp = m_process_wp.lock()) {
3867-
if (DataBufferSP data_sp =
3868-
ReadMemory(process_sp, strtab->d_ptr, strsz->d_val))
3869-
return DataExtractor(data_sp, GetByteOrder(), GetAddressByteSize());
3870-
} else {
3871-
// We have an ELF file with no section headers or we didn't find the
3872-
// .dynamic section. Try and find the .dynstr section.
3873-
Address addr;
3874-
if (addr.ResolveAddressUsingFileSections(strtab->d_ptr, GetSectionList())) {
3875-
DataExtractor data;
3876-
addr.GetSection()->GetSectionData(data);
3877-
return DataExtractor(data,
3878-
strtab->d_ptr - addr.GetSection()->GetFileAddress(),
3879-
strsz->d_val);
3880-
}
3881-
}
3882-
return std::nullopt;
3906+
return ReadDataFromDynamic(strtab, strsz->d_val, /*offset=*/0);
38833907
}
38843908

38853909
std::optional<lldb_private::DataExtractor> ObjectFileELF::GetDynamicData() {
@@ -3912,3 +3936,119 @@ std::optional<lldb_private::DataExtractor> ObjectFileELF::GetDynamicData() {
39123936
}
39133937
return std::nullopt;
39143938
}
3939+
3940+
std::optional<uint32_t> ObjectFileELF::GetNumSymbolsFromDynamicHash() {
3941+
const ELFDynamic *hash = FindDynamicSymbol(DT_HASH);
3942+
if (hash == nullptr)
3943+
return std::nullopt;
3944+
3945+
// The DT_HASH header looks like this:
3946+
struct DtHashHeader {
3947+
uint32_t nbucket;
3948+
uint32_t nchain;
3949+
};
3950+
if (auto data = ReadDataFromDynamic(hash, 8)) {
3951+
// We don't need the number of buckets value "nbucket", we just need the
3952+
// "nchain" value which contains the number of symbols.
3953+
offset_t offset = offsetof(DtHashHeader, nchain);
3954+
return data->GetU32(&offset);
3955+
}
3956+
3957+
return std::nullopt;
3958+
}
3959+
3960+
std::optional<uint32_t> ObjectFileELF::GetNumSymbolsFromDynamicGnuHash() {
3961+
const ELFDynamic *gnu_hash = FindDynamicSymbol(DT_GNU_HASH);
3962+
if (gnu_hash == nullptr)
3963+
return std::nullopt;
3964+
3965+
// Create a DT_GNU_HASH header
3966+
// https://flapenguin.me/elf-dt-gnu-hash
3967+
struct DtGnuHashHeader {
3968+
uint32_t nbuckets = 0;
3969+
uint32_t symoffset = 0;
3970+
uint32_t bloom_size = 0;
3971+
uint32_t bloom_shift = 0;
3972+
};
3973+
uint32_t num_symbols = 0;
3974+
// Read enogh data for the DT_GNU_HASH header so we can extract the values.
3975+
if (auto data = ReadDataFromDynamic(gnu_hash, sizeof(DtGnuHashHeader))) {
3976+
offset_t offset = 0;
3977+
DtGnuHashHeader header;
3978+
header.nbuckets = data->GetU32(&offset);
3979+
header.symoffset = data->GetU32(&offset);
3980+
header.bloom_size = data->GetU32(&offset);
3981+
header.bloom_shift = data->GetU32(&offset);
3982+
const size_t addr_size = GetAddressByteSize();
3983+
const addr_t buckets_offset =
3984+
sizeof(DtGnuHashHeader) + addr_size * header.bloom_size;
3985+
std::vector<uint32_t> buckets;
3986+
if (auto bucket_data = ReadDataFromDynamic(gnu_hash, header.nbuckets * 4,
3987+
buckets_offset)) {
3988+
offset = 0;
3989+
for (uint32_t i = 0; i < header.nbuckets; ++i)
3990+
buckets.push_back(bucket_data->GetU32(&offset));
3991+
// Locate the chain that handles the largest index bucket.
3992+
uint32_t last_symbol = 0;
3993+
for (uint32_t bucket_value : buckets)
3994+
last_symbol = std::max(bucket_value, last_symbol);
3995+
if (last_symbol < header.symoffset) {
3996+
num_symbols = header.symoffset;
3997+
} else {
3998+
// Walk the bucket's chain to add the chain length to the total.
3999+
const addr_t chains_base_offset = buckets_offset + header.nbuckets * 4;
4000+
for (;;) {
4001+
if (auto chain_entry_data = ReadDataFromDynamic(
4002+
gnu_hash, 4,
4003+
chains_base_offset + (last_symbol - header.symoffset) * 4)) {
4004+
offset = 0;
4005+
uint32_t chain_entry = chain_entry_data->GetU32(&offset);
4006+
++last_symbol;
4007+
// If the low bit is set, this entry is the end of the chain.
4008+
if (chain_entry & 1)
4009+
break;
4010+
} else {
4011+
break;
4012+
}
4013+
}
4014+
num_symbols = last_symbol;
4015+
}
4016+
}
4017+
}
4018+
if (num_symbols > 0)
4019+
return num_symbols;
4020+
4021+
return std::nullopt;
4022+
}
4023+
4024+
std::optional<DataExtractor>
4025+
ObjectFileELF::GetDynsymDataFromDynamic(uint32_t &num_symbols) {
4026+
// Every ELF file which represents an executable or shared library has
4027+
// mandatory .dynamic entries. The DT_SYMTAB value contains a pointer to the
4028+
// symbol table, and DT_SYMENT contains the size of a symbol table entry.
4029+
// We then can use either the DT_HASH or DT_GNU_HASH to find the number of
4030+
// symbols in the symbol table as the symbol count is not stored in the
4031+
// .dynamic section as a key/value pair.
4032+
//
4033+
// When loading and ELF file from memory, only the program headers end up
4034+
// being mapped into memory, and we can find these values in the PT_DYNAMIC
4035+
// segment.
4036+
num_symbols = 0;
4037+
// Get the process in case this is an in memory ELF file.
4038+
ProcessSP process_sp(m_process_wp.lock());
4039+
const ELFDynamic *symtab = FindDynamicSymbol(DT_SYMTAB);
4040+
const ELFDynamic *syment = FindDynamicSymbol(DT_SYMENT);
4041+
// DT_SYMTAB and DT_SYMENT are mandatory.
4042+
if (symtab == nullptr || syment == nullptr)
4043+
return std::nullopt;
4044+
4045+
if (std::optional<uint32_t> syms = GetNumSymbolsFromDynamicHash())
4046+
num_symbols = *syms;
4047+
else if (std::optional<uint32_t> syms = GetNumSymbolsFromDynamicGnuHash())
4048+
num_symbols = *syms;
4049+
else
4050+
return std::nullopt;
4051+
if (num_symbols == 0)
4052+
return std::nullopt;
4053+
return ReadDataFromDynamic(symtab, syment->d_val * num_symbols);
4054+
}

lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,46 @@ class ObjectFileELF : public lldb_private::ObjectFile {
435435
/// \return The bytes that represent the string table data or \c std::nullopt
436436
/// if an error occured.
437437
std::optional<lldb_private::DataExtractor> GetDynstrData();
438+
439+
/// Read the bytes pointed to by the \a dyn dynamic entry.
440+
///
441+
/// ELFDynamic::d_ptr values contain file addresses if we load the ELF file
442+
/// form a file on disk, or they contain load addresses if they were read
443+
/// from memory. This function will correctly extract the data in both cases
444+
/// if it is available.
445+
///
446+
/// \param[in] dyn The dynamic entry to use to fetch the data from.
447+
///
448+
/// \param[in] length The number of bytes to read.
449+
///
450+
/// \param[in] offset The number of bytes to skip after the d_ptr value
451+
/// before reading data.
452+
///
453+
/// \return The bytes that represent the dynanic entries data or
454+
/// \c std::nullopt if an error occured or the data is not available.
455+
std::optional<lldb_private::DataExtractor>
456+
ReadDataFromDynamic(const elf::ELFDynamic *dyn, uint64_t length,
457+
uint64_t offset = 0);
458+
459+
/// Get the bytes that represent the dynamic symbol table from the .dynamic
460+
/// section from process memory.
461+
///
462+
/// This functon uses the DT_SYMTAB value from the .dynamic section to read
463+
/// the symbols table data from process memory. The number of symbols in the
464+
/// symbol table is calculated by looking at the DT_HASH or DT_GNU_HASH
465+
/// values as the symbol count isn't stored in the .dynamic section.
466+
///
467+
/// \return The bytes that represent the symbol table data from the .dynamic
468+
/// section or section headers or \c std::nullopt if an error
469+
/// occured or if there is no dynamic symbol data available.
470+
std::optional<lldb_private::DataExtractor>
471+
GetDynsymDataFromDynamic(uint32_t &num_symbols);
472+
473+
/// Get the number of symbols from the DT_HASH dynamic entry.
474+
std::optional<uint32_t> GetNumSymbolsFromDynamicHash();
475+
476+
/// Get the number of symbols from the DT_GNU_HASH dynamic entry.
477+
std::optional<uint32_t> GetNumSymbolsFromDynamicGnuHash();
438478
};
439479

440480
#endif // LLDB_SOURCE_PLUGINS_OBJECTFILE_ELF_OBJECTFILEELF_H
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// This test verifies that loading an ELF file that has no section headers can
2+
// load the dynamic symbol table using the DT_SYMTAB, DT_SYMENT, DT_HASH or
3+
// the DT_GNU_HASH .dynamic key/value pairs that are loaded via the PT_DYNAMIC
4+
// segment.
5+
6+
// RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj \
7+
// RUN: -o - - <<<".globl defined, undefined; defined:" | \
8+
// RUN: ld.lld /dev/stdin -o - --hash-style=gnu -export-dynamic -shared \
9+
// RUN: -o %t.gnu
10+
// RUN: llvm-strip --strip-sections %t.gnu
11+
// RUN: %lldb %t.gnu -b \
12+
// RUN: -o "image dump objfile" \
13+
// RUN: | FileCheck %s --dump-input=always --check-prefix=GNU
14+
// GNU: (lldb) image dump objfile
15+
// GNU: Dumping headers for 1 module(s).
16+
// GNU: ObjectFileELF, file =
17+
// GNU: ELF Header
18+
// GNU: e_type = 0x0003 ET_DYN
19+
// Make sure there are no section headers
20+
// GNU: e_shnum = 0x00000000
21+
// Make sure we were able to load the symbols
22+
// GNU: Symtab, file = {{.*}}elf-dynsym.test.tmp.gnu, num_symbols = 2:
23+
// GNU-DAG: undefined
24+
// GNU-DAG: defined
25+
26+
// RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj \
27+
// RUN: -o - - <<<".globl defined, undefined; defined:" | \
28+
// RUN: ld.lld /dev/stdin -o - --hash-style=sysv -export-dynamic -shared \
29+
// RUN: -o %t.sysv
30+
// RUN: llvm-strip --strip-sections %t.sysv
31+
// RUN: %lldb %t.sysv -b \
32+
// RUN: -o "image dump objfile" \
33+
// RUN: | FileCheck %s --dump-input=always --check-prefix=HASH
34+
// HASH: (lldb) image dump objfile
35+
// HASH: Dumping headers for 1 module(s).
36+
// HASH: ObjectFileELF, file =
37+
// HASH: ELF Header
38+
// HASH: e_type = 0x0003 ET_DYN
39+
// Make sure there are no section headers
40+
// HASH: e_shnum = 0x00000000
41+
// Make sure we were able to load the symbols
42+
// HASH: Symtab, file = {{.*}}elf-dynsym.test.tmp.sysv, num_symbols = 2:
43+
// HASH-DAG: undefined
44+
// HASH-DAG: defined

0 commit comments

Comments
 (0)