Skip to content

Commit 1be9195

Browse files
committed
[llvm-objcopy] [COFF] Fix handling of aux symbols for big objects
The aux symbols were stored in an opaque std::vector<uint8_t>, with contents interpreted according to the rest of the symbol. All aux symbol types but one fit in 18 bytes (sizeof(coff_symbol16)), and if written to a bigobj, two extra padding bytes are written (as sizeof(coff_symbol32) is 20). In the storage agnostic intermediate representation, store the aux symbols as a series of coff_symbol16 sized opaque blobs. (In practice, all such aux symbols only consist of one aux symbol, so this is more flexible than what reality needs.) The special case is the file aux symbols, which are written in potentially more than one aux symbol slot, without any padding, as one single long string. This can't be stored in the same opaque vector of fixed sized aux symbol entries. The file aux symbols will occupy a different number of aux symbol slots depending on the type of output object file. As nothing in the intermediate process needs to have accurate raw symbol indices, updating that is moved into the writer class. Differential Revision: https://reviews.llvm.org/D57009 llvm-svn: 351947
1 parent 4813340 commit 1be9195

File tree

13 files changed

+115
-30
lines changed

13 files changed

+115
-30
lines changed
Binary file not shown.
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
RUN: %python %p/../Inputs/ungzip.py %p/Inputs/bigobj.o.gz > %t.in.o
2+
3+
RUN: llvm-objdump -t %t.in.o | FileCheck %s --check-prefixes=SYMBOLS,SYMBOLS-BIG,SYMBOLS-ORIG
4+
5+
# Do a plain copy, to check that section numbers in symbols referring
6+
# to sections outside of the small object format are handled correctly.
7+
RUN: llvm-objcopy -R '.text$4' %t.in.o %t.small.o
8+
RUN: llvm-objdump -t %t.in.o | FileCheck %s --check-prefixes=SYMBOLS,SYMBOLS-BIG,SYMBOLS-ORIG
9+
10+
# Remove a section, making the section count fit into a small object.
11+
RUN: llvm-objcopy -R '.text$4' %t.in.o %t.small.o
12+
RUN: llvm-objdump -t %t.small.o | FileCheck %s --check-prefixes=SYMBOLS,SYMBOLS-SMALL,SYMBOLS-REMOVED-SMALL
13+
14+
# Add a .gnu_debuglink section, forcing the object back to big format.
15+
RUN: llvm-objcopy --add-gnu-debuglink=%t.in.o %t.small.o %t.big.o
16+
llvm-objdump -t %t.big.o | FileCheck %s --check-prefixes=SYMBOLS,SYMBOLS-BIG,SYMBOLS-REMOVED-BIG
17+
18+
# In big object format, the .file symbol occupies one symbol table entry for
19+
# the auxillary data, but needs two entries in the small format, forcing the
20+
# raw symbol indices of later symbols to change.
21+
SYMBOLS: SYMBOL TABLE:
22+
SYMBOLS-NEXT: [ 0]{{.*}} (nx 1) {{.*}} .text
23+
SYMBOLS-NEXT: AUX scnlen
24+
SYMBOLS-SMALL-NEXT: [ 2]{{.*}} (nx 2) {{.*}} .file
25+
SYMBOLS-BIG-NEXT: [ 2]{{.*}} (nx 1) {{.*}} .file
26+
SYMBOLS-NEXT: AUX abcdefghijklmnopqrs
27+
SYMBOLS-SMALL-NEXT: [ 5]{{.*}} (nx 0) {{.*}} foo
28+
SYMBOLS-BIG-NEXT: [ 4]{{.*}} (nx 0) {{.*}} foo
29+
30+
# Check that the section numbers outside of signed 16 bit int range
31+
# are represented properly. After removing one section, the section
32+
# numbers decrease.
33+
SYMBOLS-ORIG: [ 5](sec 65280){{.*}} symbol65280
34+
SYMBOLS-REMOVED-SMALL: [ 6](sec 65279){{.*}} symbol65280
35+
SYMBOLS-REMOVED-BIG: [ 5](sec 65279){{.*}} symbol65280

llvm/test/tools/llvm-objcopy/ELF/auto-remove-shndx.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# RUN: %python %p/Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t
1+
# RUN: %python %p/../Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t
22
# RUN: llvm-objcopy -R .text -R s0 -R s1 -R s2 -R s3 -R s4 -R s5 -R s6 %t %t2
33
# RUN: llvm-readobj --sections %t2 | FileCheck --check-prefix=SECS %s
44

llvm/test/tools/llvm-objcopy/ELF/many-sections.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
RUN: %python %p/Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t
1+
RUN: %python %p/../Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t
22
RUN: llvm-objcopy %t %t2
33
RUN: llvm-readobj --file-headers %t2 | FileCheck --check-prefix=EHDR %s
44
RUN: llvm-readobj --sections %t2 | FileCheck --check-prefix=SECS %s

llvm/test/tools/llvm-objcopy/ELF/remove-shndx.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# This test checks to see that a .symtab_shndx section is added to any binary
22
# that needs it, even if the original was removed.
3-
RUN: %python %p/Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t
3+
RUN: %python %p/../Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t
44
RUN: llvm-objcopy -R .symtab_shndx %t %t2
55
RUN: llvm-readobj --sections %t2 | FileCheck %s
66

llvm/test/tools/llvm-objcopy/ELF/strict-no-add.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# This test makes sure that sections added at the end that don't have symbols
22
# defined in them don't trigger the creation of a large index table.
33

4-
RUN: %python %p/Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t.0
4+
RUN: %python %p/../Inputs/ungzip.py %p/Inputs/many-sections.o.gz > %t.0
55
RUN: cat %p/Inputs/alloc-symtab.o > %t
66
RUN: llvm-objcopy -R .text -R s0 -R s1 -R s2 -R s3 -R s4 -R s5 -R s6 %t.0 %t2
77
RUN: llvm-objcopy --add-section=.s0=%t --add-section=.s1=%t --add-section=.s2=%t %t2 %t2

llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ static uint64_t getNextRVA(const Object &Obj) {
3737
return 0;
3838
const Section &Last = Obj.getSections().back();
3939
return alignTo(Last.Header.VirtualAddress + Last.Header.VirtualSize,
40-
Obj.PeHeader.SectionAlignment);
40+
Obj.IsPE ? Obj.PeHeader.SectionAlignment : 1);
4141
}
4242

4343
static uint32_t getCRC32(StringRef Data) {
@@ -74,8 +74,8 @@ static void addGnuDebugLink(Object &Obj, StringRef DebugLinkFile) {
7474
Sec.Name = ".gnu_debuglink";
7575
Sec.Header.VirtualSize = Sec.getContents().size();
7676
Sec.Header.VirtualAddress = StartRVA;
77-
Sec.Header.SizeOfRawData =
78-
alignTo(Sec.Header.VirtualSize, Obj.PeHeader.FileAlignment);
77+
Sec.Header.SizeOfRawData = alignTo(Sec.Header.VirtualSize,
78+
Obj.IsPE ? Obj.PeHeader.FileAlignment : 1);
7979
// Sec.Header.PointerToRawData is filled in by the writer.
8080
Sec.Header.PointerToRelocations = 0;
8181
Sec.Header.PointerToLinenumbers = 0;

llvm/tools/llvm-objcopy/COFF/Object.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,8 @@ void Object::addSymbols(ArrayRef<Symbol> NewSymbols) {
2626

2727
void Object::updateSymbols() {
2828
SymbolMap = DenseMap<size_t, Symbol *>(Symbols.size());
29-
size_t RawSymIndex = 0;
30-
for (Symbol &Sym : Symbols) {
29+
for (Symbol &Sym : Symbols)
3130
SymbolMap[Sym.UniqueId] = &Sym;
32-
Sym.RawIndex = RawSymIndex;
33-
RawSymIndex += 1 + Sym.Sym.NumberOfAuxSymbols;
34-
}
3531
}
3632

3733
const Symbol *Object::findSymbol(size_t UniqueId) const {

llvm/tools/llvm-objcopy/COFF/Object.h

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,24 @@ struct Section {
6666
std::vector<uint8_t> OwnedContents;
6767
};
6868

69+
struct AuxSymbol {
70+
AuxSymbol(ArrayRef<uint8_t> In) {
71+
assert(In.size() == sizeof(Opaque));
72+
std::copy(In.begin(), In.end(), Opaque);
73+
}
74+
75+
ArrayRef<uint8_t> getRef() const {
76+
return ArrayRef<uint8_t>(Opaque, sizeof(Opaque));
77+
}
78+
79+
uint8_t Opaque[sizeof(object::coff_symbol16)];
80+
};
81+
6982
struct Symbol {
7083
object::coff_symbol32 Sym;
7184
StringRef Name;
72-
std::vector<uint8_t> AuxData;
85+
std::vector<AuxSymbol> AuxData;
86+
StringRef AuxFile;
7387
ssize_t TargetSectionId;
7488
ssize_t AssociativeComdatTargetSectionId = 0;
7589
Optional<size_t> WeakTargetSymbolId;
@@ -132,7 +146,7 @@ struct Object {
132146

133147
ssize_t NextSectionUniqueId = 1; // Allow a UniqueId 0 to mean undefined.
134148

135-
// Update SymbolMap and RawIndex in each Symbol.
149+
// Update SymbolMap.
136150
void updateSymbols();
137151

138152
// Update SectionMap and Index in each Section.

llvm/tools/llvm-objcopy/COFF/Reader.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,24 @@ Error COFFReader::readSymbols(Object &Obj, bool IsBigObj) const {
107107
*reinterpret_cast<const coff_symbol16 *>(SymRef.getRawPtr()));
108108
if (auto EC = COFFObj.getSymbolName(SymRef, Sym.Name))
109109
return errorCodeToError(EC);
110-
Sym.AuxData = COFFObj.getSymbolAuxData(SymRef);
111-
assert((Sym.AuxData.size() %
112-
(IsBigObj ? sizeof(coff_symbol32) : sizeof(coff_symbol16))) == 0);
110+
111+
ArrayRef<uint8_t> AuxData = COFFObj.getSymbolAuxData(SymRef);
112+
size_t SymSize = IsBigObj ? sizeof(coff_symbol32) : sizeof(coff_symbol16);
113+
assert(AuxData.size() == SymSize * SymRef.getNumberOfAuxSymbols());
114+
// The auxillary symbols are structs of sizeof(coff_symbol16) each.
115+
// In the big object format (where symbols are coff_symbol32), each
116+
// auxillary symbol is padded with 2 bytes at the end. Copy each
117+
// auxillary symbol to the Sym.AuxData vector. For file symbols,
118+
// the whole range of aux symbols are interpreted as one null padded
119+
// string instead.
120+
if (SymRef.isFileRecord())
121+
Sym.AuxFile = StringRef(reinterpret_cast<const char *>(AuxData.data()),
122+
AuxData.size())
123+
.rtrim('\0');
124+
else
125+
for (size_t I = 0; I < SymRef.getNumberOfAuxSymbols(); I++)
126+
Sym.AuxData.push_back(AuxData.slice(I * SymSize, sizeof(AuxSymbol)));
127+
113128
// Find the unique id of the section
114129
if (SymRef.getSectionNumber() <=
115130
0) // Special symbol (undefined/absolute/debug)

llvm/tools/llvm-objcopy/COFF/Writer.cpp

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ Error COFFWriter::finalizeSymbolContents() {
5555
if (Sym.Sym.NumberOfAuxSymbols == 1 &&
5656
Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC) {
5757
coff_aux_section_definition *SD =
58-
reinterpret_cast<coff_aux_section_definition *>(Sym.AuxData.data());
58+
reinterpret_cast<coff_aux_section_definition *>(
59+
Sym.AuxData[0].Opaque);
5960
uint32_t SDSectionNumber;
6061
if (Sym.AssociativeComdatTargetSectionId == 0) {
6162
// Not a comdat associative section; just set the Number field to
@@ -79,7 +80,7 @@ Error COFFWriter::finalizeSymbolContents() {
7980
// we want to set. Only >= 1 would be required, but only == 1 makes sense.
8081
if (Sym.WeakTargetSymbolId && Sym.Sym.NumberOfAuxSymbols == 1) {
8182
coff_aux_weak_external *WE =
82-
reinterpret_cast<coff_aux_weak_external *>(Sym.AuxData.data());
83+
reinterpret_cast<coff_aux_weak_external *>(Sym.AuxData[0].Opaque);
8384
const Symbol *Target = Obj.findSymbol(*Sym.WeakTargetSymbolId);
8485
if (Target == nullptr)
8586
return createStringError(object_error::invalid_symbol_index,
@@ -141,13 +142,26 @@ size_t COFFWriter::finalizeStringTable() {
141142

142143
template <class SymbolTy>
143144
std::pair<size_t, size_t> COFFWriter::finalizeSymbolTable() {
144-
size_t SymTabSize = Obj.getSymbols().size() * sizeof(SymbolTy);
145-
for (const auto &S : Obj.getSymbols())
146-
SymTabSize += S.AuxData.size();
147-
return std::make_pair(SymTabSize, sizeof(SymbolTy));
145+
size_t RawSymIndex = 0;
146+
for (auto &S : Obj.getMutableSymbols()) {
147+
// Symbols normally have NumberOfAuxSymbols set correctly all the time.
148+
// For file symbols, we need to know the output file's symbol size to be
149+
// able to calculate the number of slots it occupies.
150+
if (!S.AuxFile.empty())
151+
S.Sym.NumberOfAuxSymbols =
152+
alignTo(S.AuxFile.size(), sizeof(SymbolTy)) / sizeof(SymbolTy);
153+
S.RawIndex = RawSymIndex;
154+
RawSymIndex += 1 + S.Sym.NumberOfAuxSymbols;
155+
}
156+
return std::make_pair(RawSymIndex * sizeof(SymbolTy), sizeof(SymbolTy));
148157
}
149158

150159
Error COFFWriter::finalize(bool IsBigObj) {
160+
size_t SymTabSize, SymbolSize;
161+
std::tie(SymTabSize, SymbolSize) = IsBigObj
162+
? finalizeSymbolTable<coff_symbol32>()
163+
: finalizeSymbolTable<coff_symbol16>();
164+
151165
if (Error E = finalizeRelocTargets())
152166
return E;
153167
if (Error E = finalizeSymbolContents())
@@ -199,10 +213,6 @@ Error COFFWriter::finalize(bool IsBigObj) {
199213
}
200214

201215
size_t StrTabSize = finalizeStringTable();
202-
size_t SymTabSize, SymbolSize;
203-
std::tie(SymTabSize, SymbolSize) = IsBigObj
204-
? finalizeSymbolTable<coff_symbol32>()
205-
: finalizeSymbolTable<coff_symbol16>();
206216

207217
size_t PointerToSymbolTable = FileSize;
208218
// StrTabSize <= 4 is the size of an empty string table, only consisting
@@ -312,8 +322,23 @@ template <class SymbolTy> void COFFWriter::writeSymbolStringTables() {
312322
copySymbol<SymbolTy, coff_symbol32>(*reinterpret_cast<SymbolTy *>(Ptr),
313323
S.Sym);
314324
Ptr += sizeof(SymbolTy);
315-
std::copy(S.AuxData.begin(), S.AuxData.end(), Ptr);
316-
Ptr += S.AuxData.size();
325+
if (!S.AuxFile.empty()) {
326+
// For file symbols, just write the string into the aux symbol slots,
327+
// assuming that the unwritten parts are initialized to zero in the memory
328+
// mapped file.
329+
std::copy(S.AuxFile.begin(), S.AuxFile.end(), Ptr);
330+
Ptr += S.Sym.NumberOfAuxSymbols * sizeof(SymbolTy);
331+
} else {
332+
// For other auxillary symbols, write their opaque payload into one symbol
333+
// table slot each. For big object files, the symbols are larger than the
334+
// opaque auxillary symbol struct and we leave padding at the end of each
335+
// entry.
336+
for (const AuxSymbol &AuxSym : S.AuxData) {
337+
ArrayRef<uint8_t> Ref = AuxSym.getRef();
338+
std::copy(Ref.begin(), Ref.end(), Ptr);
339+
Ptr += sizeof(SymbolTy);
340+
}
341+
}
317342
}
318343
if (StrTabBuilder.getSize() > 4 || !Obj.IsPE) {
319344
// Always write a string table in object files, even an empty one.

llvm/tools/llvm-objcopy/COFF/Writer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,11 @@ class COFFWriter {
3030
size_t SizeOfInitializedData;
3131
StringTableBuilder StrTabBuilder;
3232

33+
template <class SymbolTy> std::pair<size_t, size_t> finalizeSymbolTable();
3334
Error finalizeRelocTargets();
3435
Error finalizeSymbolContents();
3536
void layoutSections();
3637
size_t finalizeStringTable();
37-
template <class SymbolTy> std::pair<size_t, size_t> finalizeSymbolTable();
3838

3939
Error finalize(bool IsBigObj);
4040

0 commit comments

Comments
 (0)