Skip to content

Commit 32087b6

Browse files
committed
[llvm-objdump] Split disassembleObject and simplify --{start,stop}-address handling
The main disassembly loop is hard to read due to special handling of ARM ELF data & ELF data. Split off the logic into two functions dumpARMELFData and dumpELFData. Hoist some checks outside of the loop. --start-address --stop-address have redundant checks and minor off-by-1 issues. Fix them. llvm-svn: 357869
1 parent 32a8e74 commit 32087b6

File tree

1 file changed

+116
-109
lines changed

1 file changed

+116
-109
lines changed

llvm/tools/llvm-objdump/llvm-objdump.cpp

Lines changed: 116 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -910,6 +910,76 @@ static bool shouldAdjustVA(const SectionRef &Section) {
910910
return false;
911911
}
912912

913+
static uint64_t
914+
dumpARMELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End,
915+
const ObjectFile *Obj, ArrayRef<uint8_t> Bytes,
916+
const std::vector<uint64_t> &TextMappingSymsAddr) {
917+
support::endianness Endian =
918+
Obj->isLittleEndian() ? support::little : support::big;
919+
while (Index < End) {
920+
outs() << format("%8" PRIx64 ":", SectionAddr + Index);
921+
outs() << "\t";
922+
if (Index + 4 <= End) {
923+
dumpBytes(Bytes.slice(Index, 4), outs());
924+
outs() << "\t.word\t"
925+
<< format_hex(
926+
support::endian::read32(Bytes.data() + Index, Endian), 10);
927+
Index += 4;
928+
} else if (Index + 2 <= End) {
929+
dumpBytes(Bytes.slice(Index, 2), outs());
930+
outs() << "\t\t.short\t"
931+
<< format_hex(
932+
support::endian::read16(Bytes.data() + Index, Endian), 6);
933+
Index += 2;
934+
} else {
935+
dumpBytes(Bytes.slice(Index, 1), outs());
936+
outs() << "\t\t.byte\t" << format_hex(Bytes[0], 4);
937+
++Index;
938+
}
939+
outs() << "\n";
940+
if (std::binary_search(TextMappingSymsAddr.begin(),
941+
TextMappingSymsAddr.end(), Index))
942+
break;
943+
}
944+
return Index;
945+
}
946+
947+
static void dumpELFData(uint64_t SectionAddr, uint64_t Index, uint64_t End,
948+
ArrayRef<uint8_t> Bytes) {
949+
// print out data up to 8 bytes at a time in hex and ascii
950+
uint8_t AsciiData[9] = {'\0'};
951+
uint8_t Byte;
952+
int NumBytes = 0;
953+
954+
for (; Index < End; ++Index) {
955+
if (NumBytes == 0) {
956+
outs() << format("%8" PRIx64 ":", SectionAddr + Index);
957+
outs() << "\t";
958+
}
959+
Byte = Bytes.slice(Index)[0];
960+
outs() << format(" %02x", Byte);
961+
AsciiData[NumBytes] = isPrint(Byte) ? Byte : '.';
962+
963+
uint8_t IndentOffset = 0;
964+
NumBytes++;
965+
if (Index == End - 1 || NumBytes > 8) {
966+
// Indent the space for less than 8 bytes data.
967+
// 2 spaces for byte and one for space between bytes
968+
IndentOffset = 3 * (8 - NumBytes);
969+
for (int Excess = NumBytes; Excess < 8; Excess++)
970+
AsciiData[Excess] = '\0';
971+
NumBytes = 8;
972+
}
973+
if (NumBytes == 8) {
974+
AsciiData[8] = '\0';
975+
outs() << std::string(IndentOffset, ' ') << " ";
976+
outs() << reinterpret_cast<char *>(AsciiData);
977+
outs() << '\n';
978+
NumBytes = 0;
979+
}
980+
}
981+
}
982+
913983
static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
914984
MCContext &Ctx, MCDisassembler *DisAsm,
915985
const MCInstrAnalysis *MIA, MCInstPrinter *IP,
@@ -1081,10 +1151,13 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
10811151

10821152
// Check if we need to skip symbol
10831153
// Skip if the symbol's data is not between StartAddress and StopAddress
1084-
if (End + SectionAddr < StartAddress ||
1085-
Start + SectionAddr > StopAddress) {
1154+
if (End + SectionAddr <= StartAddress ||
1155+
Start + SectionAddr >= StopAddress)
10861156
continue;
1087-
}
1157+
1158+
// Stop disassembly at the stop address specified
1159+
if (End + SectionAddr > StopAddress)
1160+
End = StopAddress - SectionAddr;
10881161

10891162
/// Skip if user requested specific symbols and this is not in the list
10901163
if (!DisasmFuncsSet.empty() &&
@@ -1099,10 +1172,6 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
10991172
outs() << SectionName << ':';
11001173
}
11011174

1102-
// Stop disassembly at the stop address specified
1103-
if (End + SectionAddr > StopAddress)
1104-
End = StopAddress - SectionAddr;
1105-
11061175
if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
11071176
if (std::get<2>(Symbols[SI]) == ELF::STT_AMDGPU_HSA_KERNEL) {
11081177
// skip amd_kernel_code_t at the begining of kernel symbol (256 bytes)
@@ -1150,102 +1219,38 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
11501219
SectionAddr + Start, DebugOut, CommentStream);
11511220
Start += Size;
11521221

1153-
for (Index = Start; Index < End; Index += Size) {
1154-
MCInst Inst;
1222+
Index = Start;
1223+
if (SectionAddr < StartAddress)
1224+
Index = std::max<uint64_t>(Index, StartAddress - SectionAddr);
1225+
1226+
// If there is a data symbol inside an ELF text section and we are
1227+
// only disassembling text (applicable all architectures), we are in a
1228+
// situation where we must print the data and not disassemble it.
1229+
if (Obj->isELF() && std::get<2>(Symbols[SI]) == ELF::STT_OBJECT &&
1230+
!DisassembleAll && Section.isText()) {
1231+
dumpELFData(SectionAddr, Index, End, Bytes);
1232+
Index = End;
1233+
}
11551234

1156-
if (Index + SectionAddr < StartAddress ||
1157-
Index + SectionAddr > StopAddress) {
1158-
// skip byte by byte till StartAddress is reached
1159-
Size = 1;
1160-
continue;
1161-
}
1162-
// AArch64 ELF binaries can interleave data and text in the
1163-
// same section. We rely on the markers introduced to
1164-
// understand what we need to dump. If the data marker is within a
1165-
// function, it is denoted as a word/short etc
1166-
if (isArmElf(Obj) && std::get<2>(Symbols[SI]) != ELF::STT_OBJECT &&
1167-
!DisassembleAll &&
1235+
bool CheckARMELFData = isArmElf(Obj) &&
1236+
std::get<2>(Symbols[SI]) != ELF::STT_OBJECT &&
1237+
!DisassembleAll;
1238+
MCInst Inst;
1239+
while (Index < End) {
1240+
// AArch64 ELF binaries can interleave data and text in the same
1241+
// section. We rely on the markers introduced to understand what we
1242+
// need to dump. If the data marker is within a function, it is
1243+
// denoted as a word/short etc.
1244+
if (CheckARMELFData &&
11681245
std::binary_search(DataMappingSymsAddr.begin(),
11691246
DataMappingSymsAddr.end(), Index)) {
1170-
// Switch to data.
1171-
support::endianness Endian =
1172-
Obj->isLittleEndian() ? support::little : support::big;
1173-
while (Index < End) {
1174-
outs() << format("%8" PRIx64 ":", SectionAddr + Index);
1175-
outs() << "\t";
1176-
if (Index + 4 <= End) {
1177-
dumpBytes(Bytes.slice(Index, 4), outs());
1178-
outs() << "\t.word\t"
1179-
<< format_hex(support::endian::read32(Bytes.data() + Index,
1180-
Endian),
1181-
10);
1182-
Index += 4;
1183-
} else if (Index + 2 <= End) {
1184-
dumpBytes(Bytes.slice(Index, 2), outs());
1185-
outs() << "\t\t.short\t"
1186-
<< format_hex(support::endian::read16(Bytes.data() + Index,
1187-
Endian),
1188-
6);
1189-
Index += 2;
1190-
} else {
1191-
dumpBytes(Bytes.slice(Index, 1), outs());
1192-
outs() << "\t\t.byte\t" << format_hex(Bytes[0], 4);
1193-
++Index;
1194-
}
1195-
outs() << "\n";
1196-
if (std::binary_search(TextMappingSymsAddr.begin(),
1197-
TextMappingSymsAddr.end(), Index))
1198-
break;
1199-
}
1200-
}
1201-
1202-
// If there is a data symbol inside an ELF text section and we are only
1203-
// disassembling text (applicable all architectures),
1204-
// we are in a situation where we must print the data and not
1205-
// disassemble it.
1206-
if (Obj->isELF() && std::get<2>(Symbols[SI]) == ELF::STT_OBJECT &&
1207-
!DisassembleAll && Section.isText()) {
1208-
// print out data up to 8 bytes at a time in hex and ascii
1209-
uint8_t AsciiData[9] = {'\0'};
1210-
uint8_t Byte;
1211-
int NumBytes = 0;
1212-
1213-
for (Index = Start; Index < End; Index += 1) {
1214-
if (((SectionAddr + Index) < StartAddress) ||
1215-
((SectionAddr + Index) > StopAddress))
1216-
continue;
1217-
if (NumBytes == 0) {
1218-
outs() << format("%8" PRIx64 ":", SectionAddr + Index);
1219-
outs() << "\t";
1220-
}
1221-
Byte = Bytes.slice(Index)[0];
1222-
outs() << format(" %02x", Byte);
1223-
AsciiData[NumBytes] = isPrint(Byte) ? Byte : '.';
1224-
1225-
uint8_t IndentOffset = 0;
1226-
NumBytes++;
1227-
if (Index == End - 1 || NumBytes > 8) {
1228-
// Indent the space for less than 8 bytes data.
1229-
// 2 spaces for byte and one for space between bytes
1230-
IndentOffset = 3 * (8 - NumBytes);
1231-
for (int Excess = NumBytes; Excess < 8; Excess++)
1232-
AsciiData[Excess] = '\0';
1233-
NumBytes = 8;
1234-
}
1235-
if (NumBytes == 8) {
1236-
AsciiData[8] = '\0';
1237-
outs() << std::string(IndentOffset, ' ') << " ";
1238-
outs() << reinterpret_cast<char *>(AsciiData);
1239-
outs() << '\n';
1240-
NumBytes = 0;
1241-
}
1242-
}
1247+
Index = dumpARMELFData(SectionAddr, Index, End, Obj, Bytes,
1248+
TextMappingSymsAddr);
1249+
continue;
12431250
}
1244-
if (Index >= End)
1245-
break;
12461251

1247-
// When -z or --disassemble-zeroes are given we always dissasemble them.
1248-
// Otherwise we might want to skip zero bytes we see.
1252+
// When -z or --disassemble-zeroes are given we always dissasemble
1253+
// them. Otherwise we might want to skip zero bytes we see.
12491254
if (!DisassembleZeroes) {
12501255
uint64_t MaxOffset = End - Index;
12511256
// For -reloc: print zero blocks patched by relocations, so that
@@ -1257,23 +1262,23 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
12571262
countSkippableZeroBytes(Bytes.slice(Index, MaxOffset))) {
12581263
outs() << "\t\t..." << '\n';
12591264
Index += N;
1260-
if (Index >= End)
1261-
break;
1265+
continue;
12621266
}
12631267
}
12641268

12651269
// Disassemble a real instruction or a data when disassemble all is
12661270
// provided
1267-
bool Disassembled = DisAsm->getInstruction(Inst, Size, Bytes.slice(Index),
1268-
SectionAddr + Index, DebugOut,
1269-
CommentStream);
1271+
Inst.clear();
1272+
bool Disassembled = DisAsm->getInstruction(
1273+
Inst, Size, Bytes.slice(Index), SectionAddr + Index, DebugOut,
1274+
CommentStream);
12701275
if (Size == 0)
12711276
Size = 1;
12721277

1273-
PIP.printInst(*IP, Disassembled ? &Inst : nullptr,
1274-
Bytes.slice(Index, Size),
1275-
{SectionAddr + Index + VMAAdjustment, Section.getIndex()},
1276-
outs(), "", *STI, &SP, &Rels);
1278+
PIP.printInst(
1279+
*IP, Disassembled ? &Inst : nullptr, Bytes.slice(Index, Size),
1280+
{SectionAddr + Index + VMAAdjustment, Section.getIndex()}, outs(),
1281+
"", *STI, &SP, &Rels);
12771282
outs() << CommentStream.str();
12781283
Comments.clear();
12791284

@@ -1343,7 +1348,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
13431348
while (RelCur != RelEnd) {
13441349
uint64_t Offset = RelCur->getOffset();
13451350
// If this relocation is hidden, skip it.
1346-
if (getHidden(*RelCur) || ((SectionAddr + Offset) < StartAddress)) {
1351+
if (getHidden(*RelCur) || SectionAddr + Offset < StartAddress) {
13471352
++RelCur;
13481353
continue;
13491354
}
@@ -1357,7 +1362,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
13571362
Expected<section_iterator> SymSI =
13581363
RelCur->getSymbol()->getSection();
13591364
if (SymSI && *SymSI != Obj->section_end() &&
1360-
(shouldAdjustVA(**SymSI)))
1365+
shouldAdjustVA(**SymSI))
13611366
Offset += AdjustVMA;
13621367
}
13631368

@@ -1366,6 +1371,8 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
13661371
++RelCur;
13671372
}
13681373
}
1374+
1375+
Index += Size;
13691376
}
13701377
}
13711378
}

0 commit comments

Comments
 (0)