Skip to content

Commit 18e5131

Browse files
authored
[BOLT] Support pre-aggregated basic sample profile (#140196)
Define a pre-aggregated basic sample format: ``` E <event name> S <location> <count> ``` `-nl` flag is required to use parsed basic samples. Test Plan: update pre-aggregated-perf.test
1 parent c4806db commit 18e5131

File tree

5 files changed

+141
-69
lines changed

5 files changed

+141
-69
lines changed

bolt/include/bolt/Profile/DataAggregator.h

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -370,33 +370,46 @@ class DataAggregator : public DataReader {
370370
/// memory.
371371
///
372372
/// File format syntax:
373-
/// {B|F|f|T} [<start_id>:]<start_offset> [<end_id>:]<end_offset> [<ft_end>]
374-
/// <count> [<mispred_count>]
373+
/// E <event>
374+
/// S <start> <count>
375+
/// T <start> <end> <ft_end> <count>
376+
/// B <start> <end> <count> <mispred_count>
377+
/// [Ff] <start> <end> <count>
375378
///
376-
/// B - indicates an aggregated branch
377-
/// F - an aggregated fall-through
379+
/// where <start>, <end>, <ft_end> have the format [<id>:]<offset>
380+
///
381+
/// E - name of the sampling event used for subsequent entries
382+
/// S - indicates an aggregated basic sample at <start>
383+
/// B - indicates an aggregated branch from <start> to <end>
384+
/// F - an aggregated fall-through from <start> to <end>
378385
/// f - an aggregated fall-through with external origin - used to disambiguate
379386
/// between a return hitting a basic block head and a regular internal
380387
/// jump to the block
381-
/// T - an aggregated trace: branch with a fall-through (from, to, ft_end)
382-
///
383-
/// <start_id> - build id of the object containing the start address. We can
384-
/// skip it for the main binary and use "X" for an unknown object. This will
385-
/// save some space and facilitate human parsing.
386-
///
387-
/// <start_offset> - hex offset from the object base load address (0 for the
388-
/// main executable unless it's PIE) to the start address.
388+
/// T - an aggregated trace: branch from <start> to <end> with a fall-through
389+
/// to <ft_end>
389390
///
390-
/// <end_id>, <end_offset> - same for the end address.
391+
/// <id> - build id of the object containing the address. We can skip it for
392+
/// the main binary and use "X" for an unknown object. This will save some
393+
/// space and facilitate human parsing.
391394
///
392-
/// <ft_end> - same for the fallthrough_end address.
395+
/// <offset> - hex offset from the object base load address (0 for the
396+
/// main executable unless it's PIE) to the address.
393397
///
394-
/// <count> - total aggregated count of the branch or a fall-through.
398+
/// <count> - total aggregated count.
395399
///
396400
/// <mispred_count> - the number of times the branch was mispredicted.
397-
/// Omitted for fall-throughs.
398401
///
399402
/// Example:
403+
/// Basic samples profile:
404+
/// E cycles
405+
/// S 41be50 3
406+
/// E br_inst_retired.near_taken
407+
/// S 41be60 6
408+
///
409+
/// Trace profile combining branches and fall-throughs:
410+
/// T 4b196f 4b19e0 4b19ef 2
411+
///
412+
/// Legacy branch profile with separate branches and fall-throughs:
400413
/// F 41be50 41be50 3
401414
/// F 41be90 41be90 4
402415
/// B 4b1942 39b57f0 3 0

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 80 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1204,77 +1204,106 @@ ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
12041204
}
12051205

12061206
std::error_code DataAggregator::parseAggregatedLBREntry() {
1207-
while (checkAndConsumeFS()) {
1208-
}
1207+
enum AggregatedLBREntry : char {
1208+
INVALID = 0,
1209+
EVENT_NAME, // E
1210+
TRACE, // T
1211+
SAMPLE, // S
1212+
BRANCH, // B
1213+
FT, // F
1214+
FT_EXTERNAL_ORIGIN // f
1215+
} Type = INVALID;
1216+
1217+
// The number of fields to parse, set based on Type.
1218+
int AddrNum = 0;
1219+
int CounterNum = 0;
1220+
// Storage for parsed fields.
1221+
StringRef EventName;
1222+
std::optional<Location> Addr[3];
1223+
int64_t Counters[2];
1224+
1225+
while (Type == INVALID || Type == EVENT_NAME) {
1226+
while (checkAndConsumeFS()) {
1227+
}
1228+
ErrorOr<StringRef> StrOrErr =
1229+
parseString(FieldSeparator, Type == EVENT_NAME);
1230+
if (std::error_code EC = StrOrErr.getError())
1231+
return EC;
1232+
StringRef Str = StrOrErr.get();
12091233

1210-
ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
1211-
if (std::error_code EC = TypeOrErr.getError())
1212-
return EC;
1213-
enum AggregatedLBREntry { TRACE, BRANCH, FT, FT_EXTERNAL_ORIGIN, INVALID };
1214-
auto Type = StringSwitch<AggregatedLBREntry>(TypeOrErr.get())
1215-
.Case("T", TRACE)
1216-
.Case("B", BRANCH)
1217-
.Case("F", FT)
1218-
.Case("f", FT_EXTERNAL_ORIGIN)
1219-
.Default(INVALID);
1220-
if (Type == INVALID) {
1221-
reportError("expected T, B, F or f");
1222-
return make_error_code(llvm::errc::io_error);
1223-
}
1234+
if (Type == EVENT_NAME) {
1235+
EventName = Str;
1236+
break;
1237+
}
12241238

1225-
while (checkAndConsumeFS()) {
1226-
}
1227-
ErrorOr<Location> From = parseLocationOrOffset();
1228-
if (std::error_code EC = From.getError())
1229-
return EC;
1239+
Type = StringSwitch<AggregatedLBREntry>(Str)
1240+
.Case("T", TRACE)
1241+
.Case("S", SAMPLE)
1242+
.Case("E", EVENT_NAME)
1243+
.Case("B", BRANCH)
1244+
.Case("F", FT)
1245+
.Case("f", FT_EXTERNAL_ORIGIN)
1246+
.Default(INVALID);
1247+
1248+
if (Type == INVALID) {
1249+
reportError("expected T, S, E, B, F or f");
1250+
return make_error_code(llvm::errc::io_error);
1251+
}
12301252

1231-
while (checkAndConsumeFS()) {
1253+
using SSI = StringSwitch<int>;
1254+
AddrNum = SSI(Str).Case("T", 3).Case("S", 1).Case("E", 0).Default(2);
1255+
CounterNum = SSI(Str).Case("B", 2).Case("E", 0).Default(1);
12321256
}
1233-
ErrorOr<Location> To = parseLocationOrOffset();
1234-
if (std::error_code EC = To.getError())
1235-
return EC;
12361257

1237-
ErrorOr<Location> TraceFtEnd = std::error_code();
1238-
if (Type == AggregatedLBREntry::TRACE) {
1258+
for (int I = 0; I < AddrNum; ++I) {
12391259
while (checkAndConsumeFS()) {
12401260
}
1241-
TraceFtEnd = parseLocationOrOffset();
1242-
if (std::error_code EC = TraceFtEnd.getError())
1261+
ErrorOr<Location> AddrOrErr = parseLocationOrOffset();
1262+
if (std::error_code EC = AddrOrErr.getError())
12431263
return EC;
1264+
Addr[I] = AddrOrErr.get();
12441265
}
12451266

1246-
while (checkAndConsumeFS()) {
1247-
}
1248-
ErrorOr<int64_t> Frequency =
1249-
parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
1250-
if (std::error_code EC = Frequency.getError())
1251-
return EC;
1252-
1253-
uint64_t Mispreds = 0;
1254-
if (Type == AggregatedLBREntry::BRANCH) {
1267+
for (int I = 0; I < CounterNum; ++I) {
12551268
while (checkAndConsumeFS()) {
12561269
}
1257-
ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
1258-
if (std::error_code EC = MispredsOrErr.getError())
1270+
ErrorOr<int64_t> CountOrErr =
1271+
parseNumberField(FieldSeparator, I + 1 == CounterNum);
1272+
if (std::error_code EC = CountOrErr.getError())
12591273
return EC;
1260-
Mispreds = static_cast<uint64_t>(MispredsOrErr.get());
1274+
Counters[I] = CountOrErr.get();
12611275
}
12621276

12631277
if (!checkAndConsumeNewLine()) {
12641278
reportError("expected end of line");
12651279
return make_error_code(llvm::errc::io_error);
12661280
}
12671281

1268-
BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From->Offset);
1269-
BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To->Offset);
1282+
if (Type == EVENT_NAME) {
1283+
EventNames.insert(EventName);
1284+
return std::error_code();
1285+
}
12701286

1271-
for (BinaryFunction *BF : {FromFunc, ToFunc})
1272-
if (BF)
1273-
BF->setHasProfileAvailable();
1287+
const uint64_t FromOffset = Addr[0]->Offset;
1288+
BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(FromOffset);
1289+
if (FromFunc)
1290+
FromFunc->setHasProfileAvailable();
1291+
1292+
int64_t Count = Counters[0];
1293+
int64_t Mispreds = Counters[1];
1294+
1295+
if (Type == SAMPLE) {
1296+
BasicSamples[FromOffset] += Count;
1297+
NumTotalSamples += Count;
1298+
return std::error_code();
1299+
}
12741300

1275-
uint64_t Count = static_cast<uint64_t>(Frequency.get());
1301+
const uint64_t ToOffset = Addr[1]->Offset;
1302+
BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(ToOffset);
1303+
if (ToFunc)
1304+
ToFunc->setHasProfileAvailable();
12761305

1277-
Trace Trace(From->Offset, To->Offset);
1306+
Trace Trace(FromOffset, ToOffset);
12781307
// Taken trace
12791308
if (Type == TRACE || Type == BRANCH) {
12801309
TakenBranchInfo &Info = BranchLBRs[Trace];
@@ -1285,8 +1314,9 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
12851314
}
12861315
// Construct fallthrough part of the trace
12871316
if (Type == TRACE) {
1288-
Trace.From = To->Offset;
1289-
Trace.To = TraceFtEnd->Offset;
1317+
const uint64_t TraceFtEndOffset = Addr[2]->Offset;
1318+
Trace.From = ToOffset;
1319+
Trace.To = TraceFtEndOffset;
12901320
Type = FromFunc == ToFunc ? FT : FT_EXTERNAL_ORIGIN;
12911321
}
12921322
// Add fallthrough trace
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
E cycles
2+
S 4005f0 1
3+
S 4005f0 1
4+
S 400610 1
5+
S 400ad1 2
6+
S 400b10 1
7+
S 400bb7 1
8+
S 400bbc 2
9+
S 400d90 1
10+
S 400dae 1
11+
S 400e00 2
12+
S 401170 22
13+
S 401180 58
14+
S 4011a0 33
15+
S 4011a9 33
16+
S 4011ad 58
17+
S 4011b2 22
18+
S X:7f36d18d60c0 2
19+
S X:7f36d18f2ce0 1

bolt/test/X86/pre-aggregated-perf.test

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,16 @@ RUN: llvm-bolt %t.exe -o %t.bolt.yaml --pa -p %p/Inputs/pre-aggregated.txt \
5757
RUN: --aggregate-only --profile-format=yaml --profile-use-dfs
5858
RUN: cat %t.bolt.yaml | FileCheck %s -check-prefix=NEWFORMAT
5959

60+
## Test pre-aggregated basic profile
61+
RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated-basic.txt -o %t.ba \
62+
RUN: 2>&1 | FileCheck %s --check-prefix=BASIC-ERROR
63+
RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated-basic.txt -o %t.ba.nl \
64+
RUN: -nl 2>&1 | FileCheck %s --check-prefix=BASIC-SUCCESS
65+
RUN: FileCheck %s --input-file %t.ba.nl --check-prefix CHECK-BASIC-NL
66+
BASIC-ERROR: BOLT-INFO: 0 out of 7 functions in the binary (0.0%) have non-empty execution profile
67+
BASIC-SUCCESS: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
68+
CHECK-BASIC-NL: no_lbr cycles
69+
6070
PERF2BOLT: 0 [unknown] 7f36d18d60c0 1 main 53c 0 2
6171
PERF2BOLT: 1 main 451 1 SolveCubic 0 0 2
6272
PERF2BOLT: 1 main 490 0 [unknown] 4005f0 0 1

bolt/test/link_fdata.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,9 @@
3636
fdata_pat = re.compile(r"([01].*) (?P<mispred>\d+) (?P<exec>\d+)")
3737

3838
# Pre-aggregated profile:
39-
# {T|B|F|f} [<start_id>:]<start_offset> [<end_id>:]<end_offset> [<ft_end>]
40-
# <count> [<mispred_count>]
41-
preagg_pat = re.compile(r"(?P<type>[TBFf]) (?P<offsets_count>.*)")
39+
# {T|S|E|B|F|f} <start> [<end>] [<ft_end>] <count> [<mispred_count>]
40+
# <loc>: [<id>:]<offset>
41+
preagg_pat = re.compile(r"(?P<type>[TSBFf]) (?P<offsets_count>.*)")
4242

4343
# No-LBR profile:
4444
# <is symbol?> <closest elf symbol or DSO name> <relative address> <count>

0 commit comments

Comments
 (0)