Skip to content

Commit

Permalink
[SampleFDO] Add profile symbol list section to discriminate function …
Browse files Browse the repository at this point in the history
…being

cold versus function being newly added.

This is the second half of https://reviews.llvm.org/D66374.

Profile symbol list is the collection of function symbols showing up in
the binary which generates the current profile. It is used to discriminate
function being cold versus function being newly added. Profile symbol list
is only added for profile with ExtBinary format.

During profile use compilation, when profile-sample-accurate is enabled,
a function without profile will be regarded as cold only when it is
contained in that list.

Differential Revision: https://reviews.llvm.org/D66766

llvm-svn: 370563
  • Loading branch information
wmi-11 committed Aug 31, 2019
1 parent 0377ca6 commit 798e59b
Show file tree
Hide file tree
Showing 17 changed files with 484 additions and 26 deletions.
49 changes: 48 additions & 1 deletion llvm/include/llvm/ProfileData/SampleProf.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cstdint>
#include <map>
Expand All @@ -50,7 +52,10 @@ enum class sampleprof_error {
truncated_name_table,
not_implemented,
counter_overflow,
ostream_seek_unsupported
ostream_seek_unsupported,
compress_failed,
uncompress_failed,
zlib_unavailable
};

inline std::error_code make_error_code(sampleprof_error E) {
Expand Down Expand Up @@ -114,6 +119,7 @@ enum SecType {
SecInValid = 0,
SecProfSummary = 1,
SecNameTable = 2,
SecProfileSymbolList = 3,
// marker for the first type of profile.
SecFuncProfileFirst = 32,
SecLBRProfile = SecFuncProfileFirst
Expand Down Expand Up @@ -595,6 +601,47 @@ template <class LocationT, class SampleT> class SampleSorter {
SamplesWithLocList V;
};

/// ProfileSymbolList records the list of function symbols shown up
/// in the binary used to generate the profile. It is useful to
/// to discriminate a function being so cold as not to shown up
/// in the profile and a function newly added.
class ProfileSymbolList {
public:
/// copy indicates whether we need to copy the underlying memory
/// for the input Name.
void add(StringRef Name, bool copy = false) {
if (!copy) {
Syms.insert(Name);
return;
}
Syms.insert(Name.copy(Allocator));
}

bool contains(StringRef Name) { return Syms.count(Name); }

void merge(const ProfileSymbolList &List) {
for (auto Sym : List.Syms)
add(Sym, true);
}

unsigned size() { return Syms.size(); }

void setToCompress(bool TC) { ToCompress = TC; }

std::error_code read(uint64_t CompressSize, uint64_t UncompressSize,
const uint8_t *Data);
std::error_code write(raw_ostream &OS);
void dump(raw_ostream &OS = dbgs()) const;

private:
// Determine whether or not to compress the symbol list when
// writing it into profile. The variable is unused when the symbol
// list is read from an existing profile.
bool ToCompress = true;
DenseSet<StringRef> Syms;
BumpPtrAllocator Allocator;
};

} // end namespace sampleprof
} // end namespace llvm

Expand Down
10 changes: 10 additions & 0 deletions llvm/include/llvm/ProfileData/SampleProfReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,10 @@ class SampleProfileReader {
/// \brief Return the profile format.
SampleProfileFormat getFormat() { return Format; }

virtual std::unique_ptr<ProfileSymbolList> getProfileSymbolList() {
return nullptr;
};

protected:
/// Map every function to its associated profile.
///
Expand Down Expand Up @@ -477,6 +481,7 @@ class SampleProfileReaderRawBinary : public SampleProfileReaderBinary {
class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
protected:
std::vector<SecHdrTableEntry> SecHdrTable;
std::unique_ptr<ProfileSymbolList> ProfSymList;
std::error_code readSecHdrTableEntry();
std::error_code readSecHdrTable();
virtual std::error_code readHeader() override;
Expand All @@ -498,6 +503,7 @@ class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase {
virtual std::error_code verifySPMagic(uint64_t Magic) override;
virtual std::error_code readOneSection(const uint8_t *Start, uint64_t Size,
SecType Type) override;
std::error_code readProfileSymbolList();

public:
SampleProfileReaderExtBinary(std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
Expand All @@ -506,6 +512,10 @@ class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase {

/// \brief Return true if \p Buffer is in the format supported by this class.
static bool hasFormat(const MemoryBuffer &Buffer);

virtual std::unique_ptr<ProfileSymbolList> getProfileSymbolList() override {
return std::move(ProfSymList);
};
};

class SampleProfileReaderCompactBinary : public SampleProfileReaderBinary {
Expand Down
11 changes: 10 additions & 1 deletion llvm/include/llvm/ProfileData/SampleProfWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ class SampleProfileWriter {
static ErrorOr<std::unique_ptr<SampleProfileWriter>>
create(std::unique_ptr<raw_ostream> &OS, SampleProfileFormat Format);

virtual void setProfileSymbolList(ProfileSymbolList *PSL) {}

protected:
SampleProfileWriter(std::unique_ptr<raw_ostream> &OS)
: OutputStream(std::move(OS)) {}
Expand Down Expand Up @@ -175,12 +177,19 @@ class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary {
class SampleProfileWriterExtBinary : public SampleProfileWriterExtBinaryBase {
using SampleProfileWriterExtBinaryBase::SampleProfileWriterExtBinaryBase;

public:
virtual void setProfileSymbolList(ProfileSymbolList *PSL) override {
ProfSymList = PSL;
};

private:
virtual void initSectionLayout() override {
SectionLayout = {SecProfSummary, SecNameTable, SecLBRProfile};
SectionLayout = {SecProfSummary, SecNameTable, SecLBRProfile,
SecProfileSymbolList};
};
virtual std::error_code
writeSections(const StringMap<FunctionSamples> &ProfileMap) override;
ProfileSymbolList *ProfSymList = nullptr;
};

// CompactBinary is a compact format of binary profile which both reduces
Expand Down
81 changes: 81 additions & 0 deletions llvm/lib/ProfileData/SampleProf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/raw_ostream.h"
#include <string>
Expand Down Expand Up @@ -66,6 +69,12 @@ class SampleProfErrorCategoryType : public std::error_category {
return "Counter overflow";
case sampleprof_error::ostream_seek_unsupported:
return "Ostream does not support seek";
case sampleprof_error::compress_failed:
return "Compress failure";
case sampleprof_error::uncompress_failed:
return "Uncompress failure";
case sampleprof_error::zlib_unavailable:
return "Zlib is unavailable";
}
llvm_unreachable("A value of sampleprof_error has no message.");
}
Expand Down Expand Up @@ -188,3 +197,75 @@ FunctionSamples::findFunctionSamples(const DILocation *DIL) const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void FunctionSamples::dump() const { print(dbgs(), 0); }
#endif

std::error_code ProfileSymbolList::read(uint64_t CompressSize,
uint64_t UncompressSize,
const uint8_t *Data) {
const char *ListStart = reinterpret_cast<const char *>(Data);
// CompressSize being non-zero means the profile is compressed and
// needs to be uncompressed first.
if (CompressSize) {
if (!llvm::zlib::isAvailable())
return sampleprof_error::zlib_unavailable;

StringRef CompressedStrings(reinterpret_cast<const char *>(Data),
CompressSize);
char *Buffer = Allocator.Allocate<char>(UncompressSize);
llvm::Error E = zlib::uncompress(CompressedStrings, Buffer, UncompressSize);
if (E)
return sampleprof_error::uncompress_failed;
ListStart = Buffer;
}

uint64_t Size = 0;
while (Size < UncompressSize) {
StringRef Str(ListStart + Size);
add(Str);
Size += Str.size() + 1;
}
return sampleprof_error::success;
}

std::error_code ProfileSymbolList::write(raw_ostream &OS) {
// Sort the symbols before doing compression. It will make the
// compression much more effective.
std::vector<StringRef> SortedList;
SortedList.insert(SortedList.begin(), Syms.begin(), Syms.end());
llvm::sort(SortedList);

std::string UncompressedStrings;
for (auto &Sym : SortedList) {
UncompressedStrings.append(Sym.str());
UncompressedStrings.append(1, '\0');
}

if (ToCompress) {
if (!llvm::zlib::isAvailable())
return sampleprof_error::zlib_unavailable;
SmallString<128> CompressedStrings;
llvm::Error E = zlib::compress(UncompressedStrings, CompressedStrings,
zlib::BestSizeCompression);
if (E)
return sampleprof_error::compress_failed;
encodeULEB128(UncompressedStrings.size(), OS);
encodeULEB128(CompressedStrings.size(), OS);
OS << CompressedStrings.str();
} else {
encodeULEB128(UncompressedStrings.size(), OS);
// If profile symbol list is not compressed, we will still save
// a compressed size value, but the value of the size is 0.
encodeULEB128(0, OS);
OS << UncompressedStrings;
}
return sampleprof_error::success;
}

void ProfileSymbolList::dump(raw_ostream &OS) const {
OS << "======== Dump profile symbol list ========\n";
std::vector<StringRef> SortedList;
SortedList.insert(SortedList.begin(), Syms.begin(), Syms.end());
llvm::sort(SortedList);

for (auto &Sym : SortedList)
OS << Sym << "\n";
}
28 changes: 28 additions & 0 deletions llvm/lib/ProfileData/SampleProfReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -486,12 +486,40 @@ SampleProfileReaderExtBinary::readOneSection(const uint8_t *Start,
return EC;
}
break;
case SecProfileSymbolList:
if (std::error_code EC = readProfileSymbolList())
return EC;
break;
default:
break;
}
return sampleprof_error::success;
}

std::error_code SampleProfileReaderExtBinary::readProfileSymbolList() {
auto UncompressSize = readNumber<uint64_t>();
if (std::error_code EC = UncompressSize.getError())
return EC;

auto CompressSize = readNumber<uint64_t>();
if (std::error_code EC = CompressSize.getError())
return EC;

if (!ProfSymList)
ProfSymList = std::make_unique<ProfileSymbolList>();

if (std::error_code EC =
ProfSymList->read(*CompressSize, *UncompressSize, Data))
return EC;

// CompressSize is zero only when ProfileSymbolList is not compressed.
if (*CompressSize == 0)
Data = Data + *UncompressSize;
else
Data = Data + *CompressSize;
return sampleprof_error::success;
}

std::error_code SampleProfileReaderExtBinaryBase::read() {
const uint8_t *BufStart =
reinterpret_cast<const uint8_t *>(Buffer->getBufferStart());
Expand Down
7 changes: 6 additions & 1 deletion llvm/lib/ProfileData/SampleProfWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,12 @@ std::error_code SampleProfileWriterExtBinary::writeSections(

if (std::error_code EC = writeFuncProfiles(ProfileMap))
return EC;
addNewSection(SecLBRProfile, SectionStart);
SectionStart = addNewSection(SecLBRProfile, SectionStart);

if (ProfSymList && ProfSymList->size() > 0)
if (std::error_code EC = ProfSymList->write(*OutputStream))
return EC;
addNewSection(SecProfileSymbolList, SectionStart);

return sampleprof_error::success;
}
Expand Down
15 changes: 12 additions & 3 deletions llvm/lib/Transforms/IPO/SampleProfile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,10 @@ class SampleProfileLoader {
/// Profile Summary Info computed from sample profile.
ProfileSummaryInfo *PSI = nullptr;

/// Profle Symbol list tells whether a function name appears in the binary
/// used to generate the current profile.
std::unique_ptr<ProfileSymbolList> PSL;

/// Total number of samples collected in this profile.
///
/// This is the sum of all the samples collected in all the functions executed
Expand Down Expand Up @@ -1634,6 +1638,7 @@ bool SampleProfileLoader::doInitialization(Module &M) {
Reader = std::move(ReaderOrErr.get());
Reader->collectFuncsToUse(M);
ProfileIsValid = (Reader->read() == sampleprof_error::success);
PSL = Reader->getProfileSymbolList();

if (!RemappingFilename.empty()) {
// Apply profile remappings to the loaded profile data if requested.
Expand Down Expand Up @@ -1725,11 +1730,15 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM)
// conservatively by getEntryCount as the same as unknown (None). This is
// to avoid newly added code to be treated as cold. If we have samples
// this will be overwritten in emitAnnotations.
//
// PSL -- profile symbol list include all the symbols in sampled binary.
// If ProfileSampleAccurate is true or F has profile-sample-accurate
// attribute, initialize the entry count to 0 so callsites or functions
// unsampled will be treated as cold.
// attribute, and if there is no profile symbol list read in, initialize
// all the function entry counts to 0; if there is profile symbol list, only
// initialize the entry count to 0 when current function is in the list.
uint64_t initialEntryCount =
(ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate"))
((ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate")) &&
(!PSL || PSL->contains(F.getName())))
? 0
: -1;
F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
Expand Down
Loading

0 comments on commit 798e59b

Please sign in to comment.