Skip to content

[CGData] Make an option to skip reading Names into StableFunctionMap #142095

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 10, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions llvm/include/llvm/CGData/CGDataPatchItem.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
//===- CGDataPatchItem.h ----------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains support for patching codegen data.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CGDATA_CGDATAPATCHITEM_H
#define LLVM_CGDATA_CGDATAPATCHITEM_H

#include "llvm/ADT/ArrayRef.h"

namespace llvm {

/// A struct to define how the data stream should be patched.
struct CGDataPatchItem {
// Where to patch.
uint64_t Pos;
// Source data.
OwningArrayRef<uint64_t> D;

CGDataPatchItem(uint64_t Pos, const uint64_t *D, int N)
: Pos(Pos), D(ArrayRef<uint64_t>(D, N)) {}
};

} // namespace llvm

#endif // LLVM_CGDATA_CGDATAPATCHITEM_H
3 changes: 3 additions & 0 deletions llvm/include/llvm/CGData/CodeGenData.h
Original file line number Diff line number Diff line change
@@ -282,6 +282,9 @@ enum CGDataVersion {
Version1 = 1,
// Version 2 supports the stable function merging map.
Version2 = 2,
// Version 3 adds the total size of the Names in the stable function map so
// we can skip reading them into the memory for non-assertion builds.
Version3 = 3,
CurrentVersion = CG_DATA_INDEX_VERSION
};
const uint64_t Version = CGDataVersion::CurrentVersion;
2 changes: 1 addition & 1 deletion llvm/include/llvm/CGData/CodeGenData.inc
Original file line number Diff line number Diff line change
@@ -49,4 +49,4 @@ CG_DATA_SECT_ENTRY(CG_merge, CG_DATA_QUOTE(CG_DATA_MERGE_COMMON),
#endif

/* Indexed codegen data format version (start from 1). */
#define CG_DATA_INDEX_VERSION 2
#define CG_DATA_INDEX_VERSION 3
25 changes: 13 additions & 12 deletions llvm/include/llvm/CGData/CodeGenDataWriter.h
Original file line number Diff line number Diff line change
@@ -13,6 +13,7 @@
#ifndef LLVM_CGDATA_CODEGENDATAWRITER_H
#define LLVM_CGDATA_CODEGENDATAWRITER_H

#include "llvm/CGData/CGDataPatchItem.h"
#include "llvm/CGData/CodeGenData.h"
#include "llvm/CGData/OutlinedHashTreeRecord.h"
#include "llvm/CGData/StableFunctionMapRecord.h"
@@ -22,21 +23,23 @@

namespace llvm {

/// A struct to define how the data stream should be patched.
struct CGDataPatchItem {
uint64_t Pos; // Where to patch.
uint64_t *D; // Pointer to an array of source data.
int N; // Number of elements in \c D array.
};

/// A wrapper class to abstract writer stream with support of bytes
/// back patching.
class CGDataOStream {
enum class OStreamKind {
fd,
string,
svector,
};

public:
CGDataOStream(raw_fd_ostream &FD)
: IsFDOStream(true), OS(FD), LE(FD, llvm::endianness::little) {}
: Kind(OStreamKind::fd), OS(FD), LE(FD, llvm::endianness::little) {}
CGDataOStream(raw_string_ostream &STR)
: IsFDOStream(false), OS(STR), LE(STR, llvm::endianness::little) {}
: Kind(OStreamKind::string), OS(STR), LE(STR, llvm::endianness::little) {}
CGDataOStream(raw_svector_ostream &SVEC)
: Kind(OStreamKind::svector), OS(SVEC),
LE(SVEC, llvm::endianness::little) {}

uint64_t tell() { return OS.tell(); }
void write(uint64_t V) { LE.write<uint64_t>(V); }
@@ -48,9 +51,7 @@ class CGDataOStream {
// directly and it won't be reflected in the stream's internal buffer.
LLVM_ABI void patch(ArrayRef<CGDataPatchItem> P);

// If \c OS is an instance of \c raw_fd_ostream, this field will be
// true. Otherwise, \c OS will be an raw_string_ostream.
bool IsFDOStream;
OStreamKind Kind;
raw_ostream &OS;
support::endian::Writer LE;
};
10 changes: 7 additions & 3 deletions llvm/include/llvm/CGData/StableFunctionMapRecord.h
Original file line number Diff line number Diff line change
@@ -16,6 +16,7 @@
#ifndef LLVM_CGDATA_STABLEFUNCTIONMAPRECORD_H
#define LLVM_CGDATA_STABLEFUNCTIONMAPRECORD_H

#include "llvm/CGData/CGDataPatchItem.h"
#include "llvm/CGData/StableFunctionMap.h"
#include "llvm/ObjectYAML/YAML.h"
#include "llvm/Support/Compiler.h"
@@ -36,13 +37,16 @@ struct StableFunctionMapRecord {
/// A static helper function to serialize the stable function map without
/// owning the stable function map.
LLVM_ABI static void serialize(raw_ostream &OS,
const StableFunctionMap *FunctionMap);
const StableFunctionMap *FunctionMap,
std::vector<CGDataPatchItem> &PatchItems);

/// Serialize the stable function map to a raw_ostream.
LLVM_ABI void serialize(raw_ostream &OS) const;
LLVM_ABI void serialize(raw_ostream &OS,
std::vector<CGDataPatchItem> &PatchItems) const;

/// Deserialize the stable function map from a raw_ostream.
LLVM_ABI void deserialize(const unsigned char *&Ptr);
LLVM_ABI void deserialize(const unsigned char *&Ptr,
bool ReadStableFunctionMapNames = true);

/// Serialize the stable function map to a YAML stream.
LLVM_ABI void serializeYAML(yaml::Output &YOS) const;
2 changes: 1 addition & 1 deletion llvm/lib/CGData/CodeGenData.cpp
Original file line number Diff line number Diff line change
@@ -188,7 +188,7 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Curr) {
return make_error<CGDataError>(cgdata_error::unsupported_version);
H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(Curr);

static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version2,
static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version3,
"Please update the offset computation below if a new field has "
"been added to the header.");
H.OutlinedHashTreeOffset =
9 changes: 8 additions & 1 deletion llvm/lib/CGData/CodeGenDataReader.cpp
Original file line number Diff line number Diff line change
@@ -13,12 +13,19 @@
#include "llvm/CGData/CodeGenDataReader.h"
#include "llvm/CGData/OutlinedHashTreeRecord.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MemoryBuffer.h"

#define DEBUG_TYPE "cg-data-reader"

using namespace llvm;

static cl::opt<bool> IndexedCodeGenDataReadFunctionMapNames(
"indexed-codegen-data-read-function-map-names", cl::init(true), cl::Hidden,
cl::desc("Read function map names in indexed CodeGenData. Can be "
"disabled to save memory and time for final consumption of the "
"indexed CodeGenData in production."));

namespace llvm {

static Expected<std::unique_ptr<MemoryBuffer>>
@@ -106,7 +113,7 @@ Error IndexedCodeGenDataReader::read() {
const unsigned char *Ptr = Start + Header.StableFunctionMapOffset;
if (Ptr >= End)
return error(cgdata_error::eof);
FunctionMapRecord.deserialize(Ptr);
FunctionMapRecord.deserialize(Ptr, IndexedCodeGenDataReadFunctionMapNames);
}

return success();
36 changes: 28 additions & 8 deletions llvm/lib/CGData/CodeGenDataWriter.cpp
Original file line number Diff line number Diff line change
@@ -19,29 +19,46 @@ using namespace llvm;
void CGDataOStream::patch(ArrayRef<CGDataPatchItem> P) {
using namespace support;

if (IsFDOStream) {
switch (Kind) {
case OStreamKind::fd: {
raw_fd_ostream &FDOStream = static_cast<raw_fd_ostream &>(OS);
const uint64_t LastPos = FDOStream.tell();
for (const auto &K : P) {
FDOStream.seek(K.Pos);
for (int I = 0; I < K.N; I++)
for (size_t I = 0; I < K.D.size(); ++I)
write(K.D[I]);
}
// Reset the stream to the last position after patching so that users
// don't accidentally overwrite data. This makes it consistent with
// the string stream below which replaces the data directly.
FDOStream.seek(LastPos);
} else {
break;
}
case OStreamKind::string: {
raw_string_ostream &SOStream = static_cast<raw_string_ostream &>(OS);
std::string &Data = SOStream.str(); // with flush
for (const auto &K : P) {
for (int I = 0; I < K.N; I++) {
for (size_t I = 0; I < K.D.size(); ++I) {
uint64_t Bytes =
endian::byte_swap<uint64_t, llvm::endianness::little>(K.D[I]);
Data.replace(K.Pos + I * sizeof(uint64_t), sizeof(uint64_t),
reinterpret_cast<const char *>(&Bytes), sizeof(uint64_t));
}
}
break;
}
case OStreamKind::svector: {
raw_svector_ostream &VOStream = static_cast<raw_svector_ostream &>(OS);
for (const auto &K : P) {
for (size_t I = 0; I < K.D.size(); ++I) {
uint64_t Bytes =
endian::byte_swap<uint64_t, llvm::endianness::little>(K.D[I]);
VOStream.pwrite(reinterpret_cast<const char *>(&Bytes),
sizeof(uint64_t), K.Pos + I * sizeof(uint64_t));
}
}
break;
}
}
}

@@ -106,17 +123,20 @@ Error CodeGenDataWriter::writeImpl(CGDataOStream &COS) {
if (Error E = writeHeader(COS))
return E;

std::vector<CGDataPatchItem> PatchItems;

uint64_t OutlinedHashTreeFieldStart = COS.tell();
if (hasOutlinedHashTree())
HashTreeRecord.serialize(COS.OS);
uint64_t StableFunctionMapFieldStart = COS.tell();
if (hasStableFunctionMap())
FunctionMapRecord.serialize(COS.OS);
FunctionMapRecord.serialize(COS.OS, PatchItems);

// Back patch the offsets.
CGDataPatchItem PatchItems[] = {
{OutlinedHashTreeOffset, &OutlinedHashTreeFieldStart, 1},
{StableFunctionMapOffset, &StableFunctionMapFieldStart, 1}};
PatchItems.emplace_back(OutlinedHashTreeOffset, &OutlinedHashTreeFieldStart,
1);
PatchItems.emplace_back(StableFunctionMapOffset, &StableFunctionMapFieldStart,
1);
COS.patch(PatchItems);

return Error::success();
51 changes: 34 additions & 17 deletions llvm/lib/CGData/StableFunctionMapRecord.cpp
Original file line number Diff line number Diff line change
@@ -77,26 +77,32 @@ static IndexOperandHashVecType getStableIndexOperandHashes(
return IndexOperandHashes;
}

void StableFunctionMapRecord::serialize(raw_ostream &OS) const {
serialize(OS, FunctionMap.get());
void StableFunctionMapRecord::serialize(
raw_ostream &OS, std::vector<CGDataPatchItem> &PatchItems) const {
serialize(OS, FunctionMap.get(), PatchItems);
}

void StableFunctionMapRecord::serialize(raw_ostream &OS,
const StableFunctionMap *FunctionMap) {
void StableFunctionMapRecord::serialize(
raw_ostream &OS, const StableFunctionMap *FunctionMap,
std::vector<CGDataPatchItem> &PatchItems) {
support::endian::Writer Writer(OS, endianness::little);

// Write Names.
ArrayRef<std::string> Names = FunctionMap->getNames();
uint32_t ByteSize = 4;
Writer.write<uint32_t>(Names.size());
for (auto &Name : Names) {
// Remember the position, write back the total size of Names, so we can skip
// reading them if needed.
const uint64_t NamesByteSizeOffset = Writer.OS.tell();
Writer.write<uint64_t>(0);
for (auto &Name : Names)
Writer.OS << Name << '\0';
ByteSize += Name.size() + 1;
}
// Align ByteSize to 4 bytes.
uint32_t Padding = offsetToAlignment(ByteSize, Align(4));
// Align current position to 4 bytes.
uint32_t Padding = offsetToAlignment(Writer.OS.tell(), Align(4));
for (uint32_t I = 0; I < Padding; ++I)
Writer.OS << '\0';
const auto NamesByteSize =
Writer.OS.tell() - NamesByteSizeOffset - sizeof(NamesByteSizeOffset);
PatchItems.emplace_back(NamesByteSizeOffset, &NamesByteSize, 1);

// Write StableFunctionEntries whose pointers are sorted.
auto FuncEntries = getStableFunctionEntries(*FunctionMap);
@@ -120,7 +126,8 @@ void StableFunctionMapRecord::serialize(raw_ostream &OS,
}
}

void StableFunctionMapRecord::deserialize(const unsigned char *&Ptr) {
void StableFunctionMapRecord::deserialize(const unsigned char *&Ptr,
bool ReadStableFunctionMapNames) {
// Assert that Ptr is 4-byte aligned
assert(((uintptr_t)Ptr % 4) == 0);
// Read Names.
@@ -129,13 +136,23 @@ void StableFunctionMapRecord::deserialize(const unsigned char *&Ptr) {
// Early exit if there is no name.
if (NumNames == 0)
return;
for (unsigned I = 0; I < NumNames; ++I) {
StringRef Name(reinterpret_cast<const char *>(Ptr));
Ptr += Name.size() + 1;
FunctionMap->getIdOrCreateForName(Name);
const auto NamesByteSize =
endian::readNext<uint64_t, endianness::little, unaligned>(Ptr);
const auto NamesOffset = reinterpret_cast<uintptr_t>(Ptr);
if (ReadStableFunctionMapNames) {
for (unsigned I = 0; I < NumNames; ++I) {
StringRef Name(reinterpret_cast<const char *>(Ptr));
Ptr += Name.size() + 1;
FunctionMap->getIdOrCreateForName(Name);
}
// Align Ptr to 4 bytes.
Ptr = reinterpret_cast<const uint8_t *>(alignAddr(Ptr, Align(4)));
assert(reinterpret_cast<uintptr_t>(Ptr) - NamesOffset == NamesByteSize &&
"NamesByteSize does not match the actual size of names");
} else {
// skip reading Names by advancing the pointer.
Ptr = reinterpret_cast<const uint8_t *>(NamesOffset + NamesByteSize);
}
// Align Ptr to 4 bytes.
Ptr = reinterpret_cast<const uint8_t *>(alignAddr(Ptr, Align(4)));

// Read StableFunctionEntries.
auto NumFuncs =
6 changes: 5 additions & 1 deletion llvm/lib/CodeGen/GlobalMergeFunctions.cpp
Original file line number Diff line number Diff line change
@@ -14,6 +14,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
#include "llvm/CGData/CodeGenData.h"
#include "llvm/CGData/CodeGenDataWriter.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/StructuralHash.h"
#include "llvm/InitializePasses.h"
@@ -526,7 +527,10 @@ void GlobalMergeFunc::emitFunctionMap(Module &M) {
SmallVector<char> Buf;
raw_svector_ostream OS(Buf);

StableFunctionMapRecord::serialize(OS, LocalFunctionMap.get());
std::vector<CGDataPatchItem> PatchItems;
StableFunctionMapRecord::serialize(OS, LocalFunctionMap.get(), PatchItems);
CGDataOStream COS(OS);
COS.patch(PatchItems);

std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer(
OS.str(), "in-memory stable function map", false);
4 changes: 2 additions & 2 deletions llvm/test/tools/llvm-cgdata/empty.test
Original file line number Diff line number Diff line change
@@ -16,7 +16,7 @@ RUN: llvm-cgdata --show %t_emptyheader.cgdata | count 0

# The version number appears when asked, as it's in the header
RUN: llvm-cgdata --show --cgdata-version %t_emptyheader.cgdata | FileCheck %s --check-prefix=VERSION
VERSION: Version: 2
VERSION: Version: 3

# When converting a binary file (w/ the header only) to a text file, it's an empty file as the text format does not have an explicit header.
RUN: llvm-cgdata --convert %t_emptyheader.cgdata --format text | count 0
@@ -30,7 +30,7 @@ RUN: llvm-cgdata --convert %t_emptyheader.cgdata --format text | count 0
# uint64_t StableFunctionMapOffset;
# }
RUN: printf '\xffcgdata\x81' > %t_header.cgdata
RUN: printf '\x02\x00\x00\x00' >> %t_header.cgdata
RUN: printf '\x03\x00\x00\x00' >> %t_header.cgdata
RUN: printf '\x00\x00\x00\x00' >> %t_header.cgdata
RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata
RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_header.cgdata
4 changes: 2 additions & 2 deletions llvm/test/tools/llvm-cgdata/error.test
Original file line number Diff line number Diff line change
@@ -22,9 +22,9 @@ RUN: printf '\xffcgdata\x81' > %t_corrupt.cgdata
RUN: not llvm-cgdata --show %t_corrupt.cgdata 2>&1 | FileCheck %s --check-prefix=CORRUPT
CORRUPT: {{.}}cgdata: invalid codegen data (file header is corrupt)

# The current version 2 while the header says 3.
# The current version 3 while the header says 4.
RUN: printf '\xffcgdata\x81' > %t_version.cgdata
RUN: printf '\x03\x00\x00\x00' >> %t_version.cgdata
RUN: printf '\x04\x00\x00\x00' >> %t_version.cgdata
RUN: printf '\x00\x00\x00\x00' >> %t_version.cgdata
RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata
RUN: printf '\x20\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata
Original file line number Diff line number Diff line change
@@ -63,4 +63,4 @@ CHECK-NEXT: Mergeable function Count: 0

;--- merge-both-template.ll
@.data1 = private unnamed_addr constant [72 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_outline"
@.data2 = private unnamed_addr constant [60 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
@.data2 = private unnamed_addr constant [68 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
4 changes: 2 additions & 2 deletions llvm/test/tools/llvm-cgdata/merge-funcmap-archive.test
Original file line number Diff line number Diff line change
@@ -65,7 +65,7 @@ MAP-NEXT: ...
...

;--- merge-1-template.ll
@.data = private unnamed_addr constant [60 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"
@.data = private unnamed_addr constant [68 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_merge"

;--- raw-2.cgtext
:stable_function_map
@@ -80,4 +80,4 @@ MAP-NEXT: ...
...

;--- merge-2-template.ll
@.data = private unnamed_addr constant [60 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
@.data = private unnamed_addr constant [68 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_merge"
Loading
Oops, something went wrong.
Loading
Oops, something went wrong.