Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 70 additions & 87 deletions lld/ELF/SyntheticSections.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -540,43 +540,6 @@ void EhFrameSection::finalizeContents() {
this->size = off;
}

// Returns data for .eh_frame_hdr. .eh_frame_hdr is a binary search table
// to get an FDE from an address to which FDE is applied. This function
// returns a list of such pairs.
SmallVector<EhFrameSection::FdeData, 0> EhFrameSection::getFdeData() const {
uint8_t *buf = ctx.bufferStart + getParent()->offset + outSecOff;
SmallVector<FdeData, 0> ret;

uint64_t va = getPartition(ctx).ehFrameHdr->getVA();
for (CieRecord *rec : cieRecords) {
uint8_t enc = getFdeEncoding(rec->cie);
for (EhSectionPiece *fde : rec->fdes) {
uint64_t pc = getFdePc(buf, fde->outputOff, enc);
uint64_t fdeVA = getParent()->addr + fde->outputOff;
if (!isInt<32>(pc - va)) {
Err(ctx) << fde->sec << ": PC offset is too large: 0x"
<< Twine::utohexstr(pc - va);
continue;
}
ret.push_back({uint32_t(pc - va), uint32_t(fdeVA - va)});
}
}

// Sort the FDE list by their PC and uniqueify. Usually there is only
// one FDE for a PC (i.e. function), but if ICF merges two functions
// into one, there can be more than one FDEs pointing to the address.
auto less = [](const FdeData &a, const FdeData &b) {
return a.pcRel < b.pcRel;
};
llvm::stable_sort(ret, less);
auto eq = [](const FdeData &a, const FdeData &b) {
return a.pcRel == b.pcRel;
};
ret.erase(llvm::unique(ret, eq), ret.end());

return ret;
}

static uint64_t readFdeAddr(Ctx &ctx, uint8_t *buf, int size) {
switch (size) {
case DW_EH_PE_udata2:
Expand Down Expand Up @@ -630,14 +593,79 @@ void EhFrameSection::writeTo(uint8_t *buf) {
}
}

// Apply relocations. .eh_frame section contents are not contiguous
// in the output buffer, but relocateAlloc() still works because
// getOffset() takes care of discontiguous section pieces.
// Apply relocations to .eh_frame entries. This includes CIE personality
// pointers, FDE initial_location fields, and LSDA pointers.
for (EhInputSection *s : sections)
ctx.target->relocateEh(*s, buf);

if (getPartition(ctx).ehFrameHdr && getPartition(ctx).ehFrameHdr->getParent())
getPartition(ctx).ehFrameHdr->write();
EhFrameHeader *hdr = getPartition(ctx).ehFrameHdr.get();
if (!hdr || !hdr->getParent())
return;

// Write the .eh_frame_hdr section, which contains a binary search table of
// pointers to FDEs. This must be written after .eh_frame relocation since
// the content depends on relocated initial_location fields in FDEs.
using FdeData = EhFrameSection::FdeData;
SmallVector<FdeData, 0> fdes;
uint64_t va = hdr->getVA();
for (CieRecord *rec : cieRecords) {
uint8_t enc = getFdeEncoding(rec->cie);
for (EhSectionPiece *fde : rec->fdes) {
uint64_t pc = getFdePc(buf, fde->outputOff, enc);
uint64_t fdeVA = getParent()->addr + fde->outputOff;
if (!isInt<32>(pc - va)) {
Err(ctx) << fde->sec << ": PC offset is too large: 0x"
<< Twine::utohexstr(pc - va);
continue;
}
fdes.push_back({uint32_t(pc - va), uint32_t(fdeVA - va)});
}
}

// Sort the FDE list by their PC and uniqueify. Usually there is only
// one FDE for a PC (i.e. function), but if ICF merges two functions
// into one, there can be more than one FDEs pointing to the address.
llvm::stable_sort(fdes, [](const FdeData &a, const FdeData &b) {
return a.pcRel < b.pcRel;
});
fdes.erase(
llvm::unique(fdes, [](auto &a, auto &b) { return a.pcRel == b.pcRel; }),
fdes.end());

// Write header.
uint8_t *hdrBuf = ctx.bufferStart + hdr->getParent()->offset + hdr->outSecOff;
hdrBuf[0] = 1; // version
hdrBuf[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4; // eh_frame_ptr_enc
hdrBuf[2] = DW_EH_PE_udata4; // fde_count_enc
hdrBuf[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4; // table_enc
write32(ctx, hdrBuf + 4,
getParent()->addr - hdr->getVA() - 4); // eh_frame_ptr
write32(ctx, hdrBuf + 8, fdes.size()); // fde_count
hdrBuf += 12;

// Write binary search table. Each entry describes the starting PC and the FDE
// address.
for (FdeData &fde : fdes) {
write32(ctx, hdrBuf, fde.pcRel);
write32(ctx, hdrBuf + 4, fde.fdeVARel);
hdrBuf += 8;
}
}

EhFrameHeader::EhFrameHeader(Ctx &ctx)
: SyntheticSection(ctx, ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, 4) {}

void EhFrameHeader::writeTo(uint8_t *buf) {
// The section content is written during EhFrameSection::writeTo.
}

size_t EhFrameHeader::getSize() const {
// .eh_frame_hdr has a 12 bytes header followed by an array of FDEs.
return 12 + getPartition(ctx).ehFrame->numFdes * 8;
}

bool EhFrameHeader::isNeeded() const {
return isLive() && getPartition(ctx).ehFrame->isNeeded();
}

GotSection::GotSection(Ctx &ctx)
Expand Down Expand Up @@ -3658,51 +3686,6 @@ void GdbIndexSection::writeTo(uint8_t *buf) {

bool GdbIndexSection::isNeeded() const { return !chunks.empty(); }

EhFrameHeader::EhFrameHeader(Ctx &ctx)
: SyntheticSection(ctx, ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, 4) {}

void EhFrameHeader::writeTo(uint8_t *buf) {
// Unlike most sections, the EhFrameHeader section is written while writing
// another section, namely EhFrameSection, which calls the write() function
// below from its writeTo() function. This is necessary because the contents
// of EhFrameHeader depend on the relocated contents of EhFrameSection and we
// don't know which order the sections will be written in.
}

// .eh_frame_hdr contains a binary search table of pointers to FDEs.
// Each entry of the search table consists of two values,
// the starting PC from where FDEs covers, and the FDE's address.
// It is sorted by PC.
void EhFrameHeader::write() {
uint8_t *buf = ctx.bufferStart + getParent()->offset + outSecOff;
using FdeData = EhFrameSection::FdeData;
SmallVector<FdeData, 0> fdes = getPartition(ctx).ehFrame->getFdeData();

buf[0] = 1;
buf[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4;
buf[2] = DW_EH_PE_udata4;
buf[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4;
write32(ctx, buf + 4,
getPartition(ctx).ehFrame->getParent()->addr - this->getVA() - 4);
write32(ctx, buf + 8, fdes.size());
buf += 12;

for (FdeData &fde : fdes) {
write32(ctx, buf, fde.pcRel);
write32(ctx, buf + 4, fde.fdeVARel);
buf += 8;
}
}

size_t EhFrameHeader::getSize() const {
// .eh_frame_hdr has a 12 bytes header followed by an array of FDEs.
return 12 + getPartition(ctx).ehFrame->numFdes * 8;
}

bool EhFrameHeader::isNeeded() const {
return isLive() && getPartition(ctx).ehFrame->isNeeded();
}

VersionDefinitionSection::VersionDefinitionSection(Ctx &ctx)
: SyntheticSection(ctx, ".gnu.version_d", SHT_GNU_verdef, SHF_ALLOC,
sizeof(uint32_t)) {}
Expand Down
30 changes: 11 additions & 19 deletions lld/ELF/SyntheticSections.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ class EhFrameSection final : public SyntheticSection {
uint32_t fdeVARel;
};

SmallVector<FdeData, 0> getFdeData() const;
ArrayRef<CieRecord *> getCieRecords() const { return cieRecords; }
template <class ELFT>
void iterateFDEWithLSDA(llvm::function_ref<void(InputSection &)> fn);
Expand All @@ -95,6 +94,17 @@ class EhFrameSection final : public SyntheticSection {
llvm::DenseMap<std::pair<ArrayRef<uint8_t>, Symbol *>, CieRecord *> cieMap;
};

// .eh_frame_hdr contains a binary search table for .eh_frame FDEs. The section
// is covered by a PT_GNU_EH_FRAME segment, which allows the runtime unwinder to
// locate it via functions like `dl_iterate_phdr`.
class EhFrameHeader final : public SyntheticSection {
public:
EhFrameHeader(Ctx &);
void writeTo(uint8_t *buf) override;
size_t getSize() const override;
bool isNeeded() const override;
};

class GotSection final : public SyntheticSection {
public:
GotSection(Ctx &);
Expand Down Expand Up @@ -967,24 +977,6 @@ class GdbIndexSection final : public SyntheticSection {
size_t size;
};

// --eh-frame-hdr option tells linker to construct a header for all the
// .eh_frame sections. This header is placed to a section named .eh_frame_hdr
// and also to a PT_GNU_EH_FRAME segment.
// At runtime the unwinder then can find all the PT_GNU_EH_FRAME segments by
// calling dl_iterate_phdr.
// This section contains a lookup table for quick binary search of FDEs.
// Detailed info about internals can be found in Ian Lance Taylor's blog:
// http://www.airs.com/blog/archives/460 (".eh_frame")
// http://www.airs.com/blog/archives/462 (".eh_frame_hdr")
class EhFrameHeader final : public SyntheticSection {
public:
EhFrameHeader(Ctx &);
void write();
void writeTo(uint8_t *buf) override;
size_t getSize() const override;
bool isNeeded() const override;
};

// For more information about .gnu.version and .gnu.version_r see:
// https://www.akkadia.org/drepper/symbol-versioning

Expand Down
13 changes: 8 additions & 5 deletions llvm/docs/SPIRVUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ Static Compiler Commands
Description: This command compiles an LLVM IL file (`input.ll`) to a SPIR-V binary (`output.spvt`) for a 32-bit architecture.

2. **Compilation with Extensions and Optimization**
Command: `llc -O1 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_arbitrary_precision_integers input.ll -o output.spvt`
Description: Compiles an LLVM IL file to SPIR-V with (`-O1`) optimizations, targeting a 64-bit architecture. It enables the SPV_INTEL_arbitrary_precision_integers extension.
Command: `llc -O1 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_ALTERA_arbitrary_precision_integers input.ll -o output.spvt`
Description: Compiles an LLVM IL file to SPIR-V with (`-O1`) optimizations, targeting a 64-bit architecture. It enables the SPV_ALTERA_arbitrary_precision_integers extension.

3. **Compilation with experimental NonSemantic.Shader.DebugInfo.100 support**
Command: `llc --spv-emit-nonsemantic-debug-info --spirv-ext=+SPV_KHR_non_semantic_info input.ll -o output.spvt`
Expand Down Expand Up @@ -136,7 +136,7 @@ extensions to enable or disable, each prefixed with ``+`` or ``-``, respectively

To enable multiple extensions, list them separated by comma. For example, to enable support for atomic operations on floating-point numbers and arbitrary precision integers, use:

``-spirv-ext=+SPV_EXT_shader_atomic_float_add,+SPV_INTEL_arbitrary_precision_integers``
``-spirv-ext=+SPV_EXT_shader_atomic_float_add,+SPV_ALTERA_arbitrary_precision_integers``

To enable all extensions, use the following option:
``-spirv-ext=all``
Expand All @@ -145,7 +145,7 @@ To enable all KHR extensions, use the following option:
``-spirv-ext=khr``

To enable all extensions except specified, specify ``all`` followed by a list of disallowed extensions. For example:
``-spirv-ext=all,-SPV_INTEL_arbitrary_precision_integers``
``-spirv-ext=all,-SPV_ALTERA_arbitrary_precision_integers``

Below is a list of supported SPIR-V extensions, sorted alphabetically by their extension names:

Expand All @@ -171,7 +171,7 @@ Below is a list of supported SPIR-V extensions, sorted alphabetically by their e
- Extends the SPV_EXT_shader_atomic_float_add and SPV_EXT_shader_atomic_float_min_max to support addition, minimum and maximum on 16-bit `bfloat16` floating-point numbers in memory.
* - ``SPV_INTEL_2d_block_io``
- Adds additional subgroup block prefetch, load, load transposed, load transformed and store instructions to read two-dimensional blocks of data from a two-dimensional region of memory, or to write two-dimensional blocks of data to a two dimensional region of memory.
* - ``SPV_INTEL_arbitrary_precision_integers``
* - ``SPV_ALTERA_arbitrary_precision_integers``
- Allows generating arbitrary width integer types.
* - ``SPV_INTEL_bindless_images``
- Adds instructions to convert convert unsigned integer handles to images, samplers and sampled images.
Expand Down Expand Up @@ -245,6 +245,9 @@ Below is a list of supported SPIR-V extensions, sorted alphabetically by their e
- Adds execution mode and capability to enable maximal reconvergence.
* - ``SPV_ALTERA_blocking_pipes``
- Adds new pipe read and write functions that have blocking semantics instead of the non-blocking semantics of the existing pipe read/write functions.
* - ``SPV_ALTERA_arbitrary_precision_fixed_point``
- Add instructions for fixed point arithmetic. The extension works without SPV_ALTERA_arbitrary_precision_integers, but together they allow greater flexibility in representing arbitrary precision data types.


SPIR-V representation in LLVM IR
================================
Expand Down
46 changes: 45 additions & 1 deletion llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9584,6 +9584,50 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
if (SDValue V = lowerSelectToBinOp(Op.getNode(), DAG, Subtarget))
return V;

// When there is no cost for GPR <-> FPR, we can use zicond select for
// floating value when CondV is int type
bool FPinGPR = Subtarget.hasStdExtZfinx();

// We can handle FGPR without spliting into hi/lo parts
bool FitsInGPR = TypeSize::isKnownLE(VT.getSizeInBits(),
Subtarget.getXLenVT().getSizeInBits());

bool UseZicondForFPSel = Subtarget.hasStdExtZicond() && FPinGPR &&
VT.isFloatingPoint() && FitsInGPR;

if (UseZicondForFPSel) {

auto CastToInt = [&](SDValue V) -> SDValue {
// Treat +0.0 as int 0 to enable single 'czero' instruction generation.
if (isNullFPConstant(V))
return DAG.getConstant(0, DL, XLenVT);

if (VT == MVT::f16)
return DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, V);

if (VT == MVT::f32 && Subtarget.is64Bit())
return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, XLenVT, V);

return DAG.getBitcast(XLenVT, V);
};

SDValue TrueVInt = CastToInt(TrueV);
SDValue FalseVInt = CastToInt(FalseV);

// Emit integer SELECT (lowers to Zicond)
SDValue ResultInt =
DAG.getNode(ISD::SELECT, DL, XLenVT, CondV, TrueVInt, FalseVInt);

// Convert back to floating VT
if (VT == MVT::f32 && Subtarget.is64Bit())
return DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, VT, ResultInt);

if (VT == MVT::f16)
return DAG.getNode(RISCVISD::FMV_H_X, DL, VT, ResultInt);

return DAG.getBitcast(VT, ResultInt);
}

// When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
// nodes to implement the SELECT. Performing the lowering here allows for
// greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
Expand Down Expand Up @@ -10699,7 +10743,7 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
VecVT != MVT::v4i8 && VecVT != MVT::v2i32)
return SDValue();
SDValue Extracted = DAG.getBitcast(XLenVT, Vec);
unsigned ElemWidth = EltVT.getSizeInBits();
unsigned ElemWidth = VecVT.getVectorElementType().getSizeInBits();
SDValue Shamt = DAG.getNode(ISD::MUL, DL, XLenVT, Idx,
DAG.getConstant(ElemWidth, DL, XLenVT));
return DAG.getNode(ISD::SRL, DL, XLenVT, Extracted, Shamt);
Expand Down
Loading