Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
12bf183
[AutoUpgrade] Gracefully handle invalid alignment on masked intrinsics
nikic Oct 22, 2025
cde4457
[ShrinkWrap][NFC] Test with load from constant pool preventing shrink…
sushgokh Oct 22, 2025
10d3c6b
[ShrinkWrap] Consider constant pool access as non-stack access (#164393)
sushgokh Oct 22, 2025
15d11eb
[NFC] "unsafe-fp-math" post cleanup (code comments part) (#164582)
paperchalice Oct 22, 2025
ec546ce
[lld][test] Remove unsafe-fp-math uses (NFC) (#164598)
paperchalice Oct 22, 2025
8b2aba2
[WPD]: Enable speculative devirtualizatoin. (#159048)
hassnaaHamdi Oct 22, 2025
a4dbd11
[LLVM][CodeGen][AArch64] Fix global-isel for LD1R. (#164418)
paulwalker-arm Oct 22, 2025
128eacf
[LLVM][CodeGen][SVE] Fix typo in PPR_p8to15's DecoderMethod. (#164429)
paulwalker-arm Oct 22, 2025
b8062f8
[lldb-dap] Use protocol types for exceptioninfo (#164318)
da-viper Oct 22, 2025
37fcaf5
[X86] Fix some values for Znver4 model (#161405)
NexusXe Oct 22, 2025
becf847
[AArch64][test] Remove unsafe-fp-math uses (NFC) (#164606)
paperchalice Oct 22, 2025
57412c3
[GlobalISel] Update the documentation of abd. (#164594)
davemgreen Oct 22, 2025
c636a39
[Matrix] Add tests identifying GVN and DSE opportunities for matrix s…
cofibrant Oct 22, 2025
f7fb52a
[Clang] Move AllocToken frontend options to LangOptions (#163635)
melver Oct 22, 2025
50acc09
[clang-fuzzer] Remove Dockerfile (#162555)
boomanaiden154 Oct 22, 2025
6e0553f
Reapply "[Polly] Update ScopInliner for NPM (#125427)" (#164601)
Meinersbur Oct 22, 2025
6ceefbe
[OpenACC][CIR] Implement || and && reduction combiner lowering (#164298)
erichkeane Oct 22, 2025
aca53f4
[VPlan] Skip masked interleave groups in narrowInterleaveGroups.
fhahn Oct 22, 2025
b307347
[OpenACC][CIR] Lowering for atomic-read (#164299)
erichkeane Oct 22, 2025
d08cbc1
[mlir][linalg] Fix Linalg runtime verification pass to handle tensors…
Oct 22, 2025
64a8d73
[NFC] Use macros only when __AVX512IFMA__ and __AVXIFMA__ undefined (…
phoebewang Oct 22, 2025
411be14
[AgressiveInstCombine] Merge debug info on merged stores (#164449)
OCHyams Oct 22, 2025
9abbec6
[AMDGPU] Reland "Remove redundant s_cmp_lg_* sX, 0" (#164201)
LU-JOHN Oct 22, 2025
e937736
merge main into amd-staging
ronlieb Oct 22, 2025
ff37c15
Regen llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info-m…
ronlieb Oct 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions clang/docs/AllocToken.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ The default mode to calculate tokens is:
pointers.

Other token ID assignment modes are supported, but they may be subject to
change or removal. These may (experimentally) be selected with ``-mllvm
-alloc-token-mode=<mode>``:
change or removal. These may (experimentally) be selected with ``-Xclang
-falloc-token-mode=<mode>``:

* ``typehash``: This mode assigns a token ID based on the hash of the allocated
type's name.
Expand Down
11 changes: 11 additions & 0 deletions clang/include/clang/AST/StmtOpenACC.h
Original file line number Diff line number Diff line change
Expand Up @@ -815,6 +815,17 @@ class OpenACCAtomicConstruct final
Stmt *getAssociatedStmt() {
return OpenACCAssociatedStmtConstruct::getAssociatedStmt();
}

// A struct to represent a broken-down version of the associated statement,
// providing the information specified in OpenACC3.3 Section 2.12.
struct StmtInfo {
const Expr *V;
const Expr *X;
// TODO: OpenACC: We should expand this as we're implementing the other
// atomic construct kinds.
};

const StmtInfo getAssociatedStmtInfo() const;
};

} // namespace clang
Expand Down
4 changes: 0 additions & 4 deletions clang/include/clang/Basic/CodeGenOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -455,10 +455,6 @@ class CodeGenOptions : public CodeGenOptionsBase {

std::optional<double> AllowRuntimeCheckSkipHotCutoff;

/// Maximum number of allocation tokens (0 = no max), nullopt if none set (use
/// pass default).
std::optional<uint64_t> AllocTokenMax;

/// List of backend command-line options for -fembed-bitcode.
std::vector<uint8_t> CmdArgs;

Expand Down
8 changes: 8 additions & 0 deletions clang/include/clang/Basic/LangOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/DXContainer.h"
#include "llvm/Support/AllocToken.h"
#include "llvm/TargetParser/Triple.h"
#include <optional>
#include <string>
Expand Down Expand Up @@ -565,6 +566,13 @@ class LangOptions : public LangOptionsBase {
bool AtomicFineGrainedMemory = false;
bool AtomicIgnoreDenormalMode = false;

/// Maximum number of allocation tokens (0 = no max), nullopt if none set (use
/// target default).
std::optional<uint64_t> AllocTokenMax;

/// The allocation token mode.
std::optional<llvm::AllocTokenMode> AllocTokenMode;

LangOptions();

/// Set language defaults for the given input language and
Expand Down
4 changes: 4 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -2760,6 +2760,10 @@ def falloc_token_max_EQ : Joined<["-"], "falloc-token-max=">,
MetaVarName<"<N>">,
HelpText<"Limit to maximum N allocation tokens (0 = no max)">;

def falloc_token_mode_EQ : Joined<["-"], "falloc-token-mode=">,
Group<f_Group>, Visibility<[CC1Option]>,
HelpText<"Set the allocation token mode (experimental)">;

def fallow_runtime_check_skip_hot_cutoff_EQ
: Joined<["-"], "fallow-runtime-check-skip-hot-cutoff=">,
Group<f_clang_Group>,
Expand Down
34 changes: 34 additions & 0 deletions clang/lib/AST/StmtOpenACC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@

#include "clang/AST/StmtOpenACC.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/ExprCXX.h"
#include "clang/AST/StmtCXX.h"

using namespace clang;

OpenACCComputeConstruct *
Expand Down Expand Up @@ -322,6 +324,38 @@ OpenACCAtomicConstruct *OpenACCAtomicConstruct::Create(
return Inst;
}

const OpenACCAtomicConstruct::StmtInfo
OpenACCAtomicConstruct::getAssociatedStmtInfo() const {
// This ends up being a vastly simplified version of SemaOpenACCAtomic, since
// it doesn't have to worry about erroring out, but we should do a lot of
// asserts to ensure we don't get off into the weeds.
assert(getAssociatedStmt() && "invalid associated stmt?");

switch (AtomicKind) {
case OpenACCAtomicKind::None:
case OpenACCAtomicKind::Write:
case OpenACCAtomicKind::Update:
case OpenACCAtomicKind::Capture:
assert(false && "Only 'read' has been implemented here");
return {};
case OpenACCAtomicKind::Read: {
// Read only supports the format 'v = x'; where both sides are a scalar
// expression. This can come in 2 forms; BinaryOperator or
// CXXOperatorCallExpr (rarely).
const Expr *AssignExpr = cast<const Expr>(getAssociatedStmt());
if (const auto *BO = dyn_cast<BinaryOperator>(AssignExpr)) {
assert(BO->getOpcode() == BO_Assign);
return {BO->getLHS()->IgnoreImpCasts(), BO->getRHS()->IgnoreImpCasts()};
}

const auto *OO = cast<CXXOperatorCallExpr>(AssignExpr);
assert(OO->getOperator() == OO_Equal);

return {OO->getArg(0)->IgnoreImpCasts(), OO->getArg(1)->IgnoreImpCasts()};
}
}
}

OpenACCCacheConstruct *OpenACCCacheConstruct::CreateEmpty(const ASTContext &C,
unsigned NumVars) {
void *Mem =
Expand Down
19 changes: 13 additions & 6 deletions clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -553,12 +553,15 @@ class OpenACCClauseCIREmitter final
}

void VisitIfClause(const OpenACCIfClause &clause) {
if constexpr (isOneOfTypes<OpTy, mlir::acc::ParallelOp, mlir::acc::SerialOp,
mlir::acc::KernelsOp, mlir::acc::InitOp,
mlir::acc::ShutdownOp, mlir::acc::SetOp,
mlir::acc::DataOp, mlir::acc::WaitOp,
mlir::acc::HostDataOp, mlir::acc::EnterDataOp,
mlir::acc::ExitDataOp, mlir::acc::UpdateOp>) {
if constexpr (isOneOfTypes<
OpTy, mlir::acc::ParallelOp, mlir::acc::SerialOp,
mlir::acc::KernelsOp, mlir::acc::InitOp,
mlir::acc::ShutdownOp, mlir::acc::SetOp,
mlir::acc::DataOp, mlir::acc::WaitOp,
mlir::acc::HostDataOp, mlir::acc::EnterDataOp,
mlir::acc::ExitDataOp, mlir::acc::UpdateOp,
mlir::acc::AtomicReadOp, mlir::acc::AtomicWriteOp,
mlir::acc::AtomicUpdateOp, mlir::acc::AtomicCaptureOp>) {
operation.getIfCondMutable().append(
createCondition(clause.getConditionExpr()));
} else if constexpr (isCombinedType<OpTy>) {
Expand Down Expand Up @@ -1144,6 +1147,10 @@ EXPL_SPEC(mlir::acc::HostDataOp)
EXPL_SPEC(mlir::acc::EnterDataOp)
EXPL_SPEC(mlir::acc::ExitDataOp)
EXPL_SPEC(mlir::acc::UpdateOp)
EXPL_SPEC(mlir::acc::AtomicReadOp)
EXPL_SPEC(mlir::acc::AtomicWriteOp)
EXPL_SPEC(mlir::acc::AtomicCaptureOp)
EXPL_SPEC(mlir::acc::AtomicUpdateOp)
#undef EXPL_SPEC

template <typename ComputeOp, typename LoopOp>
Expand Down
27 changes: 25 additions & 2 deletions clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,29 @@ CIRGenFunction::emitOpenACCCacheConstruct(const OpenACCCacheConstruct &s) {

mlir::LogicalResult
CIRGenFunction::emitOpenACCAtomicConstruct(const OpenACCAtomicConstruct &s) {
cgm.errorNYI(s.getSourceRange(), "OpenACC Atomic Construct");
return mlir::failure();
// For now, we are only support 'read', so diagnose. We can switch on the kind
// later once we start implementing the other 3 forms.
if (s.getAtomicKind() != OpenACCAtomicKind::Read) {
cgm.errorNYI(s.getSourceRange(), "OpenACC Atomic Construct");
return mlir::failure();
}

// While Atomic is an 'associated statement' construct, it 'steals' the
// expression it is associated with rather than emitting it inside of it. So
// it has custom emit logic.
mlir::Location start = getLoc(s.getSourceRange().getBegin());
OpenACCAtomicConstruct::StmtInfo inf = s.getAssociatedStmtInfo();
// Atomic 'read' only permits 'v = x', where v and x are both scalar L values.
// The getAssociatedStmtInfo strips off implicit casts, which includes
// implicit conversions and L-to-R-Value conversions, so we can just emit it
// as an L value. The Flang implementation has no problem with different
// types, so it appears that the dialect can handle the conversions.
mlir::Value v = emitLValue(inf.V).getPointer();
mlir::Value x = emitLValue(inf.X).getPointer();
mlir::Type resTy = convertType(inf.V->getType());
auto op = mlir::acc::AtomicReadOp::create(builder, start, x, v, resTy,
/*ifCond=*/{});
emitOpenACCClauses(op, s.getDirectiveKind(), s.getDirectiveLoc(),
s.clauses());
return mlir::success();
}
9 changes: 6 additions & 3 deletions clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,9 +234,12 @@ class EmitAssemblyHelper {
};
} // namespace

static AllocTokenOptions getAllocTokenOptions(const CodeGenOptions &CGOpts) {
static AllocTokenOptions getAllocTokenOptions(const LangOptions &LangOpts,
const CodeGenOptions &CGOpts) {
AllocTokenOptions Opts;
Opts.MaxTokens = CGOpts.AllocTokenMax;
if (LangOpts.AllocTokenMode)
Opts.Mode = *LangOpts.AllocTokenMode;
Opts.MaxTokens = LangOpts.AllocTokenMax;
Opts.Extended = CGOpts.SanitizeAllocTokenExtended;
Opts.FastABI = CGOpts.SanitizeAllocTokenFastABI;
return Opts;
Expand Down Expand Up @@ -802,7 +805,7 @@ static void addSanitizers(const Triple &TargetTriple,
// memory allocation function detection.
MPM.addPass(InferFunctionAttrsPass());
}
MPM.addPass(AllocTokenPass(getAllocTokenOptions(CodeGenOpts)));
MPM.addPass(AllocTokenPass(getAllocTokenOptions(LangOpts, CodeGenOpts)));
}
};
if (ClSanitizeOnOptimizerEarlyEP) {
Expand Down
53 changes: 40 additions & 13 deletions clang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1841,10 +1841,6 @@ void CompilerInvocationBase::GenerateCodeGenArgs(const CodeGenOptions &Opts,
serializeSanitizerKinds(Opts.SanitizeAnnotateDebugInfo))
GenerateArg(Consumer, OPT_fsanitize_annotate_debug_info_EQ, Sanitizer);

if (Opts.AllocTokenMax)
GenerateArg(Consumer, OPT_falloc_token_max_EQ,
std::to_string(*Opts.AllocTokenMax));

if (!Opts.EmitVersionIdentMetadata)
GenerateArg(Consumer, OPT_Qn);

Expand Down Expand Up @@ -2358,15 +2354,6 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
}
}

if (const auto *Arg = Args.getLastArg(options::OPT_falloc_token_max_EQ)) {
StringRef S = Arg->getValue();
uint64_t Value = 0;
if (S.getAsInteger(0, Value))
Diags.Report(diag::err_drv_invalid_value) << Arg->getAsString(Args) << S;
else
Opts.AllocTokenMax = Value;
}

Opts.EmitVersionIdentMetadata = Args.hasFlag(OPT_Qy, OPT_Qn, true);

if (!LangOpts->CUDAIsDevice)
Expand Down Expand Up @@ -4037,6 +4024,29 @@ void CompilerInvocationBase::GenerateLangArgs(const LangOptions &Opts,

if (!Opts.RandstructSeed.empty())
GenerateArg(Consumer, OPT_frandomize_layout_seed_EQ, Opts.RandstructSeed);

if (Opts.AllocTokenMax)
GenerateArg(Consumer, OPT_falloc_token_max_EQ,
std::to_string(*Opts.AllocTokenMax));

if (Opts.AllocTokenMode) {
StringRef S;
switch (*Opts.AllocTokenMode) {
case llvm::AllocTokenMode::Increment:
S = "increment";
break;
case llvm::AllocTokenMode::Random:
S = "random";
break;
case llvm::AllocTokenMode::TypeHash:
S = "typehash";
break;
case llvm::AllocTokenMode::TypeHashPointerSplit:
S = "typehashpointersplit";
break;
}
GenerateArg(Consumer, OPT_falloc_token_mode_EQ, S);
}
}

bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
Expand Down Expand Up @@ -4673,6 +4683,23 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
if (const Arg *A = Args.getLastArg(OPT_frandomize_layout_seed_EQ))
Opts.RandstructSeed = A->getValue(0);

if (const auto *Arg = Args.getLastArg(options::OPT_falloc_token_max_EQ)) {
StringRef S = Arg->getValue();
uint64_t Value = 0;
if (S.getAsInteger(0, Value))
Diags.Report(diag::err_drv_invalid_value) << Arg->getAsString(Args) << S;
else
Opts.AllocTokenMax = Value;
}

if (const auto *Arg = Args.getLastArg(options::OPT_falloc_token_mode_EQ)) {
StringRef S = Arg->getValue();
if (auto Mode = getAllocTokenModeFromString(S))
Opts.AllocTokenMode = Mode;
else
Diags.Report(diag::err_drv_invalid_value) << Arg->getAsString(Args) << S;
}

// Validate options for HLSL
if (Opts.HLSL) {
// TODO: Revisit restricting SPIR-V to logical once we've figured out how to
Expand Down
44 changes: 36 additions & 8 deletions clang/lib/Headers/avx512ifmavlintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@

#endif

#if !(defined(__AVXIFMA__) || defined(__AVX512IFMA__))
#define _mm_madd52hi_epu64(X, Y, Z) \
((__m128i)__builtin_ia32_vpmadd52huq128((__v2di)(X), (__v2di)(Y), \
(__v2di)(Z)))
Expand All @@ -52,56 +53,83 @@
#define _mm256_madd52lo_epu64(X, Y, Z) \
((__m256i)__builtin_ia32_vpmadd52luq256((__v4di)(X), (__v4di)(Y), \
(__v4di)(Z)))
#endif

#if defined(__AVX512IFMA__)
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_madd52hi_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
return (__m128i)__builtin_ia32_vpmadd52huq128((__v2di)__X, (__v2di)__Y,
(__v2di)__Z);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_madd52hi_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
return (__m256i)__builtin_ia32_vpmadd52huq256((__v4di)__X, (__v4di)__Y,
(__v4di)__Z);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_madd52lo_epu64(__m128i __X, __m128i __Y, __m128i __Z) {
return (__m128i)__builtin_ia32_vpmadd52luq128((__v2di)__X, (__v2di)__Y,
(__v2di)__Z);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_madd52lo_epu64(__m256i __X, __m256i __Y, __m256i __Z) {
return (__m256i)__builtin_ia32_vpmadd52luq256((__v4di)__X, (__v4di)__Y,
(__v4di)__Z);
}
#endif

static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_madd52hi_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_selectq_128(
__M, (__v2di)_mm_madd52hi_epu64(__W, __X, __Y), (__v2di)__W);
__M, (__v2di)__builtin_ia32_vpmadd52huq128(__W, __X, __Y), (__v2di)__W);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_madd52hi_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) {
return (__m128i)__builtin_ia32_selectq_128(
__M, (__v2di)_mm_madd52hi_epu64(__X, __Y, __Z),
__M, (__v2di)__builtin_ia32_vpmadd52huq128(__X, __Y, __Z),
(__v2di)_mm_setzero_si128());
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52hi_epu64(
__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
return (__m256i)__builtin_ia32_selectq_256(
__M, (__v4di)_mm256_madd52hi_epu64(__W, __X, __Y), (__v4di)__W);
__M, (__v4di)__builtin_ia32_vpmadd52huq256(__W, __X, __Y), (__v4di)__W);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52hi_epu64(
__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) {
return (__m256i)__builtin_ia32_selectq_256(
__M, (__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z),
__M, (__v4di)__builtin_ia32_vpmadd52huq256(__X, __Y, __Z),
(__v4di)_mm256_setzero_si256());
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_madd52lo_epu64(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) {
return (__m128i)__builtin_ia32_selectq_128(
__M, (__v2di)_mm_madd52lo_epu64(__W, __X, __Y), (__v2di)__W);
__M, (__v2di)__builtin_ia32_vpmadd52luq128(__W, __X, __Y), (__v2di)__W);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_madd52lo_epu64(__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) {
return (__m128i)__builtin_ia32_selectq_128(
__M, (__v2di)_mm_madd52lo_epu64(__X, __Y, __Z),
__M, (__v2di)__builtin_ia32_vpmadd52luq128(__X, __Y, __Z),
(__v2di)_mm_setzero_si128());
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52lo_epu64(
__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) {
return (__m256i)__builtin_ia32_selectq_256(
__M, (__v4di)_mm256_madd52lo_epu64(__W, __X, __Y), (__v4di)__W);
__M, (__v4di)__builtin_ia32_vpmadd52luq256(__W, __X, __Y), (__v4di)__W);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52lo_epu64(
__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) {
return (__m256i)__builtin_ia32_selectq_256(
__M, (__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z),
__M, (__v4di)__builtin_ia32_vpmadd52luq256(__X, __Y, __Z),
(__v4di)_mm256_setzero_si256());
}

Expand Down
Loading