Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
ceba82f
[LoadStoreVectorizer] Fix one-element vector handling (#169671)
cmc-rep Nov 27, 2025
b028dac
[libc++][queue] Applied `[[nodiscard]]` (#169469)
H-G-Hristov Nov 27, 2025
bd643bc
[flang] Use default constructor for FIRToSCF pass (#169741)
clementval Nov 27, 2025
504b507
[mlir][Transforms] Dialect conversion: Add support for `replaceUsesWi…
matthias-springer Nov 27, 2025
bacca23
[libc++][mdspan] Applied `[[nodiscard]]` (#169326)
H-G-Hristov Nov 27, 2025
fb18f75
[lldb-dap] Add breakpoints after debugger initialization in DExTer (#…
qxy11 Nov 27, 2025
b3428bb
Add missing freeConstants() call for ConstantPtrAuths.
pcc Nov 27, 2025
e7dec23
[ReplaceConstant] Don't create instructions for the same constant mul…
shiltian Nov 27, 2025
601f796
[MLIR][NVVM] Add missing rounding modes in fp16x2 conversions (#169005)
Wolfram70 Nov 27, 2025
1748e23
[MLIR][Intrinsics] Add new MLIR API to automatically resolve overload…
rajatbajpai Nov 27, 2025
4099121
[clang][Tooling] Fix `getFileRange` returning a range spanning across…
tJener Nov 27, 2025
fede947
[mlir][LLVMIR] Handle missing functions in CGProfile module flags (#1…
Men-cotton Nov 27, 2025
6696e0c
[clang][bytecode] Remove double diagnostic emission (#169658)
tbaederr Nov 27, 2025
f6712b6
[libc++] Reformat `optional` constructor tests (#169231)
smallp-o-p Nov 27, 2025
a9cc7fe
[NFC][SampleFDO] Use const& to avoid copies (#164584)
abhishek-kaushik22 Nov 27, 2025
326a1a4
[MLIR][XeGPU] Add anchor_layout and update propagation to honor user-…
Jianhui-Li Nov 27, 2025
f1ddb2f
[LoongArch][NFC] Pre-commit tests for vector rotl/rotr (#161115)
ylzsx Nov 27, 2025
bb9449d
[InstCombine] Fold @llvm.experimental.get.vector.length when cnt <= m…
lukel97 Nov 27, 2025
6abbbca
[AggressiveInstCombine] Match long high-half multiply (#168396)
davemgreen Nov 27, 2025
1c7ec06
[VPlan] Optimize LastActiveLane to EVL - 1 (#169766)
lukel97 Nov 27, 2025
9cb9b16
[mlir][llvm] Fix import of branch weights with "expected" field (#169…
VadimCurca Nov 27, 2025
dc8311f
[ARM] Remove IR from mve vpt mir tests. NFC
davemgreen Nov 27, 2025
c28c99f
[NFC][HIP] Add __builtin_*_load_lds type check test cases (#165388)
jmmartinez Nov 27, 2025
650eeb8
[ShrinkWrap] Modify shrink wrapping to accommodate functions terminat…
cofibrant Nov 27, 2025
c3c3d16
[Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - Allow…
chaitanyav Nov 27, 2025
0b16512
[llvm][Tablegen] Link to tutorial before programmer's reference
DavidSpickett Nov 27, 2025
8401a8d
[NFC][LLVM] Add bitcode tests for llvm.aarch64.sve.rev
paulwalker-arm Nov 26, 2025
0dbedd1
[Clang] Replace some x86 sqrt builtins with the generic __builtin_ele…
philnik777 Nov 27, 2025
bec726f
[X86] optimize ssse3 horizontal saturating add/sub (#169591)
folkertdev Nov 27, 2025
d6be9fc
[libc++][deque] Applied `[[nodiscard]]` (#169745)
H-G-Hristov Nov 27, 2025
bd95a74
[clang][bytecode] Check for invalid record decls in IntPointer::atOff…
tbaederr Nov 27, 2025
682f292
[LV] Test more combinations of scalar stores using last lane of IV.
fhahn Nov 27, 2025
df80612
[libc++][flat_set] Applied `[[nodiscard]]` (#169739)
H-G-Hristov Nov 27, 2025
7b813c3
[clang][bytecode][test] Specify triple for Invalid.cpp
tbaederr Nov 27, 2025
eee09ca
[X86][Clang] Allow constexpr evaluation of F16C CVTPS2PH intrinsics (…
ericxu233 Nov 27, 2025
fca41f4
[X86] Replace BF16 to F32 conversions with generic conversions (#169781)
RKSimon Nov 27, 2025
ea1e62d
[CodeGenTypes] Remove explicit VT numbers from ValueTypes.td (#169670)
jayfoad Nov 27, 2025
66ca3f1
[SystemZ] Serialize ada entry flags (#169395)
redstar Nov 27, 2025
1d7d83d
RuntimeLibcalls: Add macos unlocked IO functions to systems (#167084)
arsenm Nov 27, 2025
120deff
merge main into amd-staging
z1-cciauto Nov 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 7 additions & 35 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,6 @@ let Features = "sse", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
def rcpss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
def rsqrtps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
def rsqrtss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
def sqrtps : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
def sqrtss : X86Builtin<"_Vector<4, float>(_Vector<4, float>)">;
}

let Features = "sse2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
Expand All @@ -170,8 +168,6 @@ let Features = "sse2", Attributes = [NoThrow] in {

let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">;
def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">;
def cvtpd2dq : X86Builtin<"_Vector<2, long long int>(_Vector<2, double>)">;
def cvtpd2ps : X86Builtin<"_Vector<4, float>(_Vector<2, double>)">;
def cvttpd2dq : X86Builtin<"_Vector<4, int>(_Vector<2, double>)">;
Expand Down Expand Up @@ -513,8 +509,6 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
}

let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def sqrtpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>)">;
def sqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">;
def rsqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">;
def rcpps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">;
def roundpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
Expand Down Expand Up @@ -716,11 +710,13 @@ let Features = "avx2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
def gatherq_d : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int const *, _Vector<2, long long int>, _Vector<4, int>, _Constant char)">;
}

let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "f16c",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vcvtps2ph : X86Builtin<"_Vector<8, short>(_Vector<4, float>, _Constant int)">;
}

let Features = "f16c", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "f16c",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vcvtps2ph256 : X86Builtin<"_Vector<8, short>(_Vector<8, float>, _Constant int)">;
}

Expand Down Expand Up @@ -3310,15 +3306,15 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>
def cvtusi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, unsigned int, _Constant int)">;
}

let Features = "avx512vbmi", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512vbmi", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def vpmultishiftqb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
}

let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vpmultishiftqb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
}

let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vpmultishiftqb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
}

Expand Down Expand Up @@ -3358,10 +3354,6 @@ let Features = "avx512bf16", Attributes = [NoThrow, Const, RequiredVectorWidth<5
def dpbf16ps_512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<32, __bf16>, _Vector<32, __bf16>)">;
}

let Features = "avx512bf16", Attributes = [NoThrow, Const] in {
def cvtsbf162ss_32 : X86Builtin<"float(__bf16)">;
}

let Features = "avx512vp2intersect", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
def vp2intersect_q_512 : X86Builtin<"void(_Vector<8, long long int>, _Vector<8, long long int>, unsigned char *, unsigned char *)">;
}
Expand Down Expand Up @@ -3539,14 +3531,6 @@ let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<1
def reducesh_mask : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>, _Vector<8, _Float16>, _Vector<8, _Float16>, unsigned char, _Constant int, _Constant int)">;
}

let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def sqrtph : X86Builtin<"_Vector<8, _Float16>(_Vector<8, _Float16>)">;
}

let Features = "avx512fp16,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def sqrtph256 : X86Builtin<"_Vector<16, _Float16>(_Vector<16, _Float16>)">;
}

let Features = "avx512fp16", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def sqrtph512 : X86Builtin<"_Vector<32, _Float16>(_Vector<32, _Float16>, _Constant int)">;
}
Expand Down Expand Up @@ -5065,15 +5049,3 @@ let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>
let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def vgetmantbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Constant int, _Vector<32, __bf16>, unsigned int)">;
}

let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def vsqrtbf16 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>)">;
}

let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def vsqrtbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>)">;
}

let Features = "avx10.2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def vsqrtbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>)">;
}
16 changes: 10 additions & 6 deletions clang/lib/AST/ByteCode/Interp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1435,8 +1435,12 @@ static bool getField(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
return false;

if (Ptr.isIntegralPointer()) {
S.Stk.push<Pointer>(Ptr.asIntPointer().atOffset(S.getASTContext(), Off));
return true;
if (std::optional<IntPointer> IntPtr =
Ptr.asIntPointer().atOffset(S.getASTContext(), Off)) {
S.Stk.push<Pointer>(std::move(*IntPtr));
return true;
}
return false;
}

if (!Ptr.isBlockPointer()) {
Expand Down Expand Up @@ -2081,15 +2085,15 @@ bool InvalidShuffleVectorIndex(InterpState &S, CodePtr OpPC, uint32_t Index) {

bool CheckPointerToIntegralCast(InterpState &S, CodePtr OpPC,
const Pointer &Ptr, unsigned BitWidth) {
const SourceInfo &E = S.Current->getSource(OpPC);
S.CCEDiag(E, diag::note_constexpr_invalid_cast)
<< 2 << S.getLangOpts().CPlusPlus << S.Current->getRange(OpPC);

if (Ptr.isDummy())
return false;
if (Ptr.isFunctionPointer())
return true;

const SourceInfo &E = S.Current->getSource(OpPC);
S.CCEDiag(E, diag::note_constexpr_invalid_cast)
<< 2 << S.getLangOpts().CPlusPlus << S.Current->getRange(OpPC);

if (Ptr.isBlockPointer() && !Ptr.isZero()) {
// Only allow based lvalue casts if they are lossless.
if (S.getASTContext().getTargetInfo().getPointerWidth(LangAS::Default) !=
Expand Down
4 changes: 0 additions & 4 deletions clang/lib/AST/ByteCode/Interp.h
Original file line number Diff line number Diff line change
Expand Up @@ -2646,10 +2646,6 @@ template <PrimType Name, class T = typename PrimConv<Name>::T>
bool CastPointerIntegral(InterpState &S, CodePtr OpPC) {
const Pointer &Ptr = S.Stk.pop<Pointer>();

S.CCEDiag(S.Current->getSource(OpPC), diag::note_constexpr_invalid_cast)
<< diag::ConstexprInvalidCastKind::ThisConversionOrReinterpret
<< S.getLangOpts().CPlusPlus << S.Current->getRange(OpPC);

if (!CheckPointerToIntegralCast(S, OpPC, Ptr, T::bitWidth()))
return Invalid(S, OpPC);

Expand Down
149 changes: 149 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3527,6 +3527,147 @@ static bool interp__builtin_ia32_shufbitqmb_mask(InterpState &S, CodePtr OpPC,
}

pushInteger(S, RetMask, Call->getType());
return true;
}

static bool interp__builtin_ia32_vcvtps2ph(InterpState &S, CodePtr OpPC,
const CallExpr *Call) {
// Arguments are: vector of floats, rounding immediate
assert(Call->getNumArgs() == 2);

APSInt Imm = popToAPSInt(S, Call->getArg(1));
const Pointer &Src = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();

assert(Src.getFieldDesc()->isPrimitiveArray());
assert(Dst.getFieldDesc()->isPrimitiveArray());

const auto *SrcVTy = Call->getArg(0)->getType()->castAs<VectorType>();
unsigned SrcNumElems = SrcVTy->getNumElements();
const auto *DstVTy = Call->getType()->castAs<VectorType>();
unsigned DstNumElems = DstVTy->getNumElements();

const llvm::fltSemantics &HalfSem =
S.getASTContext().getFloatTypeSemantics(S.getASTContext().HalfTy);

// imm[2] == 1 means use MXCSR rounding mode.
// In that case, we can only evaluate if the conversion is exact.
int ImmVal = Imm.getZExtValue();
bool UseMXCSR = (ImmVal & 4) != 0;
bool IsFPConstrained =
Call->getFPFeaturesInEffect(S.getASTContext().getLangOpts())
.isFPConstrained();

llvm::RoundingMode RM;
if (!UseMXCSR) {
switch (ImmVal & 3) {
case 0:
RM = llvm::RoundingMode::NearestTiesToEven;
break;
case 1:
RM = llvm::RoundingMode::TowardNegative;
break;
case 2:
RM = llvm::RoundingMode::TowardPositive;
break;
case 3:
RM = llvm::RoundingMode::TowardZero;
break;
default:
llvm_unreachable("Invalid immediate rounding mode");
}
} else {
// For MXCSR, we must check for exactness. We can use any rounding mode
// for the trial conversion since the result is the same if it's exact.
RM = llvm::RoundingMode::NearestTiesToEven;
}

QualType DstElemQT = Dst.getFieldDesc()->getElemQualType();
PrimType DstElemT = *S.getContext().classify(DstElemQT);

for (unsigned I = 0; I != SrcNumElems; ++I) {
Floating SrcVal = Src.elem<Floating>(I);
APFloat DstVal = SrcVal.getAPFloat();

bool LostInfo;
APFloat::opStatus St = DstVal.convert(HalfSem, RM, &LostInfo);

if (UseMXCSR && IsFPConstrained && St != APFloat::opOK) {
S.FFDiag(S.Current->getSource(OpPC),
diag::note_constexpr_dynamic_rounding);
return false;
}

INT_TYPE_SWITCH_NO_BOOL(DstElemT, {
// Convert the destination value's bit pattern to an unsigned integer,
// then reconstruct the element using the target type's 'from' method.
uint64_t RawBits = DstVal.bitcastToAPInt().getZExtValue();
Dst.elem<T>(I) = T::from(RawBits);
});
}

// Zero out remaining elements if the destination has more elements
// (e.g., vcvtps2ph converting 4 floats to 8 shorts).
if (DstNumElems > SrcNumElems) {
for (unsigned I = SrcNumElems; I != DstNumElems; ++I) {
INT_TYPE_SWITCH_NO_BOOL(DstElemT, { Dst.elem<T>(I) = T::from(0); });
}
}

Dst.initializeAllElements();
return true;
}

static bool interp__builtin_ia32_multishiftqb(InterpState &S, CodePtr OpPC,
const CallExpr *Call) {
assert(Call->getNumArgs() == 2);

QualType ATy = Call->getArg(0)->getType();
QualType BTy = Call->getArg(1)->getType();
if (!ATy->isVectorType() || !BTy->isVectorType()) {
return false;
}

const Pointer &BPtr = S.Stk.pop<Pointer>();
const Pointer &APtr = S.Stk.pop<Pointer>();
const auto *AVecT = ATy->castAs<VectorType>();
assert(AVecT->getNumElements() ==
BTy->castAs<VectorType>()->getNumElements());

PrimType ElemT = *S.getContext().classify(AVecT->getElementType());

unsigned NumBytesInQWord = 8;
unsigned NumBitsInByte = 8;
unsigned NumBytes = AVecT->getNumElements();
unsigned NumQWords = NumBytes / NumBytesInQWord;
const Pointer &Dst = S.Stk.peek<Pointer>();

for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
APInt BQWord(64, 0);
for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
INT_TYPE_SWITCH(ElemT, {
uint64_t Byte = static_cast<uint64_t>(BPtr.elem<T>(Idx));
BQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte);
});
}

for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
uint64_t Ctrl = 0;
INT_TYPE_SWITCH(
ElemT, { Ctrl = static_cast<uint64_t>(APtr.elem<T>(Idx)) & 0x3F; });

APInt Byte(8, 0);
for (unsigned BitIdx = 0; BitIdx != NumBitsInByte; ++BitIdx) {
Byte.setBitVal(BitIdx, BQWord[(Ctrl + BitIdx) & 0x3F]);
}
INT_TYPE_SWITCH(ElemT,
{ Dst.elem<T>(Idx) = T::from(Byte.getZExtValue()); });
}
}

Dst.initializeAllElements();

return true;
}
Expand Down Expand Up @@ -4756,6 +4897,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return std::make_pair(0, static_cast<int>(LaneOffset + Index));
});

case X86::BI__builtin_ia32_vpmultishiftqb128:
case X86::BI__builtin_ia32_vpmultishiftqb256:
case X86::BI__builtin_ia32_vpmultishiftqb512:
return interp__builtin_ia32_multishiftqb(S, OpPC, Call);
case X86::BI__builtin_ia32_kandqi:
case X86::BI__builtin_ia32_kandhi:
case X86::BI__builtin_ia32_kandsi:
Expand Down Expand Up @@ -4898,6 +5043,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_insert128i256:
return interp__builtin_x86_insert_subvector(S, OpPC, Call, BuiltinID);

case clang::X86::BI__builtin_ia32_vcvtps2ph:
case clang::X86::BI__builtin_ia32_vcvtps2ph256:
return interp__builtin_ia32_vcvtps2ph(S, OpPC, Call);

case X86::BI__builtin_ia32_vec_ext_v4hi:
case X86::BI__builtin_ia32_vec_ext_v16qi:
case X86::BI__builtin_ia32_vec_ext_v8hi:
Expand Down
7 changes: 5 additions & 2 deletions clang/lib/AST/ByteCode/Pointer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -895,8 +895,8 @@ std::optional<APValue> Pointer::toRValue(const Context &Ctx,
return Result;
}

IntPointer IntPointer::atOffset(const ASTContext &ASTCtx,
unsigned Offset) const {
std::optional<IntPointer> IntPointer::atOffset(const ASTContext &ASTCtx,
unsigned Offset) const {
if (!this->Desc)
return *this;
const Record *R = this->Desc->ElemRecord;
Expand All @@ -914,6 +914,9 @@ IntPointer IntPointer::atOffset(const ASTContext &ASTCtx,
return *this;

const FieldDecl *FD = F->Decl;
if (FD->getParent()->isInvalidDecl())
return std::nullopt;

const ASTRecordLayout &Layout = ASTCtx.getASTRecordLayout(FD->getParent());
unsigned FieldIndex = FD->getFieldIndex();
uint64_t FieldOffset =
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/AST/ByteCode/Pointer.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ struct IntPointer {
const Descriptor *Desc;
uint64_t Value;

IntPointer atOffset(const ASTContext &ASTCtx, unsigned Offset) const;
std::optional<IntPointer> atOffset(const ASTContext &ASTCtx,
unsigned Offset) const;
IntPointer baseCast(const ASTContext &ASTCtx, unsigned BaseOffset) const;
};

Expand Down
Loading