Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
45a2320
Remove unused <utility> inclusion
serge-sans-paille Nov 10, 2025
a6cd400
Remove unused <type_traits> inclusion
serge-sans-paille Nov 10, 2025
af14646
Remove unused <iterator> inclusion
serge-sans-paille Nov 10, 2025
f2a4287
[Headers][X86] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin …
TianYe717 Nov 11, 2025
17ce48f
[libc++] Remove __is_replaceable emulation (#167355)
ldionne Nov 11, 2025
b4c4013
[X86] narrowBitOpRMW - peek through bitcasts while searching for RMW …
RKSimon Nov 11, 2025
a51c1f8
[mlir] Remove deprecated GEN_PASS_CLASSES (#167496)
jpienaar Nov 11, 2025
7894a57
[BAZEL] Fix missing GpuDialect dependency added in #166865 (#167503)
WillFroom Nov 11, 2025
91e6dee
[Flang][OpenMP] Improve Semantics for Derived Type Array Elements (#1…
Stylie777 Nov 11, 2025
68415e2
[MLIR][Linalg] fix linalg.pack description (#167045)
RoboTux Nov 11, 2025
5d062bf
[flang][OpenMP] Replace modifiers in DYN_GROUPPRIVATE clause (#166199)
kparzysz Nov 11, 2025
17e67b0
[flang][OpenMP] Semantic checks for DYN_GROUPPRIVATE (#166214)
kparzysz Nov 11, 2025
f1b5504
[OpenMP] Sort clause-specific application functions, NFC (#167501)
kparzysz Nov 11, 2025
c8c3284
Revert "[VPlan] Handle WidenGEP in narrowToSingleScalars" (#167509)
artagnon Nov 11, 2025
863730f
[MLIR] Apply clang-tidy fixes for misc-use-internal-linkage in tblgen…
joker-eph Aug 21, 2025
125b6b5
[AMDGPU] Generate s_lshl?_add_u32 (#167032)
LU-JOHN Nov 11, 2025
ddaa2c3
[Clang] Generalize interp__builtin_ia32_shuffle_generic to handle sin…
TelGome Nov 11, 2025
938f521
[MLIR] Apply clang-tidy fixes for performance-unnecessary-value-param…
joker-eph Aug 21, 2025
83ef17d
[X86][AVX512] rematerialize smaller predicate masks (#166178)
ahmednoursphinx Nov 11, 2025
8346a77
[MLIR][Python] fix PyRegionList `__iter__` (#167466)
makslevental Nov 11, 2025
eb614cd
[Flang][OpenMP][MLIR] Lowering of reduction,inreduction, nogroup and …
kaviya2510 Nov 11, 2025
80e7fe8
[lldb][RISCV] Fix float load and stores in RISC-V emulator (#167490)
sga-sc Nov 11, 2025
f8cb6cd
[lldb][Android] Fix platform process list regression (#164333)
cs01 Nov 11, 2025
a314b3b
[MachineCopyPropagation] Remove logic to recognise and delete no-op m…
asb Nov 11, 2025
bdcd591
[MLIR] Apply clang-tidy fixes for performance-unnecessary-value-param…
joker-eph Aug 21, 2025
d47fdfe
[NFC][WebAssembly] Precommit test. (#167520)
sparker-arm Nov 11, 2025
45bb926
[mlir][tosa] Add e2e tests for matmul_t_block_scaled (#166567)
udaya-ranga Nov 11, 2025
7f81869
[libc++] Split features.py into multiple files (#167353)
ldionne Nov 11, 2025
75751f3
Reapply "Reapply "[mlir] Add FP software implementation lowering pass…
makslevental Nov 11, 2025
7911b35
[CSKY] Fix build (#167510)
s-barannikov Nov 11, 2025
8eb28ca
[AMDGPU] Remove implicit conversions of MCRegister to unsigned. NFC (…
topperc Nov 11, 2025
9ab38fc
AMDGPU: Replace some uses of getOpRegClass with getRegClass (#167447)
arsenm Nov 11, 2025
4e37eaf
AMDGPU: Add med3 tests from minimum/maximum (#167448)
arsenm Nov 11, 2025
f45bb98
[CIR] Upstream CXXRewrittenBinaryOperator for Scalar expr (#167210)
AmrDeveloper Nov 11, 2025
8218055
JITLink: Add initial SystemZ Support. (#144528)
anoopkg6 Nov 11, 2025
ab9cb6a
merge main into amd-staging
ronlieb Nov 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ let Features = "sse3", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
def lddqu : X86Builtin<"_Vector<16, char>(char const *)">;
}

let Features = "ssse3", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def palignr128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant int)">;
}

Expand Down Expand Up @@ -605,8 +605,7 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid

let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">;
def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, "
"_Vector<32, char>, _Constant int)">;

def psadbw256
: X86Builtin<
"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
Expand All @@ -630,6 +629,7 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
def pavgb256 : X86Builtin<"_Vector<32, unsigned char>(_Vector<32, unsigned char>, _Vector<32, unsigned char>)">;
def pavgw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">;
def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">;

def pblendd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">;
def pblendd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
Expand Down Expand Up @@ -3263,7 +3263,7 @@ let Features = "avx512bw", Attributes = [NoThrow, Const] in {
def kmovq : X86Builtin<"unsigned long long int(unsigned long long int)">;
}

let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def palignr512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>, _Constant int)">;
}

Expand Down
245 changes: 113 additions & 132 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2841,76 +2841,6 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
return true;
}

static bool interp__builtin_ia32_pshufb(InterpState &S, CodePtr OpPC,
const CallExpr *Call) {
assert(Call->getNumArgs() == 2 && "masked forms handled via select*");
const Pointer &Control = S.Stk.pop<Pointer>();
const Pointer &Src = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();

unsigned NumElems = Dst.getNumElems();
assert(NumElems == Control.getNumElems());
assert(NumElems == Dst.getNumElems());

for (unsigned Idx = 0; Idx != NumElems; ++Idx) {
uint8_t Ctlb = static_cast<uint8_t>(Control.elem<int8_t>(Idx));

if (Ctlb & 0x80) {
Dst.elem<int8_t>(Idx) = 0;
} else {
unsigned LaneBase = (Idx / 16) * 16;
unsigned SrcOffset = Ctlb & 0x0F;
unsigned SrcIdx = LaneBase + SrcOffset;

Dst.elem<int8_t>(Idx) = Src.elem<int8_t>(SrcIdx);
}
}
Dst.initializeAllElements();
return true;
}

static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC,
const CallExpr *Call, bool IsShufHW) {
assert(Call->getNumArgs() == 2 && "masked forms handled via select*");
APSInt ControlImm = popToAPSInt(S, Call->getArg(1));
const Pointer &Src = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();

unsigned NumElems = Dst.getNumElems();
PrimType ElemT = Dst.getFieldDesc()->getPrimType();

unsigned ElemBits = static_cast<unsigned>(primSize(ElemT) * 8);
if (ElemBits != 16 && ElemBits != 32)
return false;

unsigned LaneElts = 128u / ElemBits;
assert(LaneElts && (NumElems % LaneElts == 0));

uint8_t Ctl = static_cast<uint8_t>(ControlImm.getZExtValue());

for (unsigned Idx = 0; Idx != NumElems; Idx++) {
unsigned LaneBase = (Idx / LaneElts) * LaneElts;
unsigned LaneIdx = Idx % LaneElts;
unsigned SrcIdx = Idx;
unsigned Sel = (Ctl >> (2 * (LaneIdx & 0x3))) & 0x3;
if (ElemBits == 32) {
SrcIdx = LaneBase + Sel;
} else {
constexpr unsigned HalfSize = 4;
bool InHigh = LaneIdx >= HalfSize;
if (!IsShufHW && !InHigh) {
SrcIdx = LaneBase + Sel;
} else if (IsShufHW && InHigh) {
SrcIdx = LaneBase + HalfSize + Sel;
}
}

INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem<T>(Idx) = Src.elem<T>(SrcIdx); });
}
Dst.initializeAllElements();
return true;
}

static bool interp__builtin_ia32_test_op(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<bool(const APInt &A, const APInt &B)> Fn) {
Expand Down Expand Up @@ -3377,61 +3307,46 @@ static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
return true;
}

static bool interp__builtin_x86_byteshift(
InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned ID,
llvm::function_ref<APInt(const Pointer &, unsigned Lane, unsigned I,
unsigned Shift)>
Fn) {
assert(Call->getNumArgs() == 2);

APSInt ImmAPS = popToAPSInt(S, Call->getArg(1));
uint64_t Shift = ImmAPS.getZExtValue() & 0xff;

const Pointer &Src = S.Stk.pop<Pointer>();
if (!Src.getFieldDesc()->isPrimitiveArray())
return false;

unsigned NumElems = Src.getNumElems();
const Pointer &Dst = S.Stk.peek<Pointer>();
PrimType ElemT = Src.getFieldDesc()->getPrimType();

for (unsigned Lane = 0; Lane != NumElems; Lane += 16) {
for (unsigned I = 0; I != 16; ++I) {
unsigned Base = Lane + I;
APSInt Result = APSInt(Fn(Src, Lane, I, Shift));
INT_TYPE_SWITCH_NO_BOOL(ElemT,
{ Dst.elem<T>(Base) = static_cast<T>(Result); });
}
}

Dst.initializeAllElements();

return true;
}

static bool interp__builtin_ia32_shuffle_generic(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<std::pair<unsigned, int>(unsigned, unsigned)>
GetSourceIndex) {

assert(Call->getNumArgs() == 3);
assert(Call->getNumArgs() == 2 || Call->getNumArgs() == 3);

unsigned ShuffleMask = 0;
Pointer A, MaskVector, B;

QualType Arg2Type = Call->getArg(2)->getType();
bool IsVectorMask = false;
if (Arg2Type->isVectorType()) {
IsVectorMask = true;
B = S.Stk.pop<Pointer>();
MaskVector = S.Stk.pop<Pointer>();
A = S.Stk.pop<Pointer>();
} else if (Arg2Type->isIntegerType()) {
ShuffleMask = popToAPSInt(S, Call->getArg(2)).getZExtValue();
B = S.Stk.pop<Pointer>();
A = S.Stk.pop<Pointer>();
bool IsSingleOperand = (Call->getNumArgs() == 2);

if (IsSingleOperand) {
QualType MaskType = Call->getArg(1)->getType();
if (MaskType->isVectorType()) {
IsVectorMask = true;
MaskVector = S.Stk.pop<Pointer>();
A = S.Stk.pop<Pointer>();
B = A;
} else if (MaskType->isIntegerType()) {
ShuffleMask = popToAPSInt(S, Call->getArg(1)).getZExtValue();
A = S.Stk.pop<Pointer>();
B = A;
} else {
return false;
}
} else {
return false;
QualType Arg2Type = Call->getArg(2)->getType();
if (Arg2Type->isVectorType()) {
IsVectorMask = true;
B = S.Stk.pop<Pointer>();
MaskVector = S.Stk.pop<Pointer>();
A = S.Stk.pop<Pointer>();
} else if (Arg2Type->isIntegerType()) {
ShuffleMask = popToAPSInt(S, Call->getArg(2)).getZExtValue();
B = S.Stk.pop<Pointer>();
A = S.Stk.pop<Pointer>();
} else {
return false;
}
}

QualType Arg0Type = Call->getArg(0)->getType();
Expand All @@ -3455,6 +3370,7 @@ static bool interp__builtin_ia32_shuffle_generic(
ShuffleMask = static_cast<unsigned>(MaskVector.elem<T>(DstIdx));
});
}

auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);

if (SrcIdx < 0) {
Expand Down Expand Up @@ -4555,22 +4471,58 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_pshufb128:
case X86::BI__builtin_ia32_pshufb256:
case X86::BI__builtin_ia32_pshufb512:
return interp__builtin_ia32_pshufb(S, OpPC, Call);
return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
uint8_t Ctlb = static_cast<uint8_t>(ShuffleMask);
if (Ctlb & 0x80)
return std::make_pair(0, -1);

unsigned LaneBase = (DstIdx / 16) * 16;
unsigned SrcOffset = Ctlb & 0x0F;
unsigned SrcIdx = LaneBase + SrcOffset;
return std::make_pair(0, static_cast<int>(SrcIdx));
});

case X86::BI__builtin_ia32_pshuflw:
case X86::BI__builtin_ia32_pshuflw256:
case X86::BI__builtin_ia32_pshuflw512:
return interp__builtin_ia32_pshuf(S, OpPC, Call, false);
return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
unsigned LaneBase = (DstIdx / 8) * 8;
unsigned LaneIdx = DstIdx % 8;
if (LaneIdx < 4) {
unsigned Sel = (ShuffleMask >> (2 * LaneIdx)) & 0x3;
return std::make_pair(0, static_cast<int>(LaneBase + Sel));
}

return std::make_pair(0, static_cast<int>(DstIdx));
});

case X86::BI__builtin_ia32_pshufhw:
case X86::BI__builtin_ia32_pshufhw256:
case X86::BI__builtin_ia32_pshufhw512:
return interp__builtin_ia32_pshuf(S, OpPC, Call, true);
return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
unsigned LaneBase = (DstIdx / 8) * 8;
unsigned LaneIdx = DstIdx % 8;
if (LaneIdx >= 4) {
unsigned Sel = (ShuffleMask >> (2 * (LaneIdx - 4))) & 0x3;
return std::make_pair(0, static_cast<int>(LaneBase + 4 + Sel));
}

return std::make_pair(0, static_cast<int>(DstIdx));
});

case X86::BI__builtin_ia32_pshufd:
case X86::BI__builtin_ia32_pshufd256:
case X86::BI__builtin_ia32_pshufd512:
return interp__builtin_ia32_pshuf(S, OpPC, Call, false);
return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
unsigned LaneBase = (DstIdx / 4) * 4;
unsigned LaneIdx = DstIdx % 4;
unsigned Sel = (ShuffleMask >> (2 * LaneIdx)) & 0x3;
return std::make_pair(0, static_cast<int>(LaneBase + Sel));
});

case X86::BI__builtin_ia32_kandqi:
case X86::BI__builtin_ia32_kandhi:
Expand Down Expand Up @@ -4728,13 +4680,16 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
// The lane width is hardcoded to 16 to match the SIMD register size,
// but the algorithm processes one byte per iteration,
// so APInt(8, ...) is correct and intentional.
return interp__builtin_x86_byteshift(
S, OpPC, Call, BuiltinID,
[](const Pointer &Src, unsigned Lane, unsigned I, unsigned Shift) {
if (I < Shift) {
return APInt(8, 0);
}
return APInt(8, Src.elem<uint8_t>(Lane + I - Shift));
return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call,
[](unsigned DstIdx, unsigned Shift) -> std::pair<unsigned, int> {
unsigned LaneBase = (DstIdx / 16) * 16;
unsigned LaneIdx = DstIdx % 16;
if (LaneIdx < Shift)
return std::make_pair(0, -1);

return std::make_pair(0,
static_cast<int>(LaneBase + LaneIdx - Shift));
});

case X86::BI__builtin_ia32_psrldqi128_byteshift:
Expand All @@ -4744,14 +4699,40 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
// The lane width is hardcoded to 16 to match the SIMD register size,
// but the algorithm processes one byte per iteration,
// so APInt(8, ...) is correct and intentional.
return interp__builtin_x86_byteshift(
S, OpPC, Call, BuiltinID,
[](const Pointer &Src, unsigned Lane, unsigned I, unsigned Shift) {
if (I + Shift < 16) {
return APInt(8, Src.elem<uint8_t>(Lane + I + Shift));
return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call,
[](unsigned DstIdx, unsigned Shift) -> std::pair<unsigned, int> {
unsigned LaneBase = (DstIdx / 16) * 16;
unsigned LaneIdx = DstIdx % 16;
if (LaneIdx + Shift < 16)
return std::make_pair(0,
static_cast<int>(LaneBase + LaneIdx + Shift));

return std::make_pair(0, -1);
});

case X86::BI__builtin_ia32_palignr128:
case X86::BI__builtin_ia32_palignr256:
case X86::BI__builtin_ia32_palignr512:
return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call, [](unsigned DstIdx, unsigned Shift) {
// Default to -1 → zero-fill this destination element
unsigned VecIdx = 1;
int ElemIdx = -1;

int Lane = DstIdx / 16;
int Offset = DstIdx % 16;

// Elements come from VecB first, then VecA after the shift boundary
unsigned ShiftedIdx = Offset + (Shift & 0xFF);
if (ShiftedIdx < 16) { // from VecB
ElemIdx = ShiftedIdx + (Lane * 16);
} else if (ShiftedIdx < 32) { // from VecA
VecIdx = 0;
ElemIdx = (ShiftedIdx - 16) + (Lane * 16);
}

return APInt(8, 0);
return std::pair<unsigned, int>{VecIdx, ElemIdx};
});

default:
Expand Down
Loading