Skip to content

Commit

Permalink
[X86] combineVectorCompare - add constant folding support for PCMPEQ/…
Browse files Browse the repository at this point in the history
…PCMPGT instructions
  • Loading branch information
RKSimon committed Jun 28, 2024
1 parent aca71ef commit b0f20f2
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 8 deletions.
39 changes: 34 additions & 5 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55692,14 +55692,43 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,

static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
unsigned Opcode = N->getOpcode();
assert((Opcode == X86ISD::PCMPEQ || Opcode == X86ISD::PCMPGT) &&
"Unknown PCMP opcode");

SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
MVT VT = N->getSimpleValueType(0);
unsigned EltBits = VT.getScalarSizeInBits();
unsigned NumElts = VT.getVectorNumElements();
SDLoc DL(N);

if (N->getOperand(0) == N->getOperand(1)) {
if (N->getOpcode() == X86ISD::PCMPEQ)
return DAG.getConstant(-1, DL, VT);
if (N->getOpcode() == X86ISD::PCMPGT)
return DAG.getConstant(0, DL, VT);
if (LHS == RHS)
return (Opcode == X86ISD::PCMPEQ) ? DAG.getAllOnesConstant(DL, VT)
: DAG.getConstant(0, DL, VT);

// Constant Folding.
// PCMPEQ(X,UNDEF) -> UNDEF
// PCMPGT(X,UNDEF) -> 0
// PCMPGT(UNDEF,X) -> 0
APInt LHSUndefs, RHSUndefs;
SmallVector<APInt> LHSBits, RHSBits;
if (getTargetConstantBitsFromNode(LHS, EltBits, LHSUndefs, LHSBits) &&
getTargetConstantBitsFromNode(RHS, EltBits, RHSUndefs, RHSBits)) {
APInt Ones = APInt::getAllOnes(EltBits);
APInt Zero = APInt::getZero(EltBits);
SmallVector<APInt> Results(NumElts);
for (unsigned I = 0; I != NumElts; ++I) {
if (Opcode == X86ISD::PCMPEQ) {
Results[I] = (LHSBits[I] == RHSBits[I]) ? Ones : Zero;
} else {
bool AnyUndef = LHSUndefs[I] || RHSUndefs[I];
Results[I] = (!AnyUndef && LHSBits[I].sgt(RHSBits[I])) ? Ones : Zero;
}
}
if (Opcode == X86ISD::PCMPEQ)
return getConstVector(Results, LHSUndefs | RHSUndefs, VT, DAG, DL);
return getConstVector(Results, VT, DAG, DL);
}

return SDValue();
Expand Down
4 changes: 1 addition & 3 deletions llvm/test/CodeGen/X86/pr81136.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,17 @@ define i64 @PR81136(i32 %a0, i32 %a1, ptr %a2) {
; CHECK-NEXT: vmovd %esi, %xmm1
; CHECK-NEXT: vmovdqa (%rdx), %ymm2
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
; CHECK-NEXT: vpmovzxbq {{.*#+}} xmm4 = [128,1]
; CHECK-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm4
; CHECK-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vpmovsxwq %xmm0, %xmm0
; CHECK-NEXT: vpalignr {{.*#+}} xmm0 = mem[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
; CHECK-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; CHECK-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm1
; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm2
; CHECK-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0
; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; CHECK-NEXT: vandnpd %ymm0, %ymm1, %ymm0
; CHECK-NEXT: vmovmskpd %ymm0, %eax
Expand Down

0 comments on commit b0f20f2

Please sign in to comment.