Skip to content

Commit 5bdce50

Browse files
committed
[LoongArch] Lower vector select mask generation to [X]VMSK{LT,GE,NE}Z if possible
This patch adds a DAG combine rule for BITCAST nodes converting from vector `i1` masks generated by `setcc` into integer vector types. It recognizes common select mask patterns and lowers them into efficient LoongArch LSX/LASX mask instructions such as: - [X]VMSKLTZ.{B,H,W,D} - [X]VMSKGEZ.B - [X]VMSKNEZ.B When the vector comparison matches specific patterns (e.g., x < 0, x >= 0, x != 0, etc.), the transformation is performed pre-legalization. This avoids scalarization and unnecessary operations, improving both performance and code size.
1 parent 45f4bc6 commit 5bdce50

File tree

6 files changed

+405
-1840
lines changed

6 files changed

+405
-1840
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 230 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -388,8 +388,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
388388

389389
// Set DAG combine for 'LSX' feature.
390390

391-
if (Subtarget.hasExtLSX())
391+
if (Subtarget.hasExtLSX()) {
392392
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
393+
setTargetDAGCombine(ISD::BITCAST);
394+
}
393395

394396
// Compute derived properties from the register classes.
395397
computeRegisterProperties(Subtarget.getRegisterInfo());
@@ -4286,6 +4288,94 @@ static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
42864288
return SDValue();
42874289
}
42884290

4291+
static SDValue performBITCASTCombine(SDNode *N, SelectionDAG &DAG,
4292+
TargetLowering::DAGCombinerInfo &DCI,
4293+
const LoongArchSubtarget &Subtarget) {
4294+
SDLoc DL(N);
4295+
EVT VT = N->getValueType(0);
4296+
SDValue Src = N->getOperand(0);
4297+
EVT SrcVT = Src.getValueType();
4298+
4299+
if (!DCI.isBeforeLegalizeOps())
4300+
return SDValue();
4301+
4302+
if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1)
4303+
return SDValue();
4304+
4305+
if (Src.getOpcode() != ISD::SETCC || !Src.hasOneUse())
4306+
return SDValue();
4307+
4308+
bool UseLASX;
4309+
EVT CmpVT = Src.getOperand(0).getValueType();
4310+
EVT EltVT = CmpVT.getVectorElementType();
4311+
if (Subtarget.hasExtLSX() && CmpVT.getSizeInBits() <= 128)
4312+
UseLASX = false;
4313+
else if (Subtarget.has32S() && Subtarget.hasExtLASX() &&
4314+
CmpVT.getSizeInBits() <= 256)
4315+
UseLASX = true;
4316+
else
4317+
return SDValue();
4318+
4319+
unsigned ISD = ISD::DELETED_NODE;
4320+
SDValue SrcN1 = Src.getOperand(1);
4321+
switch (cast<CondCodeSDNode>(Src.getOperand(2))->get()) {
4322+
default:
4323+
return SDValue();
4324+
case ISD::SETEQ:
4325+
if (EltVT == MVT::i8) {
4326+
// x == 0 => not (vmsknez.b x)
4327+
if (ISD::isBuildVectorAllZeros(SrcN1.getNode()))
4328+
ISD = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ;
4329+
// x == -1 => vmsknez.b x
4330+
else if (ISD::isBuildVectorAllOnes(SrcN1.getNode()))
4331+
ISD = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ;
4332+
}
4333+
break;
4334+
case ISD::SETGT:
4335+
// x > -1 => vmskgez.b x
4336+
if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) && EltVT == MVT::i8)
4337+
ISD = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
4338+
break;
4339+
case ISD::SETGE:
4340+
// x >= 0 => vmskgez.b x
4341+
if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) && EltVT == MVT::i8)
4342+
ISD = UseLASX ? LoongArchISD::XVMSKGEZ : LoongArchISD::VMSKGEZ;
4343+
break;
4344+
case ISD::SETLT:
4345+
// x < 0 => vmskltz.{b,h,w,d} x
4346+
if (ISD::isBuildVectorAllZeros(SrcN1.getNode()) &&
4347+
(EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
4348+
EltVT == MVT::i64))
4349+
ISD = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
4350+
break;
4351+
case ISD::SETLE:
4352+
// x <= -1 => vmskltz.{b,h,w,d} x
4353+
if (ISD::isBuildVectorAllOnes(SrcN1.getNode()) &&
4354+
(EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
4355+
EltVT == MVT::i64))
4356+
ISD = UseLASX ? LoongArchISD::XVMSKLTZ : LoongArchISD::VMSKLTZ;
4357+
break;
4358+
case ISD::SETNE:
4359+
if (EltVT == MVT::i8) {
4360+
// x != 0 => vmsknez.b x
4361+
if (ISD::isBuildVectorAllZeros(SrcN1.getNode()))
4362+
ISD = UseLASX ? LoongArchISD::XVMSKNEZ : LoongArchISD::VMSKNEZ;
4363+
// x != -1 => not (vmsknez.b x)
4364+
else if (ISD::isBuildVectorAllOnes(SrcN1.getNode()))
4365+
ISD = UseLASX ? LoongArchISD::XVMSKEQZ : LoongArchISD::VMSKEQZ;
4366+
}
4367+
break;
4368+
}
4369+
4370+
if (ISD == ISD::DELETED_NODE)
4371+
return SDValue();
4372+
4373+
SDValue V = DAG.getNode(ISD, DL, MVT::i64, Src.getOperand(0));
4374+
EVT T = EVT::getIntegerVT(*DAG.getContext(), SrcVT.getVectorNumElements());
4375+
V = DAG.getZExtOrTrunc(V, DL, T);
4376+
return DAG.getBitcast(VT, V);
4377+
}
4378+
42894379
static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
42904380
TargetLowering::DAGCombinerInfo &DCI,
42914381
const LoongArchSubtarget &Subtarget) {
@@ -5303,6 +5393,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
53035393
return performSETCCCombine(N, DAG, DCI, Subtarget);
53045394
case ISD::SRL:
53055395
return performSRLCombine(N, DAG, DCI, Subtarget);
5396+
case ISD::BITCAST:
5397+
return performBITCASTCombine(N, DAG, DCI, Subtarget);
53065398
case LoongArchISD::BITREV_W:
53075399
return performBITREV_WCombine(N, DAG, DCI, Subtarget);
53085400
case ISD::INTRINSIC_WO_CHAIN:
@@ -5589,6 +5681,120 @@ static MachineBasicBlock *emitPseudoCTPOP(MachineInstr &MI,
55895681
return BB;
55905682
}
55915683

5684+
static MachineBasicBlock *
5685+
emitPseudoVMSKCOND(MachineInstr &MI, MachineBasicBlock *BB,
5686+
const LoongArchSubtarget &Subtarget) {
5687+
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
5688+
const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
5689+
const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
5690+
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
5691+
Register Dst = MI.getOperand(0).getReg();
5692+
Register Src = MI.getOperand(1).getReg();
5693+
DebugLoc DL = MI.getDebugLoc();
5694+
unsigned EleBits = 8;
5695+
unsigned NotOpc = 0;
5696+
unsigned MskOpc;
5697+
5698+
switch (MI.getOpcode()) {
5699+
default:
5700+
llvm_unreachable("Unexpected opcode");
5701+
case LoongArch::PseudoVMSKLTZ_B:
5702+
MskOpc = LoongArch::VMSKLTZ_B;
5703+
break;
5704+
case LoongArch::PseudoVMSKLTZ_H:
5705+
MskOpc = LoongArch::VMSKLTZ_H;
5706+
EleBits = 16;
5707+
break;
5708+
case LoongArch::PseudoVMSKLTZ_W:
5709+
MskOpc = LoongArch::VMSKLTZ_W;
5710+
EleBits = 32;
5711+
break;
5712+
case LoongArch::PseudoVMSKLTZ_D:
5713+
MskOpc = LoongArch::VMSKLTZ_D;
5714+
EleBits = 64;
5715+
break;
5716+
case LoongArch::PseudoVMSKGEZ_B:
5717+
MskOpc = LoongArch::VMSKGEZ_B;
5718+
break;
5719+
case LoongArch::PseudoVMSKEQZ_B:
5720+
MskOpc = LoongArch::VMSKNZ_B;
5721+
NotOpc = LoongArch::VNOR_V;
5722+
break;
5723+
case LoongArch::PseudoVMSKNEZ_B:
5724+
MskOpc = LoongArch::VMSKNZ_B;
5725+
break;
5726+
case LoongArch::PseudoXVMSKLTZ_B:
5727+
MskOpc = LoongArch::XVMSKLTZ_B;
5728+
RC = &LoongArch::LASX256RegClass;
5729+
break;
5730+
case LoongArch::PseudoXVMSKLTZ_H:
5731+
MskOpc = LoongArch::XVMSKLTZ_H;
5732+
RC = &LoongArch::LASX256RegClass;
5733+
EleBits = 16;
5734+
break;
5735+
case LoongArch::PseudoXVMSKLTZ_W:
5736+
MskOpc = LoongArch::XVMSKLTZ_W;
5737+
RC = &LoongArch::LASX256RegClass;
5738+
EleBits = 32;
5739+
break;
5740+
case LoongArch::PseudoXVMSKLTZ_D:
5741+
MskOpc = LoongArch::XVMSKLTZ_D;
5742+
RC = &LoongArch::LASX256RegClass;
5743+
EleBits = 64;
5744+
break;
5745+
case LoongArch::PseudoXVMSKGEZ_B:
5746+
MskOpc = LoongArch::XVMSKGEZ_B;
5747+
RC = &LoongArch::LASX256RegClass;
5748+
break;
5749+
case LoongArch::PseudoXVMSKEQZ_B:
5750+
MskOpc = LoongArch::XVMSKNZ_B;
5751+
NotOpc = LoongArch::XVNOR_V;
5752+
RC = &LoongArch::LASX256RegClass;
5753+
break;
5754+
case LoongArch::PseudoXVMSKNEZ_B:
5755+
MskOpc = LoongArch::XVMSKNZ_B;
5756+
RC = &LoongArch::LASX256RegClass;
5757+
break;
5758+
}
5759+
5760+
Register Msk = MRI.createVirtualRegister(RC);
5761+
if (NotOpc) {
5762+
Register Tmp = MRI.createVirtualRegister(RC);
5763+
BuildMI(*BB, MI, DL, TII->get(MskOpc), Tmp).addReg(Src);
5764+
BuildMI(*BB, MI, DL, TII->get(NotOpc), Msk)
5765+
.addReg(Tmp, RegState::Kill)
5766+
.addReg(Tmp, RegState::Kill);
5767+
} else {
5768+
BuildMI(*BB, MI, DL, TII->get(MskOpc), Msk).addReg(Src);
5769+
}
5770+
5771+
if (TRI->getRegSizeInBits(*RC) > 128) {
5772+
Register Lo = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
5773+
Register Hi = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
5774+
BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Lo)
5775+
.addReg(Msk, RegState::Kill)
5776+
.addImm(0);
5777+
BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPICKVE2GR_WU), Hi)
5778+
.addReg(Msk, RegState::Kill)
5779+
.addImm(4);
5780+
BuildMI(*BB, MI, DL,
5781+
TII->get(Subtarget.is64Bit() ? LoongArch::BSTRINS_D
5782+
: LoongArch::BSTRINS_W),
5783+
Dst)
5784+
.addReg(Lo, RegState::Kill)
5785+
.addReg(Hi, RegState::Kill)
5786+
.addImm(256 / EleBits - 1)
5787+
.addImm(128 / EleBits);
5788+
} else {
5789+
BuildMI(*BB, MI, DL, TII->get(LoongArch::VPICKVE2GR_HU), Dst)
5790+
.addReg(Msk, RegState::Kill)
5791+
.addImm(0);
5792+
}
5793+
5794+
MI.eraseFromParent();
5795+
return BB;
5796+
}
5797+
55925798
static bool isSelectPseudo(MachineInstr &MI) {
55935799
switch (MI.getOpcode()) {
55945800
default:
@@ -5795,6 +6001,21 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
57956001
return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
57966002
case LoongArch::PseudoCTPOP:
57976003
return emitPseudoCTPOP(MI, BB, Subtarget);
6004+
case LoongArch::PseudoVMSKLTZ_B:
6005+
case LoongArch::PseudoVMSKLTZ_H:
6006+
case LoongArch::PseudoVMSKLTZ_W:
6007+
case LoongArch::PseudoVMSKLTZ_D:
6008+
case LoongArch::PseudoVMSKGEZ_B:
6009+
case LoongArch::PseudoVMSKEQZ_B:
6010+
case LoongArch::PseudoVMSKNEZ_B:
6011+
case LoongArch::PseudoXVMSKLTZ_B:
6012+
case LoongArch::PseudoXVMSKLTZ_H:
6013+
case LoongArch::PseudoXVMSKLTZ_W:
6014+
case LoongArch::PseudoXVMSKLTZ_D:
6015+
case LoongArch::PseudoXVMSKGEZ_B:
6016+
case LoongArch::PseudoXVMSKEQZ_B:
6017+
case LoongArch::PseudoXVMSKNEZ_B:
6018+
return emitPseudoVMSKCOND(MI, BB, Subtarget);
57986019
case TargetOpcode::STATEPOINT:
57996020
// STATEPOINT is a pseudo instruction which has no implicit defs/uses
58006021
// while bl call instruction (where statepoint will be lowered at the
@@ -5916,6 +6137,14 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
59166137
NODE_NAME_CASE(VBSLL)
59176138
NODE_NAME_CASE(VBSRL)
59186139
NODE_NAME_CASE(VLDREPL)
6140+
NODE_NAME_CASE(VMSKLTZ)
6141+
NODE_NAME_CASE(VMSKGEZ)
6142+
NODE_NAME_CASE(VMSKEQZ)
6143+
NODE_NAME_CASE(VMSKNEZ)
6144+
NODE_NAME_CASE(XVMSKLTZ)
6145+
NODE_NAME_CASE(XVMSKGEZ)
6146+
NODE_NAME_CASE(XVMSKEQZ)
6147+
NODE_NAME_CASE(XVMSKNEZ)
59196148
}
59206149
#undef NODE_NAME_CASE
59216150
return nullptr;

llvm/lib/Target/LoongArch/LoongArchISelLowering.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,17 @@ enum NodeType : unsigned {
161161
VBSRL,
162162

163163
// Scalar load broadcast to vector
164-
VLDREPL
164+
VLDREPL,
165+
166+
// Vector mask set by condition
167+
VMSKLTZ,
168+
VMSKGEZ,
169+
VMSKEQZ,
170+
VMSKNEZ,
171+
XVMSKLTZ,
172+
XVMSKGEZ,
173+
XVMSKEQZ,
174+
XVMSKNEZ,
165175

166176
// Intrinsic operations end =============================================
167177
};

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@
1212

1313
// Target nodes.
1414
def loongarch_xvpermi: SDNode<"LoongArchISD::XVPERMI", SDT_LoongArchV1RUimm>;
15+
def loongarch_xvmskltz: SDNode<"LoongArchISD::XVMSKLTZ", SDT_LoongArchVMSKCOND>;
16+
def loongarch_xvmskgez: SDNode<"LoongArchISD::XVMSKGEZ", SDT_LoongArchVMSKCOND>;
17+
def loongarch_xvmskeqz: SDNode<"LoongArchISD::XVMSKEQZ", SDT_LoongArchVMSKCOND>;
18+
def loongarch_xvmsknez: SDNode<"LoongArchISD::XVMSKNEZ", SDT_LoongArchVMSKCOND>;
1519

1620
def lasxsplati8
1721
: PatFrag<(ops node:$e0),
@@ -1086,6 +1090,16 @@ def PseudoXVINSGR2VR_H
10861090
: Pseudo<(outs LASX256:$dst), (ins LASX256:$xd, GPR:$rj, uimm4:$imm)>;
10871091
} // usesCustomInserter = 1, Constraints = "$xd = $dst"
10881092

1093+
let usesCustomInserter = 1, hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
1094+
def PseudoXVMSKLTZ_B : Pseudo<(outs GPR:$rd), (ins LASX256:$vj)>;
1095+
def PseudoXVMSKLTZ_H : Pseudo<(outs GPR:$rd), (ins LASX256:$vj)>;
1096+
def PseudoXVMSKLTZ_W : Pseudo<(outs GPR:$rd), (ins LASX256:$vj)>;
1097+
def PseudoXVMSKLTZ_D : Pseudo<(outs GPR:$rd), (ins LASX256:$vj)>;
1098+
def PseudoXVMSKGEZ_B : Pseudo<(outs GPR:$rd), (ins LASX256:$vj)>;
1099+
def PseudoXVMSKEQZ_B : Pseudo<(outs GPR:$rd), (ins LASX256:$vj)>;
1100+
def PseudoXVMSKNEZ_B : Pseudo<(outs GPR:$rd), (ins LASX256:$vj)>;
1101+
} // usesCustomInserter = 1, hasSideEffects = 0, mayLoad = 0, mayStore = 0
1102+
10891103
} // Predicates = [HasExtLASX]
10901104

10911105
multiclass PatXr<SDPatternOperator OpNode, string Inst> {
@@ -1856,6 +1870,15 @@ def : Pat<(vt (concat_vectors LSX128:$vd, LSX128:$vj)),
18561870
defm : PatXrXr<abds, "XVABSD">;
18571871
defm : PatXrXrU<abdu, "XVABSD">;
18581872

1873+
// Vector mask set by condition
1874+
def : Pat<(loongarch_xvmskltz (v32i8 LASX256:$vj)), (PseudoXVMSKLTZ_B LASX256:$vj)>;
1875+
def : Pat<(loongarch_xvmskltz (v16i16 LASX256:$vj)), (PseudoXVMSKLTZ_H LASX256:$vj)>;
1876+
def : Pat<(loongarch_xvmskltz (v8i32 LASX256:$vj)), (PseudoXVMSKLTZ_W LASX256:$vj)>;
1877+
def : Pat<(loongarch_xvmskltz (v4i64 LASX256:$vj)), (PseudoXVMSKLTZ_D LASX256:$vj)>;
1878+
def : Pat<(loongarch_xvmskgez (v32i8 LASX256:$vj)), (PseudoXVMSKGEZ_B LASX256:$vj)>;
1879+
def : Pat<(loongarch_xvmskeqz (v32i8 LASX256:$vj)), (PseudoXVMSKEQZ_B LASX256:$vj)>;
1880+
def : Pat<(loongarch_xvmsknez (v32i8 LASX256:$vj)), (PseudoXVMSKNEZ_B LASX256:$vj)>;
1881+
18591882
} // Predicates = [HasExtLASX]
18601883

18611884
/// Intrinsic pattern

llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def SDT_LoongArchVreplgr2vr : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<0>, S
3131
def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
3232
def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
3333
def SDT_LoongArchVLDREPL : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisPtrTy<1>]>;
34+
def SDT_LoongArchVMSKCOND : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>;
3435

3536
// Target nodes.
3637
def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>;
@@ -74,6 +75,11 @@ def loongarch_vldrepl
7475
: SDNode<"LoongArchISD::VLDREPL",
7576
SDT_LoongArchVLDREPL, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
7677

78+
def loongarch_vmskltz: SDNode<"LoongArchISD::VMSKLTZ", SDT_LoongArchVMSKCOND>;
79+
def loongarch_vmskgez: SDNode<"LoongArchISD::VMSKGEZ", SDT_LoongArchVMSKCOND>;
80+
def loongarch_vmskeqz: SDNode<"LoongArchISD::VMSKEQZ", SDT_LoongArchVMSKCOND>;
81+
def loongarch_vmsknez: SDNode<"LoongArchISD::VMSKNEZ", SDT_LoongArchVMSKCOND>;
82+
7783
def immZExt1 : ImmLeaf<i64, [{return isUInt<1>(Imm);}]>;
7884
def immZExt2 : ImmLeaf<i64, [{return isUInt<2>(Imm);}]>;
7985
def immZExt3 : ImmLeaf<i64, [{return isUInt<3>(Imm);}]>;
@@ -1266,6 +1272,16 @@ let usesCustomInserter = 1 in
12661272
def PseudoCTPOP : Pseudo<(outs GPR:$rd), (ins GPR:$rj),
12671273
[(set GPR:$rd, (ctpop GPR:$rj))]>;
12681274

1275+
let usesCustomInserter = 1, hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
1276+
def PseudoVMSKLTZ_B : Pseudo<(outs GPR:$rd), (ins LSX128:$vj)>;
1277+
def PseudoVMSKLTZ_H : Pseudo<(outs GPR:$rd), (ins LSX128:$vj)>;
1278+
def PseudoVMSKLTZ_W : Pseudo<(outs GPR:$rd), (ins LSX128:$vj)>;
1279+
def PseudoVMSKLTZ_D : Pseudo<(outs GPR:$rd), (ins LSX128:$vj)>;
1280+
def PseudoVMSKGEZ_B : Pseudo<(outs GPR:$rd), (ins LSX128:$vj)>;
1281+
def PseudoVMSKEQZ_B : Pseudo<(outs GPR:$rd), (ins LSX128:$vj)>;
1282+
def PseudoVMSKNEZ_B : Pseudo<(outs GPR:$rd), (ins LSX128:$vj)>;
1283+
} // usesCustomInserter = 1, hasSideEffects = 0, mayLoad = 0, mayStore = 0
1284+
12691285
} // Predicates = [HasExtLSX]
12701286

12711287
multiclass PatVr<SDPatternOperator OpNode, string Inst> {
@@ -2050,6 +2066,15 @@ def : Pat<(f64 f64imm_vldi:$in),
20502066
defm : PatVrVr<abds, "VABSD">;
20512067
defm : PatVrVrU<abdu, "VABSD">;
20522068

2069+
// Vector mask set by condition
2070+
def : Pat<(loongarch_vmskltz (v16i8 LSX128:$vj)), (PseudoVMSKLTZ_B LSX128:$vj)>;
2071+
def : Pat<(loongarch_vmskltz (v8i16 LSX128:$vj)), (PseudoVMSKLTZ_H LSX128:$vj)>;
2072+
def : Pat<(loongarch_vmskltz (v4i32 LSX128:$vj)), (PseudoVMSKLTZ_W LSX128:$vj)>;
2073+
def : Pat<(loongarch_vmskltz (v2i64 LSX128:$vj)), (PseudoVMSKLTZ_D LSX128:$vj)>;
2074+
def : Pat<(loongarch_vmskgez (v16i8 LSX128:$vj)), (PseudoVMSKGEZ_B LSX128:$vj)>;
2075+
def : Pat<(loongarch_vmskeqz (v16i8 LSX128:$vj)), (PseudoVMSKEQZ_B LSX128:$vj)>;
2076+
def : Pat<(loongarch_vmsknez (v16i8 LSX128:$vj)), (PseudoVMSKNEZ_B LSX128:$vj)>;
2077+
20532078
} // Predicates = [HasExtLSX]
20542079

20552080
/// Intrinsic pattern

0 commit comments

Comments
 (0)