Skip to content

Commit 8ed029d

Browse files
heiherDhruvSrivastavaX
authored andcommitted
[LoongArch] Add codegen support for atomic-ops on LA32 (llvm#141557)
This patch adds codegen support for atomic operations `cmpxchg`, `max`, `min`, `umax` and `umin` on the LA32 target.
1 parent 1be3b9a commit 8ed029d

File tree

5 files changed

+2740
-62
lines changed

5 files changed

+2740
-62
lines changed

llvm/include/llvm/IR/IntrinsicsLoongArch.td

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,13 @@ class MaskedAtomicRMW<LLVMType itype>
2525
multiclass MaskedAtomicRMWIntrinsics {
2626
// i32 @llvm.<name>.i32.<p>(any*, i32, i32, i32 imm);
2727
def _i32 : MaskedAtomicRMW<llvm_i32_ty>;
28-
// i64 @llvm.<name>.i32.<p>(any*, i64, i64, i64 imm);
28+
// i64 @llvm.<name>.i64.<p>(any*, i64, i64, i64 imm);
2929
def _i64 : MaskedAtomicRMW<llvm_i64_ty>;
3030
}
3131

3232
multiclass MaskedAtomicRMWFiveOpIntrinsics {
33-
// TODO: Support cmpxchg on LA32.
33+
// i32 @llvm.<name>.i32.<p>(any*, i32, i32, i32, i32 imm);
34+
def _i32 : MaskedAtomicRMWFiveArg<llvm_i32_ty>;
3435
// i64 @llvm.<name>.i64.<p>(any*, i64, i64, i64, i64 imm);
3536
def _i64 : MaskedAtomicRMWFiveArg<llvm_i64_ty>;
3637
}
@@ -44,7 +45,7 @@ defm int_loongarch_masked_atomicrmw_umin : MaskedAtomicRMWIntrinsics;
4445
defm int_loongarch_masked_atomicrmw_max : MaskedAtomicRMWFiveOpIntrinsics;
4546
defm int_loongarch_masked_atomicrmw_min : MaskedAtomicRMWFiveOpIntrinsics;
4647

47-
// @llvm.loongarch.masked.cmpxchg.i64.<p>(
48+
// @llvm.loongarch.masked.cmpxchg.<i32,i64>.<p>(
4849
// ptr addr, grlen cmpval, grlen newval, grlen mask, grlenimm ordering)
4950
defm int_loongarch_masked_cmpxchg : MaskedAtomicRMWFiveOpIntrinsics;
5051

llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp

Lines changed: 56 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,18 @@ bool LoongArchExpandAtomicPseudo::expandMI(
122122
case LoongArch::PseudoAtomicLoadXor32:
123123
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 32,
124124
NextMBBI);
125+
case LoongArch::PseudoAtomicLoadUMax32:
126+
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, false, 32,
127+
NextMBBI);
128+
case LoongArch::PseudoAtomicLoadUMin32:
129+
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, false, 32,
130+
NextMBBI);
131+
case LoongArch::PseudoAtomicLoadMax32:
132+
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, false, 32,
133+
NextMBBI);
134+
case LoongArch::PseudoAtomicLoadMin32:
135+
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, false, 32,
136+
NextMBBI);
125137
case LoongArch::PseudoMaskedAtomicLoadUMax32:
126138
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
127139
NextMBBI);
@@ -356,8 +368,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
356368
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
357369
AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
358370
MachineBasicBlock::iterator &NextMBBI) {
359-
assert(IsMasked == true &&
360-
"Should only need to expand masked atomic max/min");
361371
assert(Width == 32 && "Should never need to expand masked 64-bit operations");
362372

363373
MachineInstr &MI = *MBBI;
@@ -385,79 +395,92 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
385395
MBB.addSuccessor(LoopHeadMBB);
386396

387397
Register DestReg = MI.getOperand(0).getReg();
388-
Register Scratch1Reg = MI.getOperand(1).getReg();
389-
Register Scratch2Reg = MI.getOperand(2).getReg();
390-
Register AddrReg = MI.getOperand(3).getReg();
391-
Register IncrReg = MI.getOperand(4).getReg();
392-
Register MaskReg = MI.getOperand(5).getReg();
398+
Register ScratchReg = MI.getOperand(1).getReg();
399+
Register AddrReg = MI.getOperand(IsMasked ? 3 : 2).getReg();
400+
Register IncrReg = MI.getOperand(IsMasked ? 4 : 3).getReg();
401+
Register CmprReg = DestReg;
393402

394403
//
395404
// .loophead:
396405
// ll.w destreg, (alignedaddr)
397-
// and scratch2, destreg, mask
398-
// move scratch1, destreg
399406
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LL_W), DestReg)
400407
.addReg(AddrReg)
401408
.addImm(0);
402-
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::AND), Scratch2Reg)
403-
.addReg(DestReg)
404-
.addReg(MaskReg);
405-
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::OR), Scratch1Reg)
409+
// and cmpr, destreg, mask
410+
if (IsMasked) {
411+
Register MaskReg = MI.getOperand(5).getReg();
412+
CmprReg = MI.getOperand(2).getReg();
413+
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::AND), CmprReg)
414+
.addReg(DestReg)
415+
.addReg(MaskReg);
416+
}
417+
// move scratch, destreg
418+
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::OR), ScratchReg)
406419
.addReg(DestReg)
407420
.addReg(LoongArch::R0);
408421

409422
switch (BinOp) {
410423
default:
411424
llvm_unreachable("Unexpected AtomicRMW BinOp");
412-
// bgeu scratch2, incr, .looptail
425+
// bgeu cmpr, incr, .looptail
413426
case AtomicRMWInst::UMax:
414427
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGEU))
415-
.addReg(Scratch2Reg)
428+
.addReg(CmprReg)
416429
.addReg(IncrReg)
417430
.addMBB(LoopTailMBB);
418431
break;
419-
// bgeu incr, scratch2, .looptail
432+
// bgeu incr, cmpr, .looptail
420433
case AtomicRMWInst::UMin:
421434
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGEU))
422435
.addReg(IncrReg)
423-
.addReg(Scratch2Reg)
436+
.addReg(CmprReg)
424437
.addMBB(LoopTailMBB);
425438
break;
426439
case AtomicRMWInst::Max:
427-
insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
428-
// bge scratch2, incr, .looptail
440+
if (IsMasked)
441+
insertSext(TII, DL, LoopHeadMBB, CmprReg, MI.getOperand(6).getReg());
442+
// bge cmpr, incr, .looptail
429443
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGE))
430-
.addReg(Scratch2Reg)
444+
.addReg(CmprReg)
431445
.addReg(IncrReg)
432446
.addMBB(LoopTailMBB);
433447
break;
434448
case AtomicRMWInst::Min:
435-
insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
436-
// bge incr, scratch2, .looptail
449+
if (IsMasked)
450+
insertSext(TII, DL, LoopHeadMBB, CmprReg, MI.getOperand(6).getReg());
451+
// bge incr, cmpr, .looptail
437452
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGE))
438453
.addReg(IncrReg)
439-
.addReg(Scratch2Reg)
454+
.addReg(CmprReg)
440455
.addMBB(LoopTailMBB);
441456
break;
442457
// TODO: support other AtomicRMWInst.
443458
}
444459

445460
// .loopifbody:
446-
// xor scratch1, destreg, incr
447-
// and scratch1, scratch1, mask
448-
// xor scratch1, destreg, scratch1
449-
insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
450-
MaskReg, Scratch1Reg);
461+
if (IsMasked) {
462+
Register MaskReg = MI.getOperand(5).getReg();
463+
// xor scratch, destreg, incr
464+
// and scratch, scratch, mask
465+
// xor scratch, destreg, scratch
466+
insertMaskedMerge(TII, DL, LoopIfBodyMBB, ScratchReg, DestReg, IncrReg,
467+
MaskReg, ScratchReg);
468+
} else {
469+
// move scratch, incr
470+
BuildMI(LoopIfBodyMBB, DL, TII->get(LoongArch::OR), ScratchReg)
471+
.addReg(IncrReg)
472+
.addReg(LoongArch::R0);
473+
}
451474

452475
// .looptail:
453-
// sc.w scratch1, scratch1, (addr)
454-
// beqz scratch1, loop
455-
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), Scratch1Reg)
456-
.addReg(Scratch1Reg)
476+
// sc.w scratch, scratch, (addr)
477+
// beqz scratch, loop
478+
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), ScratchReg)
479+
.addReg(ScratchReg)
457480
.addReg(AddrReg)
458481
.addImm(0);
459482
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQ))
460-
.addReg(Scratch1Reg)
483+
.addReg(ScratchReg)
461484
.addReg(LoongArch::R0)
462485
.addMBB(LoopHeadMBB);
463486

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7364,6 +7364,14 @@ getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
73647364
return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
73657365
case AtomicRMWInst::Nand:
73667366
return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
7367+
case AtomicRMWInst::UMax:
7368+
return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
7369+
case AtomicRMWInst::UMin:
7370+
return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
7371+
case AtomicRMWInst::Max:
7372+
return Intrinsic::loongarch_masked_atomicrmw_max_i32;
7373+
case AtomicRMWInst::Min:
7374+
return Intrinsic::loongarch_masked_atomicrmw_min_i32;
73677375
// TODO: support other AtomicRMWInst.
73687376
}
73697377
}
@@ -7387,19 +7395,22 @@ LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
73877395
Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
73887396
IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
73897397
Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
7398+
unsigned GRLen = Subtarget.getGRLen();
73907399
AtomicOrdering FailOrd = CI->getFailureOrdering();
73917400
Value *FailureOrdering =
73927401
Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
7393-
7394-
// TODO: Support cmpxchg on LA32.
7395-
Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
7396-
CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
7397-
NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
7398-
Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
7402+
Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
7403+
if (GRLen == 64) {
7404+
CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
7405+
CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
7406+
NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
7407+
Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
7408+
}
73997409
Type *Tys[] = {AlignedAddr->getType()};
74007410
Value *Result = Builder.CreateIntrinsic(
74017411
CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
7402-
Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
7412+
if (GRLen == 64)
7413+
Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
74037414
return Result;
74047415
}
74057416

llvm/lib/Target/LoongArch/LoongArchInstrInfo.td

Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2055,6 +2055,10 @@ def PseudoAtomicLoadSub32 : PseudoAM;
20552055
def PseudoAtomicLoadAnd32 : PseudoAM;
20562056
def PseudoAtomicLoadOr32 : PseudoAM;
20572057
def PseudoAtomicLoadXor32 : PseudoAM;
2058+
def PseudoAtomicLoadUMax32 : PseudoAM;
2059+
def PseudoAtomicLoadUMin32 : PseudoAM;
2060+
def PseudoAtomicLoadMax32 : PseudoAM;
2061+
def PseudoAtomicLoadMin32 : PseudoAM;
20582062

20592063
multiclass PseudoBinPat<string Op, Pseudo BinInst> {
20602064
def : Pat<(!cast<PatFrag>(Op#"_monotonic") GPR:$addr, GPR:$incr),
@@ -2253,6 +2257,22 @@ def : Pat<(atomic_cmp_swap_i64 GPR:$addr, GPR:$cmp, GPR:$new),
22532257
(AMCAS__DB_D GPR:$cmp, GPR:$new, GPR:$addr)>;
22542258
}
22552259

2260+
// Ordering constants must be kept in sync with the AtomicOrdering enum in
2261+
// AtomicOrdering.h.
2262+
multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst,
2263+
ValueType vt = GRLenVT> {
2264+
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)),
2265+
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>;
2266+
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acquire") GPR:$addr, GPR:$cmp, GPR:$new)),
2267+
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>;
2268+
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_release") GPR:$addr, GPR:$cmp, GPR:$new)),
2269+
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>;
2270+
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)),
2271+
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>;
2272+
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)),
2273+
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
2274+
}
2275+
22562276
let Predicates = [IsLA64] in {
22572277
defm : binary_atomic_op_wd<"AMSWAP", "atomic_swap">;
22582278
defm : binary_atomic_op_wd<"AMADD", "atomic_load_add">;
@@ -2288,23 +2308,6 @@ def : AtomicPat<int_loongarch_masked_atomicrmw_umax_i64,
22882308
def : AtomicPat<int_loongarch_masked_atomicrmw_umin_i64,
22892309
PseudoMaskedAtomicLoadUMin32>;
22902310

2291-
// Ordering constants must be kept in sync with the AtomicOrdering enum in
2292-
// AtomicOrdering.h.
2293-
multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst,
2294-
ValueType vt = GRLenVT> {
2295-
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)),
2296-
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>;
2297-
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acquire") GPR:$addr, GPR:$cmp, GPR:$new)),
2298-
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>;
2299-
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_release") GPR:$addr, GPR:$cmp, GPR:$new)),
2300-
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>;
2301-
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)),
2302-
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>;
2303-
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)),
2304-
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
2305-
}
2306-
2307-
defm : PseudoCmpXchgPat<"atomic_cmp_swap_i32", PseudoCmpXchg32>;
23082311
defm : PseudoCmpXchgPat<"atomic_cmp_swap_i64", PseudoCmpXchg64, i64>;
23092312
def : Pat<(int_loongarch_masked_cmpxchg_i64
23102313
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order),
@@ -2317,6 +2320,7 @@ def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_min_i64,
23172320
PseudoMaskedAtomicLoadMin32>;
23182321
} // Predicates = [IsLA64]
23192322

2323+
defm : PseudoCmpXchgPat<"atomic_cmp_swap_i32", PseudoCmpXchg32>;
23202324
defm : PseudoBinPat<"atomic_load_nand_i32", PseudoAtomicLoadNand32>;
23212325

23222326
let Predicates = [IsLA32] in {
@@ -2329,11 +2333,30 @@ def : AtomicPat<int_loongarch_masked_atomicrmw_sub_i32,
23292333
PseudoMaskedAtomicLoadSub32>;
23302334
def : AtomicPat<int_loongarch_masked_atomicrmw_nand_i32,
23312335
PseudoMaskedAtomicLoadNand32>;
2336+
def : AtomicPat<int_loongarch_masked_atomicrmw_umax_i32,
2337+
PseudoMaskedAtomicLoadUMax32>;
2338+
def : AtomicPat<int_loongarch_masked_atomicrmw_umin_i32,
2339+
PseudoMaskedAtomicLoadUMin32>;
2340+
2341+
def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_max_i32,
2342+
PseudoMaskedAtomicLoadMax32>;
2343+
def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_min_i32,
2344+
PseudoMaskedAtomicLoadMin32>;
2345+
2346+
def : Pat<(int_loongarch_masked_cmpxchg_i32
2347+
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order),
2348+
(PseudoMaskedCmpXchg32
2349+
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order)>;
2350+
23322351
defm : PseudoBinPat<"atomic_load_add_i32", PseudoAtomicLoadAdd32>;
23332352
defm : PseudoBinPat<"atomic_load_sub_i32", PseudoAtomicLoadSub32>;
23342353
defm : PseudoBinPat<"atomic_load_and_i32", PseudoAtomicLoadAnd32>;
23352354
defm : PseudoBinPat<"atomic_load_or_i32", PseudoAtomicLoadOr32>;
23362355
defm : PseudoBinPat<"atomic_load_xor_i32", PseudoAtomicLoadXor32>;
2356+
defm : PseudoBinPat<"atomic_load_umax_i32", PseudoAtomicLoadUMax32>;
2357+
defm : PseudoBinPat<"atomic_load_umin_i32", PseudoAtomicLoadUMin32>;
2358+
defm : PseudoBinPat<"atomic_load_max_i32", PseudoAtomicLoadMax32>;
2359+
defm : PseudoBinPat<"atomic_load_min_i32", PseudoAtomicLoadMin32>;
23372360
} // Predicates = [IsLA32]
23382361

23392362
/// Intrinsics

0 commit comments

Comments
 (0)