-
Notifications
You must be signed in to change notification settings - Fork 14k
[LoongArch] Add codegen support for atomic-ops on LA32 #141557
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This patch adds codegen support for atomic operations `cmpxchg`, `max`, `min`, `umax` and `umin` on the LA32 target.
@llvm/pr-subscribers-llvm-ir Author: hev (heiher) ChangesThis patch adds codegen support for atomic operations Patch is 132.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141557.diff 5 Files Affected:
diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td
index 4621f1689b46e..f5e3d412666a2 100644
--- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td
+++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td
@@ -25,12 +25,13 @@ class MaskedAtomicRMW<LLVMType itype>
multiclass MaskedAtomicRMWIntrinsics {
// i32 @llvm.<name>.i32.<p>(any*, i32, i32, i32 imm);
def _i32 : MaskedAtomicRMW<llvm_i32_ty>;
- // i64 @llvm.<name>.i32.<p>(any*, i64, i64, i64 imm);
+ // i64 @llvm.<name>.i64.<p>(any*, i64, i64, i64 imm);
def _i64 : MaskedAtomicRMW<llvm_i64_ty>;
}
multiclass MaskedAtomicRMWFiveOpIntrinsics {
- // TODO: Support cmpxchg on LA32.
+ // i32 @llvm.<name>.i32.<p>(any*, i32, i32, i32, i32 imm);
+ def _i32 : MaskedAtomicRMWFiveArg<llvm_i32_ty>;
// i64 @llvm.<name>.i64.<p>(any*, i64, i64, i64, i64 imm);
def _i64 : MaskedAtomicRMWFiveArg<llvm_i64_ty>;
}
@@ -44,7 +45,7 @@ defm int_loongarch_masked_atomicrmw_umin : MaskedAtomicRMWIntrinsics;
defm int_loongarch_masked_atomicrmw_max : MaskedAtomicRMWFiveOpIntrinsics;
defm int_loongarch_masked_atomicrmw_min : MaskedAtomicRMWFiveOpIntrinsics;
-// @llvm.loongarch.masked.cmpxchg.i64.<p>(
+// @llvm.loongarch.masked.cmpxchg.<i32,i64>.<p>(
// ptr addr, grlen cmpval, grlen newval, grlen mask, grlenimm ordering)
defm int_loongarch_masked_cmpxchg : MaskedAtomicRMWFiveOpIntrinsics;
diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
index 3be012feb2385..73874fccc0308 100644
--- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
@@ -122,6 +122,18 @@ bool LoongArchExpandAtomicPseudo::expandMI(
case LoongArch::PseudoAtomicLoadXor32:
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 32,
NextMBBI);
+ case LoongArch::PseudoAtomicLoadUMax32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, false, 32,
+ NextMBBI);
+ case LoongArch::PseudoAtomicLoadUMin32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, false, 32,
+ NextMBBI);
+ case LoongArch::PseudoAtomicLoadMax32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, false, 32,
+ NextMBBI);
+ case LoongArch::PseudoAtomicLoadMin32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, false, 32,
+ NextMBBI);
case LoongArch::PseudoMaskedAtomicLoadUMax32:
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
NextMBBI);
@@ -356,8 +368,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
MachineBasicBlock::iterator &NextMBBI) {
- assert(IsMasked == true &&
- "Should only need to expand masked atomic max/min");
assert(Width == 32 && "Should never need to expand masked 64-bit operations");
MachineInstr &MI = *MBBI;
@@ -385,79 +395,92 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
MBB.addSuccessor(LoopHeadMBB);
Register DestReg = MI.getOperand(0).getReg();
- Register Scratch1Reg = MI.getOperand(1).getReg();
- Register Scratch2Reg = MI.getOperand(2).getReg();
- Register AddrReg = MI.getOperand(3).getReg();
- Register IncrReg = MI.getOperand(4).getReg();
- Register MaskReg = MI.getOperand(5).getReg();
+ Register ScratchReg = MI.getOperand(1).getReg();
+ Register AddrReg = MI.getOperand(IsMasked ? 3 : 2).getReg();
+ Register IncrReg = MI.getOperand(IsMasked ? 4 : 3).getReg();
+ Register CmprReg = DestReg;
//
// .loophead:
// ll.w destreg, (alignedaddr)
- // and scratch2, destreg, mask
- // move scratch1, destreg
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LL_W), DestReg)
.addReg(AddrReg)
.addImm(0);
- BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::AND), Scratch2Reg)
- .addReg(DestReg)
- .addReg(MaskReg);
- BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::OR), Scratch1Reg)
+ // and cmpr, destreg, mask
+ if (IsMasked) {
+ Register MaskReg = MI.getOperand(5).getReg();
+ CmprReg = MI.getOperand(2).getReg();
+ BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::AND), CmprReg)
+ .addReg(DestReg)
+ .addReg(MaskReg);
+ }
+ // move scratch, destreg
+ BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::OR), ScratchReg)
.addReg(DestReg)
.addReg(LoongArch::R0);
switch (BinOp) {
default:
llvm_unreachable("Unexpected AtomicRMW BinOp");
- // bgeu scratch2, incr, .looptail
+ // bgeu cmpr, incr, .looptail
case AtomicRMWInst::UMax:
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGEU))
- .addReg(Scratch2Reg)
+ .addReg(CmprReg)
.addReg(IncrReg)
.addMBB(LoopTailMBB);
break;
- // bgeu incr, scratch2, .looptail
+ // bgeu incr, cmpr, .looptail
case AtomicRMWInst::UMin:
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGEU))
.addReg(IncrReg)
- .addReg(Scratch2Reg)
+ .addReg(CmprReg)
.addMBB(LoopTailMBB);
break;
case AtomicRMWInst::Max:
- insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
- // bge scratch2, incr, .looptail
+ if (IsMasked)
+ insertSext(TII, DL, LoopHeadMBB, CmprReg, MI.getOperand(6).getReg());
+ // bge cmpr, incr, .looptail
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGE))
- .addReg(Scratch2Reg)
+ .addReg(CmprReg)
.addReg(IncrReg)
.addMBB(LoopTailMBB);
break;
case AtomicRMWInst::Min:
- insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
- // bge incr, scratch2, .looptail
+ if (IsMasked)
+ insertSext(TII, DL, LoopHeadMBB, CmprReg, MI.getOperand(6).getReg());
+ // bge incr, cmpr, .looptail
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGE))
.addReg(IncrReg)
- .addReg(Scratch2Reg)
+ .addReg(CmprReg)
.addMBB(LoopTailMBB);
break;
// TODO: support other AtomicRMWInst.
}
// .loopifbody:
- // xor scratch1, destreg, incr
- // and scratch1, scratch1, mask
- // xor scratch1, destreg, scratch1
- insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
- MaskReg, Scratch1Reg);
+ if (IsMasked) {
+ Register MaskReg = MI.getOperand(5).getReg();
+ // xor scratch, destreg, incr
+ // and scratch, scratch, mask
+ // xor scratch, destreg, scratch
+ insertMaskedMerge(TII, DL, LoopIfBodyMBB, ScratchReg, DestReg, IncrReg,
+ MaskReg, ScratchReg);
+ } else {
+ // move scratch, incr
+ BuildMI(LoopIfBodyMBB, DL, TII->get(LoongArch::OR), ScratchReg)
+ .addReg(IncrReg)
+ .addReg(LoongArch::R0);
+ }
// .looptail:
- // sc.w scratch1, scratch1, (addr)
- // beqz scratch1, loop
- BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), Scratch1Reg)
- .addReg(Scratch1Reg)
+ // sc.w scratch, scratch, (addr)
+ // beqz scratch, loop
+ BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), ScratchReg)
+ .addReg(ScratchReg)
.addReg(AddrReg)
.addImm(0);
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQ))
- .addReg(Scratch1Reg)
+ .addReg(ScratchReg)
.addReg(LoongArch::R0)
.addMBB(LoopHeadMBB);
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 9f5c94ddea44f..c96be139340f3 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -7070,6 +7070,14 @@ getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
case AtomicRMWInst::Nand:
return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
+ case AtomicRMWInst::UMax:
+ return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
+ case AtomicRMWInst::UMin:
+ return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
+ case AtomicRMWInst::Max:
+ return Intrinsic::loongarch_masked_atomicrmw_max_i32;
+ case AtomicRMWInst::Min:
+ return Intrinsic::loongarch_masked_atomicrmw_min_i32;
// TODO: support other AtomicRMWInst.
}
}
@@ -7093,19 +7101,22 @@ LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
+ unsigned GRLen = Subtarget.getGRLen();
AtomicOrdering FailOrd = CI->getFailureOrdering();
Value *FailureOrdering =
Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
-
- // TODO: Support cmpxchg on LA32.
- Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
- CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
- NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
- Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
+ Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
+ if (GRLen == 64) {
+ CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
+ CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
+ NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
+ Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
+ }
Type *Tys[] = {AlignedAddr->getType()};
Value *Result = Builder.CreateIntrinsic(
CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
- Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
+ if (GRLen == 64)
+ Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
return Result;
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index fcdd9a130d8b6..344f563bd61e8 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -2055,6 +2055,10 @@ def PseudoAtomicLoadSub32 : PseudoAM;
def PseudoAtomicLoadAnd32 : PseudoAM;
def PseudoAtomicLoadOr32 : PseudoAM;
def PseudoAtomicLoadXor32 : PseudoAM;
+def PseudoAtomicLoadUMax32 : PseudoAM;
+def PseudoAtomicLoadUMin32 : PseudoAM;
+def PseudoAtomicLoadMax32 : PseudoAM;
+def PseudoAtomicLoadMin32 : PseudoAM;
multiclass PseudoBinPat<string Op, Pseudo BinInst> {
def : Pat<(!cast<PatFrag>(Op#"_monotonic") GPR:$addr, GPR:$incr),
@@ -2253,6 +2257,22 @@ def : Pat<(atomic_cmp_swap_i64 GPR:$addr, GPR:$cmp, GPR:$new),
(AMCAS__DB_D GPR:$cmp, GPR:$new, GPR:$addr)>;
}
+// Ordering constants must be kept in sync with the AtomicOrdering enum in
+// AtomicOrdering.h.
+multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst,
+ ValueType vt = GRLenVT> {
+ def : Pat<(vt (!cast<PatFrag>(Op#"_failure_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>;
+ def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acquire") GPR:$addr, GPR:$cmp, GPR:$new)),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>;
+ def : Pat<(vt (!cast<PatFrag>(Op#"_failure_release") GPR:$addr, GPR:$cmp, GPR:$new)),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>;
+ def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>;
+ def : Pat<(vt (!cast<PatFrag>(Op#"_failure_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
+}
+
let Predicates = [IsLA64] in {
defm : binary_atomic_op_wd<"AMSWAP", "atomic_swap">;
defm : binary_atomic_op_wd<"AMADD", "atomic_load_add">;
@@ -2288,23 +2308,6 @@ def : AtomicPat<int_loongarch_masked_atomicrmw_umax_i64,
def : AtomicPat<int_loongarch_masked_atomicrmw_umin_i64,
PseudoMaskedAtomicLoadUMin32>;
-// Ordering constants must be kept in sync with the AtomicOrdering enum in
-// AtomicOrdering.h.
-multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst,
- ValueType vt = GRLenVT> {
- def : Pat<(vt (!cast<PatFrag>(Op#"_failure_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)),
- (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>;
- def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acquire") GPR:$addr, GPR:$cmp, GPR:$new)),
- (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>;
- def : Pat<(vt (!cast<PatFrag>(Op#"_failure_release") GPR:$addr, GPR:$cmp, GPR:$new)),
- (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>;
- def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)),
- (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>;
- def : Pat<(vt (!cast<PatFrag>(Op#"_failure_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)),
- (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
-}
-
-defm : PseudoCmpXchgPat<"atomic_cmp_swap_i32", PseudoCmpXchg32>;
defm : PseudoCmpXchgPat<"atomic_cmp_swap_i64", PseudoCmpXchg64, i64>;
def : Pat<(int_loongarch_masked_cmpxchg_i64
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order),
@@ -2317,6 +2320,7 @@ def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_min_i64,
PseudoMaskedAtomicLoadMin32>;
} // Predicates = [IsLA64]
+defm : PseudoCmpXchgPat<"atomic_cmp_swap_i32", PseudoCmpXchg32>;
defm : PseudoBinPat<"atomic_load_nand_i32", PseudoAtomicLoadNand32>;
let Predicates = [IsLA32] in {
@@ -2329,11 +2333,30 @@ def : AtomicPat<int_loongarch_masked_atomicrmw_sub_i32,
PseudoMaskedAtomicLoadSub32>;
def : AtomicPat<int_loongarch_masked_atomicrmw_nand_i32,
PseudoMaskedAtomicLoadNand32>;
+def : AtomicPat<int_loongarch_masked_atomicrmw_umax_i32,
+ PseudoMaskedAtomicLoadUMax32>;
+def : AtomicPat<int_loongarch_masked_atomicrmw_umin_i32,
+ PseudoMaskedAtomicLoadUMin32>;
+
+def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_max_i32,
+ PseudoMaskedAtomicLoadMax32>;
+def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_min_i32,
+ PseudoMaskedAtomicLoadMin32>;
+
+def : Pat<(int_loongarch_masked_cmpxchg_i32
+ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order),
+ (PseudoMaskedCmpXchg32
+ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order)>;
+
defm : PseudoBinPat<"atomic_load_add_i32", PseudoAtomicLoadAdd32>;
defm : PseudoBinPat<"atomic_load_sub_i32", PseudoAtomicLoadSub32>;
defm : PseudoBinPat<"atomic_load_and_i32", PseudoAtomicLoadAnd32>;
defm : PseudoBinPat<"atomic_load_or_i32", PseudoAtomicLoadOr32>;
defm : PseudoBinPat<"atomic_load_xor_i32", PseudoAtomicLoadXor32>;
+defm : PseudoBinPat<"atomic_load_umax_i32", PseudoAtomicLoadUMax32>;
+defm : PseudoBinPat<"atomic_load_umin_i32", PseudoAtomicLoadUMin32>;
+defm : PseudoBinPat<"atomic_load_max_i32", PseudoAtomicLoadMax32>;
+defm : PseudoBinPat<"atomic_load_min_i32", PseudoAtomicLoadMin32>;
} // Predicates = [IsLA32]
/// Intrinsics
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
index 096c2242661c0..a6a0f15f9f4a5 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
@@ -1,10 +1,37 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 -mattr=+d --verify-machineinstrs < %s | \
+; RUN: FileCheck %s --check-prefix=LA32
; RUN: llc --mtriple=loongarch64 -mattr=+d --verify-machineinstrs < %s | \
; RUN: FileCheck %s --check-prefix=LA64
;; TODO: Testing for LA32 architecture will be added later
define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
+; LA32-LABEL: atomicrmw_umax_i8_acquire:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $a2, $zero, -4
+; LA32-NEXT: and $a2, $a0, $a2
+; LA32-NEXT: slli.w $a0, $a0, 3
+; LA32-NEXT: ori $a3, $zero, 255
+; LA32-NEXT: sll.w $a3, $a3, $a0
+; LA32-NEXT: andi $a1, $a1, 255
+; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; LA32-NEXT: ll.w $a4, $a2, 0
+; LA32-NEXT: and $a6, $a4, $a3
+; LA32-NEXT: move $a5, $a4
+; LA32-NEXT: bgeu $a6, $a1, .LBB0_3
+; LA32-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1
+; LA32-NEXT: xor $a5, $a4, $a1
+; LA32-NEXT: and $a5, $a5, $a3
+; LA32-NEXT: xor $a5, $a4, $a5
+; LA32-NEXT: .LBB0_3: # in Loop: Header=BB0_1 Depth=1
+; LA32-NEXT: sc.w $a5, $a2, 0
+; LA32-NEXT: beq $a5, $zero, .LBB0_1
+; LA32-NEXT: # %bb.4:
+; LA32-NEXT: srl.w $a0, $a4, $a0
+; LA32-NEXT: ret
+;
; LA64-LABEL: atomicrmw_umax_i8_acquire:
; LA64: # %bb.0:
; LA64-NEXT: slli.d $a2, $a0, 3
@@ -33,6 +60,32 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
}
define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
+; LA32-LABEL: atomicrmw_umax_i16_acquire:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $a2, $zero, -4
+; LA32-NEXT: and $a2, $a0, $a2
+; LA32-NEXT: slli.w $a0, $a0, 3
+; LA32-NEXT: lu12i.w $a3, 15
+; LA32-NEXT: ori $a3, $a3, 4095
+; LA32-NEXT: sll.w $a4, $a3, $a0
+; LA32-NEXT: and $a1, $a1, $a3
+; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; LA32-NEXT: ll.w $a3, $a2, 0
+; LA32-NEXT: and $a6, $a3, $a4
+; LA32-NEXT: move $a5, $a3
+; LA32-NEXT: bgeu $a6, $a1, .LBB1_3
+; LA32-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; LA32-NEXT: xor $a5, $a3, $a1
+; LA32-NEXT: and $a5, $a5, $a4
+; LA32-NEXT: xor $a5, $a3, $a5
+; LA32-NEXT: .LBB1_3: # in Loop: Header=BB1_1 Depth=1
+; LA32-NEXT: sc.w $a5, $a2, 0
+; LA32-NEXT: beq $a5, $zero, .LBB1_1
+; LA32-NEXT: # %bb.4:
+; LA32-NEXT: srl.w $a0, $a3, $a0
+; LA32-NEXT: ret
+;
; LA64-LABEL: atomicrmw_umax_i16_acquire:
; LA64: # %bb.0:
; LA64-NEXT: slli.d $a2, $a0, 3
@@ -62,6 +115,21 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
}
define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind {
+; LA32-LABEL: atomicrmw_umax_i32_acquire:
+; LA32: # %bb.0:
+; LA32-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; LA32-NEXT: ll.w $a2, $a0, 0
+; LA32-NEXT: move $a3, $a2
+; LA32-NEXT: bgeu $a2, $a1, .LBB2_3
+; LA32-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1
+; LA32-NEXT: move $a3, $a1
+; LA32-NEXT: .LBB2_3: # in Loop: Header=BB2_1 Depth=1
+; LA32-NEXT: sc.w $a3, $a0, 0
+; LA32-NEXT: beq $a3, $zero, .LBB2_1
+; LA32-NEXT: # %bb.4:
+; LA32-NEXT: move $a0, $a2
+; LA32-NEXT: ret
+;
; LA64-LABEL: atomicrmw_umax_i32_acquire:
; LA64: # %bb.0:
; LA64-NEXT: ammax_db.wu $a2, $a1, $a0
@@ -72,6 +140,65 @@ define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind {
}
define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind {
+; LA32-LABEL: atomicrmw_umax_i64_acquire:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -32
+; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s1, $sp, 16 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s2, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: ld.w $a5, $a0, 4
+; LA32-NEXT: ld.w $a4, $a0, 0
+; LA32-NEXT: move $s0, $a2
+; LA32-NEXT: move $s1, $a1
+; LA32-NEXT: addi.w $s2, $sp, 0
+; LA32-NEXT: b .LBB3_2
+; LA32-NEXT: .p2align 4, , 16
+; LA32-NEXT: .LBB3_1: # %atomicrmw.start
+; LA32-NEXT: # in Loop: Header=BB3_2 Depth=1
+; LA32-NEXT: st.w $a4, $sp, 0
+; LA32-NEXT: st.w $a5, $sp, 4
+; LA32-NEXT: ori $a4, $zero, 2
+; LA32-NEXT: ori $a5, $zero, 2
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: move $a1, $s2
+; LA32-NEXT: bl __atomic_compare_exchange_8
+; LA32-NEXT: ld.w $a5, $sp, 4
+; LA32-NEXT: ld.w $a4, $sp, 0
+; LA32-NEXT: bne $a0, $zero, .LBB3_7
+; LA32-NEXT: .LBB3_2: # %atomicrmw.start
+; LA32-NEXT: # =>This Inner Loop Header: Depth=1
+; ...
[truncated]
|
@llvm/pr-subscribers-backend-loongarch Author: hev (heiher) ChangesThis patch adds codegen support for atomic operations Patch is 132.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141557.diff 5 Files Affected:
diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td
index 4621f1689b46e..f5e3d412666a2 100644
--- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td
+++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td
@@ -25,12 +25,13 @@ class MaskedAtomicRMW<LLVMType itype>
multiclass MaskedAtomicRMWIntrinsics {
// i32 @llvm.<name>.i32.<p>(any*, i32, i32, i32 imm);
def _i32 : MaskedAtomicRMW<llvm_i32_ty>;
- // i64 @llvm.<name>.i32.<p>(any*, i64, i64, i64 imm);
+ // i64 @llvm.<name>.i64.<p>(any*, i64, i64, i64 imm);
def _i64 : MaskedAtomicRMW<llvm_i64_ty>;
}
multiclass MaskedAtomicRMWFiveOpIntrinsics {
- // TODO: Support cmpxchg on LA32.
+ // i32 @llvm.<name>.i32.<p>(any*, i32, i32, i32, i32 imm);
+ def _i32 : MaskedAtomicRMWFiveArg<llvm_i32_ty>;
// i64 @llvm.<name>.i64.<p>(any*, i64, i64, i64, i64 imm);
def _i64 : MaskedAtomicRMWFiveArg<llvm_i64_ty>;
}
@@ -44,7 +45,7 @@ defm int_loongarch_masked_atomicrmw_umin : MaskedAtomicRMWIntrinsics;
defm int_loongarch_masked_atomicrmw_max : MaskedAtomicRMWFiveOpIntrinsics;
defm int_loongarch_masked_atomicrmw_min : MaskedAtomicRMWFiveOpIntrinsics;
-// @llvm.loongarch.masked.cmpxchg.i64.<p>(
+// @llvm.loongarch.masked.cmpxchg.<i32,i64>.<p>(
// ptr addr, grlen cmpval, grlen newval, grlen mask, grlenimm ordering)
defm int_loongarch_masked_cmpxchg : MaskedAtomicRMWFiveOpIntrinsics;
diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
index 3be012feb2385..73874fccc0308 100644
--- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
@@ -122,6 +122,18 @@ bool LoongArchExpandAtomicPseudo::expandMI(
case LoongArch::PseudoAtomicLoadXor32:
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 32,
NextMBBI);
+ case LoongArch::PseudoAtomicLoadUMax32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, false, 32,
+ NextMBBI);
+ case LoongArch::PseudoAtomicLoadUMin32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, false, 32,
+ NextMBBI);
+ case LoongArch::PseudoAtomicLoadMax32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, false, 32,
+ NextMBBI);
+ case LoongArch::PseudoAtomicLoadMin32:
+ return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, false, 32,
+ NextMBBI);
case LoongArch::PseudoMaskedAtomicLoadUMax32:
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
NextMBBI);
@@ -356,8 +368,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
MachineBasicBlock::iterator &NextMBBI) {
- assert(IsMasked == true &&
- "Should only need to expand masked atomic max/min");
assert(Width == 32 && "Should never need to expand masked 64-bit operations");
MachineInstr &MI = *MBBI;
@@ -385,79 +395,92 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
MBB.addSuccessor(LoopHeadMBB);
Register DestReg = MI.getOperand(0).getReg();
- Register Scratch1Reg = MI.getOperand(1).getReg();
- Register Scratch2Reg = MI.getOperand(2).getReg();
- Register AddrReg = MI.getOperand(3).getReg();
- Register IncrReg = MI.getOperand(4).getReg();
- Register MaskReg = MI.getOperand(5).getReg();
+ Register ScratchReg = MI.getOperand(1).getReg();
+ Register AddrReg = MI.getOperand(IsMasked ? 3 : 2).getReg();
+ Register IncrReg = MI.getOperand(IsMasked ? 4 : 3).getReg();
+ Register CmprReg = DestReg;
//
// .loophead:
// ll.w destreg, (alignedaddr)
- // and scratch2, destreg, mask
- // move scratch1, destreg
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LL_W), DestReg)
.addReg(AddrReg)
.addImm(0);
- BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::AND), Scratch2Reg)
- .addReg(DestReg)
- .addReg(MaskReg);
- BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::OR), Scratch1Reg)
+ // and cmpr, destreg, mask
+ if (IsMasked) {
+ Register MaskReg = MI.getOperand(5).getReg();
+ CmprReg = MI.getOperand(2).getReg();
+ BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::AND), CmprReg)
+ .addReg(DestReg)
+ .addReg(MaskReg);
+ }
+ // move scratch, destreg
+ BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::OR), ScratchReg)
.addReg(DestReg)
.addReg(LoongArch::R0);
switch (BinOp) {
default:
llvm_unreachable("Unexpected AtomicRMW BinOp");
- // bgeu scratch2, incr, .looptail
+ // bgeu cmpr, incr, .looptail
case AtomicRMWInst::UMax:
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGEU))
- .addReg(Scratch2Reg)
+ .addReg(CmprReg)
.addReg(IncrReg)
.addMBB(LoopTailMBB);
break;
- // bgeu incr, scratch2, .looptail
+ // bgeu incr, cmpr, .looptail
case AtomicRMWInst::UMin:
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGEU))
.addReg(IncrReg)
- .addReg(Scratch2Reg)
+ .addReg(CmprReg)
.addMBB(LoopTailMBB);
break;
case AtomicRMWInst::Max:
- insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
- // bge scratch2, incr, .looptail
+ if (IsMasked)
+ insertSext(TII, DL, LoopHeadMBB, CmprReg, MI.getOperand(6).getReg());
+ // bge cmpr, incr, .looptail
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGE))
- .addReg(Scratch2Reg)
+ .addReg(CmprReg)
.addReg(IncrReg)
.addMBB(LoopTailMBB);
break;
case AtomicRMWInst::Min:
- insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
- // bge incr, scratch2, .looptail
+ if (IsMasked)
+ insertSext(TII, DL, LoopHeadMBB, CmprReg, MI.getOperand(6).getReg());
+ // bge incr, cmpr, .looptail
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGE))
.addReg(IncrReg)
- .addReg(Scratch2Reg)
+ .addReg(CmprReg)
.addMBB(LoopTailMBB);
break;
// TODO: support other AtomicRMWInst.
}
// .loopifbody:
- // xor scratch1, destreg, incr
- // and scratch1, scratch1, mask
- // xor scratch1, destreg, scratch1
- insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
- MaskReg, Scratch1Reg);
+ if (IsMasked) {
+ Register MaskReg = MI.getOperand(5).getReg();
+ // xor scratch, destreg, incr
+ // and scratch, scratch, mask
+ // xor scratch, destreg, scratch
+ insertMaskedMerge(TII, DL, LoopIfBodyMBB, ScratchReg, DestReg, IncrReg,
+ MaskReg, ScratchReg);
+ } else {
+ // move scratch, incr
+ BuildMI(LoopIfBodyMBB, DL, TII->get(LoongArch::OR), ScratchReg)
+ .addReg(IncrReg)
+ .addReg(LoongArch::R0);
+ }
// .looptail:
- // sc.w scratch1, scratch1, (addr)
- // beqz scratch1, loop
- BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), Scratch1Reg)
- .addReg(Scratch1Reg)
+ // sc.w scratch, scratch, (addr)
+ // beqz scratch, loop
+ BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), ScratchReg)
+ .addReg(ScratchReg)
.addReg(AddrReg)
.addImm(0);
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQ))
- .addReg(Scratch1Reg)
+ .addReg(ScratchReg)
.addReg(LoongArch::R0)
.addMBB(LoopHeadMBB);
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 9f5c94ddea44f..c96be139340f3 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -7070,6 +7070,14 @@ getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
case AtomicRMWInst::Nand:
return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
+ case AtomicRMWInst::UMax:
+ return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
+ case AtomicRMWInst::UMin:
+ return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
+ case AtomicRMWInst::Max:
+ return Intrinsic::loongarch_masked_atomicrmw_max_i32;
+ case AtomicRMWInst::Min:
+ return Intrinsic::loongarch_masked_atomicrmw_min_i32;
// TODO: support other AtomicRMWInst.
}
}
@@ -7093,19 +7101,22 @@ LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
+ unsigned GRLen = Subtarget.getGRLen();
AtomicOrdering FailOrd = CI->getFailureOrdering();
Value *FailureOrdering =
Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
-
- // TODO: Support cmpxchg on LA32.
- Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
- CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
- NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
- Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
+ Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
+ if (GRLen == 64) {
+ CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
+ CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
+ NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
+ Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
+ }
Type *Tys[] = {AlignedAddr->getType()};
Value *Result = Builder.CreateIntrinsic(
CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
- Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
+ if (GRLen == 64)
+ Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
return Result;
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index fcdd9a130d8b6..344f563bd61e8 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -2055,6 +2055,10 @@ def PseudoAtomicLoadSub32 : PseudoAM;
def PseudoAtomicLoadAnd32 : PseudoAM;
def PseudoAtomicLoadOr32 : PseudoAM;
def PseudoAtomicLoadXor32 : PseudoAM;
+def PseudoAtomicLoadUMax32 : PseudoAM;
+def PseudoAtomicLoadUMin32 : PseudoAM;
+def PseudoAtomicLoadMax32 : PseudoAM;
+def PseudoAtomicLoadMin32 : PseudoAM;
multiclass PseudoBinPat<string Op, Pseudo BinInst> {
def : Pat<(!cast<PatFrag>(Op#"_monotonic") GPR:$addr, GPR:$incr),
@@ -2253,6 +2257,22 @@ def : Pat<(atomic_cmp_swap_i64 GPR:$addr, GPR:$cmp, GPR:$new),
(AMCAS__DB_D GPR:$cmp, GPR:$new, GPR:$addr)>;
}
+// Ordering constants must be kept in sync with the AtomicOrdering enum in
+// AtomicOrdering.h.
+multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst,
+ ValueType vt = GRLenVT> {
+ def : Pat<(vt (!cast<PatFrag>(Op#"_failure_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>;
+ def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acquire") GPR:$addr, GPR:$cmp, GPR:$new)),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>;
+ def : Pat<(vt (!cast<PatFrag>(Op#"_failure_release") GPR:$addr, GPR:$cmp, GPR:$new)),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>;
+ def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>;
+ def : Pat<(vt (!cast<PatFrag>(Op#"_failure_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)),
+ (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
+}
+
let Predicates = [IsLA64] in {
defm : binary_atomic_op_wd<"AMSWAP", "atomic_swap">;
defm : binary_atomic_op_wd<"AMADD", "atomic_load_add">;
@@ -2288,23 +2308,6 @@ def : AtomicPat<int_loongarch_masked_atomicrmw_umax_i64,
def : AtomicPat<int_loongarch_masked_atomicrmw_umin_i64,
PseudoMaskedAtomicLoadUMin32>;
-// Ordering constants must be kept in sync with the AtomicOrdering enum in
-// AtomicOrdering.h.
-multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst,
- ValueType vt = GRLenVT> {
- def : Pat<(vt (!cast<PatFrag>(Op#"_failure_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)),
- (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>;
- def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acquire") GPR:$addr, GPR:$cmp, GPR:$new)),
- (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>;
- def : Pat<(vt (!cast<PatFrag>(Op#"_failure_release") GPR:$addr, GPR:$cmp, GPR:$new)),
- (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>;
- def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)),
- (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>;
- def : Pat<(vt (!cast<PatFrag>(Op#"_failure_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)),
- (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
-}
-
-defm : PseudoCmpXchgPat<"atomic_cmp_swap_i32", PseudoCmpXchg32>;
defm : PseudoCmpXchgPat<"atomic_cmp_swap_i64", PseudoCmpXchg64, i64>;
def : Pat<(int_loongarch_masked_cmpxchg_i64
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order),
@@ -2317,6 +2320,7 @@ def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_min_i64,
PseudoMaskedAtomicLoadMin32>;
} // Predicates = [IsLA64]
+defm : PseudoCmpXchgPat<"atomic_cmp_swap_i32", PseudoCmpXchg32>;
defm : PseudoBinPat<"atomic_load_nand_i32", PseudoAtomicLoadNand32>;
let Predicates = [IsLA32] in {
@@ -2329,11 +2333,30 @@ def : AtomicPat<int_loongarch_masked_atomicrmw_sub_i32,
PseudoMaskedAtomicLoadSub32>;
def : AtomicPat<int_loongarch_masked_atomicrmw_nand_i32,
PseudoMaskedAtomicLoadNand32>;
+def : AtomicPat<int_loongarch_masked_atomicrmw_umax_i32,
+ PseudoMaskedAtomicLoadUMax32>;
+def : AtomicPat<int_loongarch_masked_atomicrmw_umin_i32,
+ PseudoMaskedAtomicLoadUMin32>;
+
+def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_max_i32,
+ PseudoMaskedAtomicLoadMax32>;
+def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_min_i32,
+ PseudoMaskedAtomicLoadMin32>;
+
+def : Pat<(int_loongarch_masked_cmpxchg_i32
+ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order),
+ (PseudoMaskedCmpXchg32
+ GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order)>;
+
defm : PseudoBinPat<"atomic_load_add_i32", PseudoAtomicLoadAdd32>;
defm : PseudoBinPat<"atomic_load_sub_i32", PseudoAtomicLoadSub32>;
defm : PseudoBinPat<"atomic_load_and_i32", PseudoAtomicLoadAnd32>;
defm : PseudoBinPat<"atomic_load_or_i32", PseudoAtomicLoadOr32>;
defm : PseudoBinPat<"atomic_load_xor_i32", PseudoAtomicLoadXor32>;
+defm : PseudoBinPat<"atomic_load_umax_i32", PseudoAtomicLoadUMax32>;
+defm : PseudoBinPat<"atomic_load_umin_i32", PseudoAtomicLoadUMin32>;
+defm : PseudoBinPat<"atomic_load_max_i32", PseudoAtomicLoadMax32>;
+defm : PseudoBinPat<"atomic_load_min_i32", PseudoAtomicLoadMin32>;
} // Predicates = [IsLA32]
/// Intrinsics
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
index 096c2242661c0..a6a0f15f9f4a5 100644
--- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
+++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-minmax.ll
@@ -1,10 +1,37 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 -mattr=+d --verify-machineinstrs < %s | \
+; RUN: FileCheck %s --check-prefix=LA32
; RUN: llc --mtriple=loongarch64 -mattr=+d --verify-machineinstrs < %s | \
; RUN: FileCheck %s --check-prefix=LA64
;; TODO: Testing for LA32 architecture will be added later
define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
+; LA32-LABEL: atomicrmw_umax_i8_acquire:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $a2, $zero, -4
+; LA32-NEXT: and $a2, $a0, $a2
+; LA32-NEXT: slli.w $a0, $a0, 3
+; LA32-NEXT: ori $a3, $zero, 255
+; LA32-NEXT: sll.w $a3, $a3, $a0
+; LA32-NEXT: andi $a1, $a1, 255
+; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; LA32-NEXT: ll.w $a4, $a2, 0
+; LA32-NEXT: and $a6, $a4, $a3
+; LA32-NEXT: move $a5, $a4
+; LA32-NEXT: bgeu $a6, $a1, .LBB0_3
+; LA32-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1
+; LA32-NEXT: xor $a5, $a4, $a1
+; LA32-NEXT: and $a5, $a5, $a3
+; LA32-NEXT: xor $a5, $a4, $a5
+; LA32-NEXT: .LBB0_3: # in Loop: Header=BB0_1 Depth=1
+; LA32-NEXT: sc.w $a5, $a2, 0
+; LA32-NEXT: beq $a5, $zero, .LBB0_1
+; LA32-NEXT: # %bb.4:
+; LA32-NEXT: srl.w $a0, $a4, $a0
+; LA32-NEXT: ret
+;
; LA64-LABEL: atomicrmw_umax_i8_acquire:
; LA64: # %bb.0:
; LA64-NEXT: slli.d $a2, $a0, 3
@@ -33,6 +60,32 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
}
define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
+; LA32-LABEL: atomicrmw_umax_i16_acquire:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $a2, $zero, -4
+; LA32-NEXT: and $a2, $a0, $a2
+; LA32-NEXT: slli.w $a0, $a0, 3
+; LA32-NEXT: lu12i.w $a3, 15
+; LA32-NEXT: ori $a3, $a3, 4095
+; LA32-NEXT: sll.w $a4, $a3, $a0
+; LA32-NEXT: and $a1, $a1, $a3
+; LA32-NEXT: sll.w $a1, $a1, $a0
+; LA32-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; LA32-NEXT: ll.w $a3, $a2, 0
+; LA32-NEXT: and $a6, $a3, $a4
+; LA32-NEXT: move $a5, $a3
+; LA32-NEXT: bgeu $a6, $a1, .LBB1_3
+; LA32-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1
+; LA32-NEXT: xor $a5, $a3, $a1
+; LA32-NEXT: and $a5, $a5, $a4
+; LA32-NEXT: xor $a5, $a3, $a5
+; LA32-NEXT: .LBB1_3: # in Loop: Header=BB1_1 Depth=1
+; LA32-NEXT: sc.w $a5, $a2, 0
+; LA32-NEXT: beq $a5, $zero, .LBB1_1
+; LA32-NEXT: # %bb.4:
+; LA32-NEXT: srl.w $a0, $a3, $a0
+; LA32-NEXT: ret
+;
; LA64-LABEL: atomicrmw_umax_i16_acquire:
; LA64: # %bb.0:
; LA64-NEXT: slli.d $a2, $a0, 3
@@ -62,6 +115,21 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
}
define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind {
+; LA32-LABEL: atomicrmw_umax_i32_acquire:
+; LA32: # %bb.0:
+; LA32-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; LA32-NEXT: ll.w $a2, $a0, 0
+; LA32-NEXT: move $a3, $a2
+; LA32-NEXT: bgeu $a2, $a1, .LBB2_3
+; LA32-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1
+; LA32-NEXT: move $a3, $a1
+; LA32-NEXT: .LBB2_3: # in Loop: Header=BB2_1 Depth=1
+; LA32-NEXT: sc.w $a3, $a0, 0
+; LA32-NEXT: beq $a3, $zero, .LBB2_1
+; LA32-NEXT: # %bb.4:
+; LA32-NEXT: move $a0, $a2
+; LA32-NEXT: ret
+;
; LA64-LABEL: atomicrmw_umax_i32_acquire:
; LA64: # %bb.0:
; LA64-NEXT: ammax_db.wu $a2, $a1, $a0
@@ -72,6 +140,65 @@ define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind {
}
define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind {
+; LA32-LABEL: atomicrmw_umax_i64_acquire:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -32
+; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s1, $sp, 16 # 4-byte Folded Spill
+; LA32-NEXT: st.w $s2, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: ld.w $a5, $a0, 4
+; LA32-NEXT: ld.w $a4, $a0, 0
+; LA32-NEXT: move $s0, $a2
+; LA32-NEXT: move $s1, $a1
+; LA32-NEXT: addi.w $s2, $sp, 0
+; LA32-NEXT: b .LBB3_2
+; LA32-NEXT: .p2align 4, , 16
+; LA32-NEXT: .LBB3_1: # %atomicrmw.start
+; LA32-NEXT: # in Loop: Header=BB3_2 Depth=1
+; LA32-NEXT: st.w $a4, $sp, 0
+; LA32-NEXT: st.w $a5, $sp, 4
+; LA32-NEXT: ori $a4, $zero, 2
+; LA32-NEXT: ori $a5, $zero, 2
+; LA32-NEXT: move $a0, $fp
+; LA32-NEXT: move $a1, $s2
+; LA32-NEXT: bl __atomic_compare_exchange_8
+; LA32-NEXT: ld.w $a5, $sp, 4
+; LA32-NEXT: ld.w $a4, $sp, 0
+; LA32-NEXT: bne $a0, $zero, .LBB3_7
+; LA32-NEXT: .LBB3_2: # %atomicrmw.start
+; LA32-NEXT: # =>This Inner Loop Header: Depth=1
+; ...
[truncated]
|
@@ -1,10 +1,37 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | |||
; RUN: llc --mtriple=loongarch32 -mattr=+d --verify-machineinstrs < %s | \ | |||
; RUN: FileCheck %s --check-prefix=LA32 | |||
; RUN: llc --mtriple=loongarch64 -mattr=+d --verify-machineinstrs < %s | \ | |||
; RUN: FileCheck %s --check-prefix=LA64 | |||
|
|||
;; TODO: Testing for LA32 architecture will be added later |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This TODO
can be removed now?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This patch adds codegen support for atomic operations `cmpxchg`, `max`, `min`, `umax` and `umin` on the LA32 target.
This patch adds codegen support for atomic operations `cmpxchg`, `max`, `min`, `umax` and `umin` on the LA32 target.
This patch adds codegen support for atomic operations
cmpxchg
,max
,min
,umax
andumin
on the LA32 target.