Skip to content

[LoongArch] Add codegen support for atomic-ops on LA32 #141557

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions llvm/include/llvm/IR/IntrinsicsLoongArch.td
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,13 @@ class MaskedAtomicRMW<LLVMType itype>
multiclass MaskedAtomicRMWIntrinsics {
// i32 @llvm.<name>.i32.<p>(any*, i32, i32, i32 imm);
def _i32 : MaskedAtomicRMW<llvm_i32_ty>;
// i64 @llvm.<name>.i32.<p>(any*, i64, i64, i64 imm);
// i64 @llvm.<name>.i64.<p>(any*, i64, i64, i64 imm);
def _i64 : MaskedAtomicRMW<llvm_i64_ty>;
}

multiclass MaskedAtomicRMWFiveOpIntrinsics {
// TODO: Support cmpxchg on LA32.
// i32 @llvm.<name>.i32.<p>(any*, i32, i32, i32, i32 imm);
def _i32 : MaskedAtomicRMWFiveArg<llvm_i32_ty>;
// i64 @llvm.<name>.i64.<p>(any*, i64, i64, i64, i64 imm);
def _i64 : MaskedAtomicRMWFiveArg<llvm_i64_ty>;
}
Expand All @@ -44,7 +45,7 @@ defm int_loongarch_masked_atomicrmw_umin : MaskedAtomicRMWIntrinsics;
defm int_loongarch_masked_atomicrmw_max : MaskedAtomicRMWFiveOpIntrinsics;
defm int_loongarch_masked_atomicrmw_min : MaskedAtomicRMWFiveOpIntrinsics;

// @llvm.loongarch.masked.cmpxchg.i64.<p>(
// @llvm.loongarch.masked.cmpxchg.<i32,i64>.<p>(
// ptr addr, grlen cmpval, grlen newval, grlen mask, grlenimm ordering)
defm int_loongarch_masked_cmpxchg : MaskedAtomicRMWFiveOpIntrinsics;

Expand Down
89 changes: 56 additions & 33 deletions llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,18 @@ bool LoongArchExpandAtomicPseudo::expandMI(
case LoongArch::PseudoAtomicLoadXor32:
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 32,
NextMBBI);
case LoongArch::PseudoAtomicLoadUMax32:
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, false, 32,
NextMBBI);
case LoongArch::PseudoAtomicLoadUMin32:
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, false, 32,
NextMBBI);
case LoongArch::PseudoAtomicLoadMax32:
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, false, 32,
NextMBBI);
case LoongArch::PseudoAtomicLoadMin32:
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, false, 32,
NextMBBI);
case LoongArch::PseudoMaskedAtomicLoadUMax32:
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
NextMBBI);
Expand Down Expand Up @@ -356,8 +368,6 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
MachineBasicBlock::iterator &NextMBBI) {
assert(IsMasked == true &&
"Should only need to expand masked atomic max/min");
assert(Width == 32 && "Should never need to expand masked 64-bit operations");

MachineInstr &MI = *MBBI;
Expand Down Expand Up @@ -385,79 +395,92 @@ bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
MBB.addSuccessor(LoopHeadMBB);

Register DestReg = MI.getOperand(0).getReg();
Register Scratch1Reg = MI.getOperand(1).getReg();
Register Scratch2Reg = MI.getOperand(2).getReg();
Register AddrReg = MI.getOperand(3).getReg();
Register IncrReg = MI.getOperand(4).getReg();
Register MaskReg = MI.getOperand(5).getReg();
Register ScratchReg = MI.getOperand(1).getReg();
Register AddrReg = MI.getOperand(IsMasked ? 3 : 2).getReg();
Register IncrReg = MI.getOperand(IsMasked ? 4 : 3).getReg();
Register CmprReg = DestReg;

//
// .loophead:
// ll.w destreg, (alignedaddr)
// and scratch2, destreg, mask
// move scratch1, destreg
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LL_W), DestReg)
.addReg(AddrReg)
.addImm(0);
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::AND), Scratch2Reg)
.addReg(DestReg)
.addReg(MaskReg);
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::OR), Scratch1Reg)
// and cmpr, destreg, mask
if (IsMasked) {
Register MaskReg = MI.getOperand(5).getReg();
CmprReg = MI.getOperand(2).getReg();
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::AND), CmprReg)
.addReg(DestReg)
.addReg(MaskReg);
}
// move scratch, destreg
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::OR), ScratchReg)
.addReg(DestReg)
.addReg(LoongArch::R0);

switch (BinOp) {
default:
llvm_unreachable("Unexpected AtomicRMW BinOp");
// bgeu scratch2, incr, .looptail
// bgeu cmpr, incr, .looptail
case AtomicRMWInst::UMax:
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGEU))
.addReg(Scratch2Reg)
.addReg(CmprReg)
.addReg(IncrReg)
.addMBB(LoopTailMBB);
break;
// bgeu incr, scratch2, .looptail
// bgeu incr, cmpr, .looptail
case AtomicRMWInst::UMin:
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGEU))
.addReg(IncrReg)
.addReg(Scratch2Reg)
.addReg(CmprReg)
.addMBB(LoopTailMBB);
break;
case AtomicRMWInst::Max:
insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
// bge scratch2, incr, .looptail
if (IsMasked)
insertSext(TII, DL, LoopHeadMBB, CmprReg, MI.getOperand(6).getReg());
// bge cmpr, incr, .looptail
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGE))
.addReg(Scratch2Reg)
.addReg(CmprReg)
.addReg(IncrReg)
.addMBB(LoopTailMBB);
break;
case AtomicRMWInst::Min:
insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
// bge incr, scratch2, .looptail
if (IsMasked)
insertSext(TII, DL, LoopHeadMBB, CmprReg, MI.getOperand(6).getReg());
// bge incr, cmpr, .looptail
BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGE))
.addReg(IncrReg)
.addReg(Scratch2Reg)
.addReg(CmprReg)
.addMBB(LoopTailMBB);
break;
// TODO: support other AtomicRMWInst.
}

// .loopifbody:
// xor scratch1, destreg, incr
// and scratch1, scratch1, mask
// xor scratch1, destreg, scratch1
insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
MaskReg, Scratch1Reg);
if (IsMasked) {
Register MaskReg = MI.getOperand(5).getReg();
// xor scratch, destreg, incr
// and scratch, scratch, mask
// xor scratch, destreg, scratch
insertMaskedMerge(TII, DL, LoopIfBodyMBB, ScratchReg, DestReg, IncrReg,
MaskReg, ScratchReg);
} else {
// move scratch, incr
BuildMI(LoopIfBodyMBB, DL, TII->get(LoongArch::OR), ScratchReg)
.addReg(IncrReg)
.addReg(LoongArch::R0);
}

// .looptail:
// sc.w scratch1, scratch1, (addr)
// beqz scratch1, loop
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), Scratch1Reg)
.addReg(Scratch1Reg)
// sc.w scratch, scratch, (addr)
// beqz scratch, loop
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), ScratchReg)
.addReg(ScratchReg)
.addReg(AddrReg)
.addImm(0);
BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQ))
.addReg(Scratch1Reg)
.addReg(ScratchReg)
.addReg(LoongArch::R0)
.addMBB(LoopHeadMBB);

Expand Down
25 changes: 18 additions & 7 deletions llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7070,6 +7070,14 @@ getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
case AtomicRMWInst::Nand:
return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
case AtomicRMWInst::UMax:
return Intrinsic::loongarch_masked_atomicrmw_umax_i32;
case AtomicRMWInst::UMin:
return Intrinsic::loongarch_masked_atomicrmw_umin_i32;
case AtomicRMWInst::Max:
return Intrinsic::loongarch_masked_atomicrmw_max_i32;
case AtomicRMWInst::Min:
return Intrinsic::loongarch_masked_atomicrmw_min_i32;
// TODO: support other AtomicRMWInst.
}
}
Expand All @@ -7093,19 +7101,22 @@ LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
unsigned GRLen = Subtarget.getGRLen();
AtomicOrdering FailOrd = CI->getFailureOrdering();
Value *FailureOrdering =
Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));

// TODO: Support cmpxchg on LA32.
Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i32;
if (GRLen == 64) {
CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
}
Type *Tys[] = {AlignedAddr->getType()};
Value *Result = Builder.CreateIntrinsic(
CmpXchgIntrID, Tys, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
if (GRLen == 64)
Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
return Result;
}

Expand Down
57 changes: 40 additions & 17 deletions llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -2055,6 +2055,10 @@ def PseudoAtomicLoadSub32 : PseudoAM;
def PseudoAtomicLoadAnd32 : PseudoAM;
def PseudoAtomicLoadOr32 : PseudoAM;
def PseudoAtomicLoadXor32 : PseudoAM;
def PseudoAtomicLoadUMax32 : PseudoAM;
def PseudoAtomicLoadUMin32 : PseudoAM;
def PseudoAtomicLoadMax32 : PseudoAM;
def PseudoAtomicLoadMin32 : PseudoAM;

multiclass PseudoBinPat<string Op, Pseudo BinInst> {
def : Pat<(!cast<PatFrag>(Op#"_monotonic") GPR:$addr, GPR:$incr),
Expand Down Expand Up @@ -2253,6 +2257,22 @@ def : Pat<(atomic_cmp_swap_i64 GPR:$addr, GPR:$cmp, GPR:$new),
(AMCAS__DB_D GPR:$cmp, GPR:$new, GPR:$addr)>;
}

// Ordering constants must be kept in sync with the AtomicOrdering enum in
// AtomicOrdering.h.
multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst,
ValueType vt = GRLenVT> {
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)),
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>;
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acquire") GPR:$addr, GPR:$cmp, GPR:$new)),
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>;
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_release") GPR:$addr, GPR:$cmp, GPR:$new)),
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>;
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)),
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>;
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)),
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
}

let Predicates = [IsLA64] in {
defm : binary_atomic_op_wd<"AMSWAP", "atomic_swap">;
defm : binary_atomic_op_wd<"AMADD", "atomic_load_add">;
Expand Down Expand Up @@ -2288,23 +2308,6 @@ def : AtomicPat<int_loongarch_masked_atomicrmw_umax_i64,
def : AtomicPat<int_loongarch_masked_atomicrmw_umin_i64,
PseudoMaskedAtomicLoadUMin32>;

// Ordering constants must be kept in sync with the AtomicOrdering enum in
// AtomicOrdering.h.
multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst,
ValueType vt = GRLenVT> {
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)),
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>;
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acquire") GPR:$addr, GPR:$cmp, GPR:$new)),
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>;
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_release") GPR:$addr, GPR:$cmp, GPR:$new)),
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>;
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)),
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>;
def : Pat<(vt (!cast<PatFrag>(Op#"_failure_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)),
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
}

defm : PseudoCmpXchgPat<"atomic_cmp_swap_i32", PseudoCmpXchg32>;
defm : PseudoCmpXchgPat<"atomic_cmp_swap_i64", PseudoCmpXchg64, i64>;
def : Pat<(int_loongarch_masked_cmpxchg_i64
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order),
Expand All @@ -2317,6 +2320,7 @@ def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_min_i64,
PseudoMaskedAtomicLoadMin32>;
} // Predicates = [IsLA64]

defm : PseudoCmpXchgPat<"atomic_cmp_swap_i32", PseudoCmpXchg32>;
defm : PseudoBinPat<"atomic_load_nand_i32", PseudoAtomicLoadNand32>;

let Predicates = [IsLA32] in {
Expand All @@ -2329,11 +2333,30 @@ def : AtomicPat<int_loongarch_masked_atomicrmw_sub_i32,
PseudoMaskedAtomicLoadSub32>;
def : AtomicPat<int_loongarch_masked_atomicrmw_nand_i32,
PseudoMaskedAtomicLoadNand32>;
def : AtomicPat<int_loongarch_masked_atomicrmw_umax_i32,
PseudoMaskedAtomicLoadUMax32>;
def : AtomicPat<int_loongarch_masked_atomicrmw_umin_i32,
PseudoMaskedAtomicLoadUMin32>;

def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_max_i32,
PseudoMaskedAtomicLoadMax32>;
def : PseudoMaskedAMMinMaxPat<int_loongarch_masked_atomicrmw_min_i32,
PseudoMaskedAtomicLoadMin32>;

def : Pat<(int_loongarch_masked_cmpxchg_i32
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order),
(PseudoMaskedCmpXchg32
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$fail_order)>;

defm : PseudoBinPat<"atomic_load_add_i32", PseudoAtomicLoadAdd32>;
defm : PseudoBinPat<"atomic_load_sub_i32", PseudoAtomicLoadSub32>;
defm : PseudoBinPat<"atomic_load_and_i32", PseudoAtomicLoadAnd32>;
defm : PseudoBinPat<"atomic_load_or_i32", PseudoAtomicLoadOr32>;
defm : PseudoBinPat<"atomic_load_xor_i32", PseudoAtomicLoadXor32>;
defm : PseudoBinPat<"atomic_load_umax_i32", PseudoAtomicLoadUMax32>;
defm : PseudoBinPat<"atomic_load_umin_i32", PseudoAtomicLoadUMin32>;
defm : PseudoBinPat<"atomic_load_max_i32", PseudoAtomicLoadMax32>;
defm : PseudoBinPat<"atomic_load_min_i32", PseudoAtomicLoadMin32>;
} // Predicates = [IsLA32]

/// Intrinsics
Expand Down
Loading