Skip to content

Commit bb1a52a

Browse files
committed
Recommit "[RISCV] Teach RISCVMergeBaseOffset about cases where we use SHXADD to add some immediates."
With fix for sanitizer build bot failure.
1 parent bd577af commit bb1a52a

File tree

2 files changed

+97
-2
lines changed

2 files changed

+97
-2
lines changed

llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass {
5050
void foldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI, MachineInstr &Tail,
5151
int64_t Offset);
5252
bool matchLargeOffset(MachineInstr &TailAdd, Register GSReg, int64_t &Offset);
53+
bool matchShiftedOffset(MachineInstr &TailShXAdd, Register GSReg,
54+
int64_t &Offset);
55+
5356
RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID) {}
5457

5558
MachineFunctionProperties getRequiredProperties() const override {
@@ -193,6 +196,59 @@ bool RISCVMergeBaseOffsetOpt::matchLargeOffset(MachineInstr &TailAdd,
193196
return false;
194197
}
195198

199+
// Detect patterns for offsets that are passed into a SHXADD instruction.
200+
// The offset has 1,2, or 3 trailing zeros and fits in simm13, simm14, simm15.
201+
// The constant is created with addi voff, x0, C, and shXadd is used to
202+
// fill insert the trailing zeros and do the addition.
203+
//
204+
// HiLUI: lui vreg1, %hi(s)
205+
// LoADDI: addi vreg2, vreg1, %lo(s)
206+
// OffsetTail: addi voff, x0, C
207+
// TailAdd: shXadd vreg4, voff, vreg2
208+
bool RISCVMergeBaseOffsetOpt::matchShiftedOffset(MachineInstr &TailShXAdd,
209+
Register GAReg,
210+
int64_t &Offset) {
211+
assert((TailShXAdd.getOpcode() == RISCV::SH1ADD ||
212+
TailShXAdd.getOpcode() == RISCV::SH2ADD ||
213+
TailShXAdd.getOpcode() == RISCV::SH3ADD) &&
214+
"Expected SHXADD instruction!");
215+
216+
// The first source is the shifted operand.
217+
Register Rs1 = TailShXAdd.getOperand(1).getReg();
218+
219+
if (GAReg != TailShXAdd.getOperand(2).getReg())
220+
return false;
221+
222+
// Can't fold if the register has more than one use.
223+
if (!MRI->hasOneUse(Rs1))
224+
return false;
225+
// This can point to an ADDI X0, C.
226+
MachineInstr &OffsetTail = *MRI->getVRegDef(Rs1);
227+
if (OffsetTail.getOpcode() != RISCV::ADDI)
228+
return false;
229+
if (!OffsetTail.getOperand(1).isReg() ||
230+
OffsetTail.getOperand(1).getReg() != RISCV::X0 ||
231+
!OffsetTail.getOperand(2).isImm())
232+
return false;
233+
234+
Offset = OffsetTail.getOperand(2).getImm();
235+
assert(isInt<12>(Offset) && "Unexpected offset");
236+
237+
unsigned ShAmt;
238+
switch (TailShXAdd.getOpcode()) {
239+
default: llvm_unreachable("Unexpected opcode");
240+
case RISCV::SH1ADD: ShAmt = 1; break;
241+
case RISCV::SH2ADD: ShAmt = 2; break;
242+
case RISCV::SH3ADD: ShAmt = 3; break;
243+
}
244+
245+
Offset = (uint64_t)Offset << ShAmt;
246+
247+
LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
248+
DeadInstrs.insert(&OffsetTail);
249+
return true;
250+
}
251+
196252
bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI,
197253
MachineInstr &LoADDI) {
198254
Register DestReg = LoADDI.getOperand(0).getReg();
@@ -240,6 +296,18 @@ bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI,
240296
foldOffset(HiLUI, LoADDI, Tail, Offset);
241297
return true;
242298
}
299+
case RISCV::SH1ADD:
300+
case RISCV::SH2ADD:
301+
case RISCV::SH3ADD: {
302+
// The offset is too large to fit in the immediate field of ADDI.
303+
// It may be encoded as (SH2ADD (ADDI X0, C), DestReg) or
304+
// (SH3ADD (ADDI X0, C), DestReg).
305+
int64_t Offset;
306+
if (!matchShiftedOffset(Tail, DestReg, Offset))
307+
return false;
308+
foldOffset(HiLUI, LoADDI, Tail, Offset);
309+
return true;
310+
}
243311
case RISCV::LB:
244312
case RISCV::LH:
245313
case RISCV::LW:

llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=riscv32 < %s | FileCheck %s --check-prefixes=CHECK,RV32
3-
; RUN: llc -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=CHECK,RV64
2+
; RUN: llc -mtriple=riscv32 < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32I
3+
; RUN: llc -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64I
4+
; RUN: llc -mtriple=riscv32 -mattr=+zba < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZBA
5+
; RUN: llc -mtriple=riscv64 -mattr=+zba < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZBA
46

57
%struct.S = type { [40 x i32], i32, i32, i32, [4100 x i32], i32, i32, i32 }
68
@s = common dso_local global %struct.S zeroinitializer, align 4
@@ -239,3 +241,28 @@ define i8* @offset_addi_addi_neg() {
239241
; CHECK-NEXT: ret
240242
ret i8* getelementptr inbounds ([0 x i8], [0 x i8]* @bar, i32 0, i64 -4000)
241243
}
244+
245+
; With Zba the constant 6424 is created with LI+SH2ADD.
246+
define i8* @offset_sh2add() {
247+
; CHECK-LABEL: offset_sh2add:
248+
; CHECK: # %bb.0:
249+
; CHECK-NEXT: lui a0, %hi(bar+6424)
250+
; CHECK-NEXT: addi a0, a0, %lo(bar+6424)
251+
; CHECK-NEXT: ret
252+
ret i8* getelementptr inbounds ([0 x i8], [0 x i8]* @bar, i32 0, i64 6424)
253+
}
254+
255+
; With Zba the constant 12848 is created with LI+SH3ADD.
256+
define i8* @offset_sh3add() {
257+
; CHECK-LABEL: offset_sh3add:
258+
; CHECK: # %bb.0:
259+
; CHECK-NEXT: lui a0, %hi(bar+12848)
260+
; CHECK-NEXT: addi a0, a0, %lo(bar+12848)
261+
; CHECK-NEXT: ret
262+
ret i8* getelementptr inbounds ([0 x i8], [0 x i8]* @bar, i32 0, i64 12848)
263+
}
264+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
265+
; RV32I: {{.*}}
266+
; RV32ZBA: {{.*}}
267+
; RV64I: {{.*}}
268+
; RV64ZBA: {{.*}}

0 commit comments

Comments
 (0)