Skip to content

Commit 4b8d9e3

Browse files
author
Vincent Lejeune
committed
R600: Workaround for cayman loop bug
llvm-svn: 196121
1 parent 604dfec commit 4b8d9e3

File tree

3 files changed

+46
-0
lines changed

3 files changed

+46
-0
lines changed

llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,7 @@ class R600ControlFlowFinalizer : public MachineFunctionPass {
332332

333333
unsigned MaxStack = 0;
334334
unsigned CurrentStack = 0;
335+
unsigned CurrentLoopDepth = 0;
335336
bool HasPush = false;
336337
for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
337338
++MB) {
@@ -370,6 +371,13 @@ class R600ControlFlowFinalizer : public MachineFunctionPass {
370371
CurrentStack++;
371372
MaxStack = std::max(MaxStack, CurrentStack);
372373
HasPush = true;
374+
if (ST.hasCaymanISA() && CurrentLoopDepth > 1) {
375+
BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_CM))
376+
.addImm(CfCount + 1)
377+
.addImm(1);
378+
MI->setDesc(TII->get(AMDGPU::CF_ALU));
379+
CfCount++;
380+
}
373381
case AMDGPU::CF_ALU:
374382
I = MI;
375383
AluClauses.push_back(MakeALUClause(MBB, I));
@@ -378,6 +386,7 @@ class R600ControlFlowFinalizer : public MachineFunctionPass {
378386
break;
379387
case AMDGPU::WHILELOOP: {
380388
CurrentStack+=4;
389+
CurrentLoopDepth++;
381390
MaxStack = std::max(MaxStack, CurrentStack);
382391
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
383392
getHWInstrDesc(CF_WHILE_LOOP))
@@ -392,6 +401,7 @@ class R600ControlFlowFinalizer : public MachineFunctionPass {
392401
}
393402
case AMDGPU::ENDLOOP: {
394403
CurrentStack-=4;
404+
CurrentLoopDepth--;
395405
std::pair<unsigned, std::set<MachineInstr *> > Pair =
396406
LoopStack.back();
397407
LoopStack.pop_back();

llvm/lib/Target/R600/R600Instructions.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1867,6 +1867,10 @@ def : Pat <
18671867
let COUNT = 0;
18681868
}
18691869

1870+
def CF_PUSH_CM : CF_CLAUSE_EG<11, (ins i32imm:$ADDR, i32imm:$POP_COUNT), "PUSH @$ADDR POP:$POP_COUNT"> {
1871+
let COUNT = 0;
1872+
}
1873+
18701874
def : Pat<(fsqrt f32:$src), (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm $src))>;
18711875

18721876
class RAT_STORE_DWORD <RegisterClass rc, ValueType vt, bits<4> mask> :
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s
2+
3+
; CHECK-LABEL: @main
4+
; CHECK: LOOP_START_DX10
5+
; CHECK: ALU_PUSH_BEFORE
6+
; CHECK: LOOP_START_DX10
7+
; CHECK: PUSH
8+
; CHECK-NOT: ALU_PUSH_BEFORE
9+
; CHECK: END_LOOP
10+
; CHECK: END_LOOP
11+
define void @main (<4 x float> inreg %reg0) #0 {
12+
entry:
13+
br label %outer_loop
14+
outer_loop:
15+
%cnt = phi i32 [0, %entry], [%cnt_incr, %inner_loop]
16+
%cond = icmp eq i32 %cnt, 16
17+
br i1 %cond, label %outer_loop_body, label %exit
18+
outer_loop_body:
19+
%cnt_incr = add i32 %cnt, 1
20+
br label %inner_loop
21+
inner_loop:
22+
%cnt2 = phi i32 [0, %outer_loop_body], [%cnt2_incr, %inner_loop_body]
23+
%cond2 = icmp eq i32 %cnt2, 16
24+
br i1 %cond, label %inner_loop_body, label %outer_loop
25+
inner_loop_body:
26+
%cnt2_incr = add i32 %cnt2, 1
27+
br label %inner_loop
28+
exit:
29+
ret void
30+
}
31+
32+
attributes #0 = { "ShaderType"="0" }

0 commit comments

Comments
 (0)