Skip to content

Commit 5c7a61d

Browse files
author
Marek Olsak
committed
AMDGPU: Buffer descriptor changes for GFX9
Reviewers: arsenm Subscribers: qcolombet, kzhuravl, wdng, nhaehnle, yaxunl, tony-tye, dstuttard, tpr Differential Revision: https://reviews.llvm.org/D31158 llvm-svn: 298397
1 parent e22fdb9 commit 5c7a61d

File tree

5 files changed

+23
-8
lines changed

5 files changed

+23
-8
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3693,10 +3693,13 @@ MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
36933693
uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
36943694
uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
36953695
if (ST.isAmdHsaOS()) {
3696-
RsrcDataFormat |= (1ULL << 56);
3696+
// Set ATC = 1. GFX9 doesn't have this bit.
3697+
if (ST.getGeneration() <= SISubtarget::VOLCANIC_ISLANDS)
3698+
RsrcDataFormat |= (1ULL << 56);
36973699

3698-
if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
3699-
// Set MTYPE = 2
3700+
// Set MTYPE = 2 (MTYPE_UC = uncached). GFX9 doesn't have this.
3701+
// BTW, it disables TC L2 and therefore decreases performance.
3702+
if (ST.getGeneration() == SISubtarget::VOLCANIC_ISLANDS)
37003703
RsrcDataFormat |= (2ULL << 59);
37013704
}
37023705

@@ -3708,11 +3711,14 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const {
37083711
AMDGPU::RSRC_TID_ENABLE |
37093712
0xffffffff; // Size;
37103713

3711-
uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
3714+
// GFX9 doesn't have ELEMENT_SIZE.
3715+
if (ST.getGeneration() <= SISubtarget::VOLCANIC_ISLANDS) {
3716+
uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
3717+
Rsrc23 |= EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT;
3718+
}
37123719

3713-
Rsrc23 |= (EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT) |
3714-
// IndexStride = 64
3715-
(UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT);
3720+
// IndexStride = 64.
3721+
Rsrc23 |= UINT64_C(3) << AMDGPU::RSRC_INDEX_STRIDE_SHIFT;
37163722

37173723
// If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
37183724
// Clear them unless we want a huge stride.

llvm/test/CodeGen/AMDGPU/large-alloca-compute.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; RUN: llc -march=amdgcn -mcpu=bonaire -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=ALL %s
22
; RUN: llc -march=amdgcn -mcpu=carrizo --show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=ALL %s
3+
; RUN: llc -march=amdgcn -mcpu=gfx900 --show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=ALL %s
34
; RUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa < %s -mattr=-flat-for-global | FileCheck -check-prefix=GCNHSA -check-prefix=CIHSA -check-prefix=ALL %s
45
; RUN: llc -march=amdgcn -mcpu=carrizo -mtriple=amdgcn-unknown-amdhsa -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCNHSA -check-prefix=VIHSA -check-prefix=ALL %s
56

@@ -14,6 +15,7 @@
1415
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, -1
1516
; CI-DAG: s_mov_b32 s{{[0-9]+}}, 0xe8f000
1617
; VI-DAG: s_mov_b32 s{{[0-9]+}}, 0xe80000
18+
; GFX9-DAG: s_mov_b32 s{{[0-9]+}}, 0xe00000
1719

1820

1921
; GCNHSA: .amd_kernel_code_t

llvm/test/CodeGen/AMDGPU/large-alloca-graphics.ll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=ALL %s
22
; RUN: llc -march=amdgcn -mcpu=carrizo -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=ALL %s
3+
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=ALL %s
34

45
; ALL-LABEL: {{^}}large_alloca_pixel_shader:
56
; GCN-DAG: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
67
; GCN-DAG: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
78
; GCN-DAG: s_mov_b32 s10, -1
89
; CI-DAG: s_mov_b32 s11, 0xe8f000
910
; VI-DAG: s_mov_b32 s11, 0xe80000
11+
; GFX9-DAG: s_mov_b32 s11, 0xe00000
1012

1113
; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s0 offen
1214
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s0 offen
@@ -28,6 +30,7 @@ define amdgpu_ps void @large_alloca_pixel_shader(i32 %x, i32 %y) #0 {
2830
; GCN-DAG: s_mov_b32 s10, -1
2931
; CI-DAG: s_mov_b32 s11, 0xe8f000
3032
; VI-DAG: s_mov_b32 s11, 0xe80000
33+
; GFX9-DAG: s_mov_b32 s11, 0xe00000
3134

3235
; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s2 offen
3336
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s2 offen

llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=SIMESA %s
22
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+vgpr-spilling,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=VIMESA %s
3+
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+vgpr-spilling,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCNMESA -check-prefix=GFX9MESA %s
34
; RUN: llc -march=amdgcn -mcpu=hawaii -mtriple=amdgcn-unknown-amdhsa -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIHSA -check-prefix=HSA %s
45
; RUN: llc -march=amdgcn -mcpu=fiji -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIHSA -check-prefix=HSA %s
56

@@ -21,10 +22,11 @@
2122

2223
; GCNMESA-DAG: s_mov_b32 s16, s3
2324
; GCNMESA-DAG: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
24-
; GCNMESA--DAG: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
25+
; GCNMESA-DAG: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
2526
; GCNMESA-DAG: s_mov_b32 s14, -1
2627
; SIMESA-DAG: s_mov_b32 s15, 0xe8f000
2728
; VIMESA-DAG: s_mov_b32 s15, 0xe80000
29+
; GFX9MESA-DAG: s_mov_b32 s15, 0xe00000
2830

2931

3032
; GCN: buffer_store_dword {{v[0-9]+}}, off, s[12:15], s16 offset:{{[0-9]+}} ; 4-byte Folded Spill

llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
22
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3+
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
34

45
; This ends up using all 255 registers and requires register
56
; scavenging which will fail to find an unsued register.
@@ -18,6 +19,7 @@
1819
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, -1
1920
; SI-DAG: s_mov_b32 s[[DESC3:[0-9]+]], 0xe8f000
2021
; VI-DAG: s_mov_b32 s[[DESC3:[0-9]+]], 0xe80000
22+
; GFX9-DAG: s_mov_b32 s[[DESC3:[0-9]+]], 0xe00000
2123

2224
; OFFREG is offset system SGPR
2325
; GCN: buffer_store_dword {{v[0-9]+}}, off, s{{\[}}[[DESC0]]:[[DESC3]]], s[[OFFREG]] offset:{{[0-9]+}} ; 4-byte Folded Spill

0 commit comments

Comments
 (0)