Skip to content

Commit

Permalink
Revert "[AMDGPU] Try to fix the block prologs broken by RA inserted i…
Browse files Browse the repository at this point in the history
…nstructions (llvm#69924)"

This reverts commit a0eb6b8.

Caused CTS failures:

ubuntu_20-04_navi21_vk-llvm-test / CTS.dEQP-VK.ssbo.phys.layout.unsized_nested_struct_array.single_buffer.scalar_instance_array_comp_access
ubuntu_20-04_navi21_vk-llvm-test / CTS.dEQP-VK.ssbo.phys.layout.unsized_nested_struct_array.single_buffer.scalar_instance_array_comp_access_store_cols
ubuntu_22-04_navi31_vk-llvm-test / CTS.dEQP-VK.ssbo.phys.layout.unsized_nested_struct_array.single_buffer.scalar_instance_array_comp_access
ubuntu_22-04_navi31_vk-llvm-test / CTS.dEQP-VK.ssbo.phys.layout.unsized_nested_struct_array.single_buffer.scalar_instance_array_comp_access_store_cols

Change-Id: Id804440b442cae543c2dd0f5c2ba1bcb6b805d1e
  • Loading branch information
piotrAMD committed Oct 30, 2023
1 parent c294719 commit d648e11
Show file tree
Hide file tree
Showing 16 changed files with 523 additions and 629 deletions.
12 changes: 2 additions & 10 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8397,16 +8397,8 @@ unsigned SIInstrInfo::getLiveRangeSplitOpcode(Register SrcReg,
}

bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const {
// We need to handle instructions which may be inserted during register
// allocation to handle the prolog. The initial prolog instruction may have
// been separated from the start of the block by spills and copies inserted
// needed by the prolog.
uint16_t Opc = MI.getOpcode();

// FIXME: Copies inserted in the block prolog for live-range split should also
// be included.
return (isSpillOpcode(Opc) || (!MI.isTerminator() && Opc != AMDGPU::COPY &&
MI.modifiesRegister(AMDGPU::EXEC, &RI)));
return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY &&
MI.modifiesRegister(AMDGPU::EXEC, &RI);
}

MachineInstrBuilder
Expand Down
5 changes: 0 additions & 5 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -675,11 +675,6 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill;
}

bool isSpillOpcode(uint16_t Opcode) const {
return get(Opcode).TSFlags &
(SIInstrFlags::SGPRSpill | SIInstrFlags::VGPRSpill);
}

static bool isWWMRegSpillOpcode(uint16_t Opcode) {
return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE ||
Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE ||
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) {
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b32 exec_lo, s21
; CHECK-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
; CHECK-NEXT: s_or_saveexec_b32 s21, -1
; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b32 exec_lo, s21
Expand All @@ -161,9 +163,6 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) {
; CHECK-NEXT: v_readlane_b32 s17, v2, 1
; CHECK-NEXT: v_readlane_b32 s18, v2, 2
; CHECK-NEXT: v_readlane_b32 s19, v2, 3
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: image_sample v0, v[0:1], s[8:15], s[16:19] dmask:0x1 dim:SQ_RSRC_IMG_2D
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
Expand Down
7 changes: 4 additions & 3 deletions llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -O0 -verify-machineinstrs --stop-after=regallocfast,1 -o - %s | FileCheck -check-prefix=REGALLOC %s

; Test to check if the bb prolog spills are inserted correctly during regalloc.
; FIXME: There are two spill codes inserted wrongly in this test.
; They are inserted during regalloc for the BBLiveIns - the spill restores for vgpr1 in the Flow block (bb.1) and for vgpr0 in the return block (bb.4).
define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
; REGALLOC-LABEL: name: prolog_spill
; REGALLOC: bb.0.bb.0:
Expand Down Expand Up @@ -32,10 +33,10 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
; REGALLOC-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
; REGALLOC-NEXT: {{ $}}
; REGALLOC-NEXT: $vgpr0 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
; REGALLOC-NEXT: $vgpr1 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
; REGALLOC-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr4_sgpr5
; REGALLOC-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 killed renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def dead $scc, implicit $exec
; REGALLOC-NEXT: $vgpr1 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = S_AND_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
; REGALLOC-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr4, 2, $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
Expand Down Expand Up @@ -65,10 +66,10 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
; REGALLOC-NEXT: {{ $}}
; REGALLOC-NEXT: bb.4.bb.3:
; REGALLOC-NEXT: $vgpr1 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
; REGALLOC-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
; REGALLOC-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 2, implicit-def $sgpr4_sgpr5
; REGALLOC-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 3
; REGALLOC-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
; REGALLOC-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
; REGALLOC-NEXT: renamable $sgpr4 = S_MOV_B32 5
; REGALLOC-NEXT: renamable $vgpr0 = V_MUL_LO_U32_e64 killed $vgpr0, killed $sgpr4, implicit $exec
; REGALLOC-NEXT: KILL killed renamable $vgpr1
Expand Down

0 comments on commit d648e11

Please sign in to comment.