Skip to content

Commit f426ddb

Browse files
committed
AMDGPU: Assume ECC is enabled by default if supported
The test should really be checking for the property directly in the code object headers, but there are problems with this. I don't see this directly represented in the text form, and for the binary emission this is depending on a function level subtarget feature to emit a global flag. llvm-svn: 357558
1 parent f7887d4 commit f426ddb

File tree

7 files changed

+127
-7
lines changed

7 files changed

+127
-7
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,12 @@ def FeatureDot2Insts : SubtargetFeature<"dot2-insts",
281281
"Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions"
282282
>;
283283

284+
def FeatureDoesNotSupportSRAMECC : SubtargetFeature<"no-sram-ecc-support",
285+
"DoesNotSupportSRAMECC",
286+
"true",
287+
"Hardware does not support SRAM ECC"
288+
>;
289+
284290
def FeatureSRAMECC : SubtargetFeature<"sram-ecc",
285291
"EnableSRAMECC",
286292
"true",
@@ -439,14 +445,16 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
439445
"southern-islands",
440446
[FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,
441447
FeatureWavefrontSize64,
442-
FeatureLDSBankCount32, FeatureMovrel, FeatureTrigReducedRange]
448+
FeatureLDSBankCount32, FeatureMovrel, FeatureTrigReducedRange,
449+
FeatureDoesNotSupportSRAMECC]
443450
>;
444451

445452
def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS",
446453
"sea-islands",
447454
[FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
448455
FeatureWavefrontSize64, FeatureFlatAddressSpace,
449-
FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange]
456+
FeatureCIInsts, FeatureMovrel, FeatureTrigReducedRange,
457+
FeatureDoesNotSupportSRAMECC]
450458
>;
451459

452460
def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
@@ -457,7 +465,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
457465
FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
458466
FeatureScalarStores, FeatureInv2PiInlineImm,
459467
FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP,
460-
FeatureIntClamp, FeatureTrigReducedRange
468+
FeatureIntClamp, FeatureTrigReducedRange, FeatureDoesNotSupportSRAMECC
461469
]
462470
>;
463471

@@ -550,19 +558,22 @@ def FeatureISAVersion9_0_0 : FeatureSet<
550558
[FeatureGFX9,
551559
FeatureMadMixInsts,
552560
FeatureLDSBankCount32,
553-
FeatureCodeObjectV3]>;
561+
FeatureCodeObjectV3,
562+
FeatureDoesNotSupportSRAMECC]>;
554563

555564
def FeatureISAVersion9_0_2 : FeatureSet<
556565
[FeatureGFX9,
557566
FeatureMadMixInsts,
558567
FeatureLDSBankCount32,
559568
FeatureXNACK,
569+
FeatureDoesNotSupportSRAMECC,
560570
FeatureCodeObjectV3]>;
561571

562572
def FeatureISAVersion9_0_4 : FeatureSet<
563573
[FeatureGFX9,
564574
FeatureLDSBankCount32,
565575
FeatureFmaMixInsts,
576+
FeatureDoesNotSupportSRAMECC,
566577
FeatureCodeObjectV3]>;
567578

568579
def FeatureISAVersion9_0_6 : FeatureSet<

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
6464

6565
GCNSubtarget &
6666
GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
67-
StringRef GPU, StringRef FS) {
67+
StringRef GPU, StringRef FS) {
6868
// Determine default and user-specified characteristics
6969
// On SI+, we want FP64 denormals to be on by default. FP32 denormals can be
7070
// enabled, but some instructions do not respect them and they run at the
@@ -77,7 +77,8 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
7777
// Similarly we want enable-prt-strict-null to be on by default and not to
7878
// unset everything else if it is disabled
7979

80-
SmallString<256> FullFS("+promote-alloca,+load-store-opt,");
80+
// Assuming ECC is enabled is the conservative default.
81+
SmallString<256> FullFS("+promote-alloca,+load-store-opt,+sram-ecc,");
8182

8283
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
8384
FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";
@@ -129,6 +130,14 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
129130

130131
HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
131132

133+
// ECC is on by default, but turn it off if the hardware doesn't support it
134+
// anyway. This matters for the gfx9 targets with d16 loads, but don't support
135+
// ECC.
136+
if (DoesNotSupportSRAMECC && EnableSRAMECC) {
137+
ToggleFeature(AMDGPU::FeatureSRAMECC);
138+
EnableSRAMECC = false;
139+
}
140+
132141
return *this;
133142
}
134143

@@ -206,6 +215,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
206215
HasDot1Insts(false),
207216
HasDot2Insts(false),
208217
EnableSRAMECC(false),
218+
DoesNotSupportSRAMECC(false),
209219
FlatAddressSpace(false),
210220
FlatInstOffsets(false),
211221
FlatGlobalInsts(false),

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,7 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
332332
bool HasDot1Insts;
333333
bool HasDot2Insts;
334334
bool EnableSRAMECC;
335+
bool DoesNotSupportSRAMECC;
335336
bool FlatAddressSpace;
336337
bool FlatInstOffsets;
337338
bool FlatGlobalInsts;

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,10 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
8484
AMDGPU::FeatureTrapHandler,
8585
AMDGPU::FeatureCodeObjectV3,
8686

87+
// The default assumption needs to be ecc is enabled, but no directly
88+
// exposed operations depend on it, so it can be safely inlined.
89+
AMDGPU::FeatureSRAMECC,
90+
8791
// Perf-tuning features
8892
AMDGPU::FeatureFastFMAF32,
8993
AMDGPU::HalfRate64Ops

llvm/test/CodeGen/AMDGPU/load-hi16.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900 %s
2-
; RUN: llc -march=amdgcn -mcpu=gfx906 -amdgpu-sroa=0 -mattr=-promote-alloca,+sram-ecc -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX906,NO-D16-HI %s
2+
; RUN: llc -march=amdgcn -mcpu=gfx906 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX906,NO-D16-HI %s
33
; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX803,NO-D16-HI %s
44

55
; GCN-LABEL: {{^}}load_local_lo_hi_v2i16_multi_use_lo:
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
2+
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+sram-ecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
3+
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-sram-ecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
4+
; RUN: llc -march=amdgcn -mcpu=gfx902 -mattr=+sram-ecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
5+
; RUN: llc -march=amdgcn -mcpu=gfx904 -mattr=+sram-ecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
6+
; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=+sram-ecc < %s | FileCheck -check-prefixes=GCN,ECC %s
7+
; RUN: llc -march=amdgcn -mcpu=gfx906 -mattr=-sram-ecc < %s | FileCheck -check-prefixes=GCN,NO-ECC %s
8+
9+
; Make sure the correct set of targets are marked with
10+
; FeatureDoesNotSupportSRAMECC, and +sram-ecc is ignored if it's never
11+
; supported.
12+
13+
; GCN-LABEL: {{^}}load_global_hi_v2i16_reglo_vreg:
14+
; NO-ECC: global_load_short_d16_hi
15+
; ECC: global_load_ushort
16+
define void @load_global_hi_v2i16_reglo_vreg(i16 addrspace(1)* %in, i16 %reg) {
17+
entry:
18+
%gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 -2047
19+
%load = load i16, i16 addrspace(1)* %gep
20+
%build0 = insertelement <2 x i16> undef, i16 %reg, i32 0
21+
%build1 = insertelement <2 x i16> %build0, i16 %load, i32 1
22+
store <2 x i16> %build1, <2 x i16> addrspace(1)* undef
23+
ret void
24+
}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -inline < %s | FileCheck %s
2+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes='cgscc(inline)' < %s | FileCheck %s
3+
4+
; sram-ecc can be safely ignored when inlining, since no intrinisics
5+
; or other directly exposed operations depend on it.
6+
7+
define i32 @func_default() #0 {
8+
ret i32 0
9+
}
10+
11+
define i32 @func_ecc_enabled() #1 {
12+
ret i32 0
13+
}
14+
15+
define i32 @func_ecc_disabled() #2 {
16+
ret i32 0
17+
}
18+
19+
; CHECK-LABEL: @default_call_default(
20+
; CHECK-NEXT: ret i32 0
21+
define i32 @default_call_default() #0 {
22+
%call = call i32 @func_default()
23+
ret i32 %call
24+
}
25+
26+
; CHECK-LABEL: @ecc_enabled_call_default(
27+
; CHECK-NEXT: ret i32 0
28+
define i32 @ecc_enabled_call_default() #1 {
29+
%call = call i32 @func_default()
30+
ret i32 %call
31+
}
32+
33+
; CHECK-LABEL: @ecc_enabled_call_ecc_enabled(
34+
; CHECK-NEXT: ret i32 0
35+
define i32 @ecc_enabled_call_ecc_enabled() #1 {
36+
%call = call i32 @func_ecc_enabled()
37+
ret i32 %call
38+
}
39+
40+
; CHECK-LABEL: @ecc_enabled_call_ecc_disabled(
41+
; CHECK-NEXT: ret i32 0
42+
define i32 @ecc_enabled_call_ecc_disabled() #1 {
43+
%call = call i32 @func_ecc_disabled()
44+
ret i32 %call
45+
}
46+
47+
; CHECK-LABEL: @ecc_disabled_call_default(
48+
; CHECK-NEXT: ret i32 0
49+
define i32 @ecc_disabled_call_default() #2 {
50+
%call = call i32 @func_default()
51+
ret i32 %call
52+
}
53+
54+
; CHECK-LABEL: @ecc_disabled_call_ecc_enabled(
55+
; CHECK-NEXT: ret i32 0
56+
define i32 @ecc_disabled_call_ecc_enabled() #2 {
57+
%call = call i32 @func_ecc_enabled()
58+
ret i32 %call
59+
}
60+
61+
; CHECK-LABEL: @ecc_disabled_call_ecc_disabled(
62+
; CHECK-NEXT: ret i32 0
63+
define i32 @ecc_disabled_call_ecc_disabled() #2 {
64+
%call = call i32 @func_ecc_disabled()
65+
ret i32 %call
66+
}
67+
68+
attributes #0 = { nounwind }
69+
attributes #1 = { nounwind "target-features"="+sram-ecc" }
70+
attributes #2 = { nounwind "target-features"="-sram-ecc" }

0 commit comments

Comments
 (0)