Skip to content

Commit 1fc7021

Browse files
committed
AMDGPU: Add basic gfx941 target
Differential Revision: https://reviews.llvm.org/D149982
1 parent 1e46394 commit 1fc7021

File tree

23 files changed

+99
-3
lines changed

23 files changed

+99
-3
lines changed

clang/include/clang/Basic/Cuda.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ enum class CudaArch {
9292
GFX90a,
9393
GFX90c,
9494
GFX940,
95+
GFX941,
9596
GFX1010,
9697
GFX1011,
9798
GFX1012,

clang/lib/Basic/Cuda.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ static const CudaArchToStringMap arch_names[] = {
114114
GFX(90a), // gfx90a
115115
GFX(90c), // gfx90c
116116
GFX(940), // gfx940
117+
GFX(941), // gfx941
117118
GFX(1010), // gfx1010
118119
GFX(1011), // gfx1011
119120
GFX(1012), // gfx1012

clang/lib/Basic/Targets/NVPTX.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
195195
case CudaArch::GFX90a:
196196
case CudaArch::GFX90c:
197197
case CudaArch::GFX940:
198+
case CudaArch::GFX941:
198199
case CudaArch::GFX1010:
199200
case CudaArch::GFX1011:
200201
case CudaArch::GFX1012:

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3580,6 +3580,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(
35803580
case CudaArch::GFX90a:
35813581
case CudaArch::GFX90c:
35823582
case CudaArch::GFX940:
3583+
case CudaArch::GFX941:
35833584
case CudaArch::GFX1010:
35843585
case CudaArch::GFX1011:
35853586
case CudaArch::GFX1012:

clang/test/CodeGenOpenCL/amdgpu-features.cl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx90a -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX90A %s
3131
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx90c -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX90C %s
3232
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx940 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX940 %s
33+
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx941 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX941 %s
3334
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1010 %s
3435
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1011 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1011 %s
3536
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1012 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1012 %s
@@ -75,6 +76,7 @@
7576
// GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
7677
// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
7778
// GFX940: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
79+
// GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
7880
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
7981
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
8082
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"

clang/test/Driver/amdgpu-macros.cl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@
109109
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx90a -DFAMILY=GFX9
110110
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx90c %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx90c -DFAMILY=GFX9
111111
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx940 -DFAMILY=GFX9
112+
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx941 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx941 -DFAMILY=GFX9
112113
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1010 -DFAMILY=GFX10
113114
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1011 -DFAMILY=GFX10
114115
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1012 -DFAMILY=GFX10

clang/test/Driver/amdgpu-mcpu.cl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@
9393
// RUN: %clang -### -target amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefix=GFX90A %s
9494
// RUN: %clang -### -target amdgcn -mcpu=gfx90c %s 2>&1 | FileCheck --check-prefix=GFX90C %s
9595
// RUN: %clang -### -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefix=GFX940 %s
96+
// RUN: %clang -### -target amdgcn -mcpu=gfx941 %s 2>&1 | FileCheck --check-prefix=GFX941 %s
9697
// RUN: %clang -### -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefix=GFX1010 %s
9798
// RUN: %clang -### -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefix=GFX1011 %s
9899
// RUN: %clang -### -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefix=GFX1012 %s
@@ -133,6 +134,7 @@
133134
// GFX90A: "-target-cpu" "gfx90a"
134135
// GFX90C: "-target-cpu" "gfx90c"
135136
// GFX940: "-target-cpu" "gfx940"
137+
// GFX941: "-target-cpu" "gfx941"
136138
// GFX1010: "-target-cpu" "gfx1010"
137139
// GFX1011: "-target-cpu" "gfx1011"
138140
// GFX1012: "-target-cpu" "gfx1012"

clang/test/Misc/target-invalid-cpu-note.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,15 @@
2929

3030
// RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
3131
// NVPTX: error: unknown target CPU 'not-a-cpu'
32-
// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103{{$}}
32+
// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103{{$}}
3333

3434
// RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600
3535
// R600: error: unknown target CPU 'not-a-cpu'
3636
// R600-NEXT: note: valid target CPU values are: r600, rv630, rv635, r630, rs780, rs880, rv610, rv620, rv670, rv710, rv730, rv740, rv770, cedar, palm, cypress, hemlock, juniper, redwood, sumo, sumo2, barts, caicos, aruba, cayman, turks{{$}}
3737

3838
// RUN: not %clang_cc1 -triple amdgcn--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AMDGCN
3939
// AMDGCN: error: unknown target CPU 'not-a-cpu'
40-
// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103{{$}}
40+
// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103{{$}}
4141

4242
// RUN: not %clang_cc1 -triple wasm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix WEBASM
4343
// WEBASM: error: unknown target CPU 'not-a-cpu'

llvm/docs/AMDGPUUsage.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,13 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following
385385
work-item Add product
386386
IDs names.
387387

388+
``gfx941`` ``amdgcn`` dGPU - sramecc - Architected *TBA*
389+
- tgsplit flat
390+
- xnack scratch .. TODO::
391+
- Packed
392+
work-item Add product
393+
IDs names.
394+
388395
**GCN GFX10.1 (RDNA 1)** [AMD-GCN-GFX10-RDNA1]_
389396
-----------------------------------------------------------------------------------------------------------------------
390397
``gfx1010`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* - Radeon RX 5700
@@ -1324,6 +1331,7 @@ The AMDGPU backend uses the following ELF header:
13241331
*reserved* 0x048 Reserved.
13251332
*reserved* 0x049 Reserved.
13261333
*reserved* 0x04a Reserved.
1334+
``EF_AMDGPU_MACH_AMDGCN_GFX941`` 0x04b ``gfx941``
13271335
==================================== ========== =============================
13281336

13291337
Sections

llvm/include/llvm/BinaryFormat/ELF.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -781,10 +781,11 @@ enum : unsigned {
781781
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X48 = 0x048,
782782
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X49 = 0x049,
783783
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4A = 0x04a,
784+
EF_AMDGPU_MACH_AMDGCN_GFX941 = 0x04b,
784785

785786
// First/last AMDGCN-based processors.
786787
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
787-
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4A,
788+
EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX941,
788789

789790
// Indicates if the "xnack" target feature is enabled for all code contained
790791
// in the object.

llvm/include/llvm/TargetParser/TargetParser.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ enum GPUKind : uint32_t {
8484
GK_GFX90A = 66,
8585
GK_GFX90C = 67,
8686
GK_GFX940 = 68,
87+
GK_GFX941 = 69,
8788

8889
GK_GFX1010 = 71,
8990
GK_GFX1011 = 72,

llvm/lib/Object/ELFObjectFile.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,8 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
463463
return "gfx90c";
464464
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940:
465465
return "gfx940";
466+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941:
467+
return "gfx941";
466468

467469
// AMDGCN GFX10.
468470
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010:

llvm/lib/ObjectYAML/ELFYAML.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
590590
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_MACH);
591591
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90C, EF_AMDGPU_MACH);
592592
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX940, EF_AMDGPU_MACH);
593+
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX941, EF_AMDGPU_MACH);
593594
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
594595
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1011, EF_AMDGPU_MACH);
595596
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1012, EF_AMDGPU_MACH);

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1227,6 +1227,40 @@ def FeatureISAVersion9_4_0 : FeatureSet<
12271227
FullRate64Ops,
12281228
FeatureBackOffBarrier]>;
12291229

1230+
def FeatureISAVersion9_4_1 : FeatureSet<
1231+
[FeatureGFX9,
1232+
FeatureGFX90AInsts,
1233+
FeatureGFX940Insts,
1234+
FeatureFmaMixInsts,
1235+
FeatureLDSBankCount32,
1236+
FeatureDLInsts,
1237+
FeatureFmacF64Inst,
1238+
FeatureDot1Insts,
1239+
FeatureDot2Insts,
1240+
FeatureDot3Insts,
1241+
FeatureDot4Insts,
1242+
FeatureDot5Insts,
1243+
FeatureDot6Insts,
1244+
FeatureDot7Insts,
1245+
FeatureDot10Insts,
1246+
FeatureAtomicDsPkAdd16Insts,
1247+
FeatureAtomicFlatPkAdd16Insts,
1248+
Feature64BitDPP,
1249+
FeaturePackedFP32Ops,
1250+
FeatureMAIInsts,
1251+
FeatureFP8Insts,
1252+
FeaturePkFmacF16Inst,
1253+
FeatureAtomicFaddRtnInsts,
1254+
FeatureAtomicFaddNoRtnInsts,
1255+
FeatureAtomicBufferGlobalPkAddF16Insts,
1256+
FeatureAtomicGlobalPkAddBF16Inst,
1257+
FeatureFlatAtomicFaddF32Inst,
1258+
FeatureSupportsSRAMECC,
1259+
FeaturePackedTID,
1260+
FeatureArchitectedFlatScratch,
1261+
FullRate64Ops,
1262+
FeatureBackOffBarrier]>;
1263+
12301264
// TODO: Organize more features into groups.
12311265
def FeatureGroup {
12321266
// Bugs present on gfx10.1.

llvm/lib/Target/AMDGPU/GCNProcessors.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,10 @@ def : ProcessorModel<"gfx940", SIDPGFX940FullSpeedModel,
196196
FeatureISAVersion9_4_0.Features
197197
>;
198198

199+
def : ProcessorModel<"gfx941", SIDPGFX940FullSpeedModel,
200+
FeatureISAVersion9_4_1.Features
201+
>;
202+
199203
//===----------------------------------------------------------------------===//
200204
// GCN GFX10.
201205
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
107107
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: AK = GK_GFX90A; break;
108108
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break;
109109
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break;
110+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941: AK = GK_GFX941; break;
110111
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
111112
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
112113
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
@@ -176,6 +177,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
176177
case GK_GFX90A: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A;
177178
case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C;
178179
case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940;
180+
case GK_GFX941: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX941;
179181
case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
180182
case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
181183
case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;

llvm/lib/TargetParser/TargetParser.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ constexpr GPUInfo AMDGCNGPUs[] = {
105105
{{"gfx90a"}, {"gfx90a"}, GK_GFX90A, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
106106
{{"gfx90c"}, {"gfx90c"}, GK_GFX90C, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
107107
{{"gfx940"}, {"gfx940"}, GK_GFX940, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
108+
{{"gfx941"}, {"gfx941"}, GK_GFX941, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
108109
{{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
109110
{{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
110111
{{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
@@ -224,6 +225,7 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
224225
case GK_GFX90A: return {9, 0, 10};
225226
case GK_GFX90C: return {9, 0, 12};
226227
case GK_GFX940: return {9, 4, 0};
228+
case GK_GFX941: return {9, 4, 1};
227229
case GK_GFX1010: return {10, 1, 0};
228230
case GK_GFX1011: return {10, 1, 1};
229231
case GK_GFX1012: return {10, 1, 2};
@@ -322,6 +324,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
322324
Features["s-memrealtime"] = true;
323325
Features["s-memtime-inst"] = true;
324326
break;
327+
case GK_GFX941:
325328
case GK_GFX940:
326329
Features["gfx940-insts"] = true;
327330
Features["fp8-insts"] = true;

llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@
7474
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck --check-prefixes=GFX940 %s
7575
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX940-NOXNACK %s
7676
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX940-XNACK %s
77+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx941 < %s | FileCheck --check-prefixes=GFX941 %s
78+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx941 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX941-NOXNACK %s
79+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx941 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX941-XNACK %s
7780
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX1010 %s
7881
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX1010-NOXNACK %s
7982
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX1010-XNACK %s
@@ -152,6 +155,9 @@
152155
; GFX940: .amdgcn_target "amdgcn-amd-amdhsa--gfx940"
153156
; GFX940-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940:xnack-"
154157
; GFX940-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940:xnack+"
158+
; GFX941: .amdgcn_target "amdgcn-amd-amdhsa--gfx941"
159+
; GFX941-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx941:xnack-"
160+
; GFX941-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx941:xnack+"
155161
; GFX1010: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010"
156162
; GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack-"
157163
; GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack+"

llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx90a < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX90A %s
5656
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx90c < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX90C %s
5757
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx940 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX940 %s
58+
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx941 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX941 %s
5859
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1010 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1010 %s
5960
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1011 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1011 %s
6061
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1012 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1012 %s
@@ -122,6 +123,7 @@
122123
; GFX90A: EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)
123124
; GFX90C: EF_AMDGPU_MACH_AMDGCN_GFX90C (0x32)
124125
; GFX940: EF_AMDGPU_MACH_AMDGCN_GFX940 (0x40)
126+
; GFX941: EF_AMDGPU_MACH_AMDGCN_GFX941 (0x4B)
125127
; GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33)
126128
; GFX1011: EF_AMDGPU_MACH_AMDGCN_GFX1011 (0x34)
127129
; GFX1012: EF_AMDGPU_MACH_AMDGCN_GFX1012 (0x35)

0 commit comments

Comments
 (0)