Skip to content

Commit 13a0b48

Browse files
committed
[OpenMP][libomptarget][AMDGPU] Update print launch info
Clean up for the AMD-specific kernel launch info in the NextGen Plugins. - Fixes a mistake introduced with the initial commit that added printing of an AMD-only property. - Removes another AMD-only property (not clear on upstream status) - Adds some more comment to what info is printed. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D145924
1 parent 7056260 commit 13a0b48

File tree

3 files changed

+48
-16
lines changed

3 files changed

+48
-16
lines changed

openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2614,33 +2614,39 @@ Error AMDGPUKernelTy::printLaunchInfoDetails(GenericDeviceTy &GenericDevice,
26142614
return Plugin::success();
26152615

26162616
// General Info
2617-
auto ConstWGSize = getDefaultNumThreads(GenericDevice);
26182617
auto NumGroups = NumBlocks;
2619-
auto ThreadsPerGroup = getDefaultNumThreads(GenericDevice);
2620-
auto NumTeams = KernelArgs.NumTeams[0]; // Only first dimension
2621-
auto ThreadLimit = KernelArgs.ThreadLimit[0]; // Only first dimension
2618+
auto ThreadsPerGroup = NumThreads;
26222619

26232620
// Kernel Arguments Info
26242621
auto ArgNum = KernelArgs.NumArgs;
26252622
auto LoopTripCount = KernelArgs.Tripcount;
26262623

2627-
// Details for AMDGPU kernels
2624+
// Details for AMDGPU kernels (read from image)
2625+
// https://www.llvm.org/docs/AMDGPUUsage.html#code-object-v4-metadata
26282626
auto GroupSegmentSize = (*KernelInfo).GroupSegmentList;
26292627
auto SGPRCount = (*KernelInfo).SGPRCount;
26302628
auto VGPRCount = (*KernelInfo).VGPRCount;
26312629
auto SGPRSpillCount = (*KernelInfo).SGPRSpillCount;
26322630
auto VGPRSpillCount = (*KernelInfo).VGPRSpillCount;
2633-
2634-
// TODO set correctly once host services available
2635-
auto HostCallRequired = false;
2631+
auto MaxFlatWorkgroupSize = (*KernelInfo).MaxFlatWorkgroupSize;
2632+
2633+
// Prints additional launch info that contains the following.
2634+
// Num Args: The number of kernel arguments
2635+
// Teams x Thrds: The number of teams and the number of threads actually
2636+
// running.
2637+
// MaxFlatWorkgroupSize: Maximum flat work-group size supported by the
2638+
// kernel in work-items
2639+
// LDS Usage: Amount of bytes used in LDS storage
2640+
// S/VGPR Count: the number of S/V GPRs occupied by the kernel
2641+
// S/VGPR Spill Count: how many S/VGPRs are spilled by the kernel
2642+
// Tripcount: loop tripcount for the kernel
26362643
INFO(OMP_INFOTYPE_PLUGIN_KERNEL, GenericDevice.getDeviceId(),
2637-
"SGN:%s ConstWGSize:%d args:%d teamsXthrds:(%4luX%4d) "
2638-
"reqd:(%4dX%4d) lds_usage:%uB sgpr_count:%u vgpr_count:%u "
2639-
"sgpr_spill_count:%u vgpr_spill_count:%u tripcount:%lu rpc:%d n:%s\n",
2640-
getExecutionModeName(), ConstWGSize, ArgNum, NumGroups, ThreadsPerGroup,
2641-
NumTeams, ThreadLimit, GroupSegmentSize, SGPRCount, VGPRCount,
2642-
SGPRSpillCount, VGPRSpillCount, LoopTripCount, HostCallRequired,
2643-
getName());
2644+
"#Args: %d Teams x Thrds: %4lux%4u (MaxFlatWorkGroupSize: %u) LDS "
2645+
"Usage: %uB #SGPRs/VGPRs: %u/%u #SGPR/VGPR Spills: %u/%u Tripcount: "
2646+
"%lu\n",
2647+
ArgNum, NumGroups, ThreadsPerGroup, MaxFlatWorkgroupSize,
2648+
GroupSegmentSize, SGPRCount, VGPRCount, SGPRSpillCount, VGPRSpillCount,
2649+
LoopTripCount);
26442650

26452651
return Plugin::success();
26462652
}

openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,10 @@ struct KernelMetaDataTy {
146146
uint32_t KernelSegmentSize;
147147
uint32_t ExplicitArgumentCount;
148148
uint32_t ImplicitArgumentCount;
149+
uint32_t RequestedWorkgroupSize[3];
150+
uint32_t WorkgroupSizeHint[3];
151+
uint32_t WavefronSize;
152+
uint32_t MaxFlatWorkgroupSize;
149153
};
150154
namespace {
151155

@@ -194,6 +198,19 @@ class KernelInfoReader {
194198
return DK.getString() == SK;
195199
};
196200

201+
const auto getSequenceOfThreeInts = [](msgpack::DocNode &DN,
202+
uint32_t *Vals) {
203+
assert(DN.isArray() && "MsgPack DocNode is an array node");
204+
auto DNA = DN.getArray();
205+
assert(DNA.size() == 3 && "ArrayNode has at most three elements");
206+
207+
int i = 0;
208+
for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
209+
++DNABegin) {
210+
Vals[i++] = DNABegin->getUInt();
211+
}
212+
};
213+
197214
if (isKey(V.first, ".name")) {
198215
KernelName = V.second.toString();
199216
} else if (isKey(V.first, ".sgpr_count")) {
@@ -208,6 +225,14 @@ class KernelInfoReader {
208225
KernelData.PrivateSegmentSize = V.second.getUInt();
209226
} else if (isKey(V.first, ".group_segement_fixed_size")) {
210227
KernelData.GroupSegmentList = V.second.getUInt();
228+
} else if (isKey(V.first, ".reqd_workgroup_size")) {
229+
getSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
230+
} else if (isKey(V.first, ".workgroup_size_hint")) {
231+
getSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
232+
} else if (isKey(V.first, ".wavefront_size")) {
233+
KernelData.WavefronSize = V.second.getUInt();
234+
} else if (isKey(V.first, ".max_flat_workgroup_size")) {
235+
KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
211236
}
212237

213238
return Error::success();
@@ -295,6 +320,7 @@ Error readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
295320

296321
return Error::success();
297322
}
323+
298324
} // namespace utils
299325
} // namespace plugin
300326
} // namespace target

openmp/libomptarget/test/offloading/info.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ int main() {
3939
// INFO: info: Entering OpenMP kernel at info.c:{{[0-9]+}}:{{[0-9]+}} with 1 arguments:
4040
// INFO: info: firstprivate(val)[4]
4141
// INFO: info: Launching kernel __omp_offloading_{{.*}}main{{.*}} with {{[0-9]+}} blocks and {{[0-9]+}} threads in Generic mode
42-
// AMDGPU: AMDGPU device {{[0-9]}} info: SGN:Generic ConstWGSize:{{[0-9]+}} args:{{[0-9]}} teamsXthrds:({{ [0-9]+}}X {{[0-9]+}}) reqd:( {{[0-9]+}}X {{[0-9]+}}) lds_usage:{{[0-9]+}}B sgpr_count:{{[0-9]+}} vgpr_count:{{[0-9]+}} sgpr_spill_count:{{[0-9]+}} vgpr_spill_count:{{[0-9]+}} tripcount:{{[0-9]+}} rpc:0 n:__omp_offloading_{{.*}}main{{.*}}
42+
// AMDGPU: AMDGPU device {{[0-9]}} info: #Args: {{[0-9]}} Teams x Thrds: {{[0-9]+}}x {{[0-9]+}} (MaxFlatWorkGroupSize: {{[0-9]+}}) LDS Usage: {{[0-9]+}}B #SGPRs/VGPRs: {{[0-9]+}}/{{[0-9]+}} #SGPR/VGPR Spills: {{[0-9]+}}/{{[0-9]+}} Tripcount: {{[0-9]+}}
4343
// INFO: info: OpenMP Host-Device pointer mappings after block at info.c:{{[0-9]+}}:{{[0-9]+}}:
4444
// INFO: info: Host Ptr Target Ptr Size (B) DynRefCount HoldRefCount Declaration
4545
// INFO: info: {{.*}} {{.*}} 256 1 0 C[0:64] at info.c:{{[0-9]+}}:{{[0-9]+}}

0 commit comments

Comments
 (0)