Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 134 additions & 0 deletions roctx/README
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
This directory has MPI wrappers that are instrumented with roctx
markers. The idea is to preload the wrapper library for use with
ROCm's profiling tools, thus making MPI calls visible in ROCm's
tools for visual timeline analysis. The MPI wrappers also provide
controls for roctracer_start()/roctracer_stop(), including support
for time-window profiling. This makes it possible to selectively
profile a time-window without instrumenting the code.

Requirements : ROCm, mpicc with gcc as the underlying compiler

===============================================================================
Build/Install :

(1) ./configure --with-rocm=/path/to/rocm
(2) make libmpitrace.so

===============================================================================
Typical use :

(1) rocprof --roctx-trace mpirun -np 64 ./helper.sh your.exe ...

or

(2) mpirun -np 64 rocprof --roctx-trace ./helper.sh your.exe ...

where the helper.sh script preloads the library with
LD_PRELOAD=/path/to/libmpitrace.so prior to invoking the application.
Setting LD_PRELOAD prior to invking rocprof may not work as the preload
information is not forwarded to the application environment.
Another option is:

(3) mpirun -np 64 ./helper.sh your.exe ...

where rocprof is selectively invoked for some specific rank alongside
setting LD_PRELOAD by means of another helper script.

ROCm's rocprof is a very powerful tool for analysis of GPU performance,
but it can be invasive in the context of tightly-coupled MPI parallel
applications. Instead of launching one instance of rocprof for every
MPI rank, there will be less disruption if rocprof is used selectively
for one specified rank, and the helper script can also be used for that
as in (3).
An example of a helper script is shown below for openmpi :

#!/bin/bash

cat > helper2.sh << EOF
export TRACE_ALL_EVENTS=yes
LD_PRELOAD=/path/to/libmpitrace.so "$@"
EOF
chmod +x helper2.sh

if [ $OMPI_COMM_WORLD_RANK = 31 ]; then

for i in \
results.copy_stats.csv \
results.db \
results.hip_stats.csv \
results.json \
results.stats.csv \
results.sysinfo.txt \
; do

if [ -f $i ]; then
mv $i $i.bak
fi
done

rocprof --roctx-trace ./helper2.sh
else
./helper2.sh
fi

In this example, rocprof is turned on for MPI rank 31, and profile data
is saved in a file "results.json" a backup made for any pre-existing
"results.json" file. One could be more specific about naming the rocprof
output, using for example a timestamp or a jobid.

With rocporf it can be advantageous to limit profiling to a specific code
block using roctracer_start()/roctracer_stop() or to a specified time
window. The following illustrates the time-window profiling method,
where the intent is to capture rocprof data for MPI rank 31 with a start
time of 40 seconds after MPI_Init() and a stop time of 45 seconds after
MPI_Init() :

Job script :
export PROFILE_BEGIN_TIME=40
export PROFILE_END_TIME=45
export PROFILE_RANK=31
export SAVE_LIST=$PROFILE_RANK
export LD_PRELOAD=/path/to/roctx/libmpitrace.so
mpirun -np 128 ./helper.sh your.exe [args]

Helper script :
#!/bin/bash

cat > helper2.sh << EOF
export TRACE_ALL_EVENTS=yes
LD_PRELOAD=/path/to/libmpitrace.so "$@"
EOF
chmod +x helper2.sh

if [ $OMPI_COMM_WORLD_RANK = $PROFILE_RANK ]; then
for i in \
results.copy_stats.csv \
results.db \
results.hip_stats.csv \
results.json \
results.stats.csv \
results.sysinfo.txt \
; do

if [ -f $i ]; then
mv $i $i.bak
fi
done

rocprof --roctx-trace -o test.csv --trace-start off ./helper2.sh
else
./helper2.sh
fi

With the time-window profiling method, it is not necessary to instrument
the code with calls to roctracer_start()/roctracer_stop(), and in
in fact source code access is not required at all. The libmpitrace.so
library can track the time that has elapsed after MPI_Init() when every
MPI function is called, and start and stop ROCm profiling as directed
via environment variables.

To add roctx annotations, do the following steps :
grep Log ref_mpitrace.c | awk -F"(" '{print $2}' | awk -F"," '{print $1}' >cids.txt
grep Log ref_fortran_wrappers.c | awk -F"(" '{print $2}' | awk -F"," '{print $1}' >fids.txt
gcc -g add_roctx.c -o add_roctx
./add_roctx
131 changes: 131 additions & 0 deletions roctx/add_roctx.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/* Copyright IBM Corporation, 2019
* author : Bob Walkup
*/

// utility to parse my standard MPI wrappers
// and add roctx labeled ranges for all

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main(int argc, char * argv[])
{
char * str, line[256];
FILE * srcfile;
FILE * idfile;
FILE * ofile;
const int npts = 146;
int i, j;
char idname[npts][80];
char addline[256];
char * ptr = NULL, * tvp = NULL, * logp = NULL;

// instrument C wrappers
idfile = fopen("cids.txt", "r");
if (idfile == NULL) {
printf("can't open cids.txt ... exiting\n");
exit(0);
}

for (i=0; i<npts; i++) {
fscanf(idfile, "%s", idname[i]);
}

srcfile = fopen("ref_mpitrace.c", "r");
if (srcfile == NULL) {
printf("can't open ref_mpitrace.c ... exiting\n");
exit(0);
}

ofile = fopen("mpitrace.c", "w");
if (ofile == NULL) {
printf("can't open mpitrace.c ... exiting\n");
exit(0);
}

// echo 103 lines
for (i=0; i<103; i++) {
ptr = fgets(line, sizeof(line), srcfile);
if (ptr != NULL) fprintf(ofile, "%s", line);
}

j = 0;
while (NULL != fgets(line, sizeof(line), srcfile) ) {
// check to see if the line contains "struct timeval" or LogEvent
tvp = strstr(line, "struct timeval");
logp = strstr(line, "Log");
if (tvp != NULL) {
sprintf(addline, " roctx_range_id_t range = roctxRangeStartA(label[%s]);\n", idname[j]);
str = strcat(line, addline);
fprintf(ofile, "%s", str);
j++;
}
else if (logp != NULL) {
sprintf(addline, " roctxRangeStop(range);\n");
str = strcat(addline, line);
fprintf(ofile, "%s", addline);
}
else fprintf(ofile, "%s", line);
}

fclose(srcfile);
fclose(idfile);
fclose(ofile);

// now instrument Fortran wrappers
idfile = fopen("fids.txt", "r");
if (idfile == NULL) {
printf("can't open fids.txt ... exiting\n");
exit(0);
}

for (i=0; i<npts; i++) {
fscanf(idfile, "%s", idname[i]);
}

srcfile = fopen("ref_fortran_wrappers.c", "r");
if (srcfile == NULL) {
printf("can't open ref_mpitrace.c ... exiting\n");
exit(0);
}

ofile = fopen("fortran_wrappers.c", "w");
if (ofile == NULL) {
printf("can't open mpitrace.c ... exiting\n");
exit(0);
}

// echo 260 lines for fortran_wrappers.c
for (i=0; i<260; i++) {
ptr = fgets(line, sizeof(line), srcfile);
if (ptr != NULL) fprintf(ofile, "%s", line);
}

j = 0;
while (NULL != fgets(line, sizeof(line), srcfile) ) {
// check to see if the line contains "struct timeval" or LogEvent
tvp = strstr(line, "struct timeval");
logp = strstr(line, "Log");
if (tvp != NULL) {
sprintf(addline, " roctx_range_id_t range = roctxRangeStartA(label[%s]);\n", idname[j]);
str = strcat(line, addline);
fprintf(ofile, "%s", str);
j++;
}
else if (logp != NULL) {
sprintf(addline, " roctxRangeStop(range);\n");
str = strcat(addline, line);
fprintf(ofile, "%s", addline);
}
else fprintf(ofile, "%s", line);
}

fclose(srcfile);
fclose(idfile);
fclose(ofile);

printf("done\n");

return 0;
}
Loading