Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 31 additions & 3 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -664,6 +664,9 @@ DO_DEMANGLE
USE_GETARG
PC_LOOKUP_FILE
USE_LIBDWARF
MPIP_CALLSITE_STACK_DEPTH_MAX
MPIP_INTERNAL_STACK_DEPTH
MPIP_CALLSITE_REPORT_STACK_DEPTH_MAX
ENABLE_FORTRAN_WEAK_SYMS
ENABLE_FORTRAN_XLATE
ENABLE_MPI_NONBLOCKINGCOLLECTIVES
Expand Down Expand Up @@ -758,6 +761,7 @@ enable_setjmp
enable_fortranxlate
enable_fortranweak
enable_stackdepth
enable_internal_stackdepth
enable_maxargs
enable_dwarf
enable_getarg
Expand Down Expand Up @@ -1404,7 +1408,11 @@ Optional Features:
--disable-fortranxlate Disable translation of Fortran opaque objects.
--enable-fortranweak Generate weak symbols for additional Fortran symbol
name styles.
--enable-stackdepth Specify maximum stacktrace depth (default is 8).
--enable-stackdepth Specify maximum report stacktrace depth (default is
8).
--enable-internal-stackdepth
Specify number of internal stack frames (default is
3).
--enable-maxargs Maximum number of command line arguments copied
(default is 32).
--enable-dwarf Use DWARF library for source lookup.
Expand Down Expand Up @@ -3104,10 +3112,30 @@ fi


cat >>confdefs.h <<_ACEOF
#define MPIP_CALLSITE_STACK_DEPTH_MAX $STACKDEPTH
#define MPIP_CALLSITE_REPORT_STACK_DEPTH_MAX $STACKDEPTH
_ACEOF

#AC_SUBST(MPIP_CALLSITE_STACK_DEPTH_MAX)


# Check whether --enable-internal-stackdepth was given.
if test "${enable_internal_stackdepth+set}" = set; then :
enableval=$enable_internal_stackdepth; MPIP_INTERNAL_STACK_DEPTH=$enableval;
echo "Internal stacktrace depth is $MPIP_INTERNAL_STACK_DEPTH"
else
MPIP_INTERNAL_STACK_DEPTH=3
fi


cat >>confdefs.h <<_ACEOF
#define MPIP_INTERNAL_STACK_DEPTH $MPIP_INTERNAL_STACK_DEPTH
_ACEOF




$as_echo "#define MPIP_CALLSITE_STACK_DEPTH_MAX (MPIP_CALLSITE_REPORT_STACK_DEPTH_MAX + MPIP_INTERNAL_STACK_DEPTH)" >>confdefs.h



# Check whether --enable-maxargs was given.
if test "${enable_maxargs+set}" = set; then :
Expand Down
18 changes: 15 additions & 3 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -289,13 +289,25 @@ AC_SUBST(ENABLE_FORTRAN_WEAK_SYMS)


AC_ARG_ENABLE(stackdepth,
AS_HELP_STRING([--enable-stackdepth], [Specify maximum stacktrace depth (default is 8).]),
AS_HELP_STRING([--enable-stackdepth], [Specify maximum report stacktrace depth (default is 8).]),
STACKDEPTH=$enableval;
echo "Maximum stacktrace depth is $STACKDEPTH",
STACKDEPTH=8,
)
AC_DEFINE_UNQUOTED([MPIP_CALLSITE_STACK_DEPTH_MAX], [$STACKDEPTH], [Depth of the callsite])
#AC_SUBST(MPIP_CALLSITE_STACK_DEPTH_MAX)
AC_DEFINE_UNQUOTED([MPIP_CALLSITE_REPORT_STACK_DEPTH_MAX], [$STACKDEPTH], [Stack depth of callsites in report])
AC_SUBST(MPIP_CALLSITE_REPORT_STACK_DEPTH_MAX)

AC_ARG_ENABLE(internal-stackdepth,
AS_HELP_STRING([--enable-internal-stackdepth], [Specify number of internal stack frames (default is 3).]),
MPIP_INTERNAL_STACK_DEPTH=$enableval;
echo "Internal stacktrace depth is $MPIP_INTERNAL_STACK_DEPTH",
MPIP_INTERNAL_STACK_DEPTH=3,
)
AC_DEFINE_UNQUOTED([MPIP_INTERNAL_STACK_DEPTH], [$MPIP_INTERNAL_STACK_DEPTH], [Number of internal stack frames])
AC_SUBST(MPIP_INTERNAL_STACK_DEPTH)

AC_DEFINE([MPIP_CALLSITE_STACK_DEPTH_MAX], [(MPIP_CALLSITE_REPORT_STACK_DEPTH_MAX + MPIP_INTERNAL_STACK_DEPTH)], [Recorded stack depth of callsites])
AC_SUBST(MPIP_CALLSITE_STACK_DEPTH_MAX)

AC_ARG_ENABLE(maxargs,
AS_HELP_STRING(--enable-maxargs,Maximum number of command line arguments copied (default is 32).),
Expand Down
4 changes: 2 additions & 2 deletions make-wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1064,7 +1064,7 @@ def CreateWrapper(funct, olist):
olist.append("mpiPi_GETTIME (&start);\n" )

# capture call stack
olist.append("if ( mpiPi.stackDepth > 0 ) mpiPi_RecordTraceBack((*base_jbuf), call_stack, MPIP_CALLSITE_STACK_DEPTH);\n" )
olist.append("if ( mpiPi.reportStackDepth > 0 ) mpiPi_RecordTraceBack((*base_jbuf), call_stack, mpiPi.fullStackDepth);\n" )

# end of enabled check
olist.append("}\n\n")
Expand Down Expand Up @@ -1516,7 +1516,7 @@ def GenerateWrappers():
print("-----*----- Generating profiling wrappers")
cwd = os.getcwd()
os.chdir(cwd)
sname = cwd + "/wrappers.c"
sname = cwd + "/mpiP-wrappers.c"
g = open(sname, "w")
olist = StandardFileHeader(sname)

Expand Down
15 changes: 8 additions & 7 deletions mpiP-callsites.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "mpiP-callsites.h"
#include "mpiPi.h"


void mpiPi_cs_reset_stat(callsite_stats_t *csp)
{
csp->maxDur = 0;
Expand All @@ -40,7 +41,7 @@ void mpiPi_cs_init(callsite_stats_t *csp, void *pc[],
int i;
csp->op = op;
csp->rank = rank;
for (i = 0; i < MPIP_CALLSITE_STACK_DEPTH; i++)
for (i = 0; i < mpiPi.fullStackDepth; i++)
{
csp->pc[i] = pc[i];
}
Expand Down Expand Up @@ -150,14 +151,14 @@ callsite_src_id_cache_comparator (const void *p1, const void *p2)
callsite_src_id_cache_entry_t *csp_2 = (callsite_src_id_cache_entry_t *) p2;

#define express(f) {if ((csp_1->f) > (csp_2->f)) {return 1;} if ((csp_1->f) < (csp_2->f)) {return -1;}}
if (mpiPi.stackDepth == 0)
if (mpiPi.reportStackDepth == 0)
{
express (id); /* In cases where the call stack depth is 0, the only unique info may be the id */
return 0;
}
else
{
for (i = 0; i < MPIP_CALLSITE_STACK_DEPTH; i++)
for (i = 0; i < mpiPi.fullStackDepth; i++)
{
if (csp_1->filename[i] != NULL && csp_2->filename[i] != NULL)
{
Expand Down Expand Up @@ -193,7 +194,7 @@ callsite_src_id_cache_hashkey (const void *p1)
int i, j;
int res = 0;
callsite_src_id_cache_entry_t *cs1 = (callsite_src_id_cache_entry_t *) p1;
for (i = 0; i < MPIP_CALLSITE_STACK_DEPTH; i++)
for (i = 0; i < mpiPi.fullStackDepth; i++)
{
if (cs1->filename[i] != NULL)
{
Expand Down Expand Up @@ -304,7 +305,7 @@ mpiPi_query_src (callsite_stats_t * p)
different ids */
bzero (&key, sizeof (callsite_src_id_cache_entry_t));

for (i = 0; (i < MPIP_CALLSITE_STACK_DEPTH) && (p->pc[i] != NULL); i++)
for (i = 0; (i < mpiPi.fullStackDepth) && (p->pc[i] != NULL); i++)
{
if (mpiPi.do_lookup == 1)
mpiPi_query_pc (p->pc[i], &(p->filename[i]), &(p->functname[i]),
Expand Down Expand Up @@ -334,15 +335,15 @@ mpiPi_query_src (callsite_stats_t * p)
malloc (sizeof (callsite_src_id_cache_entry_t));
bzero (csp, sizeof (callsite_src_id_cache_entry_t));

for (i = 0; (i < MPIP_CALLSITE_STACK_DEPTH) && (p->pc[i] != NULL); i++)
for (i = 0; (i < mpiPi.fullStackDepth) && (p->pc[i] != NULL); i++)
{
csp->filename[i] = strdup (key.filename[i]);
csp->functname[i] = strdup (key.functname[i]);
csp->line[i] = key.line[i];
csp->pc[i] = p->pc[i];
}
csp->op = p->op;
if (mpiPi.stackDepth == 0)
if (mpiPi.reportStackDepth == 0)
csp->id = csp->op - mpiPi_BASE;
else
csp->id = callsite_src_id_counter++;
Expand Down
12 changes: 3 additions & 9 deletions mpiP-stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ _thrd_pc_hashkey (const void *p)
int i;
callsite_stats_t *csp = (callsite_stats_t *) p;
MPIP_CALLSITE_STATS_COOKIE_ASSERT (csp);
for (i = 0; i < MPIP_CALLSITE_STACK_DEPTH; i++)
for (i = 0; i < mpiPi.fullStackDepth; i++)
{
res ^= (unsigned) (long) csp->pc[i];
}
Expand All @@ -100,7 +100,7 @@ trd_pc_comparator (const void *p1, const void *p2)
express (op);
express (rank);

for (i = 0; i < MPIP_CALLSITE_STACK_DEPTH; i++)
for (i = 0; i < mpiPi.fullStackDepth; i++)
{
express (pc[i]);
}
Expand Down Expand Up @@ -208,7 +208,7 @@ mpiPi_stats_thr_cs_upd (mpiPi_thread_stat_t *stat,
key.op = op;
key.rank = rank;
key.cookie = MPIP_CALLSITE_STATS_COOKIE;
for (i = 0; i < MPIP_CALLSITE_STACK_DEPTH; i++)
for (i = 0; i < mpiPi.fullStackDepth; i++)
{
key.pc[i] = pc[i];
}
Expand Down Expand Up @@ -290,12 +290,6 @@ void mpiPi_stats_thr_cs_merge(mpiPi_thread_stat_t *dst,
{
callsite_stats_t *csp_src = av[i], *csp_dst;

/* update file/line in p if need */
if( NULL == csp_src->filename || NULL == csp_src->functname )
{
mpiPi_query_src (csp_src);
}

/* Search for the callsite and create a new record if needed */
if (NULL == h_search (dst->cs_stats, csp_src, (void **) &csp_dst))
{
Expand Down
8 changes: 7 additions & 1 deletion mpiPconfig.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,13 @@
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H

/* Depth of the callsite */
/* Stack depth of callsites in report */
#undef MPIP_CALLSITE_REPORT_STACK_DEPTH_MAX

/* Internal stack frames */
#undef MPIP_INTERNAL_STACK_DEPTH

/* Recorded stack depth of callsites */
#undef MPIP_CALLSITE_STACK_DEPTH_MAX

/* MPI check time */
Expand Down
11 changes: 8 additions & 3 deletions mpiPi.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,16 +126,21 @@ mpiPi_init (char *appName, mpiPi_thr_mode_t thr_mode)
if (DEFAULT_REPORT_FORMAT == mpiPi_style_concise)
{
mpiPi.report_style = mpiPi_style_concise;
mpiPi.stackDepth = 0;
mpiPi.reportStackDepth = 0;
mpiPi.print_callsite_detail = 0;
}
else // verbose default
{
mpiPi.report_style = mpiPi_style_verbose;
mpiPi.stackDepth = 1;
mpiPi.reportStackDepth = 1;
mpiPi.print_callsite_detail = 1;
}

mpiPi.internalStackDepth = MPIP_INTERNAL_STACK_DEPTH;
mpiPi.fullStackDepth = mpiPi.reportStackDepth + mpiPi.internalStackDepth;
if ( mpiPi.fullStackDepth > MPIP_CALLSITE_STACK_DEPTH_MAX )
mpiPi.fullStackDepth = MPIP_CALLSITE_STACK_DEPTH_MAX;

#ifdef COLLECTIVE_REPORT_DEFAULT
mpiPi.collective_report = 1;
#else
Expand Down Expand Up @@ -747,7 +752,7 @@ mpiPi_generateReport (int report_style)

mpiPi_GETTIME (&timer_start);
mergeResult = mpiPi_mergeResults ();
if (mergeResult == 1 && mpiPi.stackDepth == 0)
if (mergeResult == 1 && mpiPi.reportStackDepth == 0)
mergeResult = mpiPi_insert_MPI_records ();
if (mergeResult == 1)
mergeResult = mpiPi_mergeCollectiveStats ();
Expand Down
5 changes: 3 additions & 2 deletions mpiPi.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@

#define MPIP_HELP_LIST PACKAGE_BUGREPORT

#define MPIP_CALLSITE_STACK_DEPTH (mpiPi.stackDepth)
#define MPIP_CALLSITE_STATS_COOKIE 518641
#define MPIP_CALLSITE_STATS_COOKIE_ASSERT(f) {assert(MPIP_CALLSITE_STATS_COOKIE==((f)->cookie));}

Expand Down Expand Up @@ -174,7 +173,9 @@ typedef struct _mpiPi_t

mpiPi_lookup_t *lookup;

int stackDepth;
int reportStackDepth;
int internalStackDepth;
int fullStackDepth;
double reportPrintThreshold;
int baseNames;
MPIP_REPORT_FORMAT_TYPE reportFormat;
Expand Down
48 changes: 15 additions & 33 deletions record_stack.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ static char *svnid = "$Id$";
#endif



#ifdef HAVE_LIBUNWIND

int
Expand All @@ -50,7 +49,7 @@ mpiPi_RecordTraceBack (jmp_buf jb, void *pc_array[], int max_back)

// Inlining / noinlining may affect frames in report
// - Tools frames may appear in report
parent_frame_start = 1;
parent_frame_start = 0;

if (unw_getcontext (&uc) != 0)
{
Expand Down Expand Up @@ -158,52 +157,35 @@ mpiPi_RecordTraceBack (void *pc, void *pc_array[], int max_back)
#include <execinfo.h>

int
mpiPi_RecordTraceBack (jmp_buf jb, void *pc_array[], int report_back)
mpiPi_RecordTraceBack (jmp_buf jb, void *pc_array[], int max_back)
{
#define MPIP_INTERNAL_FRAMES 3
#define MPIP_MAX_TEMP_FRAMES (MPIP_CALLSITE_STACK_DEPTH_MAX + MPIP_INTERNAL_FRAMES)
int max_temp_back = MPIP_MAX_TEMP_FRAMES;
static void *temp_stack_trace[MPIP_MAX_TEMP_FRAMES];
static void *temp_stack_trace[MPIP_CALLSITE_STACK_DEPTH_MAX];
int all_frame_count, user_frame_count;
void **cp;

/* backtrace() will provide us with the 3 internal mpiP stack frames,
as well as the user stack frames. We need to make sure that the
report_back maximum number of stack frames reflects only the user
frames and not the internal mpiP frames.
/* backtrace() will provide internal mpiP stack frames,
including for this function.

So, let's:
o max_temp_back = MPIP_CALLSITE_STACK_DEPTH_MAX + 3
o use a temporary array of frame pointers size of max_temp_back
o call backtrace() for max_temp_back frames.
o memcpy backtrace results - 3 frames
*/
libunwind functionality provides a stack trace beginning with the
parent function, so we remove the current function from the
stack trace for consistent behavior.

if ((report_back + MPIP_INTERNAL_FRAMES) < max_temp_back)
max_temp_back = report_back + MPIP_INTERNAL_FRAMES;
*/

all_frame_count = backtrace (temp_stack_trace, max_temp_back);
all_frame_count = backtrace (temp_stack_trace, max_back);

if (all_frame_count <= MPIP_INTERNAL_FRAMES)
if (all_frame_count <= MPIP_INTERNAL_STACK_DEPTH)
return 0;

user_frame_count = all_frame_count - MPIP_INTERNAL_FRAMES;
memcpy(pc_array, temp_stack_trace+1, sizeof(void*)*(all_frame_count-1));
pc_array[all_frame_count] = NULL;

if (user_frame_count > report_back)
user_frame_count = report_back;

memmove (pc_array, &(temp_stack_trace[MPIP_INTERNAL_FRAMES]),
(user_frame_count * sizeof (void *)));
pc_array[user_frame_count] = NULL;

#if defined(DO_PC_SUBTRACTION)
/* Subtract 1 pointer size from results to point to the address
of the calling instruction */
of the calling instruction rather than the return address */
for (cp = pc_array; cp != NULL && *cp != NULL; cp++)
*cp = *cp - sizeof (cp);
#endif

return user_frame_count;
return all_frame_count;
}

#elif defined(USE_SETJMP)
Expand Down
Loading