In [1]:
# Have to patch sys.path until we have tighter taucmdr/anaconda integration.
# Need to move towards taucmdr as a conda package.
import os
import sys
try:
    import taucmdr
except ImportError:
    sys.path.insert(0, os.path.join(os.environ['__TAUCMDR_HOME__'], 'packages'))
finally:
    from taucmdr.model.project import Project

In [2]:
%pylab
import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')
import pandas as pd
import math
import numpy as np
import operator
import time

Using matplotlib backend: Qt5Agg
Populating the interactive namespace from numpy and matplotlib


In [3]:
num_trials = Project.selected().experiment().num_trials
trials = Project.selected().experiment().trials(xrange(0, num_trials))
trial_data = {}
for i in xrange(0, num_trials):
    trial_data[i] = trials[i].get_data()

In [4]:
#trial0_intervals = trial_data[0][0][0][0].interval_data()
#trial0_intervals.plot(kind='bar')
#plt.show()

In [5]:
start = time.time()
trial_intervals = []
for trial in xrange(0, num_trials):
    for i in xrange(0, len(trial_data[trial])):
        for j in xrange(0, len(trial_data[trial][i])):
            for k in xrange(0, len(trial_data[trial][i][j])):
                trial_intervals.append(trial_data[trial][i][j][k].interval_data())
                #x = trial_data[trial][i][j][k].interval_data()
                #x['percentage'] = x['Exclusive']/x.loc['.TAU application', 'Inclusive']
                #trial_intervals.append(x)
                
expr_intervals = pd.concat(trial_intervals)
end = time.time()
print 'Time spent constructing dataframe of size %sx%s: %s' %(expr_intervals.shape[0], expr_intervals.shape[1], end-start)

Time spent constructing dataframe of size 82878x6: 7.1334168911


In [6]:
# levels: 0=trial, 1=node, 2=context, 3=thread, 4=region name

def filter_regions(dfs, percentage=0.1):
    unstacked_dfs = dfs.unstack(4)
    dfs['percentage'] = unstacked_dfs.loc[:,'Exclusive'].div(unstacked_dfs.loc[:,('Inclusive','.TAU application')], axis=0).stack()
    dfs_filtered = dfs.groupby(level=4).filter(lambda x: x['percentage'].max()>percentage or x.name == '.TAU application')
    print 'Filtering all regions with less than %s%% of total runtime reduced number of regions from %s to %s.'%(100*percentage,len(dfs.index.get_level_values(4).unique()), len(dfs_filtered.index.get_level_values(4).unique()))
    return dfs_filtered

def largest_stddev(dfs,n):
    return dfs['Exclusive'].groupby(level=3).std(ddof=0).dropna().sort_values(ascending=False, axis=0)[:n]

def largest_correlation(dfs,n):
    unstacked_dfs = dfs.unstack(4)
    return unstacked_dfs.loc[:,'Exclusive'].corrwith(unstacked_dfs.loc[:,('Inclusive','.TAU application')]).sort_values(ascending=False, axis=0)[:n]

def largest_exclusive(dfs,n):
    return dfs['Exclusive'].groupby(level=4).max().nlargest(n)

def hotspots(dfs, n, flag):
    if flag == 0:
        largest = largest_exclusive(dfs,n)
    elif flag == 1:
        largest = largest_stddev(dfs,n)
    elif flag == 2:
        largest = largest_correlation(dfs,n)
    else:
        print 'Invalid flag'
    y = ['exclusive time', 'standard deviation', 'correlation to total runtime']
    print 'Hotspot Analysis Summary'
    print '='*80
    print 'The code regions with largest %s are: ' %y[flag]
    for i in xrange(0,n):
        print '%s: %s (%s)' %(i+1, largest.index[i], largest[i])
    
n=10

# Hotspot analysis without filtering
nofiltering_start = time.time()
start = time.time()
hotspots(expr_intervals, n, 0)
end = time.time()
print '\nTime spent finding %s most expensive regions: %s\n\n' %(n, end-start)

start = time.time()
hotspots(expr_intervals.loc[0], n, 1)
end = time.time()
print '\nTime spent finding %s regions with largest standard deviation: %s\n\n' %(n, end-start)

start = time.time()
hotspots(expr_intervals, n, 2)
end = time.time()
nofiltering_end = time.time()
print '\nTime spent finding %s regions with highest correlation to total runtime: %s\n\n' %(n, end-start)

# Hotspot analysis with filtering
print '='*80

filtering_start = time.time()
start = time.time()
filtered_dfs = filter_regions(expr_intervals, 0.05)
end = time.time()
print '\nTime spent filtering the dataframe: %s\n\n' %(end-start)
filtered_dfs

start = time.time()
hotspots(filtered_dfs, n, 0)
end = time.time()
print '\nTime spent finding %s most expensive regions: %s\n\n' %(n, end-start)

start = time.time()
hotspots(filtered_dfs.loc[0], n, 1)
end = time.time()
print '\nTime spent finding %s regions with largest standard deviation: %s\n\n' %(n, end-start)

start = time.time()
hotspots(filtered_dfs, n, 2)
end = time.time()
filtering_end = time.time()
print '\nTime spent finding %s regions with highest correlation to total runtime: %s\n\n' %(n, end-start)

print 'Hotspot analysis took %s seconds without filtering and %s seconds with filtering.' %(nofiltering_end-nofiltering_start, filtering_end-filtering_start)

Hotspot Analysis Summary
The code regions with largest exclusive time are: 
1: .TAU application => NODET [{main.f90} {5,1}-{36,17}] => init_phase (144444414.0)
2: init_phase (144444414.0)
3: MPI_Bcast() (91653272.0)
4: .TAU application => NODET [{main.f90} {5,1}-{36,17}] => init_phase => MPI_Bcast() (91457231.0)
5: .TAU application => NODET [{main.f90} {5,1}-{36,17}] => init_phase => MPI_Recv() (56798703.0)
6: MPI_Recv() (56798703.0)
7: MPI_Reduce() (45090589.0)
8: .TAU application => NODET [{main.f90} {5,1}-{36,17}] => step_phase => FLOW::ITERATE [{flow.F90} {1121,3}-{1181,24}] => FLOW::STEP_SOLVER [{flow.F90} {1409,3}-{1571,28}] => RELAX_FLOW::NCFV_TIMESTEP [{relax_flow.f90} {16,3}-{119,30}] => GCR_SOLVE::GCR_SOLVER_QSET [{gcr_solve.f90} {42,3}-{353,32}] => GCR_SOLVE_UTIL::GCR_PRECONDITIONER_QSET [{gcr_solve_util.f90} {34,3}-{80,40}] => LINEARSOLVE_NODIVCHECK::NODIVCHECK_RELAX_Q [{linearsolve_nodivcheck.F90} {35,3}-{271,35}] => POINT_SOLVER::POINT_SOLVE [{point_solver.F90} {26,3}-{22