In [1]:
import pandas as pd

pd.set_option("display.max_colwidth",999)

# Load Caliper JSON Data
Drop data with empty function annotations and split function nestings into tuples "main/foo/bar" -> (main, foo, bar)

In [2]:
# Large files (>1gb) should be read in 1000 lines at a time and concatenated together, like so:
df = pd.concat(
    pd.read_json('quicksilver_topdown_counters.json', orient='records', lines=True, chunksize=1000)
)
df = df.dropna(subset=['function'])
df['function'] = df['function'].transform(lambda l: tuple(l.split('/')))
df.head()

Unnamed: 0,cali.caliper.version,cali.event.begin,cali.event.end,cali.event.set,event.begin#function,event.end#function,event.set#pthread.id,function,libpfm.counter.BR_MISP_RETIRED:ALL_BRANCHES,libpfm.counter.CPU_CLK_UNHALTED:THREAD_P,...,libpfm.counter.RS_EVENTS:EMPTY_CYCLES,libpfm.counter.UOPS_EXECUTED:CORE_CYCLES_GE_1,libpfm.counter.UOPS_EXECUTED:CORE_CYCLES_GE_2,libpfm.counter.UOPS_EXECUTED:THREAD,libpfm.counter.UOPS_ISSUED:ANY,libpfm.counter.UOPS_RETIRED:RETIRE_SLOTS,mpi.rank,mpi.size,time.inclusive.duration,time.offset
114,1.7.0-dev,334.0,,,isInside,,,"(main, initMC, initMesh, MC_Domain, findMaterial)",0,0,...,0,0,0,0,0,0,10.0,16.0,,2813734
115,1.7.0-dev,,334.0,,,SumTasks,,"(main, cycleTracking, SumTasks)",0,0,...,4915,0,0,112918,0,0,1.0,16.0,17.0,32336221
117,1.7.0-dev,334.0,,,collapse,,,"(main, cycleTracking)",0,0,...,0,0,0,0,0,0,0.0,16.0,,29924061
119,1.7.0-dev,334.0,,,collapse,,,"(main, cycleTracking)",1005,13128535,...,788101,9250136,7512747,24753559,29415199,29398111,1.0,16.0,,32380275
120,1.7.0-dev,,334.0,,,isInside,,"(main, initMC, initMesh, MC_Domain, findMaterial, isInside)",0,0,...,0,0,0,0,0,0,10.0,16.0,21.0,2813755


# Perform inclusive aggregation (derive the "call tree")

Specify inclusive aggregators, just sum up all the libpfm counters, time, and get a count of a constant string

In [3]:
import hatchet

In [4]:
aggregators = dict([ (c, 'sum') for c in filter(lambda col: 'libpfm' in col, df.columns)])
aggregators['time.inclusive.duration'] = 'max'
aggregators['cali.caliper.version'] = 'count'
aggregators

{'cali.caliper.version': 'count',
 'libpfm.counter.BR_MISP_RETIRED:ALL_BRANCHES': 'sum',
 'libpfm.counter.CPU_CLK_UNHALTED:THREAD_P': 'sum',
 'libpfm.counter.CYCLE_ACTIVITY:CYCLES_NO_EXECUTE': 'sum',
 'libpfm.counter.CYCLE_ACTIVITY:STALLS_L1D_PENDING': 'sum',
 'libpfm.counter.CYCLE_ACTIVITY:STALLS_L2_PENDING': 'sum',
 'libpfm.counter.CYCLE_ACTIVITY:STALLS_LDM_PENDING': 'sum',
 'libpfm.counter.IDQ:MS_UOPS': 'sum',
 'libpfm.counter.IDQ_UOPS_NOT_DELIVERED:CORE': 'sum',
 'libpfm.counter.INT_MISC:RECOVERY_CYCLES': 'sum',
 'libpfm.counter.MACHINE_CLEARS:COUNT': 'sum',
 'libpfm.counter.MEM_LOAD_UOPS_RETIRED:L3_HIT': 'sum',
 'libpfm.counter.MEM_LOAD_UOPS_RETIRED:L3_MISS': 'sum',
 'libpfm.counter.RESOURCE_STALLS:SB': 'sum',
 'libpfm.counter.RS_EVENTS:EMPTY_CYCLES': 'sum',
 'libpfm.counter.UOPS_EXECUTED:CORE_CYCLES_GE_1': 'sum',
 'libpfm.counter.UOPS_EXECUTED:CORE_CYCLES_GE_2': 'sum',
 'libpfm.counter.UOPS_EXECUTED:THREAD': 'sum',
 'libpfm.counter.UOPS_ISSUED:ANY': 'sum',
 'libpfm.counter.UOPS_R

In [5]:
mrt = hatchet.MultiRootTree.from_samples(df, 'function', aggregators)
mrt.df_nodes

Unnamed: 0,hash#function,libpfm.counter.BR_MISP_RETIRED:ALL_BRANCHES,libpfm.counter.CPU_CLK_UNHALTED:THREAD_P,libpfm.counter.CYCLE_ACTIVITY:CYCLES_NO_EXECUTE,libpfm.counter.CYCLE_ACTIVITY:STALLS_L1D_PENDING,libpfm.counter.CYCLE_ACTIVITY:STALLS_L2_PENDING,libpfm.counter.CYCLE_ACTIVITY:STALLS_LDM_PENDING,libpfm.counter.IDQ:MS_UOPS,libpfm.counter.IDQ_UOPS_NOT_DELIVERED:CORE,libpfm.counter.INT_MISC:RECOVERY_CYCLES,...,libpfm.counter.UOPS_EXECUTED:CORE_CYCLES_GE_1,libpfm.counter.UOPS_EXECUTED:CORE_CYCLES_GE_2,libpfm.counter.UOPS_EXECUTED:THREAD,libpfm.counter.UOPS_ISSUED:ANY,libpfm.counter.UOPS_RETIRED:RETIRE_SLOTS,time.inclusive.duration,cali.caliper.version,function,parent_hash#function,depth#function
71,2574226508381785094,331092,939095615,202732865,5996515,2133947,104700838,188604576,567870734,2303370,...,734647360,596993361,2215438952,2034027702,2018931157,79.0,93888,"(main, initMC, initMesh, buildMeshPartition, assignCellsToDomain, nearestCenter, addNbrsToQueue, addTupleToQueue, tupleToIndex)",5331452723580995626,9
15,-7252534472619379718,330788,868687790,188080918,5941646,2261905,97646291,173545665,525449352,2245504,...,677360760,549201649,2043227374,1872522334,1854839143,99.0,1,"(main, initMC, initMesh, buildMeshPartition, assignCellsToDomain, nearestCenter, minDist2, whichCellTuple)",1290044161575942532,8
30,-3677243502532151653,231806,651473871,140542409,4029885,1356632,72612542,130593947,392226689,1571402,...,511755969,414886105,1538295417,1411927731,1398119385,147.0,1,"(main, initMC, initMesh, buildMeshPartition, assignCellsToDomain, nearestCenter, addNbrsToQueue, indexToTuple)",-4933507252616422133,8
47,-927934058046326774,274511,865696693,187186353,5787708,2178189,97014114,173483056,518288933,1878281,...,675830327,549086393,2035192437,1869599378,1856585100,48.0,1,"(main, initMC, initMesh, buildMeshPartition, assignCellsToDomain, nearestCenter, minDist2, indexToTuple)",1290044161575942532,8
85,5331452723580995626,1023251,2821205312,611075227,18581459,6646584,316099320,565760476,1714789518,7027065,...,2205521989,1791282089,6646474530,6098479033,6055186996,171.0,2,"(main, initMC, initMesh, buildMeshPartition, assignCellsToDomain, nearestCenter, addNbrsToQueue, addTupleToQueue)",-4933507252616422133,8
90,5955584031965748850,108486,290137977,62718156,1995205,677147,32390107,57970771,175257073,727301,...,225776091,183388859,682603152,622727758,617021950,148.0,1,"(main, initMC, initMesh, buildMeshPartition, assignCellsToDomain, nearestCenter, addTupleToQueue, tupleToIndex)",-2901336579335736432,8
23,-4933507252616422133,2052622,5717995979,1239343961,37761611,13663767,641344694,1145743325,3471473892,14177950,...,4473417558,3630281365,13469413434,12354588065,12264770097,329.0,3,"(main, initMC, initMesh, buildMeshPartition, assignCellsToDomain, nearestCenter, addNbrsToQueue)",6957787860313903023,7
25,-4134057073213068368,2210,4051803,865255,31075,4719,98598,123880,85830,0,...,4218340,3172698,9312773,10065696,10149653,47.0,1,"(main, initMC, initMesh, buildMeshPartition, assignCellsToDomain, whichCell, whichCellTuple)",-572252942354143546,7
36,-2901336579335736432,330593,871066201,189094567,6167557,2336499,98017297,173970200,531231548,2246462,...,677220099,550049104,2045776785,1867349343,1852518878,202.0,2,"(main, initMC, initMesh, buildMeshPartition, assignCellsToDomain, nearestCenter, addTupleToQueue)",6957787860313903023,7
54,-376670115672843596,1608,731270,133243,30963,31469,0,0,0,0,...,446762,264902,1171682,1208268,887835,45.0,1,"(main, initMC, initMesh, buildMeshPartition, buildCellIndexMap, exchange, cellInfoMpiType)",4964793335720556777,7


# Derive Topdown Analysis metrics
Calculates the topdown derived metrics as described in http://ieeexplore.ieee.org/document/6844459/

In [6]:
import topdown

In [7]:
df_td = topdown.derive_topdown_ivb(mrt.df_nodes)
df_td

Unnamed: 0,hash#function,time.inclusive.duration,cali.caliper.version,function,parent_hash#function,depth#function,retiring,bad_speculation,frontend_bound,backend_bound,...,machine_clear,frontend_latency,frontend_bandwidth,memory_bound,core_bound,mem_bound,l1_bound,l2_bound,l3_bound,uncore_bound
71,2574226508381785094,79.0,93888,"(main, initMC, initMesh, buildMeshPartition, assignCellsToDomain, nearestCenter, addNbrsToQueue, addTupleToQueue, tupleToIndex)",5331452723580995626,9,0.537467,0.006472,0.151175,0.304887,...,0.145168,0.604700,0.395300,0.111491,0.250971,0.000098,0.105106,0.004113,0.002175,0.002272
15,-7252534472619379718,99.0,1,"(main, initMC, initMesh, buildMeshPartition, assignCellsToDomain, nearestCenter, minDist2, whichCellTuple)",1290044161575942532,8,0.533805,0.007674,0.151219,0.307302,...,0.134794,0.604877,0.395123,0.112407,0.251637,0.000056,0.105567,0.004236,0.002548,0.002604
30,-3677243502532151653,147.0,1,"(main, initMC, initMesh, buildMeshPartition, assignCellsToDomain, nearestCenter, addNbrsToQueue, indexToTuple)",-4933507252616422133,8,0.536522,0.007711,0.150515,0.305252,...,0.144138,0.602061,0.397939,0.111459,0.252964,0.000072,0.105273,0.004103,0.002010,0.002082
47,-927934058046326774,48.0,1,"(main, initMC, initMesh, buildMeshPartition, assignCellsToDomain, nearestCenter, minDist2, indexToTuple)",1290044161575942532,8,0.536153,0.005928,0.149674,0.308245,...,0.158463,0.598696,0.401304,0.112065,0.250568,0.000077,0.105379,0.004169,0.002439,0.002516
85,5331452723580995626,171.0,2,"(main, initMC, initMesh, buildMeshPartition, assignCellsToDomain, nearestCenter, addNbrsToQueue, addTupleToQueue)",-4933507252616422133,8,0.536578,0.006327,0.151955,0.305139,...,0.141289,0.607822,0.392178,0.112044,0.251388,0.000095,0.105458,0.004230,0.002261,0.002356
90,5955584031965748850,148.0,1,"(main, initMC, initMesh, buildMeshPartition, assignCellsToDomain, nearestCenter, addTupleToQueue, tupleToIndex)",-2901336579335736432,8,0.531663,0.007423,0.151012,0.309902,...,0.135983,0.604047,0.395953,0.111637,0.250623,0.000076,0.104760,0.004543,0.002258,0.002334
23,-4933507252616422133,329.0,3,"(main, initMC, initMesh, buildMeshPartition, assignCellsToDomain, nearestCenter, addNbrsToQueue)",6957787860313903023,7,0.536236,0.006407,0.151778,0.305580,...,0.142507,0.607114,0.392886,0.112162,0.252035,0.000093,0.105559,0.004214,0.002297,0.002390
25,-4134057073213068368,47.0,1,"(main, initMC, initMesh, buildMeshPartition, assignCellsToDomain, whichCell, whichCellTuple)",-572252942354143546,7,0.626243,-0.005180,0.005296,0.373641,...,0.003607,0.021183,0.978817,0.024334,0.447282,0.000441,0.016665,0.006505,0.000724,0.001165
36,-2901336579335736432,202.0,2,"(main, initMC, initMesh, buildMeshPartition, assignCellsToDomain, nearestCenter, addTupleToQueue)",6957787860313903023,7,0.531681,0.006835,0.152466,0.309017,...,0.133991,0.609864,0.390136,0.112526,0.250553,0.000085,0.105445,0.004398,0.002598,0.002682
54,-376670115672843596,45.0,1,"(main, initMC, initMesh, buildMeshPartition, buildCellIndexMap, exchange, cellInfoMpiType)",4964793335720556777,7,0.303525,0.109547,0.000000,0.586928,...,0.000000,0.000005,0.999995,0.000000,0.430898,0.030078,-0.042341,-0.000692,0.012955,0.043033


# Determine Topdown boundedness
Using derived metrics, determines which function paths are bound by which resources

In [8]:
pd.set_option("display.max_rows",999)

dfb = topdown.analyze_topdown_metrics(df_td)
sorted_dfb = dfb.sort_values(['depth#function', 'time.inclusive.duration'], ascending=[True, False])
sorted_dfb[['function','time.inclusive.duration', 'boundedness', 'memory_bound', 'l1_bound', 'l2_bound', 'l3_bound']].dropna()

Unnamed: 0,function,time.inclusive.duration,boundedness,memory_bound,l1_bound,l2_bound,l3_bound
0,"(main,)",37213720.0,[retiring 48.85%],0.163454,0.137907,0.003898,0.02121
13,"(main, cycleTracking)",3572462.0,[retiring 46.84%],0.185443,0.15152,0.003671,0.030098
6,"(main, initMC)",2829809.0,[retiring 53.37%],0.11897,0.110911,0.004571,0.002979
14,"(main, cycleInit)",128275.0,[retiring 53.45%],0.168545,0.161756,0.001018,0.005548
12,"(main, parseInputFile)",17550.0,[retiring 61.06%],0.149088,0.141534,-4.5e-05,0.006747
11,"(main, cycleFinalize)",2597.0,"[backend_bound 51.33%, memory_bound 7.32%, l1_bound 5.44%]",0.073193,0.054443,0.003145,0.015419
9,"(main, freeArgs)",1045.0,"[backend_bound 71.21%, memory_bound 16.85%, l1_bound 15.60%]",0.168471,0.156003,0.006754,0.005714
7,"(main, gameOver)",623.0,"[backend_bound 49.21%, memory_bound 62.30%, uncore_bound 44.30%]",0.622959,0.314746,-0.134761,0.174897
1,"(main, ~MonteCarlo)",618.0,"[backend_bound 61.74%, memory_bound 128.95%, l1_bound 68.60%]",1.28946,0.686002,0.071766,0.127417
5,"(main, coralBenchmarkCorrectness)",598.0,"[backend_bound 65.56%, memory_bound 35.20%, l1_bound 33.02%]",0.351969,0.330192,0.001093,0.015224


# Analyze the "call tree"
Visualize the aggregated tree with topdown metrics 

In [9]:
tree = hatchet.Tree(mrt.roots[0])
tree

{
    [34;01m"hash#function"[39;49;00m: [34m1128851083484916900[39;49;00m,
    [34;01m"time.inclusive.duration"[39;49;00m: [34m37213720.0[39;49;00m,
    [34;01m"cali.caliper.version"[39;49;00m: [34m15[39;49;00m,
    [34;01m"function"[39;49;00m: [
        [33m"main"[39;49;00m
    ],
    [34;01m"parent_hash#function"[39;49;00m: [34m3527539[39;49;00m,
    [34;01m"depth#function"[39;49;00m: [34m1[39;49;00m,
    [34;01m"retiring"[39;49;00m: [34m0.4884572575[39;49;00m,
    [34;01m"bad_speculation"[39;49;00m: [34m0.0177889881[39;49;00m,
    [34;01m"frontend_bound"[39;49;00m: [34m0.0987573261[39;49;00m,
    [34;01m"backend_bound"[39;49;00m: [34m0.3949964282[39;49;00m,
    [34;01m"branch_mispredict"[39;49;00m: [34m0.9716684103[39;49;00m,
    [34;01m"machine_clear"[39;49;00m: [34m0.0283315897[39;49;00m,
    [34;01m"frontend_latency"[39;49;00m: [34m0.3950293045[39;49;00m,
    [34;01m"frontend_bandwidth"[39;49;00m: [34m0.6049706955[39;49;00m,

In [10]:
tree.hot_path('time.inclusive.duration')

Unnamed: 0,hash#function,time.inclusive.duration,cali.caliper.version,function,parent_hash#function,depth#function,retiring,bad_speculation,frontend_bound,backend_bound,...,frontend_latency,frontend_bandwidth,memory_bound,core_bound,mem_bound,l1_bound,l2_bound,l3_bound,uncore_bound,boundedness
0,1128851083484916900,37213720.0,15,"(main,)",3527539,1,0.488457,0.017789,0.098757,0.394996,...,0.395029,0.604971,0.163454,0.250706,0.00044,0.137907,0.003898,0.02121,0.021649,[retiring 48.85%]
13,8375893045446190810,3572462.0,3,"(main, cycleTracking)",1128851083484916900,2,0.468424,0.023266,0.069226,0.439084,...,0.276904,0.723096,0.185443,0.247159,0.000154,0.15152,0.003671,0.030098,0.030252,[retiring 46.84%]
38,5561175753082870147,379.0,1,"(main, cycleTracking, collapse)",8375893045446190810,3,0.346102,0.004715,0.094345,0.554837,...,0.377381,0.622619,0.23332,0.298074,0.000939,0.130255,0.021011,0.081114,0.082053,"[backend_bound 55.48%, core_bound 29.81%]"


In [11]:
import util
import json

print(util.pretty_json_dumps(json.loads(tree.hot_path('time.inclusive.duration').to_json(orient='records'))))

[
    {
        [34;01m"hash#function"[39;49;00m: [34m1128851083484916900[39;49;00m,
        [34;01m"time.inclusive.duration"[39;49;00m: [34m37213720.0[39;49;00m,
        [34;01m"cali.caliper.version"[39;49;00m: [34m15[39;49;00m,
        [34;01m"function"[39;49;00m: [
            [33m"main"[39;49;00m
        ],
        [34;01m"parent_hash#function"[39;49;00m: [34m3527539[39;49;00m,
        [34;01m"depth#function"[39;49;00m: [34m1[39;49;00m,
        [34;01m"retiring"[39;49;00m: [34m0.4884572575[39;49;00m,
        [34;01m"bad_speculation"[39;49;00m: [34m0.0177889881[39;49;00m,
        [34;01m"frontend_bound"[39;49;00m: [34m0.0987573261[39;49;00m,
        [34;01m"backend_bound"[39;49;00m: [34m0.3949964282[39;49;00m,
        [34;01m"branch_mispredict"[39;49;00m: [34m0.9716684103[39;49;00m,
        [34;01m"machine_clear"[39;49;00m: [34m0.0283315897[39;49;00m,
        [34;01m"frontend_latency"[39;49;00m: [34m0.3950293045[39;49;00m,
        