# Import Pandas and Plotly (offline mode)

In [1]:
import pandas as pd

import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

pd.set_option("display.max_columns", 0) # don't truncate columns
pd.set_option("display.max_colwidth", 0) # don't truncate column values

init_notebook_mode(connected=True)

# Read in Caliper Data 

Caliper key-value data formatted in JSON can be read directly with Pandas.

This particular dataset was generated using the configuration file [configs/data_centric_PEBS.config](https://github.com/LLNL/caliper-examples/blob/master/configs/data_centric_PEBS.conf)

Now, create a Pandas dataframe from the JSON data using `read_json` as follows:

In [3]:
df = pd.read_json('../datasets/lulesh_latency_analysis.json')
df[['function','libpfm.operation','libpfm.memory_level','libpfm.hit_type','libpfm.weight']].head()

Unnamed: 0,function,libpfm.operation,libpfm.memory_level,libpfm.hit_type,libpfm.weight
0,main/LagrangeLeapFrog/LagrangeElements/CalcQForElems/CalcMonotonicQRegionForElems,Load,L1,Hit,41.0
1,main/LagrangeLeapFrog/LagrangeNodal,Load,L1,Hit,37.0
2,main/LagrangeLeapFrog/LagrangeElements/ApplyMaterialPropertiesForElems/EvalEOSForElems/CalcEnergyForElems,Load,Remote Cache 1 Hops,Hit,273.0
3,main/LagrangeLeapFrog/LagrangeNodal,Load,L1,Hit,27.0
4,main/LagrangeLeapFrog/LagrangeElements/ApplyMaterialPropertiesForElems/EvalEOSForElems/CalcEnergyForElems,Load,L1,Hit,37.0


In [5]:
df.columns

Index(['alloc.index#libpfm.addr', 'alloc.label#libpfm.addr',
       'alloc.total_size', 'alloc.uid', 'alloc.uid#libpfm.addr',
       'cali.caliper.version', 'callpath.address', 'function',
       'instruction.op#callpath.address',
       'instruction.read_size#callpath.address',
       'instruction.write_size#callpath.address', 'iteration#lulesh.cycle',
       'libpfm.addr',
       'libpfm.counter.MEM_TRANS_RETIRED:LATENCY_ABOVE_THRESHOLD',
       'libpfm.cpu', 'libpfm.data_src', 'libpfm.event_sample_name',
       'libpfm.hit_type', 'libpfm.memory_level', 'libpfm.operation',
       'libpfm.snoop', 'libpfm.tid', 'libpfm.time', 'libpfm.tlb',
       'libpfm.weight', 'loop', 'lulesh.region', 'mem.alloc', 'mem.free',
       'pthread.id', 'source.file#callpath.address',
       'source.function#callpath.address', 'source.line#callpath.address'],
      dtype='object')

## Deriving the Base Function Name

In [6]:
import os
df = df.sort_values('libpfm.time')
df['base_function'] = df['function'].transform(lambda s: os.path.basename(str(s)))

# Data Aggregations

## Data-Centric Profiles

First, we plot the sum of latencies per allocation:

In [7]:
df_data_centric = df[['alloc.label#libpfm.addr', 'libpfm.weight']].groupby('alloc.label#libpfm.addr').sum()
data = [
    go.Bar(
        name='Allocation Quantity per Function',
        x=df_data_centric.index, 
        y=df_data_centric['libpfm.weight']
    )
]
iplot(data)

Looks like `nodelist` stands out, but it may have just been accessed more frequently, rather than inefficiently.

Lets do the same plot, but average the latencies instead of sum them:

In [8]:
df_data_centric = df[['alloc.label#libpfm.addr', 'libpfm.weight']].groupby('alloc.label#libpfm.addr').mean()
data = [
    go.Bar(
        name='Allocation Quantity per Function',
        x=df_data_centric.index, 
        y=df_data_centric['libpfm.weight']
    )
]
iplot(data)

Now it looks like `arealg` stands out. Lets take a look at that data:

In [14]:
df_arealg = df[df['alloc.label#libpfm.addr'] == 'arealg']

df_arealg[['function',
           'libpfm.operation',
           'libpfm.memory_level', 
           'libpfm.hit_type', 
           'libpfm.weight',
           'libpfm.snoop']].sort_values('libpfm.weight')

Unnamed: 0,function,libpfm.operation,libpfm.memory_level,libpfm.hit_type,libpfm.weight,libpfm.snoop
18794,main/LagrangeLeapFrog/CalcTimeConstraintsForElems/CalcCourantConstraintForElems,Load,L3,Hit,112.0,Hit
3747,main/LagrangeLeapFrog/CalcTimeConstraintsForElems/CalcCourantConstraintForElems,Load,Remote Cache 1 Hops,Hit,247.0,Hit Modified
4267,main/LagrangeLeapFrog/CalcTimeConstraintsForElems/CalcCourantConstraintForElems,Load,Remote Cache 1 Hops,Hit,252.0,Hit Modified
17460,main/LagrangeLeapFrog/CalcTimeConstraintsForElems/CalcCourantConstraintForElems,Load,Remote Cache 1 Hops,Hit,256.0,Hit Modified
18552,main/LagrangeLeapFrog/CalcTimeConstraintsForElems/CalcCourantConstraintForElems,Load,LFB,Hit,419.0,


## Call Tree Aggregations of Data-Centric Profiles

We are first and foremost interested in functions that contribute most to execution time, which was not the case for the accesses to `arealg`.

We can combine our allocation label information with call tree aggregation in Hatchet to narrow down accesses inside of long-executing functions.

In [17]:
from cali_analysis import hatchet

pd.options.mode.chained_assignment = None  # default='warn'

# Derive a tuple from the function path by splitting on '/'
df_callpaths = df.dropna(subset=['function'])
df_callpaths['function'] = df_callpaths['function'].transform(lambda l: tuple(l.split('/')))
df_callpaths['alloc.label'] = df_callpaths['alloc.label#libpfm.addr'].transform(lambda l: [l])

# Construct a call tree, aggregating allocation sizes at each node
mrt = hatchet.MultiRootTree.from_samples(df_callpaths, 'function', {
    'time.inclusive.duration' : 'max',
    'libpfm.weight' : 'sum',
    'alloc.label' : 'sum'
})
mrt.df_nodes[['depth#function', 'function', 'alloc.total_size']].sort_values(['depth#function', 'alloc.total_size'], ascending=[True, False])


using a dict with renaming is deprecated and will be removed in a future version



KeyError: 'time.inclusive.duration'