In [1]:
# Global imports
import hatchet as ht
import pandas as pd
import numpy as np
from scipy import stats
import utils
import random
import os
import json

import seaborn as sns
import matplotlib.pyplot as plt
from os import listdir
from os.path import isfile, join

from ast import literal_eval as make_list
import networkx as nx
from networkx.readwrite import json_graph

In [2]:
# Local imports
from state import State
from preprocess import PreProcess
from actions.similarity import Similarity
from actions.union_graph import UnionGraph
from pipeline import Pipeline
from hatchet_to_networkx import HatchetToNetworkX

In [3]:
pd.options.display.max_rows = 10
pd.options.display.float_format = '{:,.2f}'.format

# Set Path

In [4]:
experiment = 'scatter vs scatterv'
dataset = 'osu_scatter.128.8.2019-10-09_04-04-22'
path = '/mnt/2TB/p/lustre1/rahim/data/hpcdbs/osu/catalyst/mvapich2/2.3/'

In [21]:
experiment = 'osu_bcast'
dataset = 'osu_bcast.1.18.2019-09-03_11-33-15'
path = '/home/vidi/Work/llnl/CallFlow/'

In [22]:
configPath = path + 'config_files'
dataPath = path + 'data' + '/osu_bcast'
dotPath = path + 'data/processed'
experimentPath = dataPath + '/' + experiment
experimentDotPath = dotPath + '/' + experiment

# Read Config file

In [12]:
from configFileReader import *
# configFileName = '/home/vidi/Work/llnl/CallFlow/config_files/scatter vs scatterv.json'
configFileName = '/home/vidi/Work/llnl/CallFlow/config_files/osu_bcast-rank=10.json'
config = configFileReader(configFileName)

In [8]:
from actions.create import Create
hpctoolkit_state = Create(config, dataset)

[1m #  [31m [callfow.py] Action: Creating graphframes:  "osu_bcast.1.18.2019-09-03_11-33-15"[0m


In [9]:
hpctoolkit_state.df.name.unique()

array(['<partial call paths>', '<unknown procedure>', '<unknown file>:0',
       '<program root>', 'main', '117:free_buffer',
       '1087:__GI___vsyslog_chk', 'syslog.c:244', '119:MPI_Finalize',
       'PMPI_Finalize', '296:MPID_Finalize', '174:psm_dofinalize',
       '35:<unknown procedure> 0x1e450 [libpsm_infinipath.so.1.16]',
       '<unknown procedure> 0x1e82f [libpsm_infinipath.so.1.16]',
       '_int_free', 'malloc.c:3986',
       '41:<unknown procedure> 0x188b1 [libpsm_infinipath.so.1.16]',
       '<unknown procedure> 0xff82 [libpsm_infinipath.so.1.16]',
       '<unknown procedure> 0xf774 [libpsm_infinipath.so.1.16]',
       '_getopt_internal_r', 'getopt.c:716',
       '41:<unknown procedure> 0x188fe [libpsm_infinipath.so.1.16]',
       '<unknown procedure> 0x20e9e [libpsm_infinipath.so.1.16]',
       '<unknown procedure> 0x26030 [libpsm_infinipath.so.1.16]',
       '<unknown file> [libpsm_infinipath.so.1.16]:0',
       '<unknown procedure> 0x2604e [libpsm_infinipath.so.1.16]',

In [18]:
def read_gf(dotdir, name):
    state = State(name)
    df_filepath = dotdir + '/' + name +  '/filter_df.csv'
    entire_df_filepath = dotdir + '/' + name + '/entire_df.csv'
    graph_filepath = dotdir + '/' + name + '/filter_graph.json'
    entire_graph_filepath = dotdir + '/' + name + '/entire_graph.json'

    group_df_filepath = dotdir + '/' + name + '/group_df.csv'

    with open(entire_graph_filepath, 'r') as entire_graphFile:
        entire_data = json.load(entire_graphFile)
                    
    state.gf = ht.GraphFrame.from_literal(entire_data)
    state.df = pd.read_csv(entire_df_filepath)
    state.graph = state.gf.graph

    return state

# Create the props file

In [23]:
def dump_props_file(directory):
    states = {}
    files = listdir(directory)
    ret = []
    for file in files:
        if file != 'ensemble_df.csv' and file != 'ensemble_graph.json':
            state = read_gf(directory, file)
            nop = len(state.df['rank'].unique())
            states[file + ':' + str(nop)] = state
            if(nop == 10):
                ret.append({
                    "name": file,
                    "path": "./data/osu_bcast_10/"+ file,
                    "format": "hpctoolkit",
                    "props": {
                    },
                    "nop": nop
                })
    print(json.dumps({
        "datasets": ret }))
print(experimentDotPath)
dump_props_file(experimentDotPath)

/home/vidi/Work/llnl/CallFlow/data/processed/osu_bcast
{"datasets": [{"name": "osu_bcast.1.10.2019-09-04_07-05-12", "path": "./data/osu_bcast_10/osu_bcast.1.10.2019-09-04_07-05-12", "format": "hpctoolkit", "props": {}, "nop": 10}, {"name": "osu_bcast.1.10.2019-09-03_23-33-03", "path": "./data/osu_bcast_10/osu_bcast.1.10.2019-09-03_23-33-03", "format": "hpctoolkit", "props": {}, "nop": 10}, {"name": "osu_bcast.1.10.2019-09-04_08-07-45", "path": "./data/osu_bcast_10/osu_bcast.1.10.2019-09-04_08-07-45", "format": "hpctoolkit", "props": {}, "nop": 10}, {"name": "osu_bcast.1.10.2019-09-03_13-07-55", "path": "./data/osu_bcast_10/osu_bcast.1.10.2019-09-03_13-07-55", "format": "hpctoolkit", "props": {}, "nop": 10}, {"name": "osu_bcast.1.10.2019-09-04_03-18-07", "path": "./data/osu_bcast_10/osu_bcast.1.10.2019-09-04_03-18-07", "format": "hpctoolkit", "props": {}, "nop": 10}, {"name": "osu_bcast.1.10.2019-09-03_21-42-03", "path": "./data/osu_bcast_10/osu_bcast.1.10.2019-09-03_21-42-03", "format"

# Read all states from a directory

In [12]:
def read_gfs(directory):
    states = {}
    files = listdir(directory)
    for file in files:
        if file != 'ensemble_df.csv' and file != 'ensemble_graph.json':
            state = read_gf(directory, file)
            states[file] = state
    return states

In [13]:
states = read_gfs(experimentDotPath)

# Convert hatchet graph to networkX graph

In [14]:
dataset1 = 'osu_bcast.1.18.2019-09-03_11-33-15'
dataset2 = 'osu_bcast.1.18.2019-09-03_12-29-49'
dataset3 = 'osu_bcast.1.18.2019-09-03_10-33-44'

In [15]:
networkX_graphs = {}
for idx, dataset in enumerate(states):
    state = states[dataset]
    networkX_graphs[dataset] = HatchetToNetworkX(state, 'path', construct_graph=True, add_data=False)
    print(networkX_graphs[dataset].g.nodes())

Creating a Graph for osu_bcast.1.18.2019-09-03_11-33-15.
Creating a Graph without node or edge attributes.
['<partial call paths>', '<unknown procedure>', '<program root>', 'main', '117:free_buffer', '1087:__GI___vsyslog_chk', '119:MPI_Finalize', 'PMPI_Finalize', '296:MPID_Finalize', '174:psm_dofinalize', '35:<unknown procedure> 0x1e450 [libpsm_infinipath.so.1.16]', '<unknown procedure> 0x1e82f [libpsm_infinipath.so.1.16]', '_int_free', '41:<unknown procedure> 0x188b1 [libpsm_infinipath.so.1.16]', '<unknown procedure> 0xff82 [libpsm_infinipath.so.1.16]', '<unknown procedure> 0xf774 [libpsm_infinipath.so.1.16]', '41:<unknown procedure> 0x188fe [libpsm_infinipath.so.1.16]', '<unknown procedure> 0x20e9e [libpsm_infinipath.so.1.16]', '<unknown procedure> 0x26155 [libpsm_infinipath.so.1.16]', '_getopt_internal_r', '<unknown procedure> 0x26030 [libpsm_infinipath.so.1.16]', '<unknown procedure> 0x2604e [libpsm_infinipath.so.1.16]', '<unknown procedure> 0x26058 [libpsm_infinipath.so.1.16]', '<

Creating a Graph without node or edge attributes.
['<partial call paths>', '<unknown procedure>', '<program root>', 'main', '106:PMPI_Reduce', '1258:MPIR_Reduce_impl', '1076:MPIR_Reduce_MV2', '2036:MPIR_Reduce_index_tuned_intra_MV2', '119:MPI_Finalize', 'PMPI_Finalize', '273:MPIR_Barrier_impl', '98:PMPI_Barrier', '421:MPIR_Barrier_impl', '331:MPIR_Barrier_MV2', '210:MPIR_shmem_barrier_MV2', '126:MPIDI_CH3I_SHMEM_COLL_Barrier_gather', '1743:psm_progress_wait', '135:psm_progress_wait', '247:<unknown procedure> 0x326b8 [libpsm_infinipath.so.1.16]', '135:MPIDI_CH3I_SHMEM_COLL_Barrier_bcast', '1807:<unknown procedure> 0x144b10 [libmpi.so.12.1.1]', '1807:psm_progress_wait', '247:<unknown procedure> 0x326c9 [libpsm_infinipath.so.1.16]', '<unknown procedure> 0x33e76 [libpsm_infinipath.so.1.16]', '<unknown procedure> 0x33fc2 [libpsm_infinipath.so.1.16]', '296:MPID_Finalize', '174:psm_dofinalize', '41:<unknown procedure> 0x188fe [libpsm_infinipath.so.1.16]', '<unknown procedure> 0x20e9e [libpsm_

Creating a Graph without node or edge attributes.
['<partial call paths>', '<unknown procedure>', '<program root>', 'main', '119:MPI_Finalize', 'PMPI_Finalize', '296:MPID_Finalize', '174:psm_dofinalize', '41:<unknown procedure> 0x188fe [libpsm_infinipath.so.1.16]', '<unknown procedure> 0x20e9e [libpsm_infinipath.so.1.16]', '<unknown procedure> 0x26030 [libpsm_infinipath.so.1.16]', '<unknown procedure> 0x2604e [libpsm_infinipath.so.1.16]', '<unknown procedure> 0x26058 [libpsm_infinipath.so.1.16]', '<unknown procedure> 0x26068 [libpsm_infinipath.so.1.16]', '<unknown procedure> 0x1c060 [libpsm_infinipath.so.1.16]', '<unknown procedure> 0x1c075 [libpsm_infinipath.so.1.16]', '<unknown procedure> 0x1c083 [libpsm_infinipath.so.1.16]', '<unknown procedure> 0x1c088 [libpsm_infinipath.so.1.16]', '<unknown procedure> 0x1c08c [libpsm_infinipath.so.1.16]', '<unknown procedure> 0x1c096 [libpsm_infinipath.so.1.16]', '<unknown procedure> 0x1c09c [libpsm_infinipath.so.1.16]', '<unknown procedure> 0x1c0

# Union of all graphs.

In [16]:
u_graph = UnionGraph()
u_df = pd.DataFrame()
for idx, dataset in enumerate(networkX_graphs):
    u_graph.unionize(networkX_graphs[dataset].g, dataset)
    u_df = pd.concat([u_df, networkX_graphs[dataset].df])

Nodes in R and H are same?  False
Difference is  ['main', '<program root>', '<unknown procedure> 0x13eab [libpsm_infinipath.so.1.16]', '369:MPID_Send', '422:MPID_Recv', '<unknown procedure> 0x23501 [libpsm_infinipath.so.1.16]', '3581:psm_progress_wait', 'pthread_create', '<unknown procedure>', '_getopt_internal_r', '<unknown procedure> 0x340fd [libpsm_infinipath.so.1.16]', '<unknown procedure> 0x13b87 [libpsm_infinipath.so.1.16]', '116:<unknown procedure> 0x3227a [libpsm_infinipath.so.1.16]', '488:MPID_Init', '<unknown procedure> 0xabb7 [libpsm_infinipath.so.1.16]', '<unknown procedure> 0x20e9e [libpsm_infinipath.so.1.16]', '1197:mv2_shm_progress', '<unknown procedure> 0x3401f [libpsm_infinipath.so.1.16]', '3820:__intel_ssse3_rep_memcpy', '<unknown procedure> 0x2605e [libpsm_infinipath.so.1.16]', '243:psm_no_lock', '126:MPIDI_CH3I_SHMEM_COLL_Barrier_gather', '<unknown procedure> 0x33e35 [libpsm_infinipath.so.1.16]', '<unknown procedure> 0xe7bd [libpsm_infinipath.so.1.16]', '<unknown pr

# Compare and check how it looks different with other graphs.

In [17]:
def printEdges(nxGraph):
    print("Total number of edges: ", len(nxGraph.edges()))
    for edge in nxGraph.edges():
        print(edge[0], '=> ', edge[1])

In [18]:
printEdges(networkX_graphs[dataset1].g)

Total number of edges:  184
<partial call paths> =>  <unknown procedure>
<program root> =>  main
main =>  117:free_buffer
main =>  119:MPI_Finalize
main =>  92:PMPI_Bcast
main =>  98:PMPI_Barrier
main =>  36:MPI_Init
117:free_buffer =>  1087:__GI___vsyslog_chk
119:MPI_Finalize =>  PMPI_Finalize
PMPI_Finalize =>  296:MPID_Finalize
296:MPID_Finalize =>  174:psm_dofinalize
174:psm_dofinalize =>  35:<unknown procedure> 0x1e450 [libpsm_infinipath.so.1.16]
174:psm_dofinalize =>  41:<unknown procedure> 0x188b1 [libpsm_infinipath.so.1.16]
174:psm_dofinalize =>  41:<unknown procedure> 0x188fe [libpsm_infinipath.so.1.16]
35:<unknown procedure> 0x1e450 [libpsm_infinipath.so.1.16] =>  <unknown procedure> 0x1e82f [libpsm_infinipath.so.1.16]
<unknown procedure> 0x1e82f [libpsm_infinipath.so.1.16] =>  _int_free
41:<unknown procedure> 0x188b1 [libpsm_infinipath.so.1.16] =>  <unknown procedure> 0xff82 [libpsm_infinipath.so.1.16]
<unknown procedure> 0xff82 [libpsm_infinipath.so.1.16] =>  <unknown proced

In [19]:
printEdges(networkX_graphs[dataset2].g)

Total number of edges:  191
<partial call paths> =>  <unknown procedure>
<program root> =>  main
main =>  106:PMPI_Reduce
main =>  119:MPI_Finalize
main =>  98:PMPI_Barrier
main =>  92:PMPI_Bcast
main =>  36:MPI_Init
106:PMPI_Reduce =>  1258:MPIR_Reduce_impl
1258:MPIR_Reduce_impl =>  1076:MPIR_Reduce_MV2
1076:MPIR_Reduce_MV2 =>  2036:MPIR_Reduce_index_tuned_intra_MV2
119:MPI_Finalize =>  PMPI_Finalize
PMPI_Finalize =>  273:MPIR_Barrier_impl
PMPI_Finalize =>  296:MPID_Finalize
273:MPIR_Barrier_impl =>  331:MPIR_Barrier_MV2
98:PMPI_Barrier =>  421:MPIR_Barrier_impl
421:MPIR_Barrier_impl =>  331:MPIR_Barrier_MV2
331:MPIR_Barrier_MV2 =>  210:MPIR_shmem_barrier_MV2
210:MPIR_shmem_barrier_MV2 =>  126:MPIDI_CH3I_SHMEM_COLL_Barrier_gather
210:MPIR_shmem_barrier_MV2 =>  135:psm_progress_wait
210:MPIR_shmem_barrier_MV2 =>  135:MPIDI_CH3I_SHMEM_COLL_Barrier_bcast
126:MPIDI_CH3I_SHMEM_COLL_Barrier_gather =>  1743:psm_progress_wait
1743:psm_progress_wait =>  247:<unknown procedure> 0x326c9 [libpsm_

In [20]:
printEdges(u_graph.R)

Total number of edges:  242
<partial call paths> =>  <unknown procedure>
<program root> =>  main
main =>  117:free_buffer
main =>  119:MPI_Finalize
main =>  92:PMPI_Bcast
main =>  98:PMPI_Barrier
main =>  36:MPI_Init
main =>  106:PMPI_Reduce
main =>  93:PMPI_Wtime
117:free_buffer =>  1087:__GI___vsyslog_chk
119:MPI_Finalize =>  PMPI_Finalize
PMPI_Finalize =>  296:MPID_Finalize
PMPI_Finalize =>  273:MPIR_Barrier_impl
296:MPID_Finalize =>  174:psm_dofinalize
296:MPID_Finalize =>  186:MPIDI_PG_Finalize
296:MPID_Finalize =>  204:MPIDI_RMA_finalize
174:psm_dofinalize =>  35:<unknown procedure> 0x1e450 [libpsm_infinipath.so.1.16]
174:psm_dofinalize =>  41:<unknown procedure> 0x188b1 [libpsm_infinipath.so.1.16]
174:psm_dofinalize =>  41:<unknown procedure> 0x188fe [libpsm_infinipath.so.1.16]
174:psm_dofinalize =>  56:psm_deallocate_vbuf
35:<unknown procedure> 0x1e450 [libpsm_infinipath.so.1.16] =>  <unknown procedure> 0x1e82f [libpsm_infinipath.so.1.16]
<unknown procedure> 0x1e82f [libpsm_inf

<unknown procedure> 0xe8da [libpsm_infinipath.so.1.16] =>  __pthread_cond_broadcast_2_0
126:MPIDI_CH3I_SHMEM_COLL_Barrier_gather =>  1743:psm_progress_wait
126:MPIDI_CH3I_SHMEM_COLL_Barrier_gather =>  1743:<unknown procedure> 0x144b10 [libmpi.so.12.1.1]
1743:psm_progress_wait =>  247:<unknown procedure> 0x326c9 [libpsm_infinipath.so.1.16]
1743:psm_progress_wait =>  247:<unknown procedure> 0x326ce [libpsm_infinipath.so.1.16]
106:PMPI_Reduce =>  1258:MPIR_Reduce_impl
1258:MPIR_Reduce_impl =>  1076:MPIR_Reduce_MV2
1076:MPIR_Reduce_MV2 =>  2036:MPIR_Reduce_index_tuned_intra_MV2
273:MPIR_Barrier_impl =>  331:MPIR_Barrier_MV2
<unknown procedure> 0x2350f [libpsm_infinipath.so.1.16] =>  __GI___vsyslog_chk
56:psm_deallocate_vbuf =>  105:__GI___vsyslog_chk
186:MPIDI_PG_Finalize =>  109:snprintf
109:snprintf =>  64:PMIi_ReadCommand
64:PMIi_ReadCommand =>  1431:__write_nocancel
204:MPIDI_RMA_finalize =>  135:__GI___vsyslog_chk
204:MPIDI_RMA_finalize =>  136:__GI___vsyslog_chk
1627:MPIR_Knomial_Bca

In [21]:
print(u_df.info(), networkX_graphs[dataset].df.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 23256 entries, 0 to 7217
Data columns (total 18 columns):
node             23256 non-null object
rank             23256 non-null int64
time (inc)       23256 non-null float64
time             23256 non-null float64
nid              23256 non-null int64
file             23256 non-null object
line             23256 non-null int64
module           23256 non-null object
name             23256 non-null object
type             23256 non-null object
n_index          23256 non-null int64
mod_index        23256 non-null int64
callees          23256 non-null object
callers          23256 non-null object
path             23256 non-null object
show_node        23256 non-null bool
vis_node_name    23256 non-null object
dataset          23256 non-null object
dtypes: bool(1), float64(2), int64(5), object(10)
memory usage: 3.2+ MB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7218 entries, 0 to 7217
Data columns (total 18 columns):
node            

In [22]:
print(u_df.groupby(['dataset', 'name']).count())

                                                           node  rank  \
dataset                            name                                 
osu_bcast.1.18.2019-09-03_10-33-44 105:__GI___vsyslog_chk    18    18   
                                   109:snprintf              18    18   
                                   1197:mv2_shm_bcast        54    54   
                                   1197:mv2_shm_progress     18    18   
                                   119:MPI_Finalize          18    18   
...                                                         ...   ...   
osu_bcast.1.18.2019-09-03_12-29-49 psm_queue.c:289           18    18   
                                   psm_queue.c:417           18    18   
                                   pthread_create            18    18   
                                   reduce_osu.c:1961         18    18   
                                   syslog.c:244              54    54   

                                                  

In [23]:
def print_df_count(df):
    print(df.groupby(['name']).count())

In [24]:
for state in networkX_graphs:
    print_df_count(networkX_graphs[state].df)

                                                    node  rank  time (inc)  \
name                                                                         
1087:__GI___vsyslog_chk                               18    18          18   
116:<unknown procedure> 0x3227a [libpsm_infinip...    18    18          18   
117:free_buffer                                       18    18          18   
1192:MPIR_Type_get_true_extent_impl                   18    18          18   
1197:mv2_shm_bcast                                    54    54          54   
...                                                  ...   ...         ...   
pthread_create                                        18    18          18   
syslog.c:244                                          54    54          54   
type_get_true_extent_x.c:3141                         18    18          18   
type_get_true_extent_x.c:39                           18    18          18   
type_get_true_extent_x.c:41                           18    18  

# Filter the union graph and also be distribution aware.

In [25]:
class FilterUnion():
    def __init__(self, state):
        self.df = state.df
        self.dataset_df = self.df.groupby(['dataset'])
        self.dataset_idx = {}
        self.set_max_min_times()
    
    def set_max_min_times(self):
        self.max_time_inc_list = np.array([])
        self.min_time_inc_list = np.array([])
        self.max_time_exc_list = np.array([])
        self.min_time_exc_list = np.array([])
        count = 0
        for dataset, df in self.dataset_df:
            self.dataset_idx[dataset] = count
            self.max_time_inc_list = np.hstack([self.max_time_inc_list, df['time (inc)'].max()])
            self.min_time_inc_list = np.hstack([self.min_time_inc_list, df['time (inc)'].min()])
            self.max_time_exc_list = np.hstack([self.max_time_exc_list, df['time'].max()])
            self.min_time_exc_list = np.hstack([self.min_time_exc_list, df['time'].min()])
            count += 1
        print("Dataset idx: ", self.dataset_idx)
        print("Min. time (inc): ", self.min_time_inc_list)
        print("Max. time (inc): ", self.max_time_inc_list)
        print("Min. time (exc): ", self.min_time_exc_list)
        print("Max. time (exc): ", self.max_time_exc_list)
        self.max_time_inc = np.max(self.max_time_inc_list)
        self.min_time_inc = np.min(self.min_time_inc_list)
        self.max_time_exc = np.max(self.max_time_exc_list)
        self.min_time_exc = np.min(self.min_time_exc_list)
        
    def filter_time_inc_overall(self, perc):
        df = self.df.loc[self.df['time (inc)'] > perc*0.01*self.max_time_inc]
        filter_call_sites = df['name'].unique()
        print("Number of nodes left in dataframe: ", len(filter_call_sites))
        return df[df['name'].isin(filter_call_sites)]
    
    def filter_graph_nodes(self, df, g):
        call_sites = df['name'].unique()
    
        ret = nx.DiGraph()
        
        for edge in g.edges():
            if edge[0] in call_sites and edge[1] in call_sites:
                ret.add_edge(edge[0], edge[1])
                
        return ret
    
    def generic_map(self, df, g):
        for column in df.columns:
            values = {}
            for node in g.nodes():
                values[node] = df.loc[df['name'] == node][column].tolist()
            nx.set_node_attributes(g, name=column, values = values)
    
    def add_node_attributes(self, df, g):            
        generic_map = self.generic_map(df, g)
        print(g.nodes(data=True))

In [26]:
states = {}
states['ensemble'] = State('ensemble')
states['ensemble'].df = u_df
states['ensemble'].g = u_graph.R

union_filter = FilterUnion(states['ensemble'])

Dataset idx:  {'osu_bcast.1.18.2019-09-03_10-33-44': 0, 'osu_bcast.1.18.2019-09-03_11-33-15': 1, 'osu_bcast.1.18.2019-09-03_12-29-49': 2}
Min. time (inc):  [0. 0. 0.]
Max. time (inc):  [1147289. 1139918. 1140997.]
Min. time (exc):  [0. 0. 0.]
Max. time (exc):  [281594. 287599. 305470.]


In [27]:
states['ensemble'].df = union_filter.filter_time_inc_overall(5)

Number of nodes left in dataframe:  37


In [28]:
states['ensemble'].g = union_filter.filter_graph_nodes(states['ensemble'].df, states['ensemble'].g)

In [29]:
print(len(states['ensemble'].g.nodes()), len(states['ensemble'].g.edges()))

34 34


In [30]:
union_filter.add_node_attributes(states['ensemble'].df, states['ensemble'].g)

[('<program root>', {'node': ["{'name': '<program root>', 'type': 'function'}", "{'name': '<program root>', 'type': 'function'}", "{'name': '<program root>', 'type': 'function'}", "{'name': '<program root>', 'type': 'function'}", "{'name': '<program root>', 'type': 'function'}", "{'name': '<program root>', 'type': 'function'}", "{'name': '<program root>', 'type': 'function'}", "{'name': '<program root>', 'type': 'function'}", "{'name': '<program root>', 'type': 'function'}", "{'name': '<program root>', 'type': 'function'}", "{'name': '<program root>', 'type': 'function'}", "{'name': '<program root>', 'type': 'function'}", "{'name': '<program root>', 'type': 'function'}", "{'name': '<program root>', 'type': 'function'}", "{'name': '<program root>', 'type': 'function'}", "{'name': '<program root>', 'type': 'function'}", "{'name': '<program root>', 'type': 'function'}", "{'name': '<program root>', 'type': 'function'}", "{'name': '<program root>', 'type': 'function'}", "{'name': '<program 

# Check if the grouped dataframe is right.

In [31]:
check_group_df = pd.read_csv('/home/vidi/Work/llnl/CallFlow/data/processed/osu_bcast_18/ensemble_df.csv')

In [32]:
print(check_group_df['group_path'])

0                 ('libmonitor.so.0.0.0=<program root>',)
1                 ('libmonitor.so.0.0.0=<program root>',)
2                 ('libmonitor.so.0.0.0=<program root>',)
3                 ('libmonitor.so.0.0.0=<program root>',)
4                 ('libmonitor.so.0.0.0=<program root>',)
                              ...                        
1491                                                  NaN
1492    ('libmonitor.so.0.0.0=<program root>', 'osu_bc...
1493    ('libmonitor.so.0.0.0=<program root>', 'osu_bc...
1494    ('libmonitor.so.0.0.0=<program root>', 'osu_bc...
1495                                                  NaN
Name: group_path, Length: 1496, dtype: object


# Pipeline

In [33]:
from pipeline import Pipeline
datasets = [dataset1, dataset2, dataset3]
pipeline = Pipeline(config)

In [34]:
for idx, dataset_name in enumerate(datasets):
    states[dataset_name] = pipeline.create(dataset_name)
    states[dataset_name] = pipeline.process(states[dataset_name], 'entire')
    states[dataset_name] = pipeline.convertToNetworkX(states[dataset_name], 'path')
    pipeline.write_gf(states[dataset_name], dataset_name, 'entire')
    states[dataset_name] = pipeline.filterNetworkX(states, dataset_name, config.filter_perc)
    pipeline.write_dataset_gf(states[dataset_name], dataset_name, 'filter')

[1m #  [31m [callfow.py] Action: Creating graphframes:  "osu_bcast.1.18.2019-09-03_11-33-15"[0m
[1m #  [32mPreprocessing : add_n_index[0m
[1m #  [32mPreprocessing : add_mod_index[0m
[1m #  [32mPreprocessing : add_callers_and_callees[0m
[1m #  [32mPreprocessing : add_show_node[0m
[1m #  [32mPreprocessing : add_vis_node_name[0m
[1m #  [32mPreprocessing : add_dataset_name[0m
[1m #  [32mPreprocessing : update_module_name[0m


Creating a Graph for osu_bcast.1.18.2019-09-03_11-33-15.


[1m #  [31m [callfow.py] Action: writing file for entire format [0m


Creating a Graph without node or edge attributes.
DFS on the graph
Number of nodes in graph 445
Dataframe Information
Size: (8010, 16)
Number of nodes in dataframe:  445


[1m #  [31m [callfow.py] Action: File path: /home/vidi/Work/llnl/CallFlow/data/processed/osu_bcast_18/osu_bcast.1.18.2019-09-03_11-33-15/entire_graph.json [0m
[1m #  [31m [callfow.py] Action: writing file for filter format [0m
[1m #  [31m [callfow.py] Action: Creating graphframes:  "osu_bcast.1.18.2019-09-03_12-29-49"[0m


Dataset idx:  {'osu_bcast.1.18.2019-09-03_11-33-15': 0}
Min. time (inc):  [0.]
Max. time (inc):  [1139918.]
Min. time (exc):  [0.]
Max. time (exc):  [287599.]
Number of nodes left in dataframe:  33


[1m #  [32mPreprocessing : add_n_index[0m
[1m #  [32mPreprocessing : add_mod_index[0m
[1m #  [32mPreprocessing : add_callers_and_callees[0m
[1m #  [32mPreprocessing : add_show_node[0m
[1m #  [32mPreprocessing : add_vis_node_name[0m
[1m #  [32mPreprocessing : add_dataset_name[0m
[1m #  [32mPreprocessing : update_module_name[0m


Creating a Graph for osu_bcast.1.18.2019-09-03_12-29-49.


[1m #  [31m [callfow.py] Action: writing file for entire format [0m


Creating a Graph without node or edge attributes.
DFS on the graph
Number of nodes in graph 446
Dataframe Information
Size: (8028, 16)
Number of nodes in dataframe:  446


[1m #  [31m [callfow.py] Action: File path: /home/vidi/Work/llnl/CallFlow/data/processed/osu_bcast_18/osu_bcast.1.18.2019-09-03_12-29-49/entire_graph.json [0m
[1m #  [31m [callfow.py] Action: writing file for filter format [0m
[1m #  [31m [callfow.py] Action: Creating graphframes:  "osu_bcast.1.18.2019-09-03_10-33-44"[0m


Dataset idx:  {'osu_bcast.1.18.2019-09-03_12-29-49': 0}
Min. time (inc):  [0.]
Max. time (inc):  [1140997.]
Min. time (exc):  [0.]
Max. time (exc):  [305470.]
Number of nodes left in dataframe:  32


[1m #  [32mPreprocessing : add_n_index[0m
[1m #  [32mPreprocessing : add_mod_index[0m
[1m #  [32mPreprocessing : add_callers_and_callees[0m
[1m #  [32mPreprocessing : add_show_node[0m
[1m #  [32mPreprocessing : add_vis_node_name[0m
[1m #  [32mPreprocessing : add_dataset_name[0m
[1m #  [32mPreprocessing : update_module_name[0m


Creating a Graph for osu_bcast.1.18.2019-09-03_10-33-44.


[1m #  [31m [callfow.py] Action: writing file for entire format [0m


Creating a Graph without node or edge attributes.
DFS on the graph
Number of nodes in graph 401
Dataframe Information
Size: (7218, 16)
Number of nodes in dataframe:  401


[1m #  [31m [callfow.py] Action: File path: /home/vidi/Work/llnl/CallFlow/data/processed/osu_bcast_18/osu_bcast.1.18.2019-09-03_10-33-44/entire_graph.json [0m
[1m #  [31m [callfow.py] Action: writing file for filter format [0m


Dataset idx:  {'osu_bcast.1.18.2019-09-03_10-33-44': 0}
Min. time (inc):  [0.]
Max. time (inc):  [1147289.]
Min. time (exc):  [0.]
Max. time (exc):  [281594.]
Number of nodes left in dataframe:  32


In [35]:
states['ensemble'] = pipeline.union(states)
states['ensemble'] = pipeline.filterNetworkX(states, 'ensemble', config.filter_perc)

Nodes in R and H are same?  False
Difference is  ['main', '<program root>', '1197:mv2_shm_bcast', '296:MPID_Finalize', '<unknown procedure> 0x26155 [libpsm_infinipath.so.1.16]', '210:MPIR_shmem_barrier_MV2', '92:PMPI_Bcast', 'PMPI_Finalize', '98:PMPI_Barrier', '_getopt_internal_r', '174:psm_dofinalize', '<unknown procedure> 0x20e9e [libpsm_infinipath.so.1.16]', '1492:MPIR_Knomial_Bcast_inter_node_MV2', '2732:MPIR_Bcast_tune_inter_node_helper_MV2', '<unknown procedure> 0x1c083 [libpsm_infinipath.so.1.16]', '3820:__intel_ssse3_rep_memcpy', '1340:MPIR_Shmem_Bcast_MV2', '2750:MPIR_Shmem_Bcast_MV2', '1451:MPIR_Bcast_MV2', '421:MPIR_Barrier_impl', '<unknown procedure> 0x33faf [libpsm_infinipath.so.1.16]', '1592:MPIR_Bcast_impl', '<unknown procedure> 0x33e76 [libpsm_infinipath.so.1.16]', '41:<unknown procedure> 0x188fe [libpsm_infinipath.so.1.16]', '<unknown procedure> 0x26058 [libpsm_infinipath.so.1.16]', '331:MPIR_Barrier_MV2', '119:MPI_Finalize', '3801:__intel_ssse3_rep_memcpy', '3115:MPIR

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  u_df = pd.concat([u_df, states[dataset].df])


# Group the filtered union graph based on module. 

In [36]:
# Pipeline for the ensemble graph
states['ensemble'] = pipeline.group(states, dataset_name, 'module')
pipeline.write_ensemble_gf(states, 'ensemble')

Node:  <program root>
entry function: True
Change name: False
node path:  ['<program root>']
group path:  ('libmonitor.so.0.0.0=<program root>',)
component path:  ('libmonitor.so.0.0.0=<program root>', '<program root>')
component level:  2
Show node:  True
name:  libmonitor.so.0.0.0=<program root>
Module:  libmonitor.so.0.0.0=<program root>
Node:  main
entry function: True
Change name: False
node path:  ['<program root>', 'main']
group path:  ('libmonitor.so.0.0.0=<program root>', 'osu_bcast=main')
component path:  ('osu_bcast=main', 'main')
component level:  2
Show node:  True
name:  osu_bcast=main
Module:  osu_bcast=main
Node:  main
entry function: True
Change name: False
node path:  ['<program root>', 'main']
group path:  ('libmonitor.so.0.0.0=<program root>', 'osu_bcast=main')
component path:  ('osu_bcast=main', 'main')
component level:  2
Show node:  True
name:  osu_bcast=main
Module:  osu_bcast=main
Node:  main
entry function: True
Change name: False
node path:  ['<program root>'

Node:  <unknown procedure> 0x20e9e [libpsm_infinipath.so.1.16]
entry function: False
Change name: False
node path:  ['<program root>', 'main', '119:MPI_Finalize', 'PMPI_Finalize', '296:MPID_Finalize', '174:psm_dofinalize', '41:<unknown procedure> 0x188fe [libpsm_infinipath.so.1.16]', '<unknown procedure> 0x20e9e [libpsm_infinipath.so.1.16]']
group path:  ('libmonitor.so.0.0.0=<program root>', 'osu_bcast=main', 'libmonitor.so.0.0.0=119:MPI_Finalize', 'libmpi.so.12.1.1=PMPI_Finalize', 'libpsm_infinipath.so.1.16=41:<unknown procedure> 0x188fe [libpsm_infinipath.so.1.16]')
component path:  ('libpsm_infinipath.so.1.16=41:<unknown procedure> 0x188fe [libpsm_infinipath.so.1.16]', '41:<unknown procedure> 0x188fe [libpsm_infinipath.so.1.16]', '<unknown procedure> 0x20e9e [libpsm_infinipath.so.1.16]')
component level:  3
Show node:  False
name:  <unknown procedure> 0x20e9e [libpsm_infinipath.so.1.16]
Module:  libpsm_infinipath.so.1.16=41:<unknown procedure> 0x188fe [libpsm_infinipath.so.1.16]
No

[1m #  [31m [callfow.py] Action: writing file for ensemble format [0m


Node:  1451:MPIR_Bcast_MV2
entry function: False
Change name: False
node path:  ['<program root>', 'main', '92:PMPI_Bcast', '1592:MPIR_Bcast_impl', '1451:MPIR_Bcast_MV2']
group path:  ('libmonitor.so.0.0.0=<program root>', 'osu_bcast=main', 'libmpi.so.12.1.1=92:PMPI_Bcast')
component path:  ('libmpi.so.12.1.1=92:PMPI_Bcast', '92:PMPI_Bcast', '1592:MPIR_Bcast_impl', '1451:MPIR_Bcast_MV2')
component level:  4
Show node:  False
name:  1451:MPIR_Bcast_MV2
Module:  libmpi.so.12.1.1=92:PMPI_Bcast
Node:  3115:MPIR_Bcast_index_tuned_intra_MV2
entry function: False
Change name: False
node path:  ['<program root>', 'main', '92:PMPI_Bcast', '1592:MPIR_Bcast_impl', '1451:MPIR_Bcast_MV2', '3115:MPIR_Bcast_index_tuned_intra_MV2']
group path:  ('libmonitor.so.0.0.0=<program root>', 'osu_bcast=main', 'libmpi.so.12.1.1=92:PMPI_Bcast')
component path:  ('libmpi.so.12.1.1=92:PMPI_Bcast', '92:PMPI_Bcast', '1592:MPIR_Bcast_impl', '1451:MPIR_Bcast_MV2', '3115:MPIR_Bcast_index_tuned_intra_MV2')
component lev