In [1]:
import networkx as nx
import math
import json
from ast import literal_eval as make_tuple
from networkx.readwrite import json_graph

In [2]:
from logger import log
import utils
from pipeline import Pipeline

# Read Config file

In [4]:
from configFileReader import *
# configFileName = '/home/vidi/Work/llnl/CallFlow/config_files/scatter vs scatterv.json'
configFileName = '/home/vidi/Work/llnl/CallFlow/config_files/lulesh-annotation-profile.json'
config = configFileReader(configFileName)

In [5]:
print(config.callflow_path)

/home/vidi/Work/llnl/CallFlow/


# Converting to final NetworkX graph format

In [6]:
states = {}
datasets = config.datasets
pipeline = Pipeline(config)
states['ensemble'] = pipeline.read_ensemble_gf()
for idx, dataset in enumerate(datasets):
    dataset_name = dataset['name']
    states[dataset_name] = pipeline.read_dataset_gf(dataset_name)

[1m #  [32m[Process] Reading the union dataframe and graph[0m
[1m #  [32m[Process] Reading the dataframe and graph of state: 1-core[0m
[1m #  [32m[Process] Reading the dataframe and graph of state: 27-cores[0m
[1m #  [32m[Process] Reading the dataframe and graph of state: 512-cores[0m


In [7]:
print(states)

{'ensemble': <state.State object at 0x7f3215f41358>, '1-core': <state.State object at 0x7f3215f41400>, '27-cores': <state.State object at 0x7f3215f413c8>, '512-cores': <state.State object at 0x7f3215f417f0>}


In [14]:
print(states['512-cores'].df[['name','time (inc)']].groupby('name').mean())

                                   time (inc)
name                                         
ApplyMaterialPropertiesForElems  5.215690e+06
CalcCourantConstraintForElems    1.570734e+05
CalcEnergyForElems               2.788083e+06
CalcFBHourglassForceForElems     4.832240e+06
CalcForceForNodes                2.367263e+07
CalcHourglassControlForElems     1.814385e+07
CalcHydroConstraintForElems      3.324241e+04
CalcKinematicsForElems           4.029242e+06
CalcLagrangeElements             4.429387e+06
CalcMonotonicQGradientsForElems  1.793327e+06
CalcMonotonicQRegionForElems     1.482072e+06
CalcPressureForElems             1.226399e+06
CalcQForElems                    6.675364e+06
CalcSoundSpeedForElems           1.127088e+05
CalcTimeConstraintsForElems      1.950236e+05
CalcVolumeForceForElems          2.262529e+07
EvalEOSForElems                  5.101354e+06
IntegrateStressForElems          3.973302e+06
LagrangeElements                 1.636953e+07
LagrangeLeapFrog                 4

In [7]:
class DistGraph(nx.Graph):
    # Attributes:
    # 1. State => Pass the state which needs to be handled.
    # 2. path => '', 'path', 'group_path' or 'component_path'
    # 3. construct_graph -> To decide if we should construct graph from path
    # 4. add_data => To 
    def __init__(self, state, path, group_by_attr='module', construct_graph=True, add_data=True):
        super(DistGraph, self).__init__()
        self.state = state
        self.path = path
        self.df = self.state.df
        self.group_by = group_by_attr
#         self.columns = ['time']
        self.columns = ['time (inc)', 'group_path', 'name', 'time', 'callers', 'callees', 'vis_name']
        
        if construct_graph:
            print('Creating a Graph for {0}.'.format(self.state.name))
            self.g = nx.DiGraph()
            self.add_paths(path)
        else:
            print('Using the existing graph from state {0}'.format(state.name))
            self.g = state.g

        self.adj_matrix = nx.adjacency_matrix(self.g)
        self.dense_adj_matrix = self.adj_matrix.todense()

        self.callbacks = []
        self.edge_direction = {}

        if add_data == True:
            self.add_node_attributes()
            self.add_edge_attributes()
        else:
            print('Creating a Graph without node or edge attributes.')
        self.adj_matrix = nx.adjacency_matrix(self.g)
        self.dense_adj_matrix = self.adj_matrix.todense()

    def no_cycle_path(self, path):
        ret = []
        mapper = {}
        for idx, elem in enumerate(path):
            if elem not in mapper:
                mapper[elem] = 1
                ret.append(elem)
            else:
                ret.append(elem + '_' + str(mapper[elem]))
                mapper[elem] += 1

        return tuple(ret)

    def add_paths(self, path):
        for idx, row in self.df.iterrows():
            if row.show_node:
                print(row[path], type(row[path]))
                if isinstance(row[path], list):
                    path_tuple = row[path]
                elif isinstance(row[path], str):
                    path_tuple = make_tuple(row[path])
                corrected_path = self.no_cycle_path(path_tuple)
                self.g.add_path(corrected_path)

    def add_edge_attributes(self):
        number_of_runs_mapping = self.number_of_runs()
        nx.set_edge_attributes(self.g, name="number_of_runs", values=number_of_runs_mapping)
        capacity_mapping = self.calculate_flows(self.g)
        nx.set_edge_attributes((self.g), name='weight', values=capacity_mapping)

    def number_of_runs(self):
        ret = {}
        for idx, name in enumerate(self.runs):           
            for edge in self.runs[name].edges():
                if edge not in ret:
                    ret[edge] = 0
                ret[edge] += 1
        return ret

    def add_union_node_attributes(self):
        for node in self.R.nodes(data=True):
            node_name = node[0]
            node_data = node[1]
            max_inc_time = 0
            max_exc_time = 0
            self.R.nodes[node_name]['ensemble'] = {}
            for dataset in node_data:
                for idx, key in enumerate(node_data[dataset]):
                    if(key == 'name'):
                        self.R.nodes[node_name]['union']['name'] = node_data[dataset][key]
                    elif(key == 'time (inc)'):
                        max_inc_time = max(max_inc_time, node_data[dataset][key])
                    elif(key == 'time'):
                        max_exc_time = max(max_exc_time, node_data[dataset][key])
                    elif(key == 'entry_functions'):
                        entry_functions = node_data[dataset][key]

            self.R.nodes[node_name]['ensemble']['time (inc)'] = max_inc_time
            self.R.nodes[node_name]['ensemble']['time'] = max_exc_time
            self.R.nodes[node_name]['ensemble']['entry_functions'] = entry_functions

    def add_node_attributes(self):
        ensemble_mapping = self.ensemble_map(self.g.nodes())
        
        for idx, key in enumerate(ensemble_mapping):
            nx.set_node_attributes(self.g, name=key, values=ensemble_mapping[key])
        
        dataset_mapping = {}
        for dataset in self.df['dataset'].unique():
            dataset_mapping[dataset] = self.dataset_map(self.g.nodes(), dataset)
            
            nx.set_node_attributes(self.g, name=dataset, values=dataset_mapping[dataset])

    def tailhead(self, edge):
        return (
         edge[0], edge[1])

    def tailheadDir(self, edge):
        return (
         str(edge[0]), str(edge[1]), self.edge_direction[edge])

    def add_edge_attributes(self):
        capacity_mapping = self.calculate_flows(self.g)
        nx.set_edge_attributes((self.g), name='weight', values=capacity_mapping)

    def leaves_below(self, graph, node):
        return set(sum(([vv for vv in v if graph.out_degree(vv) == 0] for k, v in nx.dfs_successors(graph, node).items()), []))

    def calculate_flows(self, graph):
        ret = {}
        edges = graph.edges()
        additional_flow = {}
        for edge in edges:
            source = edge[0]
            target = edge[1]
 
            source_inc = self.df.loc[(self.df['name'] == source)]["time (inc)"].max()
            target_inc = self.df.loc[(self.df['name'] == target)]["time (inc)"].max()
            if source_inc == target_inc:
                ret[edge] = source_inc
            else:
                ret[edge] = target_inc

        return ret
    
    def ensemble_map(self, nodes):
        ret = {}
        # loop through the nodes
        for node in self.g.nodes():

            # Get their dataframe
            node_df = self.df.loc[self.df["name"] == node]

            for column in self.columns:
                if column not in ret:
                    ret[column] = {}

                column_data = node_df[column]

                if column == "time (inc)" or column == "time" or column == "component_level":
                    if len(column_data.value_counts() > 0):
                        ret[column][node] = column_data.max()
                    else:
                        ret[column][node] = -1

                elif column == "callers" or column == "callees":
                    
                    if len(column_data.value_counts()) > 0:
                        ret[column][node] = make_tuple(column_data.tolist()[0])
                    else:
                        ret[column][node] = []

                elif column == "name" or column == "vis_name":
                    
                    if len(column_data.value_counts() > 0):
                        ret[column][node] = column_data.tolist()[0]
                    else:
                        ret[column][node] = "None"

                elif column == 'component_path' or column == 'group_path':

                    if len(column_data.value_counts() > 0):
                        ret[column][node] = list(make_tuple(column_data.tolist()[0]))
                    else:
                        ret[column][node] = []
        return ret

    def dataset_map(self, nodes, dataset):
        ret = {}
        for node in self.g.nodes():
            if node not in ret:
                ret[node] = {}

            node_df = self.df.loc[
                (self.df["name"] == node) & (self.df["dataset"] == dataset)
            ]

            for column in self.columns:
                column_data = node_df[column]

                if column == "time (inc)" or column == "time" or column == 'component_level':
                    if len(column_data.value_counts()) > 0:
                        ret[node][column] = column_data.max()
                    else:
                        ret[node][column] = -1

                elif column == "callers" or column == "callees":
                    if len(column_data.value_counts()) > 0:
                        ret[node][column] = make_tuple(column_data.tolist()[0])
                    else:
                        ret[node][column] = []

                elif column == "name" or column == "vis_name":
                    if len(column_data.value_counts()) > 0:
                        ret[node][column] = column_data.tolist()[0]

                    else:
                        ret[node][column] = "None"

                elif column == 'component_path' or column == 'group_path':

                    if len(column_data.value_counts() > 0):
                        ret[node][column] = list(make_tuple(column_data.tolist()[0]))
                    else:
                        ret[node][column] = []
        return ret

In [8]:
final_graph = DistGraph(states['ensemble'], '', construct_graph=False, add_data=True)

Using the existing graph from state ensemble


In [9]:
result = json_graph.node_link_data(final_graph.g)
final_result = json.dumps(result)

print(final_result)

{"directed": true, "multigraph": false, "graph": {}, "nodes": [{"show_node": true, "module": "<unknown load module>", "group_path": ["<unknown load module>=<partial call paths>"], "component_path": ["<unknown load module>=<partial call paths>", "<partial call paths>"], "time (inc)": 12918.0, "name": "<partial call paths>", "time": 0.0, "callers": [], "callees": ["<unknown procedure>"], "vis_name": "<unknown load module>=<partial call paths>", "osu_bcast.1.18.2019-09-03_11-33-15": {"time (inc)": 6987.0, "group_path": ["<unknown load module>=<partial call paths>"], "name": "<partial call paths>", "time": 0.0, "callers": [], "callees": ["<unknown procedure>"], "vis_name": "<unknown load module>=<partial call paths>"}, "osu_bcast.1.18.2019-09-03_12-29-49": {"time (inc)": 5948.0, "group_path": ["<unknown load module>=<partial call paths>"], "name": "<partial call paths>", "time": 0.0, "callers": [], "callees": ["<unknown procedure>"], "vis_name": "<unknown load module>=<partial call paths>"

In [10]:
df  = states['ensemble'].df
# name_grouped = df.groupby(['name'])

def addID(name):
    if(":" in name and '[' not in name):
        name = name.split(':')[len(name.split(':')) - 1]
    elif('[' in name and ':' not in name):
        name = name.split('[')[0]
    elif('[' in name and ':' in name):
        name = name.split(':')[len(name.split(':')) - 1]
        name = name.split('[')[0]
    else:
        name = name
        
    name = name.replace(' ', '_')
    return name


df['id'] = df['name'].apply(addID)
print(df['id'].unique())

['<partial_call_paths>' '<unknown_procedure>' '0' '<program_root>' 'main'
 'free_buffer' '__GI___vsyslog_chk' '244' 'MPI_Finalize' 'PMPI_Finalize'
 'MPID_Finalize' 'psm_dofinalize' '<unknown_procedure>_0x1e450_'
 '<unknown_procedure>_0x1e82f_' '_int_free' '3986'
 '<unknown_procedure>_0x188b1_' '<unknown_procedure>_0xff82_'
 '<unknown_procedure>_0xf774_' '_getopt_internal_r' '716'
 '<unknown_procedure>_0x188fe_' '<unknown_procedure>_0x20e9e_'
 '<unknown_procedure>_0x26030_' '<unknown_procedure>_0x2604e_'
 '<unknown_procedure>_0x26058_' '<unknown_procedure>_0x1c060_'
 '<unknown_procedure>_0x1c075_' '<unknown_procedure>_0x1c083_'
 '<unknown_procedure>_0x1c088_' '<unknown_procedure>_0x1c08c_'
 '<unknown_procedure>_0x1c096_' '<unknown_procedure>_0x1c09c_'
 '<unknown_procedure>_0x1c0a3_' '<unknown_procedure>_0x9c90_'
 '<unknown_procedure>_0x2605e_' '<unknown_procedure>_0x26068_'
 '<unknown_procedure>_0x1c080_' '<unknown_procedure>_0x2606e_'
 '<unknown_procedure>_0x26072_' '<unknown_procedure

In [18]:
print(df.sort_values(['time'], ascending=[False])[['name','time', 'module']])

                                                   name      time  \
3002       <unknown file> [libpsm_infinipath.so.1.16]:0  305470.0   
593        <unknown file> [libpsm_infinipath.so.1.16]:0  287599.0   
588        <unknown file> [libpsm_infinipath.so.1.16]:0  287585.0   
5986       <unknown file> [libpsm_infinipath.so.1.16]:0  281594.0   
5989       <unknown file> [libpsm_infinipath.so.1.16]:0  275647.0   
5979       <unknown file> [libpsm_infinipath.so.1.16]:0  275570.0   
3237       <unknown file> [libpsm_infinipath.so.1.16]:0  270107.0   
597        <unknown file> [libpsm_infinipath.so.1.16]:0  263622.0   
5972       <unknown file> [libpsm_infinipath.so.1.16]:0  263603.0   
5988       <unknown file> [libpsm_infinipath.so.1.16]:0  257642.0   
5978       <unknown file> [libpsm_infinipath.so.1.16]:0  257627.0   
2997       <unknown file> [libpsm_infinipath.so.1.16]:0  257439.0   
329        <unknown file> [libpsm_infinipath.so.1.16]:0  251661.0   
327        <unknown file> [libpsm_