In [85]:
from hatchet import *
import pandas as pd
import numpy as np
from state import State
import matplotlib.pyplot as plt
import json
import utils

In [84]:
%matplotlib inline

In [86]:
pd.options.display.max_rows = 10
pd.options.display.float_format = '{:,.2f}'.format
plt.rcParams['figure.figsize'] = (16, 12)

In [7]:
name = 'osu_bw'
dir_name = "/Users/jarus/ucd/Research/Visualisation/projects/CallFlow/src/server/.callflow/"

In [8]:
def lookup(df, node):
    return df.loc[df['node'] == node]

In [9]:
def lookup_with_name(df, name):
    return df.loc[df['name'] == name]

In [25]:
a = df.loc[df['name'] == '309:MPIR_Waitall_impl']
print(a)

                     node  rank  time (inc)  time  nid  rank.1  \
28  309:MPIR_Waitall_impl     0   1293311.0   0.0   30       0   
53  309:MPIR_Waitall_impl     1   1292380.0   0.0   85       1   

                                                 file  line            module  \
28  /tmp/dpkg-mkdeb.gouoc49UG7/src/mvapich/src/bui...   145  libmpi.so.12.0.5   
53  /tmp/dpkg-mkdeb.gouoc49UG7/src/mvapich/src/bui...   145  libmpi.so.12.0.5   

                     name                        ...                          \
28  309:MPIR_Waitall_impl                        ...                           
53  309:MPIR_Waitall_impl                        ...                           

   n_index mod_index                                            callees  \
28      15         6  ['145:MPIDI_CH3_Progress_start', '162:MPIDI_CH...   
53      46         6  ['145:MPIDI_CH3_Progress_start', '162:MPIDI_CH...   

                                     callers  show_node  \
28  ['122:PMPI_Waitall', '140:P

In [18]:
def replace_str_with_Node(df, graph):
        mapper = {}
        def dfs_recurse(root):
            for node in root.children: 
                mapper[node.callpath[-1]] = Node(node.nid, node.callpath, None)
                dfs_recurse(node)
        for root in graph.roots:
            mapper[root.callpath[-1]] = Node(root.nid, root.callpath, None)
            dfs_recurse(root)
        df['node'] = df['node'].apply(lambda node: mapper[node] if node in mapper else '')
        return df

In [22]:
def read_gf(name):
        state = State()
        path = dir_name + name
        df_filepath = path + '/filter_df.csv'
        entire_df_filepath = path + '/entire_df.csv'
        graph_filepath = path + '/filter_graph.json'
        entire_graph_filepath = path + '/entire_graph.json'   

        with open(graph_filepath, 'r') as graphFile:
            data = json.load(graphFile)

        state.gf = GraphFrame()
        state.gf.from_literal(data)

        with open(entire_graph_filepath, 'r') as entire_graphFile:
            entire_data = json.load(entire_graphFile)
            
        state.entire_gf = GraphFrame()
        state.entire_gf.from_literal(entire_data)

        state.df = pd.read_csv(df_filepath)
        state.entire_df = pd.read_csv(entire_df_filepath)

        state.graph = state.gf.graph
        state.entire_graph = state.entire_gf.graph

        state.map = state.node_hash_mapper()

        # Print the module group by information. 
        # print(state.df.groupby(['module']).agg(['mean','count']))

        # replace df['node'] from str to the Node object.
        state.df = replace_str_with_Node(state.df, state.graph)
        state.entire_df = replace_str_with_Node(state.entire_df, state.entire_graph)

        # add path to the dataframes. 
        # state.df['path'] = state.df['node'].apply(lambda node: node.callpath)
        # state.entire_df['path'] = state.entire_df['node'].apply(lambda node: node.callpath if node else [])

        return state

In [23]:
states = {}
states[name] = read_gf(name)
df = states['osu_bw'].df
graph = states['osu_bw'].graph

In [None]:
print(df[['name', 'module', 'time (inc)']])

In [None]:
def bfs(graph):
    ret = {}
    node_count = 0
    level = 0
    for root in graph.roots:
        print(root)
        node_gen = root.traverse_bf()
        for node in node_gen:
            node_count += 1
            for child in node.children:
                ret[child.callpath[-1] + str(child.df_index)] = {
                    'df_index': child.df_index,
                    'level': len(child.callpath) - 1,
                    'path': child.callpath,
#                     'time (inc)': lookup(df, child.callpath[-1])
                }
                print(ret[child.callpath[-1] + str(child.df_index)])
                level += 1
    return ret

In [None]:
bfs(graph)

In [81]:
class groupBy:
    def __init__(self, state, group_by):
        self.state = state
        self.df = self.state.df
        self.group_by = group_by
#         self.eliminate_funcs = ['libmonitor.so.0.0.0']
        self.eliminate_funcs = []
        self.entry_funcs = {}
        self.drop_eliminate_funcs()
        self.run()
        
    # Drop all entries user does not want to see. 
    def drop_eliminate_funcs(self):
        for idx, func in enumerate(self.eliminate_funcs):
            self.state.df = self.state.df[self.state.df['module'] != func]

    # Create a group path for the df.column = group_path.
    def create_group_path(self, path):
        group_path = []
        temp = None
        for i, elem in enumerate(path):
            grouping = self.state.lookup_with_nodeName(elem)[self.group_by].unique()
            if len(grouping) != 0:
                if grouping[0] not in self.eliminate_funcs:
                    if temp == None or grouping[0] != temp:
                        group_path.append(grouping[0])
                        temp = grouping[0]
        group_path = tuple(group_path)
#         print('group_path: {0}'.format(group_path))
        return group_path

    # Find a name for nodes with no name. 
    def find_a_good_node_name(self, node):
        node_name = lookup_with_name(self.state.df, node.callpath[-1])[self.group_by].tolist()[0]
        if(node_name == ''):
            node_name = 'Unknown name(N/A)'
        return node_name

    def create_component_path(self, path, group_path):
        component_path = []
        path = list(path)
        component_module = lookup_with_name(self.state.df, path[-1])[self.group_by].tolist()[0]
        component_path.append(component_module)
        filter_path = [node for node in path if component_module == \
                       lookup_with_name(self.state.df, node)[self.group_by].tolist()[0]]
        for i, elem in enumerate(filter_path):            
             component_path.append(elem)                    
        return tuple(component_path)

    def create_component_level(self, component_path):
        return len(component_path) - 1
            
    def run(self):
        group_path = {}
        component_path = {}
        component_level = {}
        entry_func = {}
        show_node = {}
        node_name = {}       
    
        roots = self.state.graph.roots
        if len(roots) > 1:
            print('It is a multi-rooted tree with {0} roots'.format(len(roots)))
        
        for root in roots:
            node_gen = root.traverse()       
            rootdf = lookup_with_name(self.state.df, root.callpath[-1])
            
            if rootdf.empty:
                utils.debug('Not accounting the function: {0}'.format(root))
            # Check if the dataframe exists for the root node. 
            # It might be a function that is eliminated. 
            else: 
                utils.debug('Function: {0}'.format(root))
                group_path[rootdf.node[0]] = self.create_group_path(root.callpath)        
                node_name[rootdf.node[0]] = self.find_a_good_node_name(root)
                entry_func[rootdf.node[0]] = True
                show_node[rootdf.node[0]] = True
                count = 0
            root = next(node_gen)

            try:
                while root.callpath != None:
                    root = next(node_gen)
                    t = lookup_with_name(self.df, root.callpath[-1])
                    parents = root.parents 
                    
                    for idx, parent in enumerate(parents):
                        s = lookup_with_name(self.df, parent.callpath[-1])
                    
                        if s.empty:
                            print("Not considering the Source function {0} [{1}]".format(parent, s['module']))
                        elif t.empty:
                            print("Not considering the Target function {0} [{1}]".format(root, t['path']))
                        elif not s.empty and not t.empty:
                            snode = s.node.tolist()[0]
                            tnode = t.node.tolist()[0]

                            spath = root.callpath
                            tpath = parent.callpath

                            tmodule = t[self.group_by].tolist()[0]
                                                        
                            node_name[tnode] = self.find_a_good_node_name(parent)
                            group_path[tnode] = self.create_group_path(tpath)
                            component_path[tnode] = self.create_component_path(tpath, group_path[tnode])
                            component_level[tnode] = len(component_path[tnode]) - 1
                            
                            if component_level[tnode] == 2:
                                entry_func[tnode] = True
                            else:
                                entry_func[tnode] = False
                                
                            if component_level[tnode] == 1:
                                show_node[tnode] = True
                            else:
                                show_node[tnode] = False
                            
                    print("is entry function:", entry_func[tnode])
                    print("node path: ", tpath)                
                    print("group path: ", group_path[tnode])
                    print("component path: ", component_path[tnode])
                    print("component level: ", component_level[tnode])
                    print("Show node: ", show_node[tnode])
                
            except StopIteration:
                pass
            finally:
                del root

        self.state.update_df('group_path', group_path)
        self.state.update_df('component_path', component_path)
        self.state.update_df('show_node', entry_func)
        self.state.update_df('vis_node_name', node_name)
        self.state.update_df('component_level', component_level)

In [82]:
group = groupBy(states[name], 'module')
g_df = group.state.df
g_graph = group.state.graph

[1m CallFlow:  [32m [callfow.py] Function: <program root> [0m


is entry function: False
node path:  ('<program root>',)
group path:  ('libmonitor.so.0.0.0',)
component path:  ('libmonitor.so.0.0.0', '<program root>')
component level:  1
Show node:  True
is entry function: False
node path:  ('<program root>', 'main')
group path:  ('libmonitor.so.0.0.0', 'osu_bw')
component path:  ('osu_bw', 'main')
component level:  1
Show node:  True
is entry function: False
node path:  ('<program root>', 'main', 'Loop@osu_bw.c:134')
group path:  ('libmonitor.so.0.0.0', 'osu_bw', 'Unkno')
component path:  ('Unkno', 'Loop@osu_bw.c:134')
component level:  1
Show node:  True
is entry function: False
node path:  ('<program root>', 'main', 'Loop@osu_bw.c:134', '140:PMPI_Waitall')
group path:  ('libmonitor.so.0.0.0', 'osu_bw', 'Unkno', 'libmpi.so.12.0.5')
component path:  ('libmpi.so.12.0.5', '140:PMPI_Waitall')
component level:  1
Show node:  True
is entry function: True
node path:  ('<program root>', 'main', 'Loop@osu_bw.c:134', '140:PMPI_Waitall', '309:MPIR_Waitall_i

is entry function: False
node path:  ('<program root>', 'main', '157:MPI_Finalize', 'PMPI_Finalize', '294:MPID_Finalize', '162:MPIDI_CH3_Finalize', '230:psm_dofinalize', '36:<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown procedure>')
group path:  ('libmonitor.so.0.0.0', 'osu_bw', 'libmonitor.so.0.0.0', 'libmpi.so.12.0.5', 'libpsm_infinipath.so.1.14')
component path:  ('libpsm_infinipath.so.1.14', '36:<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown procedure>')
component level:  4
Show node:  False
is entry function: False
node path:  ('<program root>', 'main')
group path:  ('libmonitor.so.0.0.0', 'osu_bw')
component path:  ('osu_bw', 'main')
component level:  1
Show node:  True
is entry function: False
node path:  ('<program root>', 'main', 'Loop@osu_bw.c:112')
group path:  ('libmonitor.so.0.0.0', 'osu_bw', 'Unkno')
component path:  ('Unkno', 'Loop@osu_bw.c:112')
component level:  1
Show node:  True
is entry function: False
n

is entry function: False
node path:  ('<program root>', 'main', 'Loop@osu_bw.c:112', '122:PMPI_Waitall', '309:MPIR_Waitall_impl', '162:MPIDI_CH3_Progress_wait', '220:psm_progress_wait', '232:<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown procedure>')
group path:  ('libmonitor.so.0.0.0', 'osu_bw', 'Unkno', 'libmpi.so.12.0.5', 'libpsm_infinipath.so.1.14')
component path:  ('libpsm_infinipath.so.1.14', '232:<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown procedure>')
component level:  5
Show node:  False
is entry function: False
node path:  ('<program root>', 'main', 'Loop@osu_bw.c:112', '122:PMPI_Waitall', '309:MPIR_Waitall_impl', '162:MPIDI_CH3_Progress_wait', '220:psm_progress_wait', '232:<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown procedure>')
group path:  ('libmonitor.so.0.0.0', 'osu_bw', 'Unkno', 'l

In [70]:
for idx, row in g_df.iterrows():
    print(row['name'], row['group_path'], row['component_path'])

<program root> ('libmonitor.so.0.0.0',) 
<program root> ('libmonitor.so.0.0.0',) 
main ('libmonitor.so.0.0.0',) ('libmonitor.so.0.0.0', '<program root>')
main ('libmonitor.so.0.0.0',) ('libmonitor.so.0.0.0', '<program root>')
157:MPI_Finalize ('libmonitor.so.0.0.0', 'osu_bw') ('osu_bw', 'main')
157:MPI_Finalize ('libmonitor.so.0.0.0', 'osu_bw') ('osu_bw', 'main')
PMPI_Finalize ('libmonitor.so.0.0.0', 'osu_bw', 'libmonitor.so.0.0.0') ('libmonitor.so.0.0.0', '<program root>', '157:MPI_Finalize')
PMPI_Finalize ('libmonitor.so.0.0.0', 'osu_bw', 'libmonitor.so.0.0.0') ('libmonitor.so.0.0.0', '<program root>', '157:MPI_Finalize')
294:MPID_Finalize ('libmonitor.so.0.0.0', 'osu_bw', 'libmonitor.so.0.0.0', 'libmpi.so.12.0.5') ('libmpi.so.12.0.5', 'PMPI_Finalize')
294:MPID_Finalize ('libmonitor.so.0.0.0', 'osu_bw', 'libmonitor.so.0.0.0', 'libmpi.so.12.0.5') ('libmpi.so.12.0.5', 'PMPI_Finalize')
162:MPIDI_CH3_Finalize ('libmonitor.so.0.0.0', 'osu_bw', 'libmonitor.so.0.0.0', 'libmpi.so.12.0.5') ('

# A more generic graph.

In [None]:
class Action():
    __metaclass__ = ABCMeta
     
    def __init__(self):
        pass
    
    def dfs(self, graph, limit):
        self.level = 0
        
        def dfs_recurse(root):
            for node in root.children:
                if(self.level < limit):
                    print("Node : ", node)
                    self.level += 1
                    dfs_recurse(node)
        
        for root in graph.roots:
            dfs_recurse(root)

    def create_group_path(self, node):
        group_path = []
        temp = None
        path = node.callpath
        for i, elem in enumerate(path):
            elem_df = self.state.lookup_with_nodeName(elem)
            if(elem_df.empty):
                grouping = 'Unkno'
            else:
                grouping = elem_df[self.group_by].tolist()[0]
            if temp == None or grouping != temp:
                group_path.append(grouping)
                temp = grouping
        return tuple(group_path)

    def find_a_good_node_name(self, node):
        node_name = self.state.lookup_with_node(node)[self.group_by].tolist()[0]
        return node_name

    def create_component_path(self, path, group_path):
        component_path = []
        path = list(path)
        component_module = self.state.lookup_with_nodeName(path[-1])[self.group_by].tolist()[0]
        component_path.append(component_module)

        filter_path = [node for node in path if component_module == self.state.lookup_with_nodeName(node)[self.group_by].tolist()[0]]
       
        for i, elem in enumerate(filter_path):            
             component_path.append(elem)                    
        return tuple(component_path)

    def create_component_level(self, component_path):
        return len(component_path) - 1

    # Assign a "Module" name to a given hatchet node. 
    def give_module_name(self, node):
        # start = time.time()
        df = self.state.lookup_with_nodeName(node.callpath[-1])
        unique_modules = df[self.group_by].unique()
        if(len(unique_modules) == 1):
            return unique_modules[0]
        elif(len(unique_modules) == 0): 
            # print('Entry not found in dataframe')
            return 'Unkno'
        else:
            print('Error! Multiple modules for a node.')
            return None

    @abstractmethod
    def run():
        pass

In [None]:
class groupGraph(Graph):
    """ A group node in the call graph.
    """
    def __init__(self):
        self.nodes = []
        self.edges = []
        # Map to check if such element exist in the graph. 
        self.nodeMap = {}
        self.roots = []
    
    def is_module(self, node_name):
        for idx, module in enumerate(self.nodes):
            if(node_name == module.callpath[-1]):
                return True
        return False

    def add_module(self, module):
        assert isinstance(module, Node)
        self.nodes.append(module)
        self.nodeMap[module.callpath[-1]] = module

    def add_inner_node(self, module_name, inner_node):
        assert isinstance(inner_node, Node)
        group_node = self.nodeMap[module_name]
        self.nodeMap[module_name] = inner_node
        group_node.hierarchy.append(inner_node)

    def print(self):
        print("Nodes: ", self.nodes)
        for idx, node in enumerate(self.nodes):
            print("Node name: {0}, \n hierarchy: {1}".format(node.name, node.hierarchy))
    def split_by_entry_function(self):
        return 

    def split_by_caller_function(self):
        return

    def get_hierarchy():
        return

    def split_level(self):
        return 

In [None]:
class groupNode(Node):
    def __init__(self, nid, name, callpath_tuple, source):
        self.nid = nid
        self.name = name
        self.callpath = callpath_tuple
        self.parents = []
        
        if source is not None:
            self.add_parent(source)
        self.children = []
        self.entry_funcs = []
        self.caller_funcs = []
        self.hierarchy = []

    def add_parent(self, node):
        self.parents.append(node)

    def add_child(self, node):
        assert isinstance(node, groupNode)
        self.children.append(node)

    def add_entry_funcs(self, node):
        assert isinstance(node, Node)
        self.entry_funcs.append(node)

    def __str__(self):
        """ Returns a string representation of the node.
        """
        return '[Node] name: {0}, number_of_parents: {1}, number_of_children: {2}, entry_funcs: {3}'.format(self.callpath, len(self.parents), len(self.children), len(self.entry_funcs))


In [None]:
class groupBy(Action):
    def __init__(self, state, group_by):
        self.state = state
        self.graph = state.graph
        self.df = state.df
        self.entire_df = state.entire_df
        self.group_by = group_by
        self.node_count = 0
        self.empty_node = Node(-1, (), None)
        self.nodeModuleMap = self.find_mapping_from_df()
        print(self.nodeModuleMap.keys())
        self.run()

    def find_mapping_from_df(self):
        fns = self.df['name'].unique()
        nodeModuleMap = {}
        for idx, fn in enumerate(fns):
            fn_hatchet = self.df[self.df['name'] == fn]['node'].unique()[0]
            nodeModuleMap[fn] = self.df[self.df['name'] == fn]['module'].unique()
        return nodeModuleMap
        
    def add_node(self, source, target, is_root=False):
        count = self.node_count
        
        target_name = target.callpath[-1]
        target_module_name = self.give_module_name(target)
        target_callpath = self.create_group_path(target)
        if (source == self.empty_node):
            pass
        else:
            source_name = source.callpath[-1]
            source_module_name = self.give_module_name(source)
            source_callpath = self.create_group_path(source)

            print(source_callpath, target_callpath)
        # hatchet_hash = self.state.lookup_with_node(node.callpath[-1])['node'].unique()

 
        if(is_root):
            source_module_name = None
            source_callpath = None
            group_node = groupNode(count, target_module_name, source_callpath, source_module_name)
            self.group_graph.roots.append(group_node)
            
        else:
            if(self.group_graph.is_module(target_module_name)):
                self.group_graph.add_inner_node(target_module_name, target)
            else:
                if(len(self.group_graph.nodes) == 0):
                    source_module = None
                else:
                    source_module = self.nodeModuleMap[source_name]
                    group_node = groupNode(count, target_module_name, target_callpath, source_module_name)
            self.group_graph.add_module(target)
        
    def run(self):         
        roots = self.graph.roots
        # New roots of the grouped graph.
        new_roots = []
        self.group_graph = groupGraph()
        
        for root in roots:
            level = 0
            node_gen = root.traverse()  
            self.add_node(self.empty_node, root, True)
#             print(self.group_graph.nodeMap)

            try:
                while root.callpath != None:
                    source = root
                    target = next(node_gen)
                    self.add_node(source, target, False)
                    # self.add_edge(target, source)
                    root = target

            except StopIteration:
                pass
            finally:
                del root

        self.group_graph.print()

        group_roots = self.group_graph.roots
        print(self.group_graph.nodeMap)
#         for root in group_roots:
#             group_node_gen = root.traverse()
#             print(root.callpath)
#             try:
#                 while root.callpath != None:
#                     source = root
#                     target = next(group_node_gen)
#                     print(source, target)
#             except StopIteration:
#                 pass
#             finally: 
#                 del root
           

In [None]:
groupBy(states[name], 'module')

In [None]:
state = states[name]
print(state.df[state.df['name'] == "Loop@<unknown file> [kripke]:0"])

In [None]:
state = states[name]
fns = state.entire_df['name'].unique()
nodeModuleMap = {}
for idx, fn in enumerate(fns):
    fn_hatchet = state.entire_df[state.entire_df['name'] == fn]['node'].unique()[0]
    nodeModuleMap[fn_hatchet] = state.df[state.df['name'] == fn]['module'].unique()
print(nodeModuleMap)