In [1]:
from hatchet import *
import pandas as pd
import numpy as np
from state import State
import matplotlib.pyplot as plt
import json
import utils

In [2]:
%matplotlib inline

In [3]:
pd.options.display.max_rows = 75
pd.options.display.float_format = '{:,.2f}'.format
plt.rcParams['figure.figsize'] = (16, 12)

# Enter the path of .callflow directory. 

In [4]:
name = 'calc-pi'
# dir_name = "/home/vidi/Work/llnl/CallFlow/.callflow/"
dir_name = "/Users/jarus/ucd/Research/Visualisation/projects/CallFlow/.callflow/"

In [5]:
def lookup(df, node):
    return df.loc[df['node'] == node]

In [6]:
def lookup_with_node(df, node):
    return df.loc[df['name'] == node.callpath[-1]]

In [7]:
def lookup_with_name(df, name):
    return df.loc[df['name'] == name]

In [8]:
def replace_str_with_Node(df, graph):
        mapper = {}
        def dfs_recurse(root):
            for node in root.children: 
                mapper[node.callpath[-1]] = Node(node.nid, node.callpath, None)
                dfs_recurse(node)
        for root in graph.roots:
            mapper[root.callpath[-1]] = Node(root.nid, root.callpath, None)
            dfs_recurse(root)
        df['node'] = df['node'].apply(lambda node: mapper[node] if node in mapper else '')
        return df

In [12]:
def read_gf(name):
        state = State()
        path = dir_name + name
        df_filepath = path + '/filter_df.csv'
        entire_df_filepath = path + '/entire_df.csv'
        group_df_filepath = path + '/group_df.csv'
        graph_filepath = path + '/filter_graph.json'
        entire_graph_filepath = path + '/entire_graph.json'   
        group_graph_filepath = path + '/filter_graph.json'

        with open(graph_filepath, 'r') as graphFile:
            data = json.load(graphFile)

        state.gf = GraphFrame()
        state.gf.from_literal(data)

        with open(entire_graph_filepath, 'r') as entire_graphFile:
            entire_data = json.load(entire_graphFile)
            
        state.entire_gf = GraphFrame()
        state.entire_gf.from_literal(entire_data)

        state.df = pd.read_csv(df_filepath)
        state.entire_df = pd.read_csv(entire_df_filepath)
        state.group_df = pd.read_csv(group_df_filepath)
        
        state.graph = state.gf.graph
        state.entire_graph = state.entire_gf.graph
        state.group_graph = state.gf.graph

        # Print the module group by information. 
        # print(state.df.groupby(['module']).agg(['mean','count']))

        # replace df['node'] from str to the Node object.
        state.df = replace_str_with_Node(state.df, state.graph)
        state.entire_df = replace_str_with_Node(state.entire_df, state.entire_graph)
        state.group_df = replace_str_with_Node(state.group_df, state.group_graph)

        return state

In [13]:
states = {}
states[name] = read_gf(name)
entire_df = states[name].entire_df
df = states[name].df
graph = states[name].graph

In [22]:
print(df.info())
for idx, row in df.iterrows():
    print(row['path'])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 68 entries, 0 to 67
Data columns (total 19 columns):
node             68 non-null object
rank             68 non-null int64
time (inc)       68 non-null float64
time             68 non-null float64
nid              68 non-null int64
rank.1           68 non-null int64
name             68 non-null object
type             68 non-null object
file             68 non-null object
line             68 non-null int64
module           68 non-null object
node.1           68 non-null object
n_index          68 non-null int64
mod_index        68 non-null int64
callees          68 non-null object
callers          68 non-null object
show_node        68 non-null bool
vis_node_name    68 non-null object
path             68 non-null object
dtypes: bool(1), float64(2), int64(6), object(10)
memory usage: 9.8+ KB
None
(('<program root>',), 2)
(('<program root>',), 2)
(('<program root>',), 2)
(('<program root>',), 2)
(('<program root>', 'main'), 4)
(('<progra

In [14]:
modules = df['module'].unique()

In [15]:
for idx, module in enumerate(modules):
    print(module, df.loc[df['module'] == module]['name'].unique())

libmonitor.so.0.0.0 ['<program root>' '62:MPI_Finalize']
cpi ['main']
libmpi.so.12.0.5 ['PMPI_Finalize' '294:MPID_Finalize' '162:MPIDI_CH3_Finalize'
 '230:psm_dofinalize']
libpsm_infinipath.so.1.14 ['36:<unknown procedure>' '<unknown procedure>']
Unknown(NA) ['<unknown file>:0']


In [50]:
def bfs(graph):
    ret = {}
    node_count = 0
    level = 0
    for root in graph.roots:
        print(root)
        node_gen = root.traverse_bf()
        for node in node_gen:
            node_count += 1
            for child in node.children:
                ret[child.callpath[-1] + str(child.df_index)] = {
                    'df_index': child.df_index,
                    'level': len(child.callpath) - 1,
                    'path': child.callpath,
#                     'time (inc)': lookup(df, child.callpath[-1])
                }
                print(ret[child.callpath[-1] + str(child.df_index)])
                level += 1
    return ret

In [25]:
def update_df(df, column, idx_column, idx_of, new_val):
    print(df.loc[df[idx_column] == idx_of][column])
    df.loc[df[idx_column] == idx_of][column] = new_val
    print(df[column])

In [24]:
def update_vals(row, data):
    row[column] = data
    return row

df.apply(update_vals, axis=1)

TypeError: ("update_vals() missing 1 required positional argument: 'data'", 'occurred at index 0')

In [28]:
update_df(states[name].df, 'module', 'name', 'main', 'main_program')

4    cpi
5    cpi
6    cpi
7    cpi
Name: module, dtype: object
0           libmonitor.so.0.0.0
1           libmonitor.so.0.0.0
2           libmonitor.so.0.0.0
3           libmonitor.so.0.0.0
4                           cpi
5                           cpi
6                           cpi
7                           cpi
8           libmonitor.so.0.0.0
9           libmonitor.so.0.0.0
10          libmonitor.so.0.0.0
11          libmonitor.so.0.0.0
12             libmpi.so.12.0.5
13             libmpi.so.12.0.5
14             libmpi.so.12.0.5
15             libmpi.so.12.0.5
16             libmpi.so.12.0.5
17             libmpi.so.12.0.5
18             libmpi.so.12.0.5
19             libmpi.so.12.0.5
20             libmpi.so.12.0.5
21             libmpi.so.12.0.5
22             libmpi.so.12.0.5
23             libmpi.so.12.0.5
24             libmpi.so.12.0.5
25             libmpi.so.12.0.5
26             libmpi.so.12.0.5
27             libmpi.so.12.0.5
28    libpsm_infinipath.so.1.14
29    li

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [51]:
class groupBy:
    def __init__(self, state, group_by):
        self.state = state
        self.df = self.state.df
        self.group_by = group_by
#         self.eliminate_funcs = ['libmonitor.so.0.0.0']
        self.eliminate_funcs = []
        self.module_func_map = {}
        self.callbacks = {}
        self.drop_eliminate_funcs()
        self.run()
        
    # Drop all entries user does not want to see. 
    def drop_eliminate_funcs(self):
        for idx, func in enumerate(self.eliminate_funcs):
            self.state.df = self.state.df[self.state.df['module'] != func]
            
    # Create a group path for the df.column = group_path.
    def create_group_path(self, path):
        group_path = []
        temp = None
        function = path[-1]        
        module_idx = 0
        change_name = False
        
        for i, elem in enumerate(path):
            grouping = self.state.lookup_with_nodeName(elem)[self.group_by].unique()
            if len(grouping) == 0:
                break
            
            module = grouping[0]
            
            # Add the function to the module map.
            if module not in self.module_func_map:
                self.module_func_map[module] = []
            self.module_func_map[module].append(function)
            
            # Append the module into the group path. 
            if module not in self.eliminate_funcs:
                if temp == None or module != temp:
                    # Append "_" + module_idx if the module exists in the group_path. 
                    if module in group_path:
                        from_module = group_path[len(group_path) - 1]
                        to_module = module
                        if(from_module not in self.callbacks):
                            self.callbacks[from_module] = []
                        if(to_module not in self.callbacks[from_module]):
                            self.callbacks[from_module].append(to_module)
                        
                        change_name = True
                    else:
                        group_path.append(module)
                        temp = module
        
        group_path = tuple(group_path)
        return (group_path, change_name)

    def create_component_path(self, path, group_path):
        component_path = []
        path = list(path)
#         component_module = lookup_with_name(self.state.df, path[-1])[self.group_by].tolist()[0]
        component_module = group_path[-1]
        component_path = [node for node in path if component_module == \
                       lookup_with_name(self.state.df, node)[self.group_by].tolist()[0]]
        if len(component_path) == 0:
            component_path.append(path[-1])
        component_path.insert(0, component_module)
        return tuple(component_path)
            
    def run(self):
        group_path = {}
        component_path = {}
        component_level = {}
        entry_func = {}
        show_node = {}
        node_name = {}    
        module = {}
        change_name = {}
    
        roots = self.state.graph.roots
        if len(roots) > 1:
            print('It is a multi-rooted tree with {0} roots'.format(len(roots)))
        
        for root in roots:
            node_gen = root.traverse()       
            rootdf = lookup_with_name(self.state.df, root.callpath[-1])
            
            if rootdf.empty:
                utils.debug('Not accounting the function: {0}'.format(root))
            # Check if the dataframe exists for the root node. 
            # It might be a function that is eliminated. 
            else: 
                utils.debug('Function: {0}'.format(root))
                temp_group_path_results = self.create_group_path(root.callpath)
                group_path[rootdf.node[0]] = temp_group_path_results[0]
                change_name[rootdf.node[0]] = temp_group_path_results[1]
                
                component_path[rootdf.node[0]] = self.create_component_path(root.callpath, group_path[rootdf.node[0]])
                component_level[rootdf.node[0]] = len(component_path[rootdf.node[0]])
                node_name[rootdf.node[0]] = lookup_with_node(self.df, root)['module'][0]
                module[rootdf.node[0]] = group_path[rootdf.node[0]][-1]
                entry_func[rootdf.node[0]] = True
                show_node[rootdf.node[0]] = True
                count = 0
                
                print("entry function:", entry_func[rootdf.node[0]])
                print('Change name:', change_name[rootdf.node[0]])
#                 print("node path: ", tpath)                
                print("group path: ", group_path[rootdf.node[0]])
                print("component path: ", component_path[rootdf.node[0]])
                print("component level: ", component_level[rootdf.node[0]])
                print("Show node: ", show_node[rootdf.node[0]])
                print("name: ", node_name[rootdf.node[0]])
                print('Module: ', module[rootdf.node[0]])
                print("=================================")
            root = next(node_gen)

            try:
                while root.callpath != None:
                    root = next(node_gen)
                    s = lookup_with_name(self.df, root.callpath[-1])
                    parents = root.parents 
                    
                    for idx, parent in enumerate(parents):
                        t = lookup_with_name(self.df, parent.callpath[-1])
                    
                        if s.empty:
                            print("Not considering the Source function {0} [{1}]".format(parent, s['module']))
                        elif t.empty:
                            print("Not considering the Target function {0} [{1}]".format(root, t['path']))
                        elif not s.empty and not t.empty:
                            snode = s.node.tolist()[0]
                            tnode = t.node.tolist()[0]

                            spath = root.callpath
                            tpath = parent.callpath

                            tmodule = t[self.group_by].tolist()[0]
                                                
                            temp_group_path_results = self.create_group_path(spath)
                            group_path[snode] = temp_group_path_results[0]
                            change_name[snode] = temp_group_path_results[1]
                            # update the df
                            
                            component_path[snode] = self.create_component_path(spath, group_path[snode])
                            component_level[snode] = len(component_path[snode])
                            module[snode] = component_path[snode][0]
                            
                            if snode.callpath[-1] == '232:<unknown procedure>':
                                print('aaaaaaaaaaaaaaaaaaaaaaa')
                            if component_level[snode] == 2:
                                entry_func[snode] = True
                                node_name[snode] = component_path[tnode][0]
                                show_node[snode] = True
                            else:
                                entry_func[snode] = False
                                node_name[snode] = "none"
                                show_node[snode] = False
                                
                    print('Node', snode)        
                    print("entry function:", entry_func[snode])
                    print('Change name:', change_name[snode])
                    print("node path: ", spath)                
                    print("group path: ", group_path[snode])
                    print("component path: ", component_path[snode])
                    print("component level: ", component_level[snode])
                    print("Show node: ", show_node[snode])
                    print("name: ", node_name[snode])
                    print('Module: ', module[snode])
                    print("=================================")
                
            except StopIteration:
                pass
            finally:
                del root

        self.state.update_df('group_path', group_path)
        self.state.update_df('component_path', component_path)
        self.state.update_df('show_node', entry_func)
        self.state.update_df('vis_node_name', node_name)
        self.state.update_df('component_level', component_level)
        self.state.update_df('_module', module)
        self.state.update_df('change_name', change_name)
        print(self.callbacks)

In [52]:
group = groupBy(states[name], 'module')
g_df = group.state.df
g_graph = group.state.graph

[1m CallFlow:  [32m [callfow.py] Function: <program root> [0m


entry function: True
Change name: False
group path:  ('libmonitor.so.0.0.0',)
component path:  ('libmonitor.so.0.0.0', '<program root>')
component level:  2
Show node:  True
name:  libmonitor.so.0.0.0
Module:  libmonitor.so.0.0.0
Node main
entry function: True
Change name: False
node path:  ('<program root>', 'main')
group path:  ('libmonitor.so.0.0.0', 'osu_bw')
component path:  ('osu_bw', 'main')
component level:  2
Show node:  True
name:  libmonitor.so.0.0.0
Module:  osu_bw
Node Loop@osu_bw.c:134
entry function: True
Change name: False
node path:  ('<program root>', 'main', 'Loop@osu_bw.c:134')
group path:  ('libmonitor.so.0.0.0', 'osu_bw', 'Unknown(NA)')
component path:  ('Unknown(NA)', 'Loop@osu_bw.c:134')
component level:  2
Show node:  True
name:  osu_bw
Module:  Unknown(NA)
Node 140:PMPI_Waitall
entry function: True
Change name: False
node path:  ('<program root>', 'main', 'Loop@osu_bw.c:134', '140:PMPI_Waitall')
group path:  ('libmonitor.so.0.0.0', 'osu_bw', 'Unknown(NA)', 'li

Node <unknown procedure>
entry function: False
Change name: True
node path:  ('<program root>', 'main', '157:MPI_Finalize', 'PMPI_Finalize', '294:MPID_Finalize', '162:MPIDI_CH3_Finalize', '230:psm_dofinalize', '36:<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown procedure>')
group path:  ('libmonitor.so.0.0.0', 'osu_bw', 'libmpi.so.12.0.5', 'libpsm_infinipath.so.1.14')
component path:  ('libpsm_infinipath.so.1.14', '36:<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown procedure>')
component level:  5
Show node:  False
name:  none
Module:  libpsm_infinipath.so.1.14
Node <unknown file>:0
entry function: True
Change name: True
node path:  ('<program root>', 'main', '157:MPI_Finalize', 'PMPI_Finalize', '294:MPID_Finalize', '162:MPIDI_CH3_Finalize', '230:psm_dofinalize', '36:<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown file>:0')
group path:  ('libmonitor.so.0.0.0', 'osu_bw', 'lib

Change name: False
node path:  ('<program root>', 'main', 'Loop@osu_bw.c:112', '122:PMPI_Waitall', '309:MPIR_Waitall_impl', '162:MPIDI_CH3_Progress_wait', '220:psm_progress_wait', '232:<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown procedure>')
group path:  ('libmonitor.so.0.0.0', 'osu_bw', 'Unknown(NA)', 'libmpi.so.12.0.5', 'libpsm_infinipath.so.1.14')
component path:  ('libpsm_infinipath.so.1.14', '232:<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown procedure>')
component level:  8
Show node:  False
name:  none
Module:  libpsm_infinipath.so.1.14
Node <unknown procedure>
entry function: False
Change name: False
node path:  ('<program root>', 'main', 'Loop@osu_bw.c:112', '122:PMPI_Waitall', '309:MPIR_Waitall_impl', '162:MPIDI_CH3_Progress_wait', '220:psm_progress_wait', '232:<unknown proced

In [92]:
for idx, row in g_df.iterrows():
    print(row['module'], row['_module'], row['component_path'])

libmonitor.so.0.0.0 libmonitor.so.0.0.0 ('libmonitor.so.0.0.0', '<program root>')
libmonitor.so.0.0.0 libmonitor.so.0.0.0 ('libmonitor.so.0.0.0', '<program root>')
osu_bw osu_bw ('osu_bw', 'main')
osu_bw osu_bw ('osu_bw', 'main')
Unkno Unkno ('Unkno', 'Loop@osu_bw.c:112')
libmpi.so.12.0.5 libmpi.so.12.0.5 ('libmpi.so.12.0.5', '122:PMPI_Waitall')
libmpi.so.12.0.5 libmpi.so.12.0.5 ('libmpi.so.12.0.5', '122:PMPI_Waitall', '309:MPIR_Waitall_impl')
libmpi.so.12.0.5 libmpi.so.12.0.5 ('libmpi.so.12.0.5', '122:PMPI_Waitall', '309:MPIR_Waitall_impl', '145:MPIDI_CH3_Progress_start')
libpsm_infinipath.so.1.14 libpsm_infinipath.so.1.14 ('libpsm_infinipath.so.1.14', '189:<unknown procedure>')
libpsm_infinipath.so.1.14 libpsm_infinipath.so.1.14 ('libpsm_infinipath.so.1.14', '232:<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown procedure>', '<unknown procedure>')
libpsm_infinipath.

In [93]:
print(g_df[['module', '_module', 'vis_node_name']])

                       module                    _module  \
0         libmonitor.so.0.0.0        libmonitor.so.0.0.0   
1         libmonitor.so.0.0.0        libmonitor.so.0.0.0   
2                      osu_bw                     osu_bw   
3                      osu_bw                     osu_bw   
4                       Unkno                      Unkno   
5            libmpi.so.12.0.5           libmpi.so.12.0.5   
6            libmpi.so.12.0.5           libmpi.so.12.0.5   
7            libmpi.so.12.0.5           libmpi.so.12.0.5   
8   libpsm_infinipath.so.1.14  libpsm_infinipath.so.1.14   
9   libpsm_infinipath.so.1.14  libpsm_infinipath.so.1.14   
10  libpsm_infinipath.so.1.14  libpsm_infinipath.so.1.14   
11  libpsm_infinipath.so.1.14  libpsm_infinipath.so.1.14   
12  libpsm_infinipath.so.1.14  libpsm_infinipath.so.1.14   
13  libpsm_infinipath.so.1.14  libpsm_infinipath.so.1.14   
14  libpsm_infinipath.so.1.14  libpsm_infinipath.so.1.14   
15       libinfinipath.so.4.0  libpsm_in

In [54]:
print(g_df.loc[g_df['name'] == '232:<unknown procedure>'])

                 node_index  rank_index   time (inc)  time  nid  rank  \
19  232:<unknown procedure>           0   820,905.00  0.00   55     0   
34  232:<unknown procedure>           1 1,220,447.00  0.00   91     1   

              file  line                     module                     name  \
19  <unknown file>     0  libpsm_infinipath.so.1.14  232:<unknown procedure>   
34  <unknown file>     0  libpsm_infinipath.so.1.14  232:<unknown procedure>   

       ...                   vis_node_name  max_incTime  avg_incTime  \
19     ...       libpsm_infinipath.so.1.14   820,905.00   410,452.50   
34     ...       libpsm_infinipath.so.1.14 1,220,447.00   610,223.50   

    imbalance_perc                                               path  \
19            0.50  ('<program root>', 'main', 'Loop@osu_bw.c:112'...   
34            0.50  ('<program root>', 'main', 'Loop@osu_bw.c:112'...   

                                           group_path  \
19  (libmonitor.so.0.0.0, osu_bw, Unkno, libm

# A more generic graph.

In [18]:
class Action():
    __metaclass__ = ABCMeta
     
    def __init__(self):
        pass
    
    def dfs(self, graph, limit):
        self.level = 0
        
        def dfs_recurse(root):
            for node in root.children:
                if(self.level < limit):
                    print("Node : ", node)
                    self.level += 1
                    dfs_recurse(node)
        
        for root in graph.roots:
            dfs_recurse(root)

    def create_group_path(self, node):
        group_path = []
        temp = None
        path = node.callpath
        for i, elem in enumerate(path):
            elem_df = self.state.lookup_with_nodeName(elem)
            if(elem_df.empty):
                grouping = 'Unkno'
            else:
                grouping = elem_df[self.group_by].tolist()[0]
            if temp == None or grouping != temp:
                group_path.append(grouping)
                temp = grouping
        return tuple(group_path)

    def find_a_good_node_name(self, node):
        node_name = self.state.lookup_with_node(node)[self.group_by].tolist()[0]
        return node_name

    def create_component_path(self, path, group_path):
        component_path = []
        path = list(path)
        component_module = self.state.lookup_with_nodeName(path[-1])[self.group_by].tolist()[0]
        component_path.append(component_module)

        filter_path = [node for node in path if component_module == self.state.lookup_with_nodeName(node)[self.group_by].tolist()[0]]
       
        for i, elem in enumerate(filter_path):            
             component_path.append(elem)                    
        return tuple(component_path)

    def create_component_level(self, component_path):
        return len(component_path) - 1

    # Assign a "Module" name to a given hatchet node. 
    def give_module_name(self, node):
        # start = time.time()
        df = self.state.lookup_with_nodeName(node.callpath[-1])
        unique_modules = df[self.group_by].unique()
        if(len(unique_modules) == 1):
            return unique_modules[0]
        elif(len(unique_modules) == 0): 
            # print('Entry not found in dataframe')
            return 'Unkno'
        else:
            print('Error! Multiple modules for a node.')
            return None

    @abstractmethod
    def run():
        pass

NameError: name 'ABCMeta' is not defined

In [None]:
class groupGraph(Graph):
    """ A group node in the call graph.
    """
    def __init__(self):
        self.nodes = []
        self.edges = []
        # Map to check if such element exist in the graph. 
        self.nodeMap = {}
        self.roots = []
    
    def is_module(self, node_name):
        for idx, module in enumerate(self.nodes):
            if(node_name == module.callpath[-1]):
                return True
        return False

    def add_module(self, module):
        assert isinstance(module, Node)
        self.nodes.append(module)
        self.nodeMap[module.callpath[-1]] = module

    def add_inner_node(self, module_name, inner_node):
        assert isinstance(inner_node, Node)
        group_node = self.nodeMap[module_name]
        self.nodeMap[module_name] = inner_node
        group_node.hierarchy.append(inner_node)

    def print(self):
        print("Nodes: ", self.nodes)
        for idx, node in enumerate(self.nodes):
            print("Node name: {0}, \n hierarchy: {1}".format(node.name, node.hierarchy))
    def split_by_entry_function(self):
        return 

    def split_by_caller_function(self):
        return

    def get_hierarchy():
        return

    def split_level(self):
        return 

In [None]:
class groupNode(Node):
    def __init__(self, nid, name, callpath_tuple, source):
        self.nid = nid
        self.name = name
        self.callpath = callpath_tuple
        self.parents = []
        
        if source is not None:
            self.add_parent(source)
        self.children = []
        self.entry_funcs = []
        self.caller_funcs = []
        self.hierarchy = []

    def add_parent(self, node):
        self.parents.append(node)

    def add_child(self, node):
        assert isinstance(node, groupNode)
        self.children.append(node)

    def add_entry_funcs(self, node):
        assert isinstance(node, Node)
        self.entry_funcs.append(node)

    def __str__(self):
        """ Returns a string representation of the node.
        """
        return '[Node] name: {0}, number_of_parents: {1}, number_of_children: {2}, entry_funcs: {3}'.format(self.callpath, len(self.parents), len(self.children), len(self.entry_funcs))


In [None]:
class groupBy(Action):
    def __init__(self, state, group_by):
        self.state = state
        self.graph = state.graph
        self.df = state.df
        self.entire_df = state.entire_df
        self.group_by = group_by
        self.node_count = 0
        self.empty_node = Node(-1, (), None)
        self.nodeModuleMap = self.find_mapping_from_df()
        print(self.nodeModuleMap.keys())
        self.run()

    def find_mapping_from_df(self):
        fns = self.df['name'].unique()
        nodeModuleMap = {}
        for idx, fn in enumerate(fns):
            fn_hatchet = self.df[self.df['name'] == fn]['node'].unique()[0]
            nodeModuleMap[fn] = self.df[self.df['name'] == fn]['module'].unique()
        return nodeModuleMap
        
    def add_node(self, source, target, is_root=False):
        count = self.node_count
        
        target_name = target.callpath[-1]
        target_module_name = self.give_module_name(target)
        target_callpath = self.create_group_path(target)
        if (source == self.empty_node):
            pass
        else:
            source_name = source.callpath[-1]
            source_module_name = self.give_module_name(source)
            source_callpath = self.create_group_path(source)

            print(source_callpath, target_callpath)
        # hatchet_hash = self.state.lookup_with_node(node.callpath[-1])['node'].unique()

 
        if(is_root):
            source_module_name = None
            source_callpath = None
            group_node = groupNode(count, target_module_name, source_callpath, source_module_name)
            self.group_graph.roots.append(group_node)
            
        else:
            if(self.group_graph.is_module(target_module_name)):
                self.group_graph.add_inner_node(target_module_name, target)
            else:
                if(len(self.group_graph.nodes) == 0):
                    source_module = None
                else:
                    source_module = self.nodeModuleMap[source_name]
                    group_node = groupNode(count, target_module_name, target_callpath, source_module_name)
            self.group_graph.add_module(target)
        
    def run(self):         
        roots = self.graph.roots
        # New roots of the grouped graph.
        new_roots = []
        self.group_graph = groupGraph()
        
        for root in roots:
            level = 0
            node_gen = root.traverse()  
            self.add_node(self.empty_node, root, True)
#             print(self.group_graph.nodeMap)

            try:
                while root.callpath != None:
                    source = root
                    target = next(node_gen)
                    self.add_node(source, target, False)
                    # self.add_edge(target, source)
                    root = target

            except StopIteration:
                pass
            finally:
                del root

        self.group_graph.print()

        group_roots = self.group_graph.roots
        print(self.group_graph.nodeMap)
#         for root in group_roots:
#             group_node_gen = root.traverse()
#             print(root.callpath)
#             try:
#                 while root.callpath != None:
#                     source = root
#                     target = next(group_node_gen)
#                     print(source, target)
#             except StopIteration:
#                 pass
#             finally: 
#                 del root
           

In [None]:
groupBy(states[name], 'module')

In [None]:
state = states[name]
print(state.df[state.df['name'] == "Loop@<unknown file> [kripke]:0"])

In [None]:
state = states[name]
fns = state.entire_df['name'].unique()
nodeModuleMap = {}
for idx, fn in enumerate(fns):
    fn_hatchet = state.entire_df[state.entire_df['name'] == fn]['node'].unique()[0]
    nodeModuleMap[fn_hatchet] = state.df[state.df['name'] == fn]['module'].unique()
print(nodeModuleMap)