In [38]:
import networkx as nx
import pandas
from hatchet import *
import os
import matplotlib.pyplot as plt
import platform
import json
from ast import literal_eval as make_tuple

In [7]:
%matplotlib inline

In [8]:
pd.options.display.max_rows = 10
pd.options.display.float_format = '{:,.2f}'.format
plt.rcParams['figure.figsize'] = (16, 12)

## Load the datasets and create Dataframes

In [9]:
name = 'osu_bw'
dir_name = "/Users/jarus/ucd/Research/Visualisation/projects/CallFlow/src/server/.callflow/"

In [14]:
def replace_str_with_Node(df, graph):
    mapper = {}
    def dfs_recurse(root):
        for node in root.children: 
            mapper[node.callpath[-1]] = Node(node.nid, node.callpath, None)
            dfs_recurse(node)
    for root in graph.roots:
        mapper[root.callpath[-1]] = Node(root.nid, root.callpath, None)
        dfs_recurse(root)
    df['node'] = df['node'].apply(lambda node: mapper[node] if node in mapper else '')
    return df

In [15]:
def read_gf(name):
    state = State()
    path = dir_name + name
    df_filepath = path + '/group_df.csv'
    entire_df_filepath = path + '/entire_df.csv'
    graph_filepath = path + '/filter_graph.json'
    entire_graph_filepath = path + '/entire_graph.json'   

    with open(graph_filepath, 'r') as graphFile:
        data = json.load(graphFile)

    state.gf = GraphFrame()
    state.gf.from_literal(data)

    with open(entire_graph_filepath, 'r') as entire_graphFile:
        entire_data = json.load(entire_graphFile)
            
    state.entire_gf = GraphFrame()
    state.entire_gf.from_literal(entire_data)

    state.df = pd.read_csv(df_filepath)
    state.entire_df = pd.read_csv(entire_df_filepath)

    state.graph = state.gf.graph
    state.entire_graph = state.entire_gf.graph

    state.map = state.node_hash_mapper()

    # replace df['node'] from str to the Node object.
    state.df = replace_str_with_Node(state.df, state.graph)
    state.entire_df = replace_str_with_Node(state.entire_df, state.entire_graph)

    # add path to the dataframes. 
    # state.df['path'] = state.df['node'].apply(lambda node: node.callpath)
    # state.entire_df['path'] = state.entire_df['node'].apply(lambda node: node.callpath if node else [])

    return state

In [16]:
states = {}
states[name] = read_gf(name)
df = states['osu_bw'].df
graph = states['osu_bw'].graph

## Gets the paths of functions inside the module and converts to a dataframe.


In [12]:
def getHierarchyfromdf(state, module):
    df = state.df
    paths = []
    func_in_module = df.loc[df['module'] == module]['name'].unique().tolist()
    print("Number of functions inside the {0} module: {1}".format(module, len(func_in_module)))
    for idx, func in enumerate(func_in_module):
        mean_inc_time = df.loc[df['name'] == func]['time (inc)'].mean()
        mean_exc_time = df.loc[df['name'] == func]['time'].mean()
        paths.append({
            "module": module,
            "opath": df.loc[df['name'] == func]['path'].unique().tolist()[0],
            "path": df.loc[df['name'] == func]['component_path'].unique().tolist()[0],
            "inc_time" : df.loc[df['name'] == func]['time (inc)'].mean(),
            "exclusive" : df.loc[df['name'] == func]['time'].mean(),
            "imbalance_perc" : df.loc[df['name'] == func]['imbalance_perc'].mean(),
            "component_level": df.loc[df['name'] == func]['component_level'].unique().tolist()[0],
        })
    return pd.DataFrame(paths)

In [13]:
for idx, state in enumerate(states):
    modules = state.df['module'].unique().tolist()
    print("In dataset {0}, there are {1} modules".format(dataset_path[idx], len(modules)))
    for idx, module in enumerate(modules):
        paths = getHierarchyfromdf(state, module)
        state.paths_df = paths 
        #paths.to_csv(str(module + ".csv"))

In dataset data/lulesh-1/db-ampi4-100-1, there are 4 modules
Number of functions inside the <unknown load module> module: 1
Number of functions inside the libc-2.17.so module: 2
Number of functions inside the lulesh2.0 module: 32
Number of functions inside the Unkno module: 50
In dataset data/lulesh-1/db-ampi4-100-8, there are 4 modules
Number of functions inside the <unknown load module> module: 1
Number of functions inside the libc-2.17.so module: 6
Number of functions inside the lulesh2.0 module: 36
Number of functions inside the Unkno module: 33


## Methods to add data into NxGraph

In [15]:
def add_levels(state):
    levelMap = {}
    track_level = 0
    nodes = state.g.nbunch_iter(state.root)
    
    for start_node in nodes:
        print("Start node", start_node)
        active_nodes = [start_node]
        levelMap[state.root] = 0
        
        for edge in nx.edge_dfs(state.g, start_node, 'original'):
            #rint("Edge {0}".format(edge))
            head_level = None
            tail_level = None
            head, tail = edge[0], edge[1]
            
            if head != start_node:
                active_nodes.append(head)
                
            if head in active_nodes and head != start_node and tail in active_nodes:
                #rint("Cycle", edge)
                edge_data = state.g.get_edge_data(*edge)                                                                             
                state.g.add_node(tail+'_')                                                                                           
                state.g.add_edge(head, tail+'_', data=edge_data)                                                                     
                state.g.node[tail+'_']['name'] = [tail + '_']                                                                        
                #state.g.node[tail+'_']['weight'] = state.g.node[tail]['weight']   
                state.g.remove_edge(edge[0], edge[1])
    return levelMap

In [16]:
 def flow_map(state):                                                                                                                 
        flowMap = {}                                                                                                                    
        nodes = state.g.nbunch_iter(state.root)                                                                                           
        for start_node in nodes:                                                                                                        
            for edge in nx.edge_dfs(state.g, start_node, 'original'):                                                                    
                head_level = None                                                                                                       
                tail_level = None                                                                                                       
                head, tail = self.tailhead(edge)                                                                                        
                                                                                                                                        
                # Check if there is an existing level mapping for the head node and assign.                                             
                if head in self.level_mapping.keys():                                                                                   
                    head_level =  self.level_mapping[head]                                                                              
                                                                                                                                        
                # Check if there is an existing level mapping for the tail node and assign.                                             
                if tail in self.level_mapping.keys():                                                                                   
                    tail_level = self.level_mapping[tail]                                                                               
                                                                                                                                        
                flowMap[(edge[0], edge[1])] = (int(head_level), int(tail_level))                                                        
        return flowMap 

In [17]:
def calculate_flows(state):
    graph = state.g
    ret = {}                                                                                                                                                                                                                                                                          
    edges = graph.edges()                                                                                                                                                                                                                                                             
    additional_flow = {}                                                                                                                                                                                                                                                              
                                                                                                                                                                                                                                                                                          
    # Calculates the costs in cycles and aggregates to one node.                                                                                                                                                                                                                      
    for edge in edges:                                                                                                                                                                                                                                                                
        source = edge[0]                                                                                                                                                                                                                                                              
        target = edge[1]                                                                                                                                                                                                                                                              
                                                                                                                                                                                                                                                                                          
        if source.endswith('_'):                                                                                                                                                                                                                                                      
            cycle_node = source                                                                                                                                                                                                                                                       
            cycle_node_df = self.state.lookup_with_nodeName(cycle_node[:-1])                                                                                                                                                                                                      
            additional_flow[cycle_node] = cycle_node_df['CPUTIME (usec) (I)'].max()                                                                                                                                                                                                   
        elif target.endswith('_'):                                                                                                                                                                                                                                                    
            cycle_node = target                                                                                                                                                                                                                                                       
            cycle_node_df = state.lookup_with_nodeName(cycle_node[:-1])                                                                                                                                                                                                      
            additional_flow[cycle_node] = cycle_node_df['CPUTIME (usec) (I)'].max()                                                                                                                                                                                                   
                                                                                                                                                                                                                                                                                          
    for edge in edges:                                                                                                                                                                                                                                                                
        added_flow = 0                                                                                                                                                                                                                                                                
        if edge[0].endswith('_'):                                                                                                                                                                                                                                                     
            ret[edge] = additional_flow[edge[0]]                                                                                                                                                                                                                                      
            continue                                                                                                                                                                                                                                                                  
        elif edge[1].endswith('_'):                                                                                                                                                                                                                                                   
            ret[edge] = additional_flow[edge[1]]                                                                                                                                                                                                                                      
            continue                                                                                                                                                                                                                                                                  
        source = state.lookup_with_nodeName(edge[0])                                                                                                                                                                                                                         
        target = state.lookup_with_nodeName(edge[1])                                                                                                                                                                                                                         
                                                                                                                                                                                                                                                                                          
        source_inc = source['time (inc)'].max()                                                                                                                                                                                                                               
        target_inc = target['time (inc)'].max()                                                                         
                                                                                                                                                                                                                                                                                          
        if source_inc == target_inc:                                                                                                                                                                                                                                                  
            ret[edge] = source_inc                                                                                                                                                                                                                                                    
        else:                                                                                                                                                                                                                                                                         
            ret[edge] = target_inc    
    return ret   

In [18]:
def add_edge_attributes(state):
    capacity_mapping = calculate_flows(state)    
    nx.set_edge_attributes(state.g, name='weight', values=capacity_mapping)

In [19]:
def generic_map(df, nodes, attr):
    ret = {}
    for node in nodes:            
        if attr == 'time (inc)':
            ret[node] = df[df['name'] == node][attr].mean()
        else:
            ret[node] = df[df['name'] == node][attr].unique().tolist()     
    return ret

In [20]:
def add_node_attributes(state):
    module_mapping = generic_map(state.df, state.g.nodes(), 'module')
    nx.set_node_attributes(state.g, name='module', values=module_mapping)
    
    time_mapping = generic_map(state.df, state.g.nodes(), 'time (inc)')
    nx.set_node_attributes(state.g, name='time', values=time_mapping)

In [51]:
def no_cycle_path(path):
    ret = []
    mapper = {}
    path = make_tuple(path)
    for idx, elem in enumerate(path):
        if elem not in mapper:
            mapper[elem] = 1
            ret.append(elem)
        else:
            ret.append(elem + "_" + str(mapper[elem]))
            mapper[elem] += 1
    return tuple(ret)

In [52]:
def add_paths(state, path_name):
    for idx, row in state.df.iterrows():
        path = row[path_name]
        corrected_path = no_cycle_path(path)
        state.g.add_path(corrected_path)

## Create NxGraph

In [53]:
def create_nx_graph():
    state = states[name]
    state.g = nx.DiGraph()
    state.root = state.lookup_with_node(state.graph.roots[0])['name'][0]
    state.rootInc = state.lookup_with_node(state.graph.roots[0])['time (inc)'].max()
        
    add_paths(state, 'group_path')
#         state.levelMap = add_levels(state)
#         add_node_attributes(state)
#         add_edge_attributes(state)  
    
create_nx_graph()