In [8]:
import networkx as nx
import pandas
from hatchet import *
import os
import matplotlib.pyplot as plt
import seaborn.apionly as sns
import platform
import json
from ast import literal_eval as make_tuple
import matplotlib.cm as cm
import utils
import mpld3

In [9]:
from state import State
from preprocess import PreProcess

In [10]:
pd.options.display.max_rows = 10
pd.options.display.float_format = '{:,.2f}'.format
plt.rcParams['figure.figsize'] = (16, 12)

In [41]:
dirname = "/Users/jarus/ucd/Research/Visualisation/projects/CallFlow/"
# dirname = "/Users/padmanabanke1/CallFlow/.callflow"
# dirname = "/home/vidi/Work/llnl/CallFlow/.callflow"

In [44]:
class Create:
    '''
    Creates a graph frame.
    Input : config variable, and dataset name
    Output : State object containing components of graphframe as separate object variables. 
    '''
    def __init__(self, config, name):
        utils.debug("Creating graphframes: ", name)
        self.config = config
        self.name = name
        self.run()

    def run(self):
        dirname = "/Users/jarus/ucd/Research/Visualisation/projects/CallFlow/"
        callflow_path = os.path.abspath(dirname)
        data_path = os.path.abspath(os.path.join(callflow_path, self.config['paths'][self.name]))

        gf = GraphFrame()
#         if self.config.format[self.name] == 'hpctoolkit':
        gf.from_hpctoolkit(data_path)
#         elif self.config.format[self.name] == 'caliper':                
#             gf.from_caliper(data_path)  

        self.gf = gf
        self.df = gf.dataframe
        self.node_hash_map = utils.node_hash_mapper(self.df)    
        self.graph = gf.graph

In [31]:
def create(name):
    state = State()
    create = Create(config, name)

    state.entire_gf = create.gf
    state.entire_df = create.df
    state.entire_graph = create.graph
        
        # print("After Creating.")
        # print(state.df.groupby(['module']).mean())
    return state

In [32]:
class Filter:
    '''
    Filter the graphframe.
    Input: State object, parameter to filterBy (could be inclusive/exclusive, 
            filterPerc: user provided filter percentage (1-100))
    '''
    def __init__(self, state, filterBy, filterPerc):
        utils.debug('filter By:', filterBy)
        utils.debug('filter Perc:', filterPerc)
        self.state = state
        self.graph = state.entire_graph
        self.df = state.entire_df
        self.gf = state.entire_gf

        self.df.set_index(['node', 'rank'], drop=False, inplace=True)

        # self.df = pd.MultiIndex.from_frame(self.df, names=['node', 'rank'])
        self.gf.dataframe = self.df
        
        self.filterBy = filterBy
        self.filterPercInDecimals = int(filterPerc)/100
        # self.filterPercInDecimals = 0.0001 
        
        self.fgf = self.run() 
        self.fgf = self.graft()

        # update df and graph after filtering.
        self.df = self.fgf.dataframe
        self.graph = self.fgf.graph

    def run(self):
        log.info('Filtering the graph.')
        t = time.time()
        if self.filterBy == "Inclusive":
            max_inclusive_time = utils.getMaxIncTime_from_gf(self.graph, self.df)
            filter_gf = self.gf.filter(lambda x: True if(x['time (inc)'] > self.filterPercInDecimals*max_inclusive_time) else False)
        elif filterBy == "Exclusive":
            max_exclusive_time = utils.getMaxExcTime_from_gf(self.graph, self.df)
            log.info('[Filter] By Exclusive time = {0})'.format(max_exclusive_time))
            filter_gf = self.gf.filter(lambda x: True if (x['time'] >= self.filterPercInDecimals*max_exclusive_time) else False)
        else:
            log.warn("Not filtering.... Can take forever. Thou were warned")
            filter_gf = self.gf
        
        log.info('[Filter] Removed {0} rows. (time={1})'.format(self.gf.dataframe.shape[0] - filter_gf.dataframe.shape[0], time.time() - t))

        return filter_gf

    def graft(self):
        log.info("Squashing the graph.")
        t = time.time()
        fgf = self.fgf.squash()[0]
        old_new_id_mapping = self.fgf.squash()[1]
        log.info("[Squash] {1} rows in dataframe (time={0})".format(time.time() - t, fgf.dataframe.shape[0]))
        return fgf

In [49]:
 def process(state, gf_type):        
        # Pre-process the dataframe and Graph. 
        preprocess = PreProcess.Builder(state, gf_type) \
            .add_n_index() \
            .add_mod_index() \
            .add_callers_and_callees() \
            .add_show_node() \
            .add_vis_node_name() \
            .update_module_name() \
            .add_path() \
            .build()

        state.gf = preprocess.gf
        state.df = preprocess.df
        state.graph = preprocess.graph

        return state

In [50]:
dataset_name = 'calc-pi'
config = {
    'paths': {
        'calc-pi': './data/hpctoolkit-cpi-database',
    }
}
state = create(dataset_name)
state = process(state, 'entire')
write_gf(state, dataset_name, 'entire')
states[dataset_name] = self.filter(states[dataset_name], filterBy, filterPerc) 
self.write_gf(states[dataset_name], dataset_name, 'filter')

[1m CallFlow:  [32m [callfow.py] Creating graphframes:  Data: "calc-pi"[0m


Times:
    fill tables:         0.00s
    read metric db:      0.10s
    graph construction:  0.05s
    data frame:          0.01s



TypeError: __init__() takes 2 positional arguments but 3 were given