In [1]:
from lxml import etree
from write_graph_to_dot import write_G_to_dot_with_pr

def inspect(node):
    for child in node:
        print(child.tag, child.attrib)

In [14]:
import networkx as nx

ns = {'srcml': 'http://www.srcML.org/srcML/src', 'pos': 'http://www.srcML.org/srcML/position'}

def handle_variable_decl(node, prev_type, v_to_type):
    # get type
    type_node = node.find('srcml:type', ns)
    type_name = None
    if type_node == None:
        # Enum Def falls into this case
        return prev_type, v_to_type
    elif 'ref' in type_node.attrib:
        assert(type_node.attrib['ref'] == "prev")
        type_name = prev_type
    else:
        type_name = type_node.find('srcml:name', ns).text
        prev_type = type_name
        
    # vectors not supported yet
    if not type_name:
        return prev_type, v_to_type

    # get name
    all_name_nodes = node.findall('srcml:name', ns)
    variable_name = None
    if len(all_name_nodes) > 0 and all_name_nodes[0].text:
        variable_name = all_name_nodes[0].text
    else:
        return prev_type, v_to_type

    # for debug
    # print("-------")
    # print(type_name)
    # print(variable_name)
    # print("-------")
    v_to_type[variable_name] = type_name
    return prev_type, v_to_type


def build_call_graph(h_roots, c_roots):
    G = nx.DiGraph()
    v_to_type = {}
    cl_to_func = {}
    
    for h_root in h_roots:
        print(h_root.attrib['filename'])
        
        class_name = None
        for decl_node in h_root.xpath('./srcml:decl_stmt/srcml:decl', namespaces=ns):
            try:
                type_name = decl_node.find('srcml:type', ns).find('srcml:name', ns).text
                assert(type_name == 'class')
                class_name = decl_node.find('srcml:name', ns).text
            except Error:
                print("Error when get type_name and class_name!")
                continue
            
            cl_to_func[class_name] = set()
            
            for node in decl_node.xpath('.//srcml:argument_list', namespaces=ns):
                parent = node.getparent()
                if parent.tag == '{http://www.srcML.org/srcML/src}call':
                    # if argument_list node is a child of a call node, then continue
                    continue
                
                """
                if parent.tag == '{http://www.srcML.org/srcML/src}decl':
                    print(parent.find('srcml:name', ns).find('pos:position', ns).attrib)
                    
                elif parent.tag == '{http://www.srcML.org/srcML/src}macro':
                    grand_parent = parent.getparent()
                    block_node = grand_parent.find('srcml:block', ns)
                    if block_node:
                        # then this is a function definition
                        # do something
                else:
                    print('------ New Cases Found! --------')
                """
                
                    
                func_name = parent.find('srcml:name', ns).text
                cl_to_func[class_name].add(func_name)
                
            for node in decl_node.xpath('.//srcml:function_decl', namespaces=ns):
                func_name = node.find('srcml:name', ns).text
                cl_to_func[class_name].add(func_name)
                
            prev_type = None
            for node in decl_node.iter():
                if node.tag == '{http://www.srcML.org/srcML/src}decl':
                    prev_type, v_to_type = handle_variable_decl(node, prev_type, v_to_type)
    
    for c_root in c_roots:
        print(c_root.attrib['filename'])
        
        for func_node in c_root.xpath('./srcml:constructor | ./srcml:function', namespaces=ns):
            
            # get caller_id and add it to G
            caller_name_node = func_node.find('srcml:name', ns)
            caller_id = None
            caller_class_name = None
            if caller_name_node.text:
                caller_id = caller_name_node.text
            else:
                caller_class_name = caller_name_node[0].text
                assert(caller_name_node[1].text == "::")
                func_name = caller_name_node[2].text
                caller_id = "{}::{}".format(caller_class_name, func_name)
            assert(caller_id)
            G.add_node(caller_id)
                
            # iterate over function body and handle all function calls and declarations
            prev_type = None
            for node in func_node.iter():
                
                # function call
                if node.tag == '{http://www.srcML.org/srcML/src}call':
                    if node.getparent().tag == '{http://www.srcML.org/srcML/src}member_init_list':
                        # ignore calls within member init list
                        continue
                    callee_name_node = node.find('srcml:name', ns)
                    # print("    Function call found!")
                    
                    # get callee_id
                    callee_id = None
                    if callee_name_node.text:
                        callee_name = callee_name_node.text
                        if caller_class_name and caller_class_name in cl_to_func and callee_name in cl_to_func[caller_class_name]:
                            callee_id = "{}::{}".format(caller_class_name, callee_name)
                        else:
                            callee_id = callee_name
                    else:
                        variable_name = callee_name_node[0].text
                        callee_name = callee_name_node[2].text
                        if variable_name not in v_to_type:
                            # temporary hack
                            print("variable name not in v_to_type: {}".format(variable_name))
                            callee_id = "Unknown::{}".format(callee_name)
                        else:
                            callee_id = "{}::{}".format(v_to_type[variable_name], callee_name)
                        
                    if callee_id not in G:
                        G.add_node(callee_id)
                    G.add_edge(caller_id, callee_id)
                    
                # declaration   
                elif node.tag == '{http://www.srcML.org/srcML/src}decl':
                    prev_type, v_to_type = handle_variable_decl(node, prev_type, v_to_type)
                
    return G, v_to_type

In [36]:
import glob
import subprocess

h_files = glob.glob('./Sexain-MemController/*.h')
h_roots = []
for h_file in h_files:
    subprocess.call('srcml {} --position --xmlns persper -o {}.xml'.format(h_file, h_file), shell=True)
    tree = etree.parse('{}.xml'.format(h_file))
    h_roots.append(tree.getroot())
    
cc_files = glob.glob('./Sexain-MemController/*.cc')
c_roots = []
for cc_file in cc_files:
    subprocess.call('srcml {} --position --xmlns persper -o {}.xml'.format(cc_file, cc_file), shell=True)
    tree = etree.parse('{}.xml'.format(cc_file))
    c_roots.append(tree.getroot())
    
from cpp_tools import get_func_ranges_cpp
sizes = {}
for cc_file in cc_files:
    with open(cc_file, 'r') as f:
        src = f.read()
        func_ids, func_ranges = get_func_ranges_cpp(src, None)
        for i, func_id in enumerate(func_ids):
            sizes[func_id] = func_ranges[i][1] - func_ranges[i][0] + 1
    
G, _ = build_call_graph(h_roots, c_roots)
pr = nx.pagerank(G.reverse(), alpha=0.85)
pr_size = pagerank_with_sizes(G, sizes)
pr_size_self = pagerank_with_sizes(G, sizes, count_self=True)

"""
write_G_to_dot_with_pr(G, pr_size, 'Sexain_call_graph_v2.dot', header_lines=['nodesep=1.0;\n'])
!unflatten -l 8 -f -o unflattened_Sexain_call_graph_v2.dot Sexain_call_graph_v2.dot 
!dot -Tsvg unflattened_Sexain_call_graph_v2.dot -o unflattened_Sexain_call_graph_v2.svg
write_G_to_dot_with_pr(G, pr_size_self, 'Sexain_call_graph_v3.dot', header_lines=['nodesep=1.0;\n'])
!unflatten -l 8 -f -o unflattened_Sexain_call_graph_v3.dot Sexain_call_graph_v3.dot 
!dot -Tsvg unflattened_Sexain_call_graph_v3.dot -o unflattened_Sexain_call_graph_v3.svg
"""

XMLSyntaxError: Namespace prefix pos for tabs on unit is not defined, line 2, column 105 (<string>)

In [None]:
tree = etree.parse('./Sexain-MemController/addr_trans_controller.h.xml')
h_roots3 = [tree.getroot()]
tree = etree.parse('./test.cc.xml')
c_roots3 = [tree.getroot()]
G3, v_to_type = build_call_graph(h_roots3, c_roots3)
write_graph_to_dot(G3, 'test_call_graph.dot')
!dot -Tsvg test_call_graph.dot -o test_call_graph.svg

In [None]:
G4['AddrTransController::DRAMStore']

In [29]:
from numpy import linalg as LA
import numpy as np

def pagerank(G, alpha=0.85, epsilon=1e-5, max_iters=300):
    ni = {}
    for i, u in enumerate(G):
        ni[u] = i
        
    num_nodes = len(G.nodes())
    P = np.zeros([num_nodes, num_nodes])
    
    for u in G:
        num_out_edges = len(G[u])
        if num_out_edges == 0:
            P[:, ni[u]] = 1 / num_nodes
        else:
            for v in G[u]:
                P[ni[v], ni[u]] = 1 / num_out_edges
            
    p = np.ones(num_nodes) / num_nodes
    v = np.ones(num_nodes) / num_nodes
        
    for i in range(max_iters):
        new_v = alpha * np.dot(P, v) + (1 - alpha) * p
        assert(new_v.shape == (num_nodes,))
        delta = new_v - v
        if LA.norm(delta) < epsilon:
            break
        v = new_v
        
    pr = {}
    for u in G:
        pr[u] = v[ni[u]]
    
    return pr


def pagerank_with_sizes(G, sizes, count_self=False, default_size=3, alpha=0.85, epsilon=1e-5, max_iters=300):
    ni = {}
    for i, u in enumerate(G):
        ni[u] = i
        
    universe_size = 0
    for u in G:
        if u not in sizes:
            sizes[u] = default_size
        universe_size += sizes[u]
        
    num_nodes = len(G.nodes())
    P = np.zeros([num_nodes, num_nodes])
    
    for u in G:
        num_out_edges = len(G[u])
        if num_out_edges == 0:
            P[:, ni[u]] = 1 / num_nodes
        else:
            total_out_sizes = 0
            for v in G[u]:
                total_out_sizes += sizes[v]
            if count_self:
                total_out_sizes += sizes[u]
                P[ni[u], ni[u]] = sizes[u] / total_out_sizes
            for v in G[u]:
                P[ni[v], ni[u]] = sizes[v] / total_out_sizes
            
    p = np.empty(num_nodes)
    for u in G:
        p[ni[u]] = sizes[u] / universe_size

    v = np.ones(num_nodes) / num_nodes
        
    for i in range(max_iters):
        new_v = alpha * np.dot(P, v) + (1 - alpha) * p
        assert(new_v.shape == (num_nodes,))
        delta = new_v - v
        if LA.norm(delta) < epsilon:
            break
        v = new_v
        
    pr = {}
    for u in G:
        pr[u] = v[ni[u]]
    
    return pr

In [9]:
pr

{'AddrTransController::AddrTransController': 0.010733974488471089,
 'AddrTransController::BeginCheckpointing': 0.019582085379620567,
 'AddrTransController::CheckValid': 0.003001572955025558,
 'AddrTransController::CopyBlockInter': 0.003001572955025558,
 'AddrTransController::CopyBlockIntra': 0.003001572955025558,
 'AddrTransController::DRAMStore': 0.03461490089774263,
 'AddrTransController::DirtyCleaner': 0.009380804220118121,
 'AddrTransController::DirtyStained': 0.009743041357773544,
 'AddrTransController::Discard': 0.006842201993183283,
 'AddrTransController::FinishCheckpointing': 0.015673614132803736,
 'AddrTransController::FreeClean': 0.012239322672063934,
 'AddrTransController::FreeLoan': 0.008719518622624808,
 'AddrTransController::FullBlock': 0.003001572955025558,
 'AddrTransController::GetAddrInfo': 0.008560479021073403,
 'AddrTransController::HideClean': 0.009124333748875288,
 'AddrTransController::HideTemp': 0.00828380683077504,
 'AddrTransController::IsDRAM': 0.003001572955

In [26]:
pr_size

{'AddrTransController::AddrTransController': 0.0053532151895998425,
 'AddrTransController::BeginCheckpointing': 0.0053532151895998425,
 'AddrTransController::CheckValid': 0.005456647377649693,
 'AddrTransController::CopyBlockInter': 0.0066853352611887448,
 'AddrTransController::CopyBlockIntra': 0.0073249034412626139,
 'AddrTransController::DRAMStore': 0.0065944014461980505,
 'AddrTransController::DirtyCleaner': 0.0053532151895998425,
 'AddrTransController::DirtyStained': 0.0059737519567257067,
 'AddrTransController::Discard': 0.0069979256817092172,
 'AddrTransController::FinishCheckpointing': 0.0053532151895998425,
 'AddrTransController::FreeClean': 0.0070496420361705874,
 'AddrTransController::FreeLoan': 0.006871070789163141,
 'AddrTransController::FullBlock': 0.0056210720601110112,
 'AddrTransController::GetAddrInfo': 0.0053532151895998425,
 'AddrTransController::HideClean': 0.0060125355046710734,
 'AddrTransController::HideTemp': 0.0059737519567257067,
 'AddrTransController::IsDRAM'

In [34]:
pr_size_self

{'AddrTransController::AddrTransController': 0.0043065126313039572,
 'AddrTransController::BeginCheckpointing': 0.0037515644729885808,
 'AddrTransController::CheckValid': 0.0037425655379979494,
 'AddrTransController::CopyBlockInter': 0.0050819676041651152,
 'AddrTransController::CopyBlockIntra': 0.0058136187066251009,
 'AddrTransController::DRAMStore': 0.012850688017354164,
 'AddrTransController::DirtyCleaner': 0.0045389316444671797,
 'AddrTransController::DirtyStained': 0.0088295184156656562,
 'AddrTransController::Discard': 0.018731833597120925,
 'AddrTransController::FinishCheckpointing': 0.0049477069608402967,
 'AddrTransController::FreeClean': 0.01171459770185463,
 'AddrTransController::FreeLoan': 0.010760392650042113,
 'AddrTransController::FullBlock': 0.0040420554279740383,
 'AddrTransController::GetAddrInfo': 0.0039064551389500759,
 'AddrTransController::HideClean': 0.0094942001458155224,
 'AddrTransController::HideTemp': 0.0089927993148888816,
 'AddrTransController::IsDRAM': 0

In [32]:
sizes

{'AddrTransController::AddrTransController': 3,
 'AddrTransController::BeginCheckpointing': 3,
 'AddrTransController::CheckValid': 3,
 'AddrTransController::CopyBlockInter': 3,
 'AddrTransController::CopyBlockIntra': 3,
 'AddrTransController::DRAMStore': 36,
 'AddrTransController::DirtyCleaner': 3,
 'AddrTransController::DirtyStained': 16,
 'AddrTransController::Discard': 32,
 'AddrTransController::FinishCheckpointing': 7,
 'AddrTransController::FreeClean': 19,
 'AddrTransController::FreeLoan': 17,
 'AddrTransController::FullBlock': 3,
 'AddrTransController::GetAddrInfo': 3,
 'AddrTransController::HideClean': 17,
 'AddrTransController::HideTemp': 16,
 'AddrTransController::IsDRAM': 3,
 'AddrTransController::LoadAddr': 26,
 'AddrTransController::LoanRevoker': 3,
 'AddrTransController::MigrateDRAM': 25,
 'AddrTransController::MigrateNVM': 52,
 'AddrTransController::MigratePages': 64,
 'AddrTransController::NVMStore': 75,
 'AddrTransController::Probe': 21,
 'AddrTransController::ResetClea