In [None]:
# Global import -- to be run before following cells
import glob
import os
import networkx as nx
from lxml import etree
from graphs import call_graph
from graphs.devrank import devrank
from graphs.pagerank import pagerank
from graphs.call_commit_graph import CallCommitGraph

## Call Graph

With following cells, you can:

* build call graph from xml files generated by srcML
* dump call graph object onto disk
* load saved call graph into memory with the following cells.

In [None]:
# Call graph -- build call graph from xml files generated by srcML
# Argument(s)
xml_dir = './repos/linux-xml/kernel'

xml_dir = os.path.expanduser(xml_dir)
G = nx.DiGraph()
func_to_fname = {}

c_roots = []
for xml in glob.glob(xml_dir + '/**/*.[ch].xml', recursive=True):
    tree = etree.parse(xml)
    roots = [tree.getroot()]
    _, _, fc_to_fn = call_graph.c.build_call_graph(roots, G=G)
    for func, fname in fc_to_fn.items():
        func_to_fname[func] = fname
print("Number of nodes: {}".format(len(G.nodes())))
print("Number of edges: {}".format(len(G.edges())))
print("Number of connected components: {}".format(nx.number_weakly_connected_components(G)))

In [None]:
# Call graph -- run this cell if you want to save call graph G and 
# the mapping between functions and source files func_to_fname
import pickle
pickle.dump(G, open("call_graph.pickle", 'wb'), True)
pickle.dump(func_to_fname, open("func_to_fname.pickle", 'wb'), True)

In [None]:
# Call graph -- run this cell if you want to load G and func_to_fname from disk
import pickle
G = pickle.load(open("call_graph.pickle", 'rb'))
func_to_fname = pickle.load(open("func_to_fname.pickle", 'rb'))
print("Number of nodes: {}".format(len(G.nodes())))
print("Number of edges: {}".format(len(G.edges())))
print("Number of connected components: {}".format(nx.number_weakly_connected_components(G)))

In [None]:
# Print functions -- to be run before following cells

def rel_path(path):
    pre = os.path.commonprefix((xml_dir, '/' + path))
    # TODO: Append '/' to all func_to_file items. Use a standard form.
    return path[len(pre):]

def to_csv(index, item):
    # Potentially, this format can be output and used by Excel
    file = rel_path(func_to_fname[item[0]]) if item[0] in func_to_fname else 'Unknown'
    return str(index) + ', ' + item[0] + ', ' + str(item[1]) + ',' + file

def print_top_bottom(ranks, n, exclude_unknown=False):
    sorted_ranks = sorted(ranks, key=lambda r: r[1])
    
    if exclude_unknown:
        top = []
        ptr = 1
        while len(top) < n:
            if sorted_ranks[-ptr][0] in func_to_fname:
                top.append(sorted_ranks[-ptr])
            ptr += 1
            
        bottom = []
        ptr = 1
        while len(bottom) < n:
            if sorted_ranks[ptr][0] in func_to_fname:
                bottom.append(sorted_ranks[ptr])
            ptr += 1
    else:
        top = [r for r in reversed(sorted_ranks[-n:])]
        bottom = [r for r in sorted_ranks[:n]]

    print('Top ' + str(n))
    for i, r in enumerate(top):
        print(to_csv(i + 1, r))
        
    print('Bottom ' + str(n))
    for i, r in enumerate(bottom):
        print(to_csv(i + 1, r))

In [None]:
# PageRank with a specific alpha
# Argument(s)
alpha = 0.5
num_func = 20 # top/bottom n to list

pr = pagerank(G, alpha=alpha)
print_top_bottom(pr.items(), num_func, exclude_unknown=True)

In [None]:
# DevRank with a specific alpha
# Argument(s)
alpha = 0.5
num_func = 20 # top/bottom n to list

dr = devrank(G, alpha=alpha)
print_top_bottom(dr.items(), num_func, exclude_unknown=True)

In [None]:
# DevRank with a range of alpha
# Argument(s)
alpha_low = 0.1
alpha_high = 0.9
alpha_step = 0.1
num_func = 20 # top/bottom n to list

a = alpha_low
while (a <= alpha_high):
    print("alpha = " + str(a))
    dr = devrank(G, alpha=a)
    print_top_bottom(dr.items(), num_func)
    a += alpha_step

In [None]:
# PageRank with a range of alpha
# Argument(s)
alpha_low = 0.1
alpha_high = 0.9
alpha_step = 0.1
num_func = 20 # top/bottom n to list

a = alpha_low
while (a <= alpha_high):
    print("alpha = " + str(a))
    dr = pagerank(G, alpha=a)
    print_top_bottom(dr.items(), num_func)
    a += alpha_step

In [None]:
# DevRank over the call-commit graph
# Argument(s)
alpha = 0.5
num_commits = (10, 100)
num_func = 10 # top/bottom n to list

g = CallCommitGraph('./repos/linux')
for n in num_commits:
    g.process(from_beginning=True, num_commits=n)
    #g.process(rev='v4.10', num_commits=n)
    dr = g.devrank_functions(alpha)
    print_top_bottom(dr, num_func)