## Problem 1

In [3]:
################################################################################
# CS 224W (Fall 2018) - HW1
# Starter code for Problem 1
# Author: praty@stanford.edu
# Last Updated: Sep 27, 2018
################################################################################

import snap
import numpy as np
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt

# Setup
erdosRenyi = None
smallWorld = None
collabNet = None


In [2]:
np.arange(5)

array([0, 1, 2, 3, 4])

In [13]:
# Problem 1.1
def genErdosRenyi(N=5242, E=14484):
    """
    :param - N: number of nodes
    :param - E: number of edges

    return type: snap.PUNGraph
    return: Erdos-Renyi graph with N nodes and E edges
    """
    ############################################################################
    # TODO: Your code here!
    pairs = []
    Graph = snap.TUNGraph.New()
    for i in range(N):
        Graph.AddNode(i)
        for j in range(i + 1, N):
            pairs.append((i, j))
    idx = np.array(np.random.choice(len(pairs), E, replace=False))
    pairs = np.array(pairs)
    chosen_pairs = pairs[idx]
    
    for pair in chosen_pairs:
        Graph.AddEdge(pair[0], pair[1])
        assert(Graph.IsEdge(pair[1], pair[0]))

    assert(Graph.GetEdges() == E)
    assert(Graph.GetNodes() == N)
    ############################################################################
    return Graph



In [14]:
def genCircle(N=5242):
    """
    :param - N: number of nodes

    return type: snap.PUNGraph
    return: Circle graph with N nodes and N edges. Imagine the nodes form a
        circle and each node is connected to its two direct neighbors.
    """
    ############################################################################
    # TODO: Your code here!
    Graph = snap.TUNGraph.New()
    Graph.AddNode(0)
    for i in range(1, N):
        Graph.AddNode(i)
        Graph.AddEdge(i-1, i)
    Graph.AddEdge(N-1, 0)
    assert(Graph.IsEdge(0, 1) and Graph.IsEdge(1, 0))
    for i in range(N):
        assert(Graph.GetNI(i).GetDeg() == 2)
    ############################################################################
    return Graph


In [15]:
def connectNbrOfNbr(Graph, N=5242):
    """
    :param - Graph: snap.PUNGraph object representing a circle graph on N nodes
    :param - N: number of nodes

    return type: snap.PUNGraph
    return: Graph object with additional N edges added by connecting each node
        to the neighbors of its neighbors
    """
    ############################################################################
    # TODO: Your code here!
    pairs = []
    for i in range(N):
        Graph.AddEdge(i, (i + 2) % N)
    for i in range(N):
        assert(Graph.GetNI(i).GetDeg() == 4)
    ############################################################################
    return Graph


In [16]:
def connectRandomNodes(Graph, M=4000):
    """
    :param - Graph: snap.PUNGraph object representing an undirected graph
    :param - M: number of edges to be added

    return type: snap.PUNGraph
    return: Graph object with additional M edges added by connecting M randomly
        selected pairs of nodes not already connected.
    """
    ############################################################################
    # TODO: Your code here!
    pairs = []
    for i in range(Graph.GetNodes()):
        for j in range(i+1, Graph.GetNodes()):
            if not Graph.IsEdge(i, j):
                pairs.append((i, j))
    idx = np.array(np.random.choice(len(pairs), M, replace=False))
    pairs = np.array(pairs)
    chosen_pairs = pairs[idx]
    for pair in chosen_pairs:
        Graph.AddEdge(pair[0], pair[1])
    ############################################################################
    return Graph


In [17]:
def genSmallWorld(N=5242, E=14484):
    """
    :param - N: number of nodes
    :param - E: number of edges

    return type: snap.PUNGraph
    return: Small-World graph with N nodes and E edges
    """
    Graph = genCircle(N) #generate a ring
    Graph = connectNbrOfNbr(Graph, N) # generate regular lattice
    Graph = connectRandomNodes(Graph, 4000) #rewire edges to generate small world model
    return Graph

In [18]:
def loadCollabNet(path):
    """
    :param - path: path to edge list file

    return type: snap.PUNGraph
    return: Graph loaded from edge list at `path and self edges removed

    Do not forget to remove the self edges!
    """
    ############################################################################
    # TODO: Your code here!
    Graph = snap.LoadEdgeList(snap.PUNGraph, path, 0, 1)
    for node in Graph.Nodes():
        if Graph.IsEdge(node.GetId(), node.GetId()):
            Graph.DelEdge(node.GetId(), node.GetId())
    assert Graph.GetEdges() == 14484
    ############################################################################
    return Graph


In [19]:
def getDataPointsToPlot(Graph):
    """
    :param - Graph: snap.PUNGraph object representing an undirected graph

    return values:
    X: list of degrees
    Y: list of frequencies: Y[i] = fraction of nodes with degree X[i]
    """
    ############################################################################
    # TODO: Your code here!
    X, Y = [], []
    degree_map = {}
    for node in Graph.Nodes():
        if node.GetDeg() not in degree_map.keys():
            degree_map[node.GetDeg()] = 0
        degree_map[node.GetDeg()] += 1
    for deg in degree_map.keys():
        X.append(deg)
        Y.append(float(degree_map[deg]) / Graph.GetNodes())
    print len(X)
    ############################################################################
    return X, Y


In [20]:
def Q1_1():
    """
    Code for HW1 Q1.1
    """
    global erdosRenyi, smallWorld, collabNet
    erdosRenyi = genErdosRenyi(5242, 14484)
    smallWorld = genSmallWorld(5242, 14484)
    collabNet = loadCollabNet("ca-GrQc.txt")
    assert(smallWorld is not None)
    assert(erdosRenyi is not None)
    x_erdosRenyi, y_erdosRenyi = getDataPointsToPlot(erdosRenyi)
    plt.loglog(x_erdosRenyi, y_erdosRenyi, color = 'y', label = 'Erdos Renyi Network')

    x_smallWorld, y_smallWorld = getDataPointsToPlot(smallWorld)
    plt.loglog(x_smallWorld, y_smallWorld, linestyle = 'dashed', color = 'r', label = 'Small World Network')

    x_collabNet, y_collabNet = getDataPointsToPlot(collabNet)
    plt.loglog(x_collabNet, y_collabNet, linestyle = 'dotted', color = 'b', label = 'Collaboration Network')

    plt.xlabel('Node Degree (log)')
    plt.ylabel('Proportion of Nodes with a Given Degree (log)')
    plt.title('Degree Distribution of Erdos Renyi, Small World, and Collaboration Networks')
    plt.legend()
    plt.savefig('q1_plots.png')


# Execute code for Q1.1
Q1_1()



17
9
66


In [21]:
# Problem 1.2 - Clustering Coefficient

def calcClusteringCoefficientSingleNode(Node, Graph):
    """
    :param - Node: node from snap.PUNGraph object. Graph.Nodes() will give an
                   iterable of nodes in a graph
    :param - Graph: snap.PUNGraph object representing an undirected graph

    return type: float
    returns: local clustering coeffient of Node
    """
    ############################################################################
    # TODO: Your code here!
    C = 0.0
    if Node.GetDeg() < 2:
        return C
    ei = 0.0
    for i in range(Node.GetDeg()):
        for j in range(i + 1, Node.GetDeg()):
            if Graph.IsEdge(Node.GetNbrNId(i), Node.GetNbrNId(j)):
                ei += 1
    
    assert(ei <= (Node.GetDeg() * (Node.GetDeg() - 1)) / 2)
    C = 2.0 * ei / (Node.GetDeg() * (Node.GetDeg() - 1))
    ############################################################################
    return C

def calcClusteringCoefficient(Graph):
    """
    :param - Graph: snap.PUNGraph object representing an undirected graph

    return type: float
    returns: clustering coeffient of Graph
    """
    ############################################################################
    # TODO: Your code here! If you filled out calcClusteringCoefficientSingleNode,
    #       you'll probably want to call it in a loop here
    C = 0.0
    for node in Graph.Nodes():
        Ci = calcClusteringCoefficientSingleNode(node, Graph)
        C += Ci
    C /= float(Graph.GetNodes())
    ############################################################################
    return C

def Q1_2():
    """
    Code for Q1.2
    """
    C_erdosRenyi = calcClusteringCoefficient(erdosRenyi)
    C_smallWorld = calcClusteringCoefficient(smallWorld)
    C_collabNet = calcClusteringCoefficient(collabNet)

    print('Clustering Coefficient for Erdos Renyi Network: %f' % C_erdosRenyi)
    print('Clustering Coefficient for Small World Network: %f' % C_smallWorld)
    print('Clustering Coefficient for Collaboration Network: %f' % C_collabNet)
    
    print 'Computed using library functions...'
    
    C_erdosRenyi = snap.GetClustCf(erdosRenyi, 5242)
    C_smallWorld =  snap.GetClustCf(smallWorld, 5242)
    C_collabNet = snap.GetClustCf(collabNet, 5242)

    print('Clustering Coefficient for Erdos Renyi Network: %f' % C_erdosRenyi)
    print('Clustering Coefficient for Small World Network: %f' % C_smallWorld)
    print('Clustering Coefficient for Collaboration Network: %f' % C_collabNet)
    

# Execute code for Q1.2
Q1_2()

Clustering Coefficient for Erdos Renyi Network: 0.001480
Clustering Coefficient for Small World Network: 0.283280
Clustering Coefficient for Collaboration Network: 0.529636
Computed using library functions...
Clustering Coefficient for Erdos Renyi Network: 0.001480
Clustering Coefficient for Small World Network: 0.283280
Clustering Coefficient for Collaboration Network: 0.529636


## Problem 2

In [4]:
def load_graph(name):
    '''
    Helper function to load graphs.
    Wse "epinions" for Epinions graph and "email" for Email graph.
    Check that the respective .txt files are in the same folder as this script;
    if not, change the paths below as required.
    '''
    if name == "epinions":
        G = snap.LoadEdgeList(snap.PNGraph, "soc-Epinions1.txt", 0, 1)
    elif name == 'email':
        G = snap.LoadEdgeList(snap.PNGraph, "email-EuAll.txt", 0, 1)   
    else: 
        raise ValueError("Invalid graph: please use 'email' or 'epinions'.")
    return G



In [4]:
def q2_1():
    '''
    You will have to run the inward and outward BFS trees for the 
    respective nodes and reason about whether they are in SCC, IN or OUT.
    You may find the SNAP function GetBfsTree() to be useful here.
    '''
    def in_out_bfs_for_node(graph, node):
        print 'Starting at node: ' , str(node)
        bfs_out = snap.GetBfsTree(graph, node, True, False)
        bfs_in = snap.GetBfsTree(graph, node, False, True)
        print 'Out size = ' , str(bfs_out.GetEdges()) , ', In size = ' , str(bfs_in.GetEdges())


    def in_out_bfs(graph, name):
        print 'In/Out BFS tree size for graph: ' , name
        if name == 'email':
            in_out_bfs_for_node(graph, 2018)
        else:
            in_out_bfs_for_node(graph, 224)
                
    ##########################################################################
    #TODO: Run outward and inward BFS trees from node 2018, compare sizes 
    #and comment on where node 2018 lies.
    G = load_graph("email")
    #Your code here:
    in_out_bfs(G, 'email')
    ##########################################################################
    
    ##########################################################################
    #TODO: Run outward and inward BFS trees from node 224, compare sizes 
    #and comment on where node 224 lies.
    G = load_graph("epinions")
    #Your code here:
    in_out_bfs(G, 'epinions')
    
    ##########################################################################
    print '2.1: Done!\n'


In [5]:
q2_1()

In/Out BFS tree size for graph:  email
Starting at node:  2018
Out size =  70318 , In size =  0
In/Out BFS tree size for graph:  epinions
Starting at node:  224
Out size =  100012 , In size =  124677
2.1: Done!



In [6]:
def q2_2():
    '''
    For each graph, get 100 random nodes and find the number of nodes in their
    inward and outward BFS trees starting from each node. Plot the cumulative
    number of nodes reached in the BFS runs, similar to the graph shown in 
    Broder et al. (see Figure in handout). You will need to have 4 figures,
    one each for the inward and outward BFS for each of email and epinions.
    
    Note: You may find the SNAP function GetRndNId() useful to get random
    node IDs (for initializing BFS).
    '''
    ##########################################################################
    #TODO: See above.
    #Your code here:
    
    def reachable_analysis(graph, name):
        reached_out_cum = []
        reached_in_cum = []
        for i in range(100):
            node = graph.GetRndNId()
            bfs_out = snap.GetBfsTree(graph, node, True, False)
            bfs_in = snap.GetBfsTree(graph, node, False, True)
            reached_out_cum.append(bfs_out.GetNodes())
            reached_in_cum.append(bfs_in.GetNodes())

        reached_out_cum = sorted(reached_out_cum)
        reached_in_cum = sorted(reached_in_cum)

        y = reached_out_cum
        if True:#name == 'epinions':
            plt.plot(y, label='Reachability using outlinks for %s' % name)
            plt.yscale('log')
        #else:
        #    plt.plot(y, label='Reachability using outlinks for %s' % name)
        plt.savefig('outlinks_%s.png' % name)
        plt.close()

        y = reached_in_cum
        if True:#name == 'email':
            plt.plot(y, label='Reachability using inlinks for %s' % name)
            plt.yscale('log')
        #else:
        #    plt.plot(y, label='Reachability using inlinks for %s' % name)
        plt.savefig('inlinks_%s.png' % name)
        plt.close()

    reachable_analysis(load_graph('email'), 'email')
    reachable_analysis(load_graph('epinions'), 'epinions')
    ##########################################################################
    print '2.2: Done!\n'



In [8]:
def q2_3():
    '''
    For each graph, determine the size of the following regions:
        DISCONNECTED
        IN
        OUT
        SCC
        TENDRILS + TUBES
        
    You can use SNAP functions GetMxWcc() and GetMxScc() to get the sizes of 
    the largest WCC and SCC on each graph. 
    '''
    ##########################################################################
    #TODO: See above.
    #Your code here:
    def per_graph(graph, name):
        mxWcc = snap.GetMxWcc(graph)
        mxScc = snap.GetMxScc(graph)
        print ''
        print 'Size analysis on {}'.format(name)
        print 'Disconnected size = {}'.format(graph.GetNodes() - mxWcc.GetNodes())
        print 'SCC size = {}'.format(mxScc.GetNodes())
        
        trials = 200
        avg_reached_out = 0
        avg_reached_in = 0
        for _ in range(trials):
            nodeId = mxScc.GetRndNId()
            avg_reached_out += snap.GetBfsTree(graph, nodeId, True, False).GetNodes()
            avg_reached_in += snap.GetBfsTree(graph, nodeId, False, True).GetNodes()

        scc_out = float(avg_reached_out) / trials
        scc_in = float(avg_reached_in) / trials

        out_sz = scc_out - mxScc.GetNodes()
        in_sz = scc_in - mxScc.GetNodes()
        print 'OUT size = {}'.format(out_sz)
        print 'IN size = {}'.format(in_sz)
        print 'Tendrils/Tubes size = {}'.format(mxWcc.GetNodes() - mxScc.GetNodes() - out_sz - in_sz)
    
    per_graph(load_graph('email'), 'email')
    per_graph(load_graph('epinions'), 'epinions')

    ##########################################################################
    print '2.3: Done!\n' 



In [9]:
q2_3()


Size analysis on email
Disconnected size = 40382
SCC size = 34203
OUT size = 17900.0
IN size = 151023.0
Tendrils/Tubes size = 21706.0

Size analysis on epinions
Disconnected size = 2
SCC size = 32223
OUT size = 15453.0
IN size = 24236.0
Tendrils/Tubes size = 3965.0
2.3: Done!



In [1]:
def q2_4():
    '''
    For each graph, calculate the probability that a path exists between
    two nodes chosen uniformly from the overall graph.
    You can do this by choosing a large number of pairs of random nodes
    and calculating the fraction of these pairs which are connected.
    The following SNAP functions may be of help: GetRndNId(), GetShortPath()
    '''
    ##########################################################################
    #TODO: See above.
    #Your code here
    
    def calc_path_prob(graph):
        trials = 1000
        reachable_count = 0
        for _ in range(trials):
            node1 = graph.GetRndNId()
            node2 = graph.GetRndNId()
            shortestPath = snap.GetShortPath(graph, node1, node2, True)
            if shortestPath > 0:
                reachable_count += 1
        return float(reachable_count) / float(trials)
    
    print 'email reachable prob = %f' % calc_path_prob(load_graph('email'))
    print 'epinions reachable prob = %f' % calc_path_prob(load_graph('epinions'))
    ##########################################################################
    print '2.4: Done!\n'

In [5]:
q2_4()

email reachable prob = 0.126000
epinions reachable prob = 0.460000
2.4: Done!



## Problem 4

In [7]:
import snap
from sets import Set
import os
import numpy as np
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt

  


In [None]:
def main():
    graph = load_graph()
    print_graph_stats(graph)
    make_plots(graph)    
    hdn = build_hdn(graph)
    contraction(graph, hdn)
    max_clique(graph)
    disease_sim(graph)
   
main()


In [9]:
def load_graph():
    '''
    Helper function to load graphs.
    Wse "epinions" for Epinions graph and "email" for Email graph.
    Check that the respective .txt files are in the same folder as this script;
    if not, change the paths below as required.
    '''    
    G = snap.LoadEdgeList(snap.PUNGraph, 'all_gene_disease_associations.csv', 0, 2, ',')
    assert not G.Empty() # check if graph is empty
    assert G.IsNode(200001418) # check is nodeID = 200001418 is a node of the graph
    assert G.IsNode(10) # same thing as above
    return G

def is_gene(node):
    return node.GetId() < 200E6 # 200E6 = 200000000

def print_graph_stats(graph):
    print 'nodes = {}'.format(graph.GetNodes())
    
    disease_set = set([])
    gene_set = set([])
    for node in graph.Nodes():
        if not is_gene(node):
            disease_set.add(node.GetId())
        else:
            gene_set.add(node.GetId())
    print 'num diseases = {}'.format(len(disease_set))
    print 'num genes = {}'.format(len(gene_set))
    print 'edges = {}'.format(graph.GetEdges())

def make_plots(graph):
    gene_deg_counts = {}
    disease_deg_counts = {}
    for node in graph.Nodes():
        if is_gene(node):
            if node.GetDeg() not in gene_deg_counts.keys():
                gene_deg_counts[node.GetDeg()] = 0
            gene_deg_counts[node.GetDeg()] += 1
        else:
            if node.GetDeg() not in disease_deg_counts.keys():
                disease_deg_counts[node.GetDeg()] = 0
            disease_deg_counts[node.GetDeg()] += 1
        
    deg_counts_plot(disease_deg_counts, 'blue', 'disease')
    deg_counts_plot(gene_deg_counts, 'green', 'gene')
    plt.savefig('deg_counts_plot.png')


def deg_counts_plot(deg_counts, color, label):
    x = []
    y = []
    for key in sorted(deg_counts.keys()):
        if key == 0 or deg_counts[key] == 0:
            continue
        x.append(key)
        y.append(deg_counts[key])

    x = np.log10(x)
    y = np.log10(y)
    plt.plot(x, y, color=color, linestyle="", marker="o", label = label)
    plt.legend()

def q4_1():
    graph = load_graph()
    print_graph_stats(graph)
    make_plots(graph)
    
q4_1()

nodes = 37444
num diseases = 20370
num genes = 17074
edges = 561119


In [13]:
def build_hdn(graph):
    if os.path.exists('hdn.graph'):
        fin = snap.TFIn('hdn.graph') #load exsit graph
        dgraph = snap.TUNGraph.Load(fin)
    else:
        dgraph = snap.TUNGraph.New() #disease network 'Human Disease Network'
        ggraph = snap.TUNGraph.New() #gene network
        for node in graph.Nodes():
            if is_gene(node):
                ggraph.AddNode(node.GetId())
            else:
               dgraph.AddNode(node.GetId())
        for i in dgraph.Nodes():
            print 'node {}'.format(i.GetId())
            j = dgraph.GetNI(i.GetId()) # j is an iterator
            j.Next() # node with node id next to j
            while j < dgraph.EndNI(): # EndNI is an iterator as j, two iterators are comparable.
                if i.GetId() == j.GetId():
                    continue
                if dgraph.IsEdge(i.GetId(), j.GetId()):
                    continue
                i_neighbors = get_neighbors(graph, i.GetId()) # find neighbors of node i in the bipartite network
                j_neighbors = get_neighbors(graph, j.GetId())
                if len(i_neighbors.intersection(j_neighbors)) > 0: #  project unweighted undirected network
                    dgraph.AddEdge(i.GetId(), j.GetId())
                j.Next()

        fout = snap.TFOut('hdn.graph') # save graph in binary format
        dgraph.Save(fout)
        fout.Flush()

    print ''
    print 'HDN graph'
    print 'nodes = {}'.format(dgraph.GetNodes())
    print 'edges = {}'.format(dgraph.GetEdges())
    print 'density = {}'.format(2.0 * dgraph.GetEdges() / (dgraph.GetNodes() * (dgraph.GetNodes() - 1.0)))
    print 'clust coef = {}'.format(snap.GetClustCf(dgraph, 1000))
    return dgraph

def get_neighbors(graph, nodeId, othergraph=None):
    n = set([])
    for i in range(graph.GetNI(nodeId).GetDeg()): #graph.GetNI(nodeId).GetDeg()
        nid = graph.GetNI(nodeId).GetNbrNId(i)
        if othergraph is not None:
            if not othergraph.IsNode(nid):
                continue
        n.add(nid)
    return n


graph = load_graph()
hdn = build_hdn(graph)



HDN graph
nodes = 20370
edges = 12892536
density = 0.0621452030364
clust coef = 0.793551293105


In [14]:
def max_clique(graph):
    mxcl = 0
    for node in graph.Nodes():
        if is_gene(node):
            sz = node.GetDeg()
            if sz > mxcl:
                mxcl = sz
    print ''
    print 'HDN max clique size = {}'.format(mxcl)
    
max_clique(graph)


HDN max clique size = 1686


In [15]:
def contraction(graph, hdn):
    
    supernodes = set([])

    def contract_clique(node):
        clique = get_neighbors(graph, node.GetId(), othergraph=hdn)
        supernodeId = clique.pop()
        assert(hdn.IsNode(supernodeId))
        supernodes.add(supernodeId)
        for nodeId in clique:
            if nodeId in supernodes:
                continue
            for nbrId in get_neighbors(hdn, nodeId):
                if not hdn.IsNode(nbrId):
                    continue
                assert(hdn.IsNode(supernodeId))
                assert(hdn.IsNode(nbrId))
                hdn.AddEdge(supernodeId, nbrId)
            assert(not supernodeId == nodeId)
            assert(nodeId not in supernodes)
            hdn.DelNode(nodeId)
            #graph.DelNode(nodeId)

    node = graph.BegNI()
    while node < graph.EndNI():
        if is_gene(node) and node.GetDeg() > 250: 
            contract_clique(node)
        node.Next()
        
    contracted = hdn
    clust_cf = snap.GetClustCf(contracted, int(1000)) #compute clustering coef over a sample
    print ''
    print 'Contracted graph stats'
    print 'clust cf = {}'.format(clust_cf)
    print 'density = {}'.format(2.0 * contracted.GetEdges() / (contracted.GetNodes() * (contracted.GetNodes() - 1.0)))
    print 'nodes = {}'.format(contracted.GetNodes())
    print 'edges = {}'.format(contracted.GetEdges())

contraction(graph, hdn)


Contracted graph stats
clust cf = 0.901562081638
density = 0.00433275563258
nodes = 9233
edges = 184660


In [16]:
a_node = set([])

In [21]:
a_node.add(3)

In [23]:
a_node.pop()

1

In [24]:
a_node

{2, 3}

In [25]:
def disease_sim(graph):
    crohnid = 200000000 + 10346
    leukid = 200000000 + 23418

    def top_five_sim(nid, metric):
        if nid == crohnid:
            name = 'crohn'
        else:
            name = 'leuk'
        print ''
        print 'Scores for {} using {}'.format(name, metric)
        top_five = []
        top_five_scores = []
        for node in graph.Nodes():
            if is_gene(node):
                continue
            if nid == crohnid and node.GetId() == crohnid:
                continue
            if nid == leukid and node.GetId() == leukid:
                continue
            nid_nb = get_neighbors(graph, nid)
            nb = get_neighbors(graph, node.GetId())
            if metric == 'CN':
                score = len(nid_nb.intersection(nb))
            else:
                score = len(nid_nb.intersection(nb)) / (1.0 * len(nid_nb.union(nb)))
            if len(top_five) < 5:
                top_five.append(node.GetId())
                top_five_scores.append(score)
            else:
                min_ind = np.argmin(top_five_scores)
                if score > top_five_scores[min_ind]:
                    top_five[min_ind] = node.GetId()
                    top_five_scores[min_ind] = score
        print top_five
        print top_five_scores

    assert(graph.IsNode(crohnid))
    assert(graph.IsNode(leukid))
    top_five_sim(crohnid, 'CN')
    top_five_sim(crohnid, 'JA')
    top_five_sim(leukid, 'CN')
    top_five_sim(leukid, 'JA')

disease_sim(graph)


Scores for crohn using CN
[200006142, 200009324, 200678222, 200027627, 200003873]
[549, 497, 541, 493, 492]

Scores for crohn using JA
[200009324, 200033860, 200003873, 200021390, 200009319]
[0.4034090909090909, 0.20563594821020564, 0.2188612099644128, 0.3675889328063241, 0.22916666666666666]

Scores for leuk using CN
[200027627, 200678222, 200006142, 202239176, 200596263]
[1198, 1346, 1361, 1133, 1255]

Scores for leuk using JA
[200025202, 200026764, 200024299, 200023449, 200023467]
[0.273531089560753, 0.27872670807453415, 0.32361166600079905, 0.3003613369467028, 0.41775557263643354]
