In [None]:
%run load_data.py
import sys
import multiprocessing as mp

In this document the methods described in the methods section are implemented for the network of verdicts. The data is loaded as `networkx`directed graph making it relatively easy to work with.

In [None]:
def jaccard_predictions(G):
    """
    Create a ranked list of possible new links based on the Jaccard similarity,
    defined as the intersection of nodes divided by the union of nodes
    
    parameters
    G: Directed or undirected nx graph
    returns
    list of linkbunches with the score as an attribute
    """
    potential_edges = []
    G_undirected = nx.Graph(G)
    for non_edge in nx.non_edges(G_undirected):
        u = set(G.neighbors(non_edge[0]))
        v = set(G.neighbors(non_edge[1]))
        uv_un = len(u.union(v))
        uv_int = len(u.intersection(v))
        if uv_int == 0 or uv_un == 0:
            continue
        else:
            s = (1.0*uv_int)/uv_un
            
        potential_edges.append(non_edge + ({'score': s},))
        
    return potential_edges

def jaccard_prediction(non_edge):
    u = set(G.neighbors(non_edge[0]))
    v = set(G.neighbors(non_edge[1]))
    uv_un = len(u.union(v))
    uv_int = len(u.intersection(v))
    if uv_int != 0 or uv_un != 0:
        s = (1.0*uv_int)/uv_un
        return non_edge + ({'score': s},)

def jaccard_mp_predictions(G):
    """
    Create a ranked list of possible new links based on the Jaccard similarity,
    defined as the intersection of nodes divided by the union of nodes
    
    parameters
    G: Directed or undirected nx graph
    returns
    list of linkbunches with the score as an attribute
    """
    pool = mp.Pool(processes=4)
    G_undirected = nx.Graph(G)
    results = pool.map(jaccard_prediction, nx.non_edges(G_undirected))
    return results


In [None]:
%time p = jaccard_predictions(G)

In [None]:
sorted(p, key=lambda x: x[2]['score'], reverse=True)

In [None]:
%time pp = jaccard_mp_predictions(G)

In [None]:
G.edges('61959CJ0046')

In [None]:
nx.Graph(G).edges('61959CJ0023')

In [None]:
G['61959CJ0023']

In [None]:
c = 0
for non_edge in nx.non_edges(G):
    c = c+1
print c