In [1]:
import numpy as np

In [19]:
edges = np.genfromtxt('../data/flikr/edges_sampled_2K.csv', delimiter=',', dtype='int64')

In [23]:
mapper = {n: idx for idx, n in enumerate(np.unique(edges.reshape(-1,)))}
reverse_mapper = {v: k for k, v in mapper.items()}

In [37]:
normalized_edges = np.empty_like(edges)
for idx, edge in enumerate(edges):
    normalized_edges[idx] = [mapper[edge[0]], mapper[edge[1]]]

In [38]:
normalized_edges

array([[111, 134],
       [111, 125],
       [111, 116],
       ...,
       [ 92, 134],
       [ 92, 109],
       [ 92, 159]])

## Helpers

Taken from https://github.com/networkx/networkx/blob/master/networkx/algorithms/centrality/betweenness.py

In [61]:
def shortest_path(G, s):
    S = []
    P = {}
    for v in G:
        P[v] = []
    sigma = dict.fromkeys(G, 0.0)    # sigma[v]=0 for v in G
    D = {}
    sigma[s] = 1.0
    D[s] = 0
    Q = [s]
    while Q:   # use BFS to find shortest paths
        v = Q.pop(0)
        S.append(v)
        Dv = D[v]
        sigmav = sigma[v]
        for w in G[v]:
            if w not in D:
                Q.append(w)
                D[w] = Dv + 1
            if D[w] == Dv + 1:   # this is a shortest path, count paths
                sigma[w] += sigmav
                P[w].append(v)  # predecessors
    return S, P, sigma

def accumulate(betweenness, S, P, sigma, s):
    delta = dict.fromkeys(S, 0)
    while S:
        w = S.pop()
        coeff = (1.0 + delta[w]) / sigma[w]
        for v in P[w]:
            delta[v] += sigma[v] * coeff
        if w != s:
            betweenness[w] += delta[w]
    return betweenness

def rescale(betweenness, n, normalized, directed=False, k=None):
    if normalized:
        if n <= 2:
            scale = None  # no normalization b=0 for all nodes
        else:
            scale = 1.0 / ((n - 1) * (n - 2))
    else:  # rescale by 2 for undirected graphs
        if not directed:
            scale = 0.5
        else:
            scale = None
    if scale is not None:
        if k is not None:
            scale = scale * n / k
        for v in betweenness:
            betweenness[v] *= scale
    return betweenness

In [66]:
def betweeness(G):
    betweenness = dict.fromkeys(G, 0.0)
    
    for s in G.nodes():
        S, P, sigma = shortest_path(G, s)

        betweenness = accumulate(betweenness, S, P, sigma, s)
    
    betweenness = rescale(betweenness, len(G), normalized=True,
                           directed=G.is_directed(), k=None)
    
    return betweenness

In [42]:
import networkx as nx

In [52]:
G = nx.Graph()

In [55]:
G.add_edges_from(normalized_edges)

In [68]:
betweeness(G) == nx.betweenness_centrality(G)

True