In [16]:
import networkx as nx
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import operator

In [2]:
undirected_G = nx.read_adjlist('dataset/karate.adjlist', \
                               create_using=nx.Graph(), nodetype=str)
directed_G = nx.read_edgelist('dataset/email_network.txt', delimiter = '\t',\
                             create_using = nx.DiGraph(), nodetype=str,\
                            data = [('time', int)])

### Link Prediction : What new edges are likely to form?

#### Measure(1): Common Neighbors

In [17]:
common_neigh = [(e[0], e[1], len(list(nx.common_neighbors(undirected_G, e[0], e[1])))) for e in nx.non_edges(undirected_G)]
common_neigh = sorted(common_neigh, key = operator.itemgetter(2), reverse=True)

#### Measure(2): Jaccard Coeff
#### (# of common neighbors btw 2 nodes) / (# of total neighbors of 2 nodes)

In [23]:
coeff = nx.jaccard_coefficient(undirected_G)
coeff = list(coeff)
coeff = sorted(coeff, key = operator.itemgetter(2), reverse=True)

#### Measure(3): Resource Allocation
#### A node can send 'resource' to another through common neighbors

In [27]:
alloc = nx.resource_allocation_index(undirected_G)
alloc = list(alloc)
alloc = sorted(alloc, key = operator.itemgetter(2), reverse=True)

#### Measure(4): Adamic-Adar Index
#### Resource allocation with log scale denominator

In [31]:
adar = nx.adamic_adar_index(undirected_G)
adar = list(adar)
adar = sorted(adar, key = operator.itemgetter(2), reverse=True)

#### Measure(5): Preferential attachment
#### Product of nodes degrees
#### Nodes with high degree tends to get more neighbors

In [33]:
pref = nx.preferential_attachment(undirected_G)
pref = list(pref)
pref = sorted(pref, key = operator.itemgetter(2), reverse=True)

## Link Prediction with community structure
#### If two nodes are in same community, high tendency to have link btw them

#### Measure(6): Community Common Neighbors
#### nx.cn_soundarajan_hopcroft(G)
#### (# of common neighbors) + 1(same community)

#### Measure(7) : Similar to measure(6), but only consider nodes in the same community
#### nx.ra_index_soundarajan_hopcroft(G)