This repository has been archived by the owner on Sep 24, 2019. It is now read-only.
/
Pagerank.py
43 lines (41 loc) · 1.74 KB
/
Pagerank.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import networkx as nx
def pagerank(G, max_iter = 100, damping = 0.85, weight = 'weight', tol = 1e-6):
'''
Calculate node's importance in G according to node's degree and edge's weight
:param G: 需要执行pagerank算法的图,执行pagerank算法之后,该图并不改变。
:param max_iter: 最大迭代次数,默认为100次
:param damping: 阻尼系数,默认为0.85
:param weight: 图中边的权,默认为原图中边的权值
:param tol: 允许的误差限度,默认为1e-6
:return: 一个键为节点,值为节点权重的字典
'''
if len(G) is 0:
return {}
# Change G to directed because stochastic_graph function deal with directed graph
if not G.is_directed():
g = G.to_directed()
else:
g = G
# copy the origin graph
gcopy = nx.stochastic_graph(g, weight=weight)
N = gcopy.number_of_nodes()
x = dict.fromkeys(gcopy, 1/N)
p = dict.fromkeys(gcopy, 1/N)
dangling_weights = p
# dangling_nodes is the node that outdegree is zero
dangling_nodes = [n for n in gcopy if gcopy.out_degree(n, weight=weight) == 0.0]
for _ in range(max_iter):
xlast = x
x = dict.fromkeys(xlast.keys(), 0)
# find out all dangling_node
danglesum = damping * sum(xlast[n] for n in dangling_nodes)
for n in x:
for nbr in gcopy[n]:
# doing a left multiply
x[nbr] += damping * xlast[n] * gcopy[n][nbr][weight]
x[n] += danglesum * dangling_weights.get(n,0) + (1.0 - damping) * p.get(n,0)
# check convergence
Errorrate = sum([abs(x[n] - xlast[n]) for n in x])
if Errorrate < N*tol:
return x
raise ValueError('The graph is not convergence')