-
Notifications
You must be signed in to change notification settings - Fork 1
/
pagerank.py
62 lines (44 loc) · 1.59 KB
/
pagerank.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import numpy as np
import matplotlib.pyplot as plt
alpha = 0.85 # 跳转因子
def draw(iter_list, pr_list):
"""
绘制收敛图
:param iter_list: 迭代次数列表
:param pr_list: 每一个向量的值
:return:
"""
plt.plot(iter_list, pr_list)
plt.show()
def pagerank(adj_matrix, pr_vec):
"""
pagerank算法:
V_{i+1} = alpha * V_i * adj + ((1 - alpha) / N) * E
收敛条件:
1. V_{i+1} == V_i
2. 迭代次数200次
就是说如果PR向量并没有发生改变,那么收敛结束,得到的PR值就是最终的PR值
但是假设迭代次数过高后且未发生收敛,那么就会陷入死循环等,根据前人总结的经验,设置为迭代20次
:param adj_matrix: 邻接矩阵
:param pr_vec: 每个节点PR值的初始向量
:return:
"""
num_nodes = np.size(adj_matrix, 1) # 获得矩阵列数(ie. 节点数)
jump_value = (1 - alpha) / num_nodes # 从其他页面跳转入所在页面的概率(标量)
jump_vec = jump_value * np.ones(num_nodes) # 向量化
# iter_list = []
# pr_list = []
for n_iter in range(1, 201):
pr_new = alpha * np.dot(pr_vec, adj_matrix) + jump_vec
print("第{0}次迭代的PR值:{1}".format(n_iter, pr_new))
# iter_list.append(n_iter)
# pr_list.append(tuple(pr_new))
if (pr_new == pr_vec).all():
break
# else:
# # 调试用
# test = pr_new - pr_vec
pr_vec = pr_new
# draw(iter_list, pr_list)
print("迭代完成!")
print("收敛值为:", pr_vec)