-
Notifications
You must be signed in to change notification settings - Fork 0
/
PageRank.py
86 lines (74 loc) · 3.3 KB
/
PageRank.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
__author__ = 's0539720'
import collections
import numpy as np
class PageRank:
def __init__(self, linkStructure):
self.__linkStructure = linkStructure
self.__d = 0.95
self.__t = 1 - self.__d
self.__delta = 0.04
self.__diffInStep = [0.0]
self.__N = len(self.__linkStructure) + 0.0
self.__pageRanks = [[1/self.__N for i in range(len(self.__linkStructure))]]
__transitions = self.__getTransitionProbabilities()
self.__calculatePageRank(__transitions)
def __getTransitionProbabilities(self):
transitionProbabilities = {}
#create transition table
for col in self.__linkStructure:
for row in self.__linkStructure:
transitionProbabilities[(col, row)] = 0
#fill transition table and order alphabetically
for site in self.__linkStructure:
outlinks = len(self.__linkStructure[site])
if outlinks > 0:
for link in self.__linkStructure[site]:
transitionProbabilities[(site, link)] = (1.0 / outlinks) * self.__d + (self.__t/self.__N)
for link in self.__linkStructure:
if transitionProbabilities[(site, link)] == 0:
transitionProbabilities[(site, link)] = self.__t/self.__N
else:
for link in self.__linkStructure:
transitionProbabilities[(site, link)] = 1.0/self.__N
transitionProbabilities = collections.OrderedDict(sorted(transitionProbabilities.items()))
#transform transitionProbabilities dictionary into a matrixlike array
transitions = []
k = 0
for i in range(len(self.__linkStructure)):
transitions.append([])
for j in range(len(self.__linkStructure)):
transitions[i].append(list(transitionProbabilities.values())[k])
k += 1
return transitions
#calculate pageranks
def __calculatePageRank(self, transitions):
diff = 1.0
step = 0
while diff > self.__delta:
step += 1
diff = 0.0
pageRank = list(np.array(np.asarray(np.matrix(self.__pageRanks[step - 1]) * np.matrix(transitions))).reshape(-1,)) # matrix to list
self.__pageRanks.append(pageRank)
for i in range(len(self.__linkStructure)):
diff += abs(self.__pageRanks[step][i] - self.__pageRanks[step-1][i])
self.__diffInStep.append(diff)
def getPageRank(self):
#create dictionary with sites as keys and pagerank as values
pageRank = {}
lastStep = len(self.__pageRanks) - 1
i = 0
for site in self.__linkStructure:
pageRank[site] = self.__pageRanks[lastStep][i]
i += 1
return pageRank
def printPageRank(self):
#print name of sites
sites = " "
for site in sorted(self.__linkStructure.keys()):
sites = sites + site + " "
sites = sites + "diff"
print(sites)
#print pageranks of sites
for i in range(len(self.__pageRanks)):
self.__pageRanks[i] = [str(format(x, '.4f')) for x in self.__pageRanks[i]]
print("step: " + str(i) + " " + str(self.__pageRanks[i]) + " " + str(format(self.__diffInStep[i], '.4f')))