In [1]:
import pandas as pd
from itertools import product

In [2]:
edges = [("A", "B"), ("A", "C"), ("A", "D"), ("B", "A"), ("B", "D"), ("C", "A"), ("D", "B"), ("D", "C")]
links = pd.DataFrame(edges,columns=['from','to'])
links

Unnamed: 0,from,to
0,A,B
1,A,C
2,A,D
3,B,A
4,B,D
5,C,A
6,D,B
7,D,C


In [3]:
links['pr'] = 1/links.groupby('from')['to'].transform('count')
links

Unnamed: 0,from,to,pr
0,A,B,0.333333
1,A,C,0.333333
2,A,D,0.333333
3,B,A,0.5
4,B,D,0.5
5,C,A,1.0
6,D,B,0.5
7,D,C,0.5


In [4]:
M = pd.pivot_table(links,index=['to'],columns=['from'],values=['pr'],fill_value=0).values
M

array([[0.        , 0.5       , 1.        , 0.        ],
       [0.33333333, 0.        , 0.        , 0.5       ],
       [0.33333333, 0.        , 0.        , 0.5       ],
       [0.33333333, 0.5       , 0.        , 0.        ]])

In [5]:
import numpy as np
x = np.array([1/4]*4)
for _ in range(27):
    x = M@x
    print(x)

[0.375      0.20833333 0.20833333 0.20833333]
[0.3125     0.22916667 0.22916667 0.22916667]
[0.34375 0.21875 0.21875 0.21875]
[0.328125   0.22395833 0.22395833 0.22395833]
[0.3359375  0.22135417 0.22135417 0.22135417]
[0.33203125 0.22265625 0.22265625 0.22265625]
[0.33398438 0.22200521 0.22200521 0.22200521]
[0.33300781 0.22233073 0.22233073 0.22233073]
[0.33349609 0.22216797 0.22216797 0.22216797]
[0.33325195 0.22224935 0.22224935 0.22224935]
[0.33337402 0.22220866 0.22220866 0.22220866]
[0.33331299 0.222229   0.222229   0.222229  ]
[0.33334351 0.22221883 0.22221883 0.22221883]
[0.33332825 0.22222392 0.22222392 0.22222392]
[0.33333588 0.22222137 0.22222137 0.22222137]
[0.33333206 0.22222265 0.22222265 0.22222265]
[0.33333397 0.22222201 0.22222201 0.22222201]
[0.33333302 0.22222233 0.22222233 0.22222233]
[0.33333349 0.22222217 0.22222217 0.22222217]
[0.33333325 0.22222225 0.22222225 0.22222225]
[0.33333337 0.22222221 0.22222221 0.22222221]
[0.33333331 0.22222223 0.22222223 0.22222223]


In [6]:
edges = [("A", "B"), ("A", "C"), ("A", "D"), ("B", "D"), ("C", "E"), ("D", "E"), ("B", "E")]
links = pd.DataFrame(edges,columns=['from','to'])
links

Unnamed: 0,from,to
0,A,B
1,A,C
2,A,D
3,B,D
4,C,E
5,D,E
6,B,E


In [7]:
deadends = product(set(links['to'])-set(links['from']), set(links['to'])|set(links['from']))
deadends = pd.DataFrame(deadends,columns=['from','to'])
links = pd.concat([links,deadends],axis=0)
links

Unnamed: 0,from,to
0,A,B
1,A,C
2,A,D
3,B,D
4,C,E
5,D,E
6,B,E
0,E,B
1,E,C
2,E,A


In [8]:
out_num = links.groupby('from').count()
out_num

Unnamed: 0_level_0,to
from,Unnamed: 1_level_1
A,3
B,2
C,1
D,1
E,5


In [9]:
in_node = links.groupby('to')['from'].agg(lambda x:set(x.values))
in_node

to
A             {E}
B          {E, A}
C          {E, A}
D       {B, E, A}
E    {B, D, E, C}
Name: from, dtype: object

In [20]:
g = pd.concat([in_node,out_num],axis=1)
g['pr'] = 1/len(g)
g

Unnamed: 0,from,to,pr
A,{E},3,0.2
B,"{E, A}",2,0.2
C,"{E, A}",1,0.2
D,"{B, E, A}",1,0.2
E,"{B, D, E, C}",5,0.2


In [11]:
import warnings
warnings.filterwarnings('ignore')

In [23]:
def pape_rank(graph,alpha=0.85,min_delta=0.00001,max_epoch=100):
    damp = (1-alpha)/len(graph)
    for i in range(max_epoch):
        graph['ave_pr'] = graph['pr']/graph['to']
        rank = lambda row: 0 if type(row['from'])!=set else graph.loc[row['from']]['ave_pr'].sum()
        graph['new_pr'] = graph.apply(rank,axis=1)*alpha + damp
        delta = (graph['new_pr']-graph['pr']).abs().sum()
        graph['pr'] = graph['new_pr']
        if delta < min_delta:
            print(f"finished in {i+1} iterations")
            break
    else:
        print("finished out of 100 iterations")
    graph.drop(['ave_pr','new_pr'],axis=1,inplace=True)

pape_rank(g)
g

finished in 1 iterations


Unnamed: 0,from,to,pr
A,{E},3,0.104318
B,"{E, A}",2,0.133874
C,"{E, A}",1,0.133874
D,"{B, E, A}",1,0.190771
E,"{B, D, E, C}",5,0.437162


In [13]:
import networkx as nx
G = nx.DiGraph()
for edge in edges:
    G.add_edge(edge[0],edge[1])

pagerank_list = nx.pagerank(G,alpha=0.85)
print("pagerank: ", pagerank_list)

pagerank:  {'A': 0.10431793757049487, 'B': 0.13387445661279748, 'C': 0.13387445661279748, 'D': 0.19077083230987668, 'E': 0.4371623168940336}
