## Pagerank

In [1]:
import numpy as np

In [2]:
def create_graph(edges,N):
    G = np.zeros((N,N))
    P = np.zeros((N,N))
    for k in range(len(edges)):
        i,j = edges[k]-1
        G[j][i] = 1
        P[j][i] = 1
    
    for j in range(N):
        Nj = G[:,j].sum()
        if Nj:
            G[:,j] = G[:,j]/Nj
            P[:,j] = P[:,j]/Nj
        else:
            P[:,j] = 1/N*np.ones(N)
    
    return G,P

edges = np.array([
    [1,2],[1,4],[1,5],
    [2,1],[2,3],[2,5],
    [3,6],
    [5,3],[5,4],[5,6],
    [6,3],[6,5]
])
N = 6
Q,P = create_graph(edges,N)
Q.round(2)

array([[0.  , 0.33, 0.  , 0.  , 0.  , 0.  ],
       [0.33, 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.33, 0.  , 0.  , 0.33, 0.5 ],
       [0.33, 0.  , 0.  , 0.  , 0.33, 0.  ],
       [0.33, 0.33, 0.  , 0.  , 0.  , 0.5 ],
       [0.  , 0.  , 1.  , 0.  , 0.33, 0.  ]])

## Random Walk and Markov Chains

In [3]:
def create_graph(edges,N):
    G = np.zeros((N,N))
    P = np.zeros((N,N))
    for k in range(len(edges)):
        i,j = edges[k]-1
        G[j][i] = 1
        P[j][i] = 1
    
    for j in range(N):
        Nj = G[:,j].sum()
        if Nj:
            G[:,j] = G[:,j]/Nj
            P[:,j] = P[:,j]/Nj
        else:
            P[:,j] = 1/N*np.ones(N)
    
    return G,P

def transition_matrix(P,alpha):
    N = P.shape[0]
    A = alpha*P+(1-alpha)/N*np.ones((N,N))
    return A

edges = np.array([
    [1,2],[1,4],[1,5],
    [2,1],[2,3],[2,5],
    [3,6],
    [5,3],[5,4],[5,6],
    [6,3],[6,5]
])
N = 6
Q,P = create_graph(edges,N)
# A = transition_matrix(P,0.85)
P.round(2)

array([[0.  , 0.33, 0.  , 0.17, 0.  , 0.  ],
       [0.33, 0.  , 0.  , 0.17, 0.  , 0.  ],
       [0.  , 0.33, 0.  , 0.17, 0.33, 0.5 ],
       [0.33, 0.  , 0.  , 0.17, 0.33, 0.  ],
       [0.33, 0.33, 0.  , 0.17, 0.  , 0.5 ],
       [0.  , 0.  , 1.  , 0.17, 0.33, 0.  ]])

In [4]:
P = np.array([
    [0,0.5,0.5,0.5,0,0],
    [0.5,0,0.5,0,0,0],
    [0.5,0.5,0,0,0,0],
    [0,0,0,0,0,0],
    [0,0,0,0.5,0,1],
    [0,0,0,0,1,0]
])
# A = alpha+(1-alpha)/n*(e*e.T)
A = transition_matrix(P,0.85)
A.round(2)

array([[0.03, 0.45, 0.45, 0.45, 0.03, 0.03],
       [0.45, 0.03, 0.45, 0.03, 0.03, 0.03],
       [0.45, 0.45, 0.03, 0.03, 0.03, 0.03],
       [0.03, 0.03, 0.03, 0.03, 0.03, 0.03],
       [0.03, 0.03, 0.03, 0.45, 0.03, 0.88],
       [0.03, 0.03, 0.03, 0.03, 0.88, 0.03]])

In [5]:
LP = np.linalg.eig(P)[0]
LP

array([-0.5,  1. , -0.5,  1. , -1. ,  0. ])

In [6]:
L,R = np.linalg.eig(A)
L.round(3),R.round(3)

(array([ 1.   ,  0.85 ,  0.   , -0.85 , -0.425, -0.425]),
 array([[-0.447,  0.365,  0.354, -0.   , -0.816, -0.172],
        [-0.43 ,  0.365, -0.354, -0.   ,  0.408, -0.605],
        [-0.43 ,  0.365, -0.354,  0.   ,  0.408,  0.777],
        [-0.057, -0.   ,  0.707, -0.   ,  0.   ,  0.   ],
        [-0.469, -0.548, -0.   , -0.707,  0.   ,  0.   ],
        [-0.456, -0.548, -0.354,  0.707, -0.   , -0.   ]]))

In [7]:
def schmidt_orthogonalization(A):
    
    m,n = A.shape
    Beta = np.zeros((m,n))
    E = np.zeros((m,n))
    R = np.eye(n,n)
    
    for r in range(n):
        
        Beta[:,r] = A[:,r]
        tmp = 0
        for j in range(r):
            
            tmp = tmp + Beta[:,j]*(A[:,r].dot(Beta[:,j]) / (Beta[:,j].dot(Beta[:,j])))
            
            R[j,r] = (A[:,r].dot(Beta[:,j]) / (Beta[:,j].dot(Beta[:,j])))
            
        Beta[:,r] = Beta[:,r] - tmp
        E[:,r] = Beta[:,r] / np.linalg.norm(Beta[:,r],ord=2)
    return Beta,R,E

U = np.random.random((N,N))
U[:,0] = np.ones(N)/np.sqrt(N)
U = schmidt_orthogonalization(U)[0]
U.T.dot(A).dot(U).round(2),(U.T.dot(P).dot(U)*0.85).round(2)

(array([[ 1.  , -0.  ,  0.  , -0.  , -0.  , -0.  ],
        [ 0.22,  0.02,  0.16,  0.14,  0.06, -0.07],
        [-0.11,  0.27, -0.09,  0.09, -0.01, -0.03],
        [-0.09,  0.07,  0.17,  0.15, -0.04, -0.04],
        [ 0.07,  0.06, -0.05, -0.02, -0.11,  0.01],
        [-0.02, -0.07, -0.02, -0.04,  0.01, -0.07]]),
 array([[ 0.85, -0.  ,  0.  , -0.  , -0.  , -0.  ],
        [ 0.22,  0.02,  0.16,  0.14,  0.06, -0.07],
        [-0.11,  0.27, -0.09,  0.09, -0.01, -0.03],
        [-0.09,  0.07,  0.17,  0.15, -0.04, -0.04],
        [ 0.07,  0.06, -0.05, -0.02, -0.11,  0.01],
        [-0.02, -0.07, -0.02, -0.04,  0.01, -0.07]]))

In [8]:
(U.T.dot(P).dot(U)*0.85-U.T.dot(A).dot(U)).round(3)

array([[-0.15, -0.  , -0.  ,  0.  ,  0.  ,  0.  ],
       [ 0.  ,  0.  ,  0.  ,  0.  ,  0.  , -0.  ],
       [-0.  ,  0.  , -0.  ,  0.  ,  0.  ,  0.  ],
       [ 0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
       [ 0.  , -0.  ,  0.  ,  0.  ,  0.  , -0.  ],
       [ 0.  ,  0.  ,  0.  ,  0.  , -0.  ,  0.  ]])

In [9]:
v = np.ones(N)/N
np.linalg.norm(v,ord=1)

0.9999999999999999

In [10]:
alpha = 0.85
A = alpha*P + (1-alpha)*v.dot(np.ones(N).T)
A

array([[0.15 , 0.575, 0.575, 0.575, 0.15 , 0.15 ],
       [0.575, 0.15 , 0.575, 0.15 , 0.15 , 0.15 ],
       [0.575, 0.575, 0.15 , 0.15 , 0.15 , 0.15 ],
       [0.15 , 0.15 , 0.15 , 0.15 , 0.15 , 0.15 ],
       [0.15 , 0.15 , 0.15 , 0.575, 0.15 , 1.   ],
       [0.15 , 0.15 , 0.15 , 0.15 , 1.   , 0.15 ]])

## The Power Method for Pagerank Computation

* 迭代算法

In [11]:
def calcR(d,M,R,N):
    return d*M.dot(R)+(1-d)/n*np.ones(n)

# M = np.array([
#     [0,1/2,0,0],
#     [1/3,0,0,1/2],
#     [1/3,0,1,1/2],
#     [1/3,1/2,0,0]
# ])
M = np.array([
    [0,0,1],
    [1/2,0,0],
    [1/2,1,0],
])
d = 0.8
n = M.shape[0]
OldR = np.ones(n)/n
NewR = -OldR
diff = 1000
while diff > 1e-3:
    NewR = calcR(d,M,OldR,n)
    diff = np.linalg.norm(NewR-OldR,ord=2)
    print(diff)
    OldR = NewR
R = NewR
R.round(4)

0.1885618083164127
0.15084944665313016
0.10451156235874898
0.04827182292900171
0.019308729171600644
0.015446983337280484
0.010701983985535833
0.004943034667929754
0.0019772138671718857
0.0015817710937375401
0.0010958831601189393
0.0005061667499960654


array([0.3837, 0.22  , 0.3963])

* 幂法
$$
R = (dM+\frac{1-d}{n}E)R = AR
$$

In [12]:
def calcR(A,R):
    return A.dot(R)

def calcA(d,M,R,N):
    return d*M+(1-d)/n*np.ones((n,n))

M = np.array([
    [0,0,1],
    [1/2,0,0],
    [1/2,1,0],
])
d = 0.85
n = M.shape[0]
OldR = np.ones(n)
A = calcA(d,M,OldR,n)
NewR = OldR
diff = 1000
Rs = [OldR]

while diff > 1e-3:
    y = calcR(A,OldR)
    x = y/np.max(y)
    NewR = x
    diff = np.linalg.norm(NewR-OldR,ord=2)
    print(diff)
    OldR = NewR
    Rs.append(NewR)

R = NewR/np.sum(NewR)
R.round(4)

0.6668974669736215
0.3702313709852009
0.31099948896081986
0.1516141644327309
0.06877193803821591
0.06312619802438128
0.050162422430656446
0.020257773138106207
0.011069399314656144
0.01055877407541046
0.006611574253362566
0.0026354683485579895
0.0014448586290107324
0.001379721377072195
0.0008616913195637558


array([0.3879, 0.2147, 0.3974])

* 代数法

In [13]:
d = 0.85
M = np.array([
    [0,0,1],
    [1/2,0,0],
    [1/2,1,0],
])
n = M.shape[0]
I = np.eye(n)
R = np.linalg.pinv(I-d*M).dot((1-d)/n*np.ones(n))
R.round(4)

array([0.3878, 0.2148, 0.3974])