In [27]:
import numpy as np
from scipy.sparse import csc_matrix

# Hyper-parameters 
beta = 0.8

# Load IDs of page link origins and targets and all unqiue IDS
ID_From = np.load('ID_From.npy')
ID_To = np.load('ID_To.npy')

# Find unique page IDs
# These pages will be both the rows and columns of our matrix M
ID_Unique = np.unique(np.concatenate((ID_From, ID_To), axis=0))

# Count nonzero outgoing links
# This will serve as the denominator for the columns of our matrix M
# ie, assuming even probability 1 / (the total outgoing count) is the 
# proability of going to the page of a row from the page of a column
ID_Outgoing_Nonzero, ID_Outgoing_Count = np.unique(ID_From, return_counts=True)

# Create our matrix
M = csc_matrix((np.ones(len(ID_From)), (ID_To, ID_From)), shape=(len(ID_Unique), len(ID_Unique)))

# Normalize dividing columns by total count 
# Will use 1 if dead end - will still be 0 - no entry in sparse - but avoids nans
Norm = np.ones(len(ID_Unique))
Norm[ID_Outgoing_Nonzero] = ID_Outgoing_Count
val = np.repeat(Norm, M.getnnz(axis=0)) # This keeps the sparse matrix a sparse matrix
M.data /= val

# Create v with even inital distribution
v_old = np.ones(len(ID_Unique))/len(ID_Unique)
v_new = (np.ones(len(ID_Unique)) - beta)/len(ID_Unique)

In [25]:
'''
for i in range(M.shape[1]):
    for j in range ():
        v_new[] += beta * v_old[i]/M[]
'''

'\nfor i in range(M.shape[1]):\n    for j in range ():\n        v_new[] += beta * v_old[i]/M[]\n'

In [30]:
print(M[:,0])

  (234, 0)	0.0051813471502590676
  (240, 0)	0.0051813471502590676
  (1365, 0)	0.0051813471502590676
  (1707, 0)	0.0051813471502590676
  (1910, 0)	0.0051813471502590676
  (2446, 0)	0.0051813471502590676
  (3072, 0)	0.0051813471502590676
  (3182, 0)	0.0051813471502590676
  (3282, 0)	0.0051813471502590676
  (3391, 0)	0.0051813471502590676
  (3549, 0)	0.0051813471502590676
  (4632, 0)	0.0051813471502590676
  (4638, 0)	0.0051813471502590676
  (4640, 0)	0.0051813471502590676
  (4833, 0)	0.0051813471502590676
  (4918, 0)	0.0051813471502590676
  (4956, 0)	0.0051813471502590676
  (5688, 0)	0.0051813471502590676
  (5745, 0)	0.0051813471502590676
  (5747, 0)	0.0051813471502590676
  (6074, 0)	0.0051813471502590676
  (6086, 0)	0.0051813471502590676
  (6287, 0)	0.0051813471502590676
  (6949, 0)	0.0051813471502590676
  (7524, 0)	0.0051813471502590676
  :	:
  (240663, 0)	0.0051813471502590676
  (240719, 0)	0.0051813471502590676
  (240853, 0)	0.0051813471502590676
  (240883, 0)	0.0051813471502590676
  