In [1]:
import scipy.sparse as sp
from standard_random_walk import construct_transition_probability_matrix, step_vector
from fast_pagerank import pagerank
import numpy as np
from sklearn.preprocessing import normalize
import random

In [2]:
matrix = sp.load_npz("full_adj_matrix.npz")
matrix

<32201x32201 sparse matrix of type '<class 'numpy.float64'>'
	with 518436100 stored elements in Compressed Sparse Row format>

In [3]:
adjmatrix = matrix.todense()

In [4]:
adjmatrix = adjmatrix + adjmatrix.T

In [5]:
adjmatrix

matrix([[0.        , 0.21710567, 0.20660088, ..., 0.20697376, 0.32662087,
         0.29426479],
        [0.21710567, 0.        , 0.23690168, ..., 0.23188163, 0.22041921,
         0.21828393],
        [0.20660088, 0.23690168, 0.        , ..., 0.52842369, 0.21039084,
         0.24795158],
        ...,
        [0.20697376, 0.23188163, 0.52842369, ..., 0.        , 0.21038629,
         0.20876588],
        [0.32662087, 0.22041921, 0.21039084, ..., 0.21038629, 0.        ,
         0.30809054],
        [0.29426479, 0.21828393, 0.24795158, ..., 0.20876588, 0.30809054,
         0.        ]])

In [6]:
norm = np.linalg.norm(adjmatrix[~np.isnan(adjmatrix)])

In [7]:
adj_matrix = sp.csr_matrix(adjmatrix / norm if (norm != 0.) else adjmatrix)

In [8]:
del matrix

In [9]:
where_are_NaNs = np.isnan(adj_matrix.toarray())
adj_matrix[where_are_NaNs] = 0

In [10]:
adjmatrix

matrix([[0.        , 0.21710567, 0.20660088, ..., 0.20697376, 0.32662087,
         0.29426479],
        [0.21710567, 0.        , 0.23690168, ..., 0.23188163, 0.22041921,
         0.21828393],
        [0.20660088, 0.23690168, 0.        , ..., 0.52842369, 0.21039084,
         0.24795158],
        ...,
        [0.20697376, 0.23188163, 0.52842369, ..., 0.        , 0.21038629,
         0.20876588],
        [0.32662087, 0.22041921, 0.21039084, ..., 0.21038629, 0.        ,
         0.30809054],
        [0.29426479, 0.21828393, 0.24795158, ..., 0.20876588, 0.30809054,
         0.        ]])

In [11]:
def random_walk_with_teleport(steps: int,
                              adj_matrix,
                              teleport_vector: np.ndarray,
                              damp_factor=0.85):
    """
    This function is just like random walk but with a teleport vector
    :param steps: The number power iterations
    :param graph: the sentence graph
    :param teleport_vector: a numpy array which indicates which nodes to favor
    :param damp_factor: 0.85
    :return: a dict comprised of sentences and their score
    """
    normalized_teleport = teleport_vector / teleport_vector.sum(axis=0)

    # random start node
    index = random.randrange(0, adj_matrix.shape[0])
    step = np.zeros(adj_matrix.shape[0])
    step[index] = 1
    for index in range(steps):
        step = damp_factor * np.dot(step, adj_matrix.T) + (1 - damp_factor) * normalized_teleport
    return step

In [12]:
step = random_walk_with_teleport(20, adj_matrix.toarray(), np.ones(adj_matrix.shape[0]))
step

array([2.42176690e-05, 2.22731589e-05, 2.42695351e-05, ...,
       2.39294456e-05, 2.62488344e-05, 2.35611463e-05])

In [13]:
np.save("pr_matrix", step)