# PageRank algorithm

# Analogy between PageRank and iterative eigenvector computation

# Finding eigenvectors with repeated multiplication

In [None]:
matrix = np.random.rand(3,3)
v = np.random.rand(3)

values, vectors = np.linalg.eig(matrix)
eig = vectors[:, 0]

print("Note: an error of 2 is like an error of 0: the vectors just point in opposite directions (v_1 = -v_2)")

for _ in range(15):
    print(f"Current estimate {v}, error from eigenvector {np.linalg.norm(v - eig)}")
    r = matrix @ v
    v = r / np.linalg.norm(r)

values, vectors = np.linalg.eig(matrix)
print(vectors[:, 0])

Note: an error of 2 is like an error of 0: the vectors just point in opposite directions (v_1 = -v_2)
Current estimate [0.61523358 0.38094225 0.78053719], error from eigenvector 2.050740606869501
Current estimate [0.50709526 0.54028169 0.67152818], error from eigenvector 1.999041560009243
Current estimate [0.53241112 0.54640295 0.64651544], error from eigenvector 1.999820480519057
Current estimate [0.53947274 0.55333864 0.63465386], error from eigenvector 1.9999677607793125
Current estimate [0.54266685 0.55600061 0.629584  ], error from eigenvector 1.9999942420034085
Current estimate [0.54399919 0.55713514 0.62742754], error from eigenvector 1.9999989778751126
Current estimate [0.54456049 0.55761139 0.62651689], error from eigenvector 1.999999818933086
Current estimate [0.54479654 0.55781173 0.62613322], error from eigenvector 1.9999999679545128
Current estimate [0.54489582 0.55789598 0.62597174], error from eigenvector 1.9999999943307243
Current estimate [0.54493758 0.55793141 0.62590

# Applying this to adjency matrices for pagerank

# Computational problem of huge matrix-vector multiplication

# The adjency matrix of the Web is very large but also very sparse

# How to multiply by a very large and very sparse matrix efficiently

# Implementation of a sparse matrix data structure

In [None]:
from re import X
from dataclasses import dataclass
from pprint import pprint
from collections import OrderedDict

@dataclass
class SparseItem:
    value: float
    column: float

class SparseMatrix:

    # Maybe it is better to have a dict {position: value}
    # or a dict of dicts {row_number: {column_number : value}}
    def __init__(self, items, dense_shape=None):
        # dictionary where 
        self.items = items
        self.dense_shape = dense_shape

    @staticmethod
    def from_dense(dense):
        new = SparseMatrix(OrderedDict(), dense.shape)
        for y, row in enumerate(dense):
            for x, item in enumerate(row):
                if item != 0:
                    new.items[(y, x)] = item
        return new

    def multiply_by_vector(self, vector, verbose=False):
        res = np.zeros(vector.shape)
        for (y, x), matrix_item in self.items.items():
            vector_current_item = vector[x]
            res[y] += matrix_item * vector_current_item
            if verbose:
                pprint(locals())
        return res

    def dense_representation(self):
        dense = np.zeros(self.dense_shape)
        for (y, x), item in self.items.items():
                dense[y][x] = item
        return dense

    @staticmethod
    def random_sparse_matrix_dense_repr(size, sparsity=0.5):
        matrix = np.random.rand(size, size)
        for _ in range(int(sparsity * size * size)):
            x, y = np.random.randint(0, size, size=2)
            matrix[(y,x)] = 0.0
        return matrix

    def __repr__(self):
        return f"SparseMatrix({self.items})"


    def __eq__(self, other):
        return self.items == other.items

# Examples and tests

In [None]:
m = SparseMatrix.from_dense(np.array([
    [1, 2],
    [0, 3]
]))

#print(m)
res = m.multiply_by_vector(np.array([1,2]), verbose=True)
#print(res)

{'matrix_item': 1,
 'res': array([1., 0.]),
 'self': SparseMatrix(OrderedDict([((0, 0), 1), ((0, 1), 2), ((1, 1), 3)])),
 'vector': array([1, 2]),
 'vector_current_item': 1,
 'verbose': True,
 'x': 0,
 'y': 0}
{'matrix_item': 2,
 'res': array([5., 0.]),
 'self': SparseMatrix(OrderedDict([((0, 0), 1), ((0, 1), 2), ((1, 1), 3)])),
 'vector': array([1, 2]),
 'vector_current_item': 2,
 'verbose': True,
 'x': 1,
 'y': 0}
{'matrix_item': 3,
 'res': array([5., 6.]),
 'self': SparseMatrix(OrderedDict([((0, 0), 1), ((0, 1), 2), ((1, 1), 3)])),
 'vector': array([1, 2]),
 'vector_current_item': 2,
 'verbose': True,
 'x': 1,
 'y': 1}


In [None]:
SparseMatrix.random_sparse_matrix_dense_repr(5)

array([[0.91507125, 0.40164316, 0.        , 0.        , 0.        ],
       [0.68011146, 0.95791376, 0.60480822, 0.79392249, 0.        ],
       [0.84871928, 0.        , 0.13870472, 0.44611222, 0.75136625],
       [0.        , 0.        , 0.79288921, 0.        , 0.73546179],
       [0.55597658, 0.        , 0.26633858, 0.        , 0.        ]])

In [None]:
import numpy as np

import unittest

class TestSparseMatrix(unittest.TestCase):

    def test_simple_conversion(self):
        matrix = np.array( [
            [1, 2, 3],
            [0, 0, 0],
            [0, 0, 1]
        ])
        sparse = SparseMatrix.from_dense(matrix)
        res = SparseMatrix(OrderedDict([
            ((0, 0), 1),
            ((0, 1), 2),
            ((0, 2), 3),
            ((2, 2), 1),
        ]))
        #print(sparse, res)
        self.assertEqual(sparse, res)

    def test_simple_mult(self):
        matrix = np.array( [
            [1, 2, 3],
            [4, 0, 0],
            [0, 0, 1]
        ])

        vector = np.array( [
            1,
            2,
            3
        ])

        numpy_result = matrix @ vector

        sparse_matrix = SparseMatrix.from_dense(matrix)
        res = np.array([1*1+2*2+3*3,
                        1*4,
                        3*1
                        ])
        sparse_res = sparse_matrix.multiply_by_vector(vector)

        #print(locals())
        np.testing.assert_array_almost_equal(res, numpy_result) 
        np.testing.assert_array_almost_equal(res, sparse_res) 

    def test_back_and_forth_is_equal_to_start(self):
        for _ in range(20):
            matrix = SparseMatrix.random_sparse_matrix_dense_repr(20)
            sparse = SparseMatrix.from_dense(matrix)
            dense_back = sparse.dense_representation()
            np.testing.assert_array_almost_equal(matrix, dense_back)

    
    def test_multiplication_is_correct_oracle(self):
        for _ in range(20):

            matrix = SparseMatrix.random_sparse_matrix_dense_repr(5)
            vector = np.random.rand(5)

            numpy_result = matrix @ vector

            sparse_matrix = SparseMatrix.from_dense(matrix)
            res = sparse_matrix.multiply_by_vector(vector)

            #pprint(locals())
            np.testing.assert_array_almost_equal(res, numpy_result)
    
    

unittest.main(verbosity=3, argv=['first-arg-is-ignored'], exit=False)

test_back_and_forth_is_equal_to_start (__main__.TestSparseMatrix) ... ok
test_multiplication_is_correct_oracle (__main__.TestSparseMatrix) ... ok
test_simple_conversion (__main__.TestSparseMatrix) ... ok
test_simple_mult (__main__.TestSparseMatrix) ... ok

----------------------------------------------------------------------
Ran 4 tests in 0.149s

OK


<unittest.main.TestProgram at 0x7fc965a594d0>