<a href="https://colab.research.google.com/github/Ryan-M-Smith/CS315/blob/main/InClass/pagerank.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Google's Simple PageRank

## Eiginvector Calculation

The original version of Google **never** used Simple PageRank.

## NumPy Approach

In [36]:
import numpy as np

In [37]:
A = np.array([
  [0, 0, 1],
  [0.5, 0, 0],
  [0.5, 1, 0]
])

eigenvalues, eigenvectors = np.linalg.eig(A)

print(f"Eigenvalues: {eigenvalues}")
print(f"Eigenvectors: {eigenvectors}\n")

Eigenvalues: [ 1. +0.j  -0.5+0.5j -0.5-0.5j]
Eigenvectors: [[-0.66666667+0.j         -0.70710678+0.j         -0.70710678-0.j        ]
 [-0.33333333+0.j          0.35355339+0.35355339j  0.35355339-0.35355339j]
 [-0.66666667+0.j          0.35355339-0.35355339j  0.35355339+0.35355339j]]



In [38]:
eig1 = eigenvectors[:, 0]
print(f"Eigenvector associated with 1: {eig1}\n")

Eigenvector associated with 1: [-0.66666667+0.j -0.33333333+0.j -0.66666667+0.j]



In [39]:
l1_norm = np.linalg.norm(eig1, ord=1)
print(f"L1 Norm: {l1_norm}\n")

L1 Norm: 1.6666666666666667



In [40]:
scaled_eig = eig1 / l1_norm
print(f"Scaled eigenvector: {scaled_eig}\n")

Scaled eigenvector: [-0.4+0.j -0.2+0.j -0.4+0.j]



In [41]:
print(f"Matrix times eigenvector: {A @ scaled_eig}")

Matrix times eigenvector: [-0.4+0.j -0.2+0.j -0.4+0.j]


## Iterative Approach

In [33]:
iter_vec = np.array([1, 1, 1])
print(iter_vec)

[1 1 1]


In [43]:
print(f"Target: {scaled_eig}\n")
for _ in range(10):
  iter_vec = A @ iter_vec
  l1_norm = np.linalg.norm(iter_vec, 1)
  iter_vec = iter_vec / l1_norm
  print(iter_vec)

Target: [-0.4+0.j -0.2+0.j -0.4+0.j]

[0.40002441 0.19999186 0.39998372]
[0.39998372 0.20001221 0.40000407]
[0.40000407 0.19999186 0.40000407]
[0.40000407 0.20000203 0.3999939 ]
[0.3999939  0.20000203 0.40000407]
[0.40000407 0.19999695 0.39999898]
[0.39999898 0.20000203 0.39999898]
[0.39999898 0.19999949 0.40000153]
[0.40000153 0.19999949 0.39999898]
[0.39999898 0.20000076 0.40000025]


## Example Problem

In [58]:
# The number of links from a given page
n_a = 2
n_b = 2
n_c = 3
n_d = 1
n_e = 3

In [59]:
# The ranks of each page
rank_A = [0, 0, 1 / n_c, 1, 1 / n_e]
rank_B = [1 / n_a, 0, 1 / n_c, 0, 0]
rank_C = [1 / n_a, 0, 0, 0, 1 / n_e]
rank_D = [0, 1 / n_b, 0, 0, 1 / n_e]
rank_E = [0, 1 / n_b, 1 / n_c, 0, 0]

In [60]:
# Define the PageRank matrix
pagerank_matrix = np.array([
  rank_A,
  rank_B,
  rank_C,
  rank_D,
  rank_E
])
print(f"PageRank Matrix: {pagerank_matrix}")

PageRank Matrix: [[0.         0.         0.33333333 1.         0.33333333]
 [0.5        0.         0.33333333 0.         0.        ]
 [0.5        0.         0.         0.         0.33333333]
 [0.         0.5        0.         0.         0.33333333]
 [0.         0.5        0.33333333 0.         0.        ]]


In [61]:
#
# Create the PageRank vector
#

eigenvalues, eigenvectors = np.linalg.eig(A)

print(f"Eigenvalues: {eigenvalues}")
print(f"Eigenvectors: {eigenvectors}\n")

eig1 = eigenvectors[:, 0]
print(f"Eigenvector associated with 1: {eig1}\n")

l1_norm = np.linalg.norm(eig1, ord=1)
print(f"L1 Norm: {l1_norm}\n")

scaled_eig = eig1 / l1_norm
print(f"Scaled eigenvector: {scaled_eig}\n")

print(f"Matrix times eigenvector: {A @ scaled_eig}")

Eigenvalues: [ 1. +0.j  -0.5+0.5j -0.5-0.5j]
Eigenvectors: [[-0.66666667+0.j         -0.70710678+0.j         -0.70710678-0.j        ]
 [-0.33333333+0.j          0.35355339+0.35355339j  0.35355339-0.35355339j]
 [-0.66666667+0.j          0.35355339-0.35355339j  0.35355339+0.35355339j]]

Eigenvector associated with 1: [-0.66666667+0.j -0.33333333+0.j -0.66666667+0.j]

L1 Norm: 1.6666666666666667

Scaled eigenvector: [-0.4+0.j -0.2+0.j -0.4+0.j]

Matrix times eigenvector: [-0.4+0.j -0.2+0.j -0.4+0.j]


In [62]:
#
# Converge the vector using the iterative method
#

iter_vec = np.ones(pagerank_matrix.shape[0])
print(f"Initial iterative vector: {iter_vec}")

for _ in range(100):
  iter_vec = pagerank_matrix @ iter_vec
  l1_norm = np.linalg.norm(iter_vec, ord=1)
  iter_vec = iter_vec / l1_norm

print(f"Final converged PageRank vector: {iter_vec}")

Initial iterative vector: [1. 1. 1. 1. 1.]
Final converged PageRank vector: [0.27777778 0.2037037  0.19444444 0.15740741 0.16666667]


In [63]:
#
# Sort the labels by importance
#

page_labels = ['A', 'B', 'C', 'D', 'E']
pagerank_scores = list(zip(page_labels, iter_vec))
sorted_pagerank = sorted(pagerank_scores, key=lambda item: item[1], reverse=True)

print("PageRank scores sorted by importance:")
for page, score in sorted_pagerank:
  print(f"Page {page}: {score:.6f}")

PageRank scores sorted by importance:
Page A: 0.277778
Page B: 0.203704
Page C: 0.194444
Page E: 0.166667
Page D: 0.157407
