<a href="https://colab.research.google.com/github/LeeTookey/LeeTookey/blob/main/PageRankSetup.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import numpy for matrix operations
import numpy as np

# Define the number of webpages
n = 5

# Define the adjacency matrix of the web graph
# Each row represents a webpage, and each column represents a link
# A 1 means there is a link from the row webpage to the column webpage
# A 0 means there is no link
adjacency_matrix = np.array([
    [0, 1, 1, 0, 0], # Webpage 1 links to 2 and 3
    [0, 0, 1, 0, 0], # Webpage 2 links to 3
    [1, 0, 0, 1, 1], # Webpage 3 links to 1, 4 and 5
    [0, 0, 0, 0, 1], # Webpage 4 links to 5
    [0, 0, 0, 0, 0]  # Webpage 5 has no outbound links
])

# Normalize the adjacency matrix by dividing each row by its sum
# This gives the transition matrix, where each entry is the probability of moving from one webpage to another
transition_matrix = adjacency_matrix / adjacency_matrix.sum(axis=1, keepdims=True)

# Replace any row that has only zeros with 1/n, to account for webpages with no outbound links
# This ensures that the transition matrix is stochastic, i.e. each row sums to 1
transition_matrix = np.where(adjacency_matrix.sum(axis=1, keepdims=True) == 0, 1/n, transition_matrix)

# Define the damping factor, which is the probability of randomly jumping to any webpage
damping_factor = 0.85

# Define the teleportation vector, which is a uniform distribution over all webpages
teleportation_vector = np.ones(n) / n

# Define the initial pagerank vector, which can be any random distribution
pagerank_vector = np.random.rand(n)
pagerank_vector = pagerank_vector / pagerank_vector.sum()

# Define the threshold for convergence, which is the maximum allowed difference between iterations
threshold = 1e-6

# Initialize a variable to store the difference between iterations
difference = 1

# Repeat until convergence
while difference > threshold:
    # Store the previous pagerank vector
    previous_pagerank_vector = pagerank_vector
    # Update the pagerank vector by applying the pagerank formula
    pagerank_vector = damping_factor * transition_matrix.T @ pagerank_vector + (1 - damping_factor) * teleportation_vector
    # Compute the difference between the previous and the current pagerank vector
    difference = np.linalg.norm(pagerank_vector - previous_pagerank_vector)

# Print the final pagerank vector
print(pagerank_vector)
