In [2]:
import numpy as np
from scipy.sparse import csc_matrix
from scipy.sparse.linalg import eigs

edges_file = open('wisconsin_edges.csv', "r")
nodes_file = open('wisconsin_nodes.csv', "r")

# create a dictionary where nodes_dict[i] = name of wikipedia page
nodes_dict = {}
for line in nodes_file:
    nodes_dict[int(line.split(',',1)[0].strip())] = line.split(',',1)[1].strip()

node_count = len(nodes_dict)

# create adjacency matrix
A = np.zeros((node_count, node_count))
for line in edges_file:
    from_node = int(line.split(',')[0].strip())
    to_node = int(line.split(',')[1].strip())
    A[to_node, from_node] = 1.0

## Add code below to (1) prevent traps and (2) find the most important pages     
# Hint -- instead of computing the entire eigen-decomposition of a matrix X using
# s, E = np.linalg.eig(A)
# you can compute just the first eigenvector with:
# s, E = eigs(csc_matrix(A), k = 1)

# (i) Prevent traps by adding a small value to each entry of A
A += 0.001

# (ii) Normalize A by column
A = A / A.sum(axis=0)

# (iii) Use an eigen decomposition to rank the importance of the Wikipedia pages
# Since A is now a dense matrix, convert it to a sparse matrix for efficient computation
A_sparse = csc_matrix(A)

# Find the principal eigenvector (PageRank values)
# Note: eigs finds the largest eigenvalues and associated eigenvectors of the matrix
s, E = eigs(A_sparse, k=1, which='LR')

# The principal eigenvector is the PageRank vector. Normalize it to sum to 1.
page_rank = np.abs(E[:, 0]) / np.sum(np.abs(E[:, 0]))

# Sort the PageRank values in descending order and get the indices
sorted_indices = np.argsort(page_rank)[::-1]

# b) Get the title of the page ranked 1st
first_page_title = nodes_dict[sorted_indices[0]]

# c) Get the title of the page ranked 3rd
third_page_title = nodes_dict[sorted_indices[2]]

print("Title of the 1st ranked page:", first_page_title)
print("Title of the 3rd ranked page:", third_page_title)

Title of the 1st ranked page: "Wisconsin"
Title of the 3rd ranked page: "Madison, Wisconsin"
