In [None]:
import numpy as np
from scipy import sparse
import operator

from HW1.pageRank import build_sparse_link_matrix, calculate_pagerank

In [2]:
filename = 'hollins.dat'
m = 0.15      # Standard Google damping factor
top_k = 10    # Number of top results to display

# 1. Build the Matrix
A_sparse, N, dangling_nodes = build_sparse_link_matrix(filename)

# 2. Calculate PageRank
if A_sparse is not None:
    pagerank_scores, iters = calculate_pagerank(A_sparse, N, dangling_nodes, m=m)

    print(f"Calculation converged in {iters} iterations.")

    # 3. Display Results
    # Create pairs of (Page ID, Score)
    page_ids = np.arange(1, N + 1)
    results = list(zip(page_ids, pagerank_scores))
    
    # Sort by score descending
    results_sorted = sorted(results, key=operator.itemgetter(1), reverse=True)

    print(f"\n--- TOP {top_k} RANKING ---")
    print(f"{'Rank':<5} | {'Page ID':<10} | {'Score':<15}")
    print("-" * 35)
    
    for rank, (page_id, score) in enumerate(results_sorted[:top_k], 1):
        print(f"{rank:<5} | {page_id:<10} | {score:.6f}")

    # 4. Mathematical Verification
    total_prob = np.sum(pagerank_scores)
    print("\n--- VERIFICATION ---")
    print(f"Total Probability Sum: {total_prob:.6f} (Should be 1.0)")
    
    # Minimum Score Check
    min_score = results_sorted[-1][1]
    expected_min = m / N
    print(f"Lowest PageRank Score:   {min_score:.8f}")
    print(f"Theoretical Minimum (m/N): {expected_min:.8f}")

Reading file: hollins.dat...
Sparse Matrix Constructed: 6012 nodes.
Valid Non-Zero Links: 23875.
Dangling Nodes Identified: 3189.
Calculation converged in 71 iterations.

--- TOP 10 RANKING ---
Rank  | Page ID    | Score          
-----------------------------------
1     | 2          | 0.019879
2     | 37         | 0.009288
3     | 38         | 0.008610
4     | 61         | 0.008065
5     | 52         | 0.008027
6     | 43         | 0.007165
7     | 425        | 0.006583
8     | 27         | 0.005989
9     | 28         | 0.005572
10    | 4023       | 0.004452

--- VERIFICATION ---
Total Probability Sum: 1.000000 (Should be 1.0)
Lowest PageRank Score:   0.00005806
Theoretical Minimum (m/N): 0.00002495


In [3]:
# Test on the required graphs.
print("=============================================")
print("  EXECUTION: WEB WITH 4 PAGES (FIGURE 2.1) ")
print("=============================================")

# Construction of the link matrix
A_4pages = np.array([
    [0.0, 0.0, 1.0, 0.5],
    [1/3, 0.0, 0.0, 0.0],
    [1/3, 0.5, 0.0, 0.5],
    [1/3, 0.5, 0.0, 0.0]
])

# Calculation of PageRank
pagerank_scores_4pages, iterations_4pages = calculate_pagerank(A_4pages, A_4pages.shape[0], False, m=0.15)

# Preparation of results: list of tuples (Page ID, Score)
page_indices_4pages = np.arange(1, A_4pages.shape[0] + 1)
results_4pages = list(zip(page_indices_4pages, pagerank_scores_4pages.flatten()))
results_4pages_sorted = sorted(results_4pages, key=operator.itemgetter(1), reverse=True)

# Printing the results
for page_id, score in results_4pages_sorted:
    print(f"Page {page_id}: {score:.4f}")
    
print(f"Calculation completed in {iterations_4pages} iterations.")


print("\n\n=============================================")
print("  EXECUTION: WEB WITH 5 PAGES (FIGURE 2.2) ")
print("=============================================")

#Construction of the link matrix
A_5pages = np.array([
    [0.0, 1.0, 0.0, 0.0, 0.0],   
    [1.0, 0.0, 0.0, 0.0, 0.0],  
    [0.0, 0.0, 0.0, 1.0, 0.5],   
    [0.0, 0.0, 1.0, 0.0, 0.5],   
    [0.0, 0.0, 0.0, 0.0, 0.0]    
])

# Calculation of PageRank
pagerank_scores_5pages,iterations_5pages = calculate_pagerank(A_5pages, A_5pages.shape[0], False, m=0.15)

# Preparation of results: list of tuples (Page ID, Score)
page_indices_5pages = np.arange(1, A_5pages.shape[0] + 1)
results_5pages = list(zip(page_indices_5pages, pagerank_scores_5pages.flatten()))
results_5pages_sorted = sorted(results_5pages, key=operator.itemgetter(1), reverse=True)

# Printing the results
for page_id, score in results_5pages_sorted:
    print(f"Page {page_id}: {score:.4f}")
    
print(f"Calculation completed in {iterations_5pages} iterations.")

  EXECUTION: WEB WITH 4 PAGES (FIGURE 2.1) 
Page 1: 0.3682
Page 3: 0.2880
Page 4: 0.2021
Page 2: 0.1418
Calculation completed in 21 iterations.


  EXECUTION: WEB WITH 5 PAGES (FIGURE 2.2) 
Page 3: 0.2850
Page 4: 0.2850
Page 1: 0.2000
Page 2: 0.2000
Page 5: 0.0300
Calculation completed in 2 iterations.
