In [1]:
import pandas as pd
import networkx as nx
import numpy as np

# Load dataset
data = pd.read_csv(r"D:\AIVS-Projects\2. Page Rank\data.csv")

# Assuming the first column is the person and the rest are the people they found impressive
person_column = data.columns[0]
other_columns = data.columns[1:]

# Create a directed graph
G = nx.DiGraph()

for _, row in data.iterrows():
    person_a = row[person_column]
    for person_b in row[other_columns].dropna():
        G.add_edge(person_a, person_b)


In [2]:
def random_walk_pagerank(G, alpha=0.85, max_iter=100, tol=1.0e-6):
    n = len(G)
    pr = {node: 1/n for node in G}  # Initialize the PageRank dictionary

    for _ in range(max_iter):
        new_pr = {}
        for node in G:
            rank_sum = sum(pr[neighbor] / len(list(G.neighbors(neighbor))) for neighbor in G.predecessors(node))
            new_pr[node] = (1 - alpha) / n + alpha * rank_sum
        
        # Check for convergence
        if max(abs(new_pr[node] - pr[node]) for node in G) < tol:
            break
        pr = new_pr
    
    return pr

# Get the PageRank of each node
pagerank_scores = random_walk_pagerank(G)

# Get the top 10 most important persons by PageRank
top_10_pagerank = sorted(pagerank_scores.items(), key=lambda x: x[1], reverse=True)[:10]
print("Top 10 most important persons by Random Walk (PageRank):")
for person, score in top_10_pagerank:
    print(f"{person}: {score}")


Top 10 most important persons by Random Walk (PageRank):
 : 0.014038474314533946
2023CSB1091: 0.00976348594322847
2023MCB1316: 0.009261824418364106
2023CSB1132: 0.008470311652995194
2023CSB1162: 0.008449750823022984
2023MCB1284: 0.00842087251709934
2023CSB1099: 0.008313132115449132
2023MCB1302: 0.008292890979355972
2023MCB1291: 0.00808687254722459
2023CSB1173: 0.008009268977073183


In [3]:
def equal_points_distribution(G, max_iter=100, tol=1.0e-6):
    n = len(G)
    points = {node: 1/n for node in G}  # Initialize points dictionary

    for _ in range(max_iter):
        new_points = {node: 0 for node in G}
        for node in G:
            if len(G[node]) == 0:  # Handle sinkholes
                for other in G:
                    new_points[other] += points[node] / n
            else:
                distributed_points = points[node] / len(G[node])
                for neighbor in G[node]:
                    new_points[neighbor] += distributed_points
        
        # Check for convergence
        if max(abs(new_points[node] - points[node]) for node in G) < tol:
            break
        points = new_points
    
    return points

# Get the points of each node
points_scores = equal_points_distribution(G)

# Get the top 10 most important persons by Equal Points Distribution
top_10_points = sorted(points_scores.items(), key=lambda x: x[1], reverse=True)[:10]
print("Top 10 most important persons by Equal Points Distribution:")
for person, score in top_10_points:
    print(f"{person}: {score}")


Top 10 most important persons by Equal Points Distribution:
 : 0.019383857235941528
2023CSB1091: 0.014375506147839508
2023MCB1316: 0.013625454776504625
2023MCB1284: 0.012136203127959043
2023CSB1162: 0.01198613197220061
2023CSB1132: 0.011949836233072792
2023MCB1291: 0.011808131520629083
2023MCB1302: 0.011791530481124503
2023CSB1100: 0.011477171767540052
2023CSB1099: 0.011305517132848723
