In [1]:
# !pip install openpyxl
import pandas as pd
import numpy as np

In [2]:
# Create Standardized Files


leader_rank_path = 'LeaderRank_Scores.xlsx'
page_rank_path = 'pagerank_scores.xlsx'
eigenvector_centrality_path = 'eigenvector_centrality_scores.xlsx'
leader_rank_df = pd.read_excel(leader_rank_path)
page_rank_df = pd.read_excel(page_rank_path)
eigenvector_centrality_df = pd.read_excel(eigenvector_centrality_path)

# Standardize LeaderRank by creating "Node" and "Rank" columns
# LeaderRank has scores in one column; extract "Node" from the row number (e.g., A2 -> 1)
leader_rank_df['Node'] = leader_rank_df.index
leader_rank_df = leader_rank_df.rename(columns={leader_rank_df.columns[0]: 'Score'})
leader_rank_df['Rank'] = leader_rank_df['Score'].rank(ascending=False).astype(int)
leader_rank_df = leader_rank_df[['Node', 'Rank', 'Score']].sort_values('Rank')

# Standardize PageRank by creating ranks
page_rank_df = page_rank_df.rename(columns={page_rank_df.columns[0]: 'Node', page_rank_df.columns[1]: 'Score'})
page_rank_df['Rank'] = page_rank_df['Score'].rank(ascending=False).astype(int)
page_rank_df = page_rank_df[['Node', 'Rank', 'Score']].sort_values('Rank')

# Standardize Eigenvector Centrality by creating ranks
eigenvector_centrality_df = eigenvector_centrality_df.rename(columns={
    eigenvector_centrality_df.columns[0]: 'Node', 
    eigenvector_centrality_df.columns[1]: 'Score'
})
eigenvector_centrality_df['Rank'] = eigenvector_centrality_df['Score'].rank(ascending=False).astype(int)
eigenvector_centrality_df = eigenvector_centrality_df[['Node', 'Rank', 'Score']].sort_values('Rank')

# Save
leader_rank_output_path = 'Standardized_LeaderRank.xlsx'
page_rank_output_path = 'Standardized_PageRank.xlsx'
eigenvector_centrality_output_path = 'Standardized_Eigenvector_Centrality.xlsx'

leader_rank_df.to_excel(leader_rank_output_path, index=False)
page_rank_df.to_excel(page_rank_output_path, index=False)
eigenvector_centrality_df.to_excel(eigenvector_centrality_output_path, index=False)

leader_rank_output_path, page_rank_output_path, eigenvector_centrality_output_path


('Standardized_LeaderRank.xlsx',
 'Standardized_PageRank.xlsx',
 'Standardized_Eigenvector_Centrality.xlsx')

In [3]:
# Get Top 20 Most Influential Nodes (Based on Average Rank)

# Read the three standardized files
leader_df = pd.read_excel('Standardized_LeaderRank.xlsx', engine='openpyxl')
pagerank_df = pd.read_excel('Standardized_PageRank.xlsx', engine='openpyxl')
eigen_df = pd.read_excel('Standardized_Eigenvector_Centrality.xlsx', engine='openpyxl')

# Create a map from Node to Rank for each method
leader_map = dict(zip(leader_df['Node'], leader_df['Rank']))
pagerank_map = dict(zip(pagerank_df['Node'], pagerank_df['Rank']))
eigen_map = dict(zip(eigen_df['Node'], eigen_df['Rank']))

# Get all unique nodes
all_nodes = set(leader_df['Node']) | set(pagerank_df['Node']) | set(eigen_df['Node'])

# Create a DataFrame with ranks for each node
rankings = []
for node in all_nodes:
    rankings.append({
        'Node': node,
        'LeaderRank': leader_map.get(node, np.nan),
        'PageRank': pagerank_map.get(node, np.nan),
        'Eigenvector': eigen_map.get(node, np.nan)
    })

results_df = pd.DataFrame(rankings)

# Calculate average rank
results_df['Average_Rank'] = results_df[['LeaderRank', 'PageRank', 'Eigenvector']].mean(axis=1)

# Sort by average rank (lower is better since 1 is top rank)
top_nodes = results_df.nsmallest(20, 'Average_Rank')

# Print results
print("\nTop 20 Most Influential Nodes (Based on Average Rank):")
print("-" * 80)
print("Node    Average    LeaderRank    PageRank    Eigenvector")
print("         Rank")
print("-" * 80)
for _, row in top_nodes.iterrows():
    print(f"{int(row['Node']):4d}    {row['Average_Rank']:8.2f}    {int(row['LeaderRank']):10d}    {int(row['PageRank']):8d}    {int(row['Eigenvector']):10d}")

# Save detailed results
top_nodes.to_excel('Most_Influential_Nodes.xlsx', index=False)
print("\nDetailed results saved to 'Most_Influential_Nodes.xlsx'")

# Show consistency stats
print("\nNodes appearing in top 100 of all methods:")
top_100_leader = set(leader_df[leader_df['Rank'] <= 100]['Node'])
top_100_pagerank = set(pagerank_df[pagerank_df['Rank'] <= 100]['Node'])
top_100_eigen = set(eigen_df[eigen_df['Rank'] <= 100]['Node'])
consistent_top = top_100_leader & top_100_pagerank & top_100_eigen
print(f"Number of nodes: {len(consistent_top)}")
if len(consistent_top) > 0:
    print("Nodes:", sorted(list(consistent_top)))


Top 20 Most Influential Nodes (Based on Average Rank):
--------------------------------------------------------------------------------
Node    Average    LeaderRank    PageRank    Eigenvector
         Rank
--------------------------------------------------------------------------------
3101       36.67            46          43            21
3437       57.67           102           1            70
3002       81.00            34         199            10
2966      105.67            69         212            36
2944      106.33            73         201            45
3397      111.00           173          54           106
3051      118.67           147         100           109
3456      119.33            79         230            49
2126      142.67            17         233           178
3252      148.33           119         257            69
3360      149.67           115         266            68
3136      166.33           149         254            96
3449      170.00           

In [5]:
# Kendall's Tau Correlation
from scipy import stats

# Read
eigenvector_df = pd.read_excel("Standardized_Eigenvector_Centrality.xlsx")
leaderrank_df = pd.read_excel("Standardized_LeaderRank.xlsx")
pagerank_df = pd.read_excel("Standardized_PageRank.xlsx")

# Create dictionaries mapping Node to Rank for each method
eigenvector_ranks = dict(zip(eigenvector_df['Node'], eigenvector_df['Rank']))
leaderrank_ranks = dict(zip(leaderrank_df['Node'], leaderrank_df['Rank']))
pagerank_ranks = dict(zip(pagerank_df['Node'], pagerank_df['Rank']))

# Get the common nodes across all three methods
common_nodes = set(eigenvector_ranks.keys()) & set(leaderrank_ranks.keys()) & set(pagerank_ranks.keys())

# Create lists of rankings in the same order for each method
eigenvector_ordered = [eigenvector_ranks[node] for node in common_nodes]
leaderrank_ordered = [leaderrank_ranks[node] for node in common_nodes]
pagerank_ordered = [pagerank_ranks[node] for node in common_nodes]

# Calculate Kendall's Tau between each pair
tau_eigen_leader, p_eigen_leader = stats.kendalltau(eigenvector_ordered, leaderrank_ordered)
tau_eigen_page, p_eigen_page = stats.kendalltau(eigenvector_ordered, pagerank_ordered)
tau_leader_page, p_leader_page = stats.kendalltau(leaderrank_ordered, pagerank_ordered)

# Print results
print("Kendall's Tau Correlation Results:")
print("-" * 50)
print(f"Eigenvector vs LeaderRank: {tau_eigen_leader:.4f} (p-value: {p_eigen_leader:.4e})")
print(f"Eigenvector vs PageRank: {tau_eigen_page:.4f} (p-value: {p_eigen_page:.4e})")
print(f"LeaderRank vs PageRank: {tau_leader_page:.4f} (p-value: {p_leader_page:.4e})")
print(f"\nNumber of nodes compared: {len(common_nodes)}")

Kendall's Tau Correlation Results:
--------------------------------------------------
Eigenvector vs LeaderRank: 0.3927 (p-value: 1.8113e-306)
Eigenvector vs PageRank: 0.0174 (p-value: 9.7440e-02)
LeaderRank vs PageRank: 0.0561 (p-value: 8.7969e-08)

Number of nodes compared: 4039
