In [1]:
# Imports
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [2]:
# Load combined centralities
df = pd.read_csv("output/combined_metrics.csv")

In [3]:
# --- Histograms ---
for col in df.columns[1:]:
    plt.figure(figsize=(6,4))
    sns.histplot(df[col].dropna(), bins=50, log_scale=(False, True))
    plt.title(f"Distribution of {col}")
    plt.xlabel(col)
    plt.ylabel("Frequency (log scale)")
    plt.tight_layout()
    plt.savefig(f"output/hist_{col}.png")
    plt.close()

print("✅ Histograms saved to /output/")

✅ Histograms saved to /output/


In [4]:
# --- Scatterplots (pairwise comparisons) ---
pairs = [
    ("in_degree", "pagerank"),
    ("out_degree", "pagerank"),
    ("betweenness", "hub_score"),
    ("betweenness", "authority_score"),
]

for x, y in pairs:
    if x in df.columns and y in df.columns:
        plt.figure(figsize=(6,6))
        sns.scatterplot(x=df[x], y=df[y], alpha=0.6)
        plt.title(f"{x} vs. {y}")
        plt.xlabel(x)
        plt.ylabel(y)
        plt.tight_layout()
        plt.savefig(f"output/scatter_{x}_vs_{y}.png")
        plt.close()

print("✅ Scatterplots saved to /output/")

✅ Scatterplots saved to /output/


In [5]:
# --- Top-10 bar charts ---
for col in df.columns[1:]:
    top10 = df.sort_values(col, ascending=False).head(10)
    plt.figure(figsize=(8,5))
    sns.barplot(x=col, y="node", data=top10, palette="viridis", hue=col, dodge=False, legend=False)
    plt.title(f"Top 10 nodes by {col}")
    plt.tight_layout()
    plt.savefig(f"output/top10_{col}.png")
    plt.close()

print("✅ Top-10 bar charts saved to /output/")

✅ Top-10 bar charts saved to /output/


In [6]:
# --- 4. Correlation heatmap ---
plt.figure(figsize=(8,6))
corr = df.drop(columns=["node"]).corr()
sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Heatmap of Centrality Metrics")
plt.tight_layout()
plt.savefig("output/correlation_heatmap.png")
plt.close()

print("✅ Correlation heatmap saved to /output/")

✅ Correlation heatmap saved to /output/


In [7]:
# Load combined centrality metrics
df = pd.read_csv("output/combined_metrics.csv")

# Top nodes by each metric
for col in ["in_degree", "out_degree", "pagerank", "betweenness", "hub_score", "authority_score"]:
    if col in df.columns:
        print(f"\n🔹 Top 5 by {col}:")
        print(df.sort_values(col, ascending=False).head(5)[["node", col]])


🔹 Top 5 by in_degree:
             node  in_degree
12675  4813567397   0.000037
5687   1905643476   0.000037
86061  8609853337   0.000037
56220  7488855891   0.000037
79424  8341138742   0.000037

🔹 Top 5 by out_degree:
             node  out_degree
12675  4813567397    0.000037
56220  7488855891    0.000037
86061  8609853337    0.000037
5687   1905643476    0.000037
79424  8341138742    0.000037

🔹 Top 5 by pagerank:
               node  pagerank
53993    7226570628  0.000025
2986      445252475  0.000018
139077  10183503218  0.000017
157457  12116337047  0.000017
36716    5716511033  0.000016

🔹 Top 5 by betweenness:
               node  betweenness
139527  10308025429     0.285480
139526  10308015689     0.284101
779       282895148     0.210321
139408  10246832783     0.210314
161640  12623288819     0.205905

🔹 Top 5 by hub_score:
             node  hub_score
11869  4466994312   0.084562
9692   3435239188   0.083878
9698   3435245119   0.083878
11868  4466994311   0.082495
9693  