In [None]:
# Imports
import json
import networkx as nx
import matplotlib.pyplot as plt
from networkx.algorithms import bipartite
import community as community_louvain
import pandas as pd

In [None]:
# ---------------------------
# DATASET
# ---------------------------

df = pd.read_csv("edges.csv")
print(df.head())

agencies = df["source"].unique()
reports = df["target"].unique()

with open("filtered_organisations_with_types.json", "r", encoding="utf-8") as f:
    roles_data = json.load(f)

In [None]:
# ---------------------------
# Part 4: Bipartite Graph Creation
# ---------------------------

# Create bipartite graph
B = nx.Graph()

# Add edges from DataFrame
B.add_nodes_from(reports, bipartite=0)  # Set 0: Reports
B.add_nodes_from(agencies, bipartite=1)  # Set 1: Agencies
B.add_edges_from(df.values)

# Visualize bipartite graph
plt.figure(figsize=(12, 8))
pos = nx.spring_layout(B, seed=42)
colors = ["skyblue" if node in reports else "lightgreen" for node in B.nodes()]
nx.draw(B, pos, with_labels=True, node_color=colors)
plt.title("Bipartite Graph: Reports ↔ Agencies")
plt.show()

In [None]:
# ---------------------------
# Part 5: Projected Agency Graph
# ---------------------------

# Project to agency-only graph
agency_proj = bipartite.weighted_projected_graph(B, agencies)

# Visualize weighted agency graph
plt.figure(figsize=(10, 6))
edge_weights = [agency_proj[u][v]["weight"] for u, v in agency_proj.edges()]
pos = nx.spring_layout(agency_proj)
nx.draw(agency_proj, with_labels=True, width=edge_weights, node_color="lightcoral")
# Layout spring

plt.title("Agency-Only Projected Graph")
plt.show()


In [None]:
# ---------------------------
# Assortativity
# ---------------------------

print(roles_data)
# Iterate nodes
for node in agency_proj.nodes():
    agency_proj.nodes()[node]["role"] = roles_data.get(str(node), {}).get("type", {}).get("name", "Unknown")

# Check for missing roles
missing_roles = [n for n in agency_proj.nodes() if "role" not in agency_proj.nodes[n]]
if missing_roles:
    print("Warning: These agencies have no assigned role:", missing_roles)
    print(len(missing_roles), "agencies have no assigned role.")


# Unique roles 
unique_roles = set(agency_proj.nodes[n]["role"] for n in agency_proj.nodes())
print("Unique roles:", unique_roles)
# Create role-color mapping 
# 6 unique roles 

role_colors = [
    "blue", 
    "red",
    "green",
    "purple",
    "orange",
    "pink",
]

role_color_map = {role: color for role, color in zip(unique_roles, role_colors)}
# Assign colors to nodes based on roles 
for node in agency_proj.nodes():
    role = agency_proj.nodes()[node]["role"]
    agency_proj.nodes()[node]["color"] = role_color_map.get(role, "gray")  # Default to gray if role not found

# Visualize agency graph with roles
plt.figure(figsize=(10, 6))
pos = nx.spring_layout(agency_proj)
colors = [agency_proj.nodes()[node]["color"] for node in agency_proj.nodes()]
nx.draw(agency_proj, pos, with_labels=True, node_color=colors)
plt.title("Agency Graph with colored roles")
plt.show()

In [None]:

# ---------------------------
# Part 7: Centrality Measures
# ---------------------------
degree_centrality = nx.degree_centrality(agency_proj)
betweenness_centrality = nx.betweenness_centrality(agency_proj)
closeness_centrality = nx.closeness_centrality(agency_proj)
eigenvector_centrality = nx.eigenvector_centrality(agency_proj)

# Display top agencies by centrality
print("\n--- Centrality Measures ---")
print("Top 5 by Degree Centrality:") 
top_degree = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[:5]
for agency, centrality in top_degree:
    print(f"{agency}: {centrality:.4f}")
print("\nTop 5 by Betweenness Centrality:")
top_betweenness = sorted(betweenness_centrality.items(), key=lambda x: x[1], reverse=True)[:5]
for agency, centrality in top_betweenness:
    print(f"{agency}: {centrality:.4f}")
print("\nTop 5 by Closeness Centrality:")
top_closeness = sorted(closeness_centrality.items(), key=lambda x: x[1], reverse=True)[:5]
for agency, centrality in top_closeness:
    print(f"{agency}: {centrality:.4f}")
print("\nTop 5 by Eigenvector Centrality:")
top_eigenvector = sorted(eigenvector_centrality.items(), key=lambda x: x[1], reverse=True)[:5]
for agency, centrality in top_eigenvector:
    print(f"{agency}: {centrality:.4f}")


# TODO: Visualize top-ranking agencies or summarize findings in a table. Highlight which agencies serve as bridges or hubs in the network

In [None]:
# ---------------------------
# Part 8: Role Assortativity
# ---------------------------
assortativity = nx.attribute_assortativity_coefficient(agency_proj, "role")
print(f"\nRole Assortativity Coefficient: {assortativity:.3f}")

In [None]:
# ---------------------------
# Part 9: Community Detection (Greedy modular)
# ---------------------------

c = nx.algorithms.community.louvain_communities(agency_proj)
print(f"\nNumber of communities detected: {len(c)}")
# Color communities 
community_colors = [plt.cm.tab20(i) for i in range(len(c))]
# Create a mapping from node to community
node_to_community = {}
for i, community in enumerate(c):
    for node in community:
        node_to_community[node] = i
# Assign colors to nodes based on community
for node in agency_proj.nodes():
    agency_proj.nodes()[node]["community"] = node_to_community[node]
# Visualize communities with each being clearly seperated
plt.figure(figsize=(10, 6))
pos = nx.spring_layout(agency_proj)
colors = [agency_proj.nodes()[node]["community"] for node in agency_proj.nodes()]
nx.draw(agency_proj, pos, with_labels=True, node_color=colors)
plt.title("Agency Graph with Communities")
plt.show()


In [None]:
# ---------------------------
# Part 11: Enhanced Visalization
# ---------------------------

# --- Visualization with Centrality and Role ---
plt.figure(figsize=(14, 6))

# Node size based on degree centrality (scaled)
sizes = [1000 * degree_centrality[node] for node in agency_proj.nodes()]
colors_by_role = [agency_proj.nodes[node]["color"] for node in agency_proj.nodes()]

plt.subplot(1, 2, 1)
nx.draw(agency_proj, pos, with_labels=True, node_size=sizes, node_color=colors_by_role)
plt.title("Agency Graph: Node Size ~ Degree Centrality, Color ~ Role")

# Node color based on community
colors_by_community = [plt.cm.tab20(agency_proj.nodes[node]["community"]) for node in agency_proj.nodes()]

plt.subplot(1, 2, 2)
nx.draw(agency_proj, pos, with_labels=True, node_size=sizes, node_color=colors_by_community)
plt.title("Agency Graph: Node Size ~ Degree Centrality, Color ~ Community")

plt.tight_layout()
plt.show()


In [None]:
# ---------------------------
# Part 12: Failure Simulation of Central Nodes
# ---------------------------

# Identify top 3 agencies by betweenness centrality
top_3_nodes = [node for node, _ in sorted(betweenness_centrality.items(), key=lambda x: x[1], reverse=True)[:3]]
print("Top 3 central agencies for removal:", top_3_nodes)

# Function to compute and display network metrics
def compute_network_metrics(G, label=""):
    metrics = {
        "Connected Components": nx.number_connected_components(G),
        "Average Shortest Path Length": None,
        "Diameter": None
    }
    if nx.is_connected(G):
        metrics["Average Shortest Path Length"] = nx.average_shortest_path_length(G)
        metrics["Diameter"] = nx.diameter(G)
    else:
        largest_cc = max(nx.connected_components(G), key=len)
        subgraph = G.subgraph(largest_cc)
        metrics["Average Shortest Path Length"] = nx.average_shortest_path_length(subgraph)
        metrics["Diameter"] = nx.diameter(subgraph)

    print(f"\n--- Network Metrics {label} ---")
    for k, v in metrics.items():
        print(f"{k}: {v}")
    return metrics

# Compute metrics BEFORE removal
compute_network_metrics(agency_proj, label="(Before Node Removal)")

# Visualize BEFORE removal
plt.figure(figsize=(8, 6))
nx.draw(agency_proj, pos, with_labels=True, node_size=600, node_color=colors_by_community)
plt.title("Before Node Removal")
plt.show()

# Clone and remove top nodes
agency_proj_removed = agency_proj.copy()
agency_proj_removed.remove_nodes_from(top_3_nodes)

# Compute metrics AFTER removal
compute_network_metrics(agency_proj_removed, label="(After Node Removal)")

# Visualize AFTER removal
plt.figure(figsize=(8, 6))
pos_removed = nx.spring_layout(agency_proj_removed, seed=42)
colors_removed = [plt.cm.tab20(agency_proj.nodes[n]["community"]) if n in agency_proj.nodes else "gray"
                  for n in agency_proj_removed.nodes()]
nx.draw(agency_proj_removed, pos_removed, with_labels=True, node_size=600, node_color=colors_removed)
plt.title("After Node Removal of Top 3 Central Agencies")
plt.show()
