In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from networkx.algorithms.community import greedy_modularity_communities
import seaborn as sns

# === LOAD AND PREPROCESS DATA ===
df = pd.read_csv("your_email_data.csv")
df = df.dropna(subset=["from_domain", "to_domain"])
df["from_domain"] = df["from_domain"].str.lower().str.strip()
df["to_domain"] = df["to_domain"].str.lower().str.strip()
def explode_urls(row):
    if isinstance(row, str):
        return [u.strip() for u in row.split(",") if u.strip()]
    elif isinstance(row, list):
        return row
    return []
df["urls"] = df["urls"].apply(explode_urls)

# === BUILD DOMAIN-DOMAIN GRAPH ===
G = nx.DiGraph()
for _, row in df.iterrows():
    G.add_edge(row["from_domain"], row["to_domain"])

# === DETECT MAIN COMMUNITIES ===
undirected_G = G.to_undirected()
main_communities = list(greedy_modularity_communities(undirected_G))
main_community_map = {n: i for i, comm in enumerate(main_communities) for n in comm}

# === SELECT ONE MAIN COMMUNITY TO EXPLORE (e.g., community 0) ===
target_comm_index = 0
target_nodes = [n for n in main_community_map if main_community_map[n] == target_comm_index]
subG = G.subgraph(target_nodes).copy()

# === DETECT SUB-COMMUNITIES ===
sub_communities = list(greedy_modularity_communities(subG.to_undirected()))
sub_community_map = {n: i for i, comm in enumerate(sub_communities) for n in comm}

# === PLOT SUB-COMMUNITY GRAPH ===
plt.figure(figsize=(14, 11))
pos = nx.spring_layout(subG, seed=42)
palette = sns.color_palette("hls", len(sub_communities)).as_hex()
node_colors = [palette[sub_community_map[n]] for n in subG.nodes()]
node_sizes = [400 + 100 * (subG.in_degree(n) + subG.out_degree(n)) for n in subG.nodes()]
nx.draw_networkx_nodes(subG, pos, node_color=node_colors, node_size=node_sizes, alpha=0.85)
nx.draw_networkx_edges(subG, pos, edge_color="gray", alpha=0.3, arrows=True, arrowsize=12)

# Label top-degree nodes only
degree_sum = {n: subG.in_degree(n) + subG.out_degree(n) for n in subG.nodes()}
label_nodes = {n: n for n in subG.nodes() if degree_sum[n] >= 3}
nx.draw_networkx_labels(subG, pos, labels=label_nodes, font_size=9)

# Legend
from matplotlib.patches import Patch
legend_handles = [Patch(color=palette[i], label=f"Sub-Community {i}") for i in range(len(sub_communities))]
plt.legend(handles=legend_handles, title=f"Sub-Communities in Main Community {target_comm_index}", loc="upper right")

plt.title(f"🔍 Sub-Communities from Main Community {target_comm_index}", fontsize=15)
plt.axis("off")
plt.tight_layout()
plt.show()
