In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from networkx.algorithms.community import greedy_modularity_communities
import seaborn as sns

# === LOAD & CLEAN ===
df = pd.read_csv("your_email_data.csv")
df = df.dropna(subset=["from_domain", "to_domain"])
df["from_domain"] = df["from_domain"].str.lower().str.strip()
df["to_domain"] = df["to_domain"].str.lower().str.strip()

# Normalize `urls`
def explode_urls(row):
    if isinstance(row, str):
        return [u.strip() for u in row.split(",") if u.strip()]
    elif isinstance(row, list):
        return row
    return []
df["urls"] = df["urls"].apply(explode_urls)

# === BUILD GRAPH ===
G = nx.DiGraph()
for _, row in df.iterrows():
    G.add_edge(row["from_domain"], row["to_domain"])

# === METRICS & COMMUNITY ===
in_degrees = dict(G.in_degree())
out_degrees = dict(G.out_degree())
degree_sum = {n: in_degrees.get(n, 0) + out_degrees.get(n, 0) for n in G.nodes()}

undirected_G = G.to_undirected()
communities = list(greedy_modularity_communities(undirected_G))
community_map = {node: i for i, comm in enumerate(communities) for node in comm}

# Assign a color for each community
num_communities = len(communities)
palette = sns.color_palette("hls", num_communities).as_hex()
node_colors = [palette[community_map[n]] for n in G.nodes()]
node_sizes = [500 + 200 * degree_sum[n] for n in G.nodes()]

# === PLOT ===
plt.figure(figsize=(16, 12))
pos = nx.spring_layout(G, k=0.7, iterations=50, seed=42)

# Draw nodes and edges
nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color=node_colors, alpha=0.85)
nx.draw_networkx_edges(G, pos, edge_color='gray', alpha=0.3, arrows=True, arrowsize=12)

# Label only important nodes
label_nodes = {n: n for n in G.nodes() if degree_sum[n] >= 3}
nx.draw_networkx_labels(G, pos, labels=label_nodes, font_size=10, font_color="black")

# Add legend for communities
legend_handles = [
    Patch(color=palette[i], label=f"Community {i}") for i in range(num_communities)
]
plt.legend(handles=legend_handles, title="Communities", loc="upper right", fontsize=10, title_fontsize=11)

plt.title("📧 Targeted Phishing: Domain-to-Domain Community Graph", fontsize=16)
plt.axis("off")
plt.tight_layout()
plt.show()

# === INSIGHTS ===
print("🔺 Top Sending Domains:\n", pd.Series(out_degrees).sort_values(ascending=False).head(5))
print("🎯 Most Targeted Domains:\n", pd.Series(in_degrees).sort_values(ascending=False).head(5))
