# Week 2 Interactive Attack-Target Graph

This notebook renders an interactive sponsor->target network from:
- `outputs/week2/attack_target_edges_v1.csv`
- `outputs/week2/attack_target_nodes_v1.csv`

It is intended for exploratory analysis of the current v1 graph.

## Optional one-time install

If your environment is missing dependencies, run this cell once.

In [1]:
# Uncomment if needed:
# !pip install -q pandas networkx plotly

In [2]:
from pathlib import Path
import pandas as pd
import networkx as nx
import plotly.graph_objects as go

pd.set_option('display.max_columns', 120)
pd.set_option('display.width', 180)

In [3]:
PROJECT_ROOT = Path.cwd().resolve()
if (PROJECT_ROOT / 'data').exists() and (PROJECT_ROOT / 'outputs').exists():
    ANALYSIS_ROOT = PROJECT_ROOT
elif PROJECT_ROOT.name == 'notebooks' and (PROJECT_ROOT.parent / 'data').exists():
    ANALYSIS_ROOT = PROJECT_ROOT.parent
else:
    ANALYSIS_ROOT = PROJECT_ROOT

WEEK2_ROOT = ANALYSIS_ROOT / 'outputs' / 'week2'
EDGE_PATH = WEEK2_ROOT / 'attack_target_edges_v1.csv'
NODE_PATH = WEEK2_ROOT / 'attack_target_nodes_v1.csv'

print('ANALYSIS_ROOT:', ANALYSIS_ROOT)
print('WEEK2_ROOT:', WEEK2_ROOT)
print('EDGE_PATH exists:', EDGE_PATH.exists())
print('NODE_PATH exists:', NODE_PATH.exists())

ANALYSIS_ROOT: /Users/jeremyzay/Desktop/delta_lab/analysis
WEEK2_ROOT: /Users/jeremyzay/Desktop/delta_lab/analysis/outputs/week2
EDGE_PATH exists: True
NODE_PATH exists: True


In [4]:
edges = pd.read_csv(EDGE_PATH)
nodes = pd.read_csv(NODE_PATH)

print('edges:', len(edges))
print('nodes:', len(nodes))
edges.head(5)

edges: 5939
nodes: 9744


Unnamed: 0,sponsor_name,canonical_entity,mention_count,ad_count,high_confidence_mentions,medium_confidence_mentions,platform_count,party_mode,tone_mode,edge_confidence
0,Frisch for CO CD-03,jeff hurd,478,205,82,396,1,DEM,CONTRAST,medium
1,House Majority PAC,congress,455,453,48,407,1,DEM,NEGATIVE,medium
2,House Majority PAC,medicare,438,297,62,376,1,DEM,NEGATIVE,medium
3,Vote AK Before Party,nick begich,421,137,68,353,1,IND,NEGATIVE,medium
4,Frisch for CO CD-03,adam frisch,418,290,70,348,1,DEM,CONTRAST,medium


## Filter Controls

Tune these values for readability/performance.

In [5]:
MIN_EDGE_MENTION_COUNT = 20
TOP_N_EDGES = 1200
EDGE_CONF_KEEP = {'high', 'medium'}
KEEP_ONLY_LARGEST_COMPONENT = True
LAYOUT_K = 0.35
LAYOUT_SEED = 42
MAX_NODE_SIZE = 42
MIN_NODE_SIZE = 8

# Optional: limit by sponsor subset (set None to disable)
SPONSOR_INCLUDE = None  # e.g., {'House Majority PAC', 'Congressional Leadership Fund'}

In [6]:
# 1) Edge-level filtering
f = edges.copy()

if EDGE_CONF_KEEP:
    f = f[f['edge_confidence'].isin(EDGE_CONF_KEEP)]
if MIN_EDGE_MENTION_COUNT is not None:
    f = f[f['mention_count'] >= MIN_EDGE_MENTION_COUNT]
if SPONSOR_INCLUDE is not None:
    f = f[f['sponsor_name'].isin(SPONSOR_INCLUDE)]

f = f.sort_values('mention_count', ascending=False)
if TOP_N_EDGES is not None:
    f = f.head(TOP_N_EDGES)

# 2) Build directed graph
G = nx.DiGraph()

# Add edge attrs
for _, row in f.iterrows():
    s = row['sponsor_name']
    t = row['canonical_entity']
    G.add_edge(
        s,
        t,
        mention_count=int(row['mention_count']),
        ad_count=int(row['ad_count']),
        edge_confidence=row.get('edge_confidence', 'unknown'),
        party_mode=row.get('party_mode', 'UNKNOWN'),
        tone_mode=row.get('tone_mode', 'UNKNOWN')
    )

# 3) Attach node attrs from node table where available
# Some builds may contain duplicate canonical_entity rows; collapse safely.
dup_count = int(nodes['canonical_entity'].duplicated().sum())
if dup_count:
    print(f'Note: collapsing {dup_count} duplicate canonical_entity rows from node table.')

node_meta = (
    nodes
    .groupby('canonical_entity', as_index=False)
    .agg(
        mention_count=('mention_count', 'max'),
        ad_count=('ad_count', 'max'),
        sponsor_count=('sponsor_count', 'max'),
        platform_count=('platform_count', 'max'),
        label_mode=('label_mode', lambda s: s.mode().iloc[0] if not s.mode().empty else 'SPONSOR_OR_UNMAPPED')
    )
)
node_lookup = node_meta.set_index('canonical_entity').to_dict('index')
for n in list(G.nodes()):
    meta = node_lookup.get(n, {})
    G.nodes[n]['mention_count'] = int(meta.get('mention_count', 1))
    G.nodes[n]['ad_count'] = int(meta.get('ad_count', 1))
    G.nodes[n]['sponsor_count'] = int(meta.get('sponsor_count', 1))
    G.nodes[n]['platform_count'] = int(meta.get('platform_count', 1))
    G.nodes[n]['label_mode'] = meta.get('label_mode', 'SPONSOR_OR_UNMAPPED')

# 4) Keep largest weakly connected component for clarity
if KEEP_ONLY_LARGEST_COMPONENT and G.number_of_nodes() > 0:
    components = list(nx.weakly_connected_components(G))
    largest = max(components, key=len)
    G = G.subgraph(largest).copy()

print('Filtered graph nodes:', G.number_of_nodes())
print('Filtered graph edges:', G.number_of_edges())

Note: collapsing 1 duplicate canonical_entity rows from node table.
Filtered graph nodes: 647
Filtered graph edges: 1006


In [7]:
if G.number_of_nodes() == 0:
    raise ValueError('Graph is empty after filtering. Lower MIN_EDGE_MENTION_COUNT or increase TOP_N_EDGES.')

# Spring layout
pos = nx.spring_layout(G, k=LAYOUT_K, seed=LAYOUT_SEED)

# Node sizing
node_mentions = pd.Series({n: G.nodes[n].get('mention_count', 1) for n in G.nodes()})
mn = node_mentions.min()
mx = node_mentions.max()

def scale_size(v, lo=MIN_NODE_SIZE, hi=MAX_NODE_SIZE):
    if mx == mn:
        return (lo + hi) / 2
    return lo + (v - mn) * (hi - lo) / (mx - mn)

# Colors by label
label_colors = {
    'PERSON': '#1f77b4',
    'ORG': '#d62728',
    'GPE': '#2ca02c',
    'SPONSOR_OR_UNMAPPED': '#7f7f7f',
}

def label_to_color(label):
    return label_colors.get(label, '#7f7f7f')

# Edge trace (single trace for speed)
edge_x=[]
edge_y=[]
for u,v,d in G.edges(data=True):
    x0,y0 = pos[u]
    x1,y1 = pos[v]
    edge_x.extend([x0, x1, None])
    edge_y.extend([y0, y1, None])

edge_trace = go.Scatter(
    x=edge_x,
    y=edge_y,
    line=dict(width=0.6, color='rgba(120,120,120,0.35)'),
    hoverinfo='none',
    mode='lines',
    showlegend=False
)

# Node trace
node_x=[]
node_y=[]
node_size=[]
node_color=[]
node_text=[]

for n in G.nodes():
    x,y = pos[n]
    meta = G.nodes[n]
    node_x.append(x)
    node_y.append(y)
    node_size.append(scale_size(meta.get('mention_count', 1)))
    node_color.append(label_to_color(meta.get('label_mode', 'SPONSOR_OR_UNMAPPED')))
    node_text.append(
        f"node={n}<br>label={meta.get('label_mode','SPONSOR_OR_UNMAPPED')}"
        f"<br>mentions={meta.get('mention_count',0):,}"
        f"<br>ads={meta.get('ad_count',0):,}"
        f"<br>sponsors={meta.get('sponsor_count',0):,}"
        f"<br>platforms={meta.get('platform_count',0):,}"
    )

node_trace = go.Scatter(
    x=node_x,
    y=node_y,
    mode='markers',
    hoverinfo='text',
    text=node_text,
    marker=dict(
        size=node_size,
        color=node_color,
        line=dict(width=0.7, color='rgba(255,255,255,0.8)'),
        opacity=0.95,
    ),
    showlegend=False
)

fig = go.Figure(data=[edge_trace, node_trace])
fig.update_layout(
    title=f"Attack-Target Graph (filtered): {G.number_of_nodes():,} nodes, {G.number_of_edges():,} edges",
    template='plotly_white',
    hovermode='closest',
    margin=dict(l=10, r=10, t=60, b=10),
    height=850,
    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
)

fig

In [8]:
# Save interactive HTML
html_path = WEEK2_ROOT / 'attack_target_graph_interactive_v1.html'
fig.write_html(str(html_path), include_plotlyjs='cdn')
print('wrote:', html_path)

wrote: /Users/jeremyzay/Desktop/delta_lab/analysis/outputs/week2/attack_target_graph_interactive_v1.html


## Quick analytic checks

Use these to inspect central actors in the currently filtered graph.

In [9]:
# Top in-degree targets in filtered graph
in_deg = sorted(G.in_degree(), key=lambda x: x[1], reverse=True)[:20]
pd.DataFrame(in_deg, columns=['target_node', 'in_degree'])

Unnamed: 0,target_node,in_degree
0,congress,49
1,washington,27
2,donald trump,23
3,kamala harris,18
4,medicare,14
5,joe biden,13
6,new york,11
7,california,10
8,oregon,10
9,china,9


In [10]:
# Top weighted in-degree targets (by mention_count on incoming edges)
weighted_in = []
for n in G.nodes():
    w = 0
    for u, v, d in G.in_edges(n, data=True):
        w += int(d.get('mention_count', 0))
    weighted_in.append((n, w))

weighted_in = sorted(weighted_in, key=lambda x: x[1], reverse=True)[:20]
pd.DataFrame(weighted_in, columns=['target_node', 'weighted_in_degree_mentions'])

Unnamed: 0,target_node,weighted_in_degree_mentions
0,congress,2461
1,donald trump,1399
2,washington,1269
3,medicare,1186
4,mark molinaro,1087
5,kamala harris,1039
6,josh riley,918
7,nick begich,884
8,alaska,848
9,janelle bynum,806
