# Toy Tweeter Recommender Demo

Visual walkthrough of the toy pipeline:
1. Load synthetic data
2. Inspect in-network vs out-of-network split
3. Build a tiny bipartite subgraph snapshot
4. Inspect top-K scores and in-network share

Run `python recommend.py` first to generate `toy_recs.csv`.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
from collections import Counter
from pathlib import Path

# Paths
import os
BASE = Path(os.getcwd())
TWEETS_CSV = BASE / "tweets.csv"
ENG_CSV = BASE / "engagements.csv"
RECS_CSV = BASE / "toy_recs.csv"
PLOTS = BASE / "plots"
PLOTS.mkdir(exist_ok=True)

plt.style.use('seaborn-v0_8-darkgrid')


In [None]:
tweets = pd.read_csv(TWEETS_CSV)
eng = pd.read_csv(ENG_CSV)
recs = pd.read_csv(RECS_CSV) if RECS_CSV.exists() else pd.DataFrame()

print(tweets.head())
print(eng.head())
print(recs.head() if not recs.empty else "No recs yet (run recommend.py)")


## In-network vs Out-of-network split


In [None]:
counts = tweets['in_network'].value_counts().rename({0:'Out-of-network',1:'In-network'})
fig, ax = plt.subplots(figsize=(5,4))
counts.plot(kind='bar', color=['#657786', '#1DA1F2'], ax=ax)
ax.set_ylabel('Tweets')
ax.set_title('In-network vs Out-of-network Tweets')
for p in ax.patches:
    ax.text(p.get_x()+p.get_width()/2, p.get_height()+10, int(p.get_height()), ha='center')
plt.tight_layout()
plt.savefig(PLOTS / "in_out_split.png", dpi=200)
plt.show()


## Recency distribution


In [None]:
tweets['created_at'] = pd.to_datetime(tweets['created_at'])
tweets['age_days'] = (pd.Timestamp.utcnow() - tweets['created_at']).dt.total_seconds()/86400
fig, ax = plt.subplots(figsize=(6,4))
tweets['age_days'].hist(bins=14, ax=ax, color='#1DA1F2', alpha=0.7)
ax.set_xlabel('Age (days)')
ax.set_ylabel('Tweets')
ax.set_title('Tweet Recency (age in days)')
plt.tight_layout()
plt.savefig(PLOTS / "recency_hist.png", dpi=200)
plt.show()


## Sample a tiny bipartite subgraph


In [None]:
sample_users = list(eng['user_id'].drop_duplicates().sample(6, random_state=123))
sample_tweets = list(eng[eng['user_id'].isin(sample_users)]['tweet_id'].drop_duplicates().sample(8, random_state=123))
sub_eng = eng[eng['user_id'].isin(sample_users) & eng['tweet_id'].isin(sample_tweets)]

G = nx.Graph()
G.add_nodes_from([f"u_{u}" for u in sample_users], bipartite=0)
G.add_nodes_from([f"t_{t}" for t in sample_tweets], bipartite=1)
for _, row in sub_eng.iterrows():
    G.add_edge(f"u_{row.user_id}", f"t_{row.tweet_id}")

pos = nx.spring_layout(G, seed=42)
fig, ax = plt.subplots(figsize=(6,5))
nx.draw_networkx_nodes(G, pos, nodelist=[n for n in G if n.startswith('u_')], node_color='#1DA1F2', node_shape='o', label='Users')
nx.draw_networkx_nodes(G, pos, nodelist=[n for n in G if n.startswith('t_')], node_color='#17BF63', node_shape='s', label='Tweets')
nx.draw_networkx_edges(G, pos, alpha=0.4)
nx.draw_networkx_labels(G, pos, font_size=8)
plt.legend()
plt.title('Sampled Userâ€“Tweet Bipartite Graph')
plt.axis('off')
plt.tight_layout()
plt.savefig(PLOTS / "sample_graph.png", dpi=200)
plt.show()


## Top-K recommendations overview


In [None]:
if recs.empty:
    print("Run recommend.py first to create toy_recs.csv")
else:
    display(recs.head(10))
    fig, ax = plt.subplots(figsize=(7,4))
    ax.bar(recs['rank'], recs['score'], color='#1DA1F2')
    ax.set_xlabel('Rank')
    ax.set_ylabel('Score')
    ax.set_title('Top-K Scores')
    plt.tight_layout()
    plt.savefig(PLOTS / "topk_scores.png", dpi=200)
    plt.show()


## In-network share within Top-K


In [None]:
if not recs.empty:
    in_share = recs['in_network'].mean()
    fig, ax = plt.subplots(figsize=(4,4))
    ax.bar(['In-network','Out-of-network'], [in_share, 1-in_share], color=['#1DA1F2','#657786'])
    ax.set_ylim(0,1)
    ax.set_ylabel('Share')
    ax.set_title('In-network share in Top-K')
    for p in ax.patches:
        ax.text(p.get_x()+p.get_width()/2, p.get_height()+0.02, f"{p.get_height():.2f}", ha='center')
    plt.tight_layout()
    plt.savefig(PLOTS / "in_share_topk.png", dpi=200)
    plt.show()
