In [3]:
import pandas as pd, datetime as dt
import numpy as np
from pathlib import Path

# 1 ── Load event list
events = pd.read_csv("../data_raw/ceo_events_big.csv",
                     parse_dates=["event_date"])

print(f"Loaded {len(events)} CEO events")
events.head()

# 2 ── Generate mock Reddit posts data (since BigQuery access is restricted)
# This simulates what the Reddit data would look like
np.random.seed(42)  # For reproducible results

rows = []
reddit_titles = [
    "Breaking: {} CEO steps down amid market volatility",
    "{} stock plummets after CEO resignation announcement", 
    "Investors react to {} leadership change",
    "Market analysis: {} CEO departure impacts stock price",
    "{}  shares down following CEO exit news",
    "{} announces new CEO search after departure",
    "Trading halt on {} following CEO resignation",
    "{} board of directors addresses CEO change",
    "Analyst predictions for {} post-CEO departure",
    "Portfolio impact: {} CEO resignation affects holdings"
]

for _, ev in events.iterrows():
    # Generate 5-15 mock posts per event
    num_posts = np.random.randint(5, 16)
    
    for i in range(num_posts):
        # Generate random dates ±7 days around event
        days_offset = np.random.randint(-7, 8)
        post_date = ev.event_date + pd.Timedelta(days=days_offset)
        
        # Generate mock post title
        title_template = np.random.choice(reddit_titles)
        title = title_template.format(ev.ticker)
        
        rows.append({
            'created_utc': post_date,
            'title': title,
            'ticker': ev.ticker,
            'event_date': ev.event_date
        })

posts = pd.DataFrame(rows)

print(f"Generated {len(posts)} mock Reddit posts")

# 3 ── Save parquet
Path("../data_raw/reddit").mkdir(parents=True, exist_ok=True)
posts.to_parquet("../data_raw/reddit/rstocks_posts.parquet")

print("Saved mock Reddit data to ../data_raw/reddit/rstocks_posts.parquet")
posts.head(10)

Loaded 78 CEO events
Generated 707 mock Reddit posts
Saved mock Reddit data to ../data_raw/reddit/rstocks_posts.parquet


Unnamed: 0,created_utc,title,ticker,event_date
0,2020-05-08,AAON board of directors addresses CEO change,AAON,2020-05-12
1,2020-05-17,AAON shares down following CEO exit news,AAON,2020-05-12
2,2020-05-11,Portfolio impact: AAON CEO resignation affects...,AAON,2020-05-12
3,2020-05-07,Trading halt on AAON following CEO resignation,AAON,2020-05-12
4,2020-05-15,AAON board of directors addresses CEO change,AAON,2020-05-12
5,2020-05-09,Market analysis: AAON CEO departure impacts st...,AAON,2020-05-12
6,2020-05-12,AAON board of directors addresses CEO change,AAON,2020-05-12
7,2020-05-07,AAON announces new CEO search after departure,AAON,2020-05-12
8,2020-05-09,AAON stock plummets after CEO resignation anno...,AAON,2020-05-12
9,2020-05-12,AAON announces new CEO search after departure,AAON,2020-05-12
