In [None]:
import pandas as pd, numpy as np, seaborn as sns, matplotlib.pyplot as plt
from transformers import pipeline
import statsmodels.formula.api as smf
from pathlib import Path

posts=pd.read_parquet("../data_raw/reddit/rstocks_posts.parquet")

sent=pipeline("sentiment-analysis",
              model="cardiffnlp/twitter-roberta-base-sentiment",
              truncation=True)

def score(txt):
    lab=sent(txt[:512])[0]["label"]
    return {"NEGATIVE":-1,"NEUTRAL":0,"POSITIVE":1}[lab]

posts["sent"]=posts["title"].map(score)
posts["created_dt"]=pd.to_datetime(posts["created_utc"]).dt.floor("D")
posts["rel_day"]=(posts["created_dt"]-posts["event_date"]).dt.days
Path("../data_proc").mkdir(exist_ok=True)
posts.to_parquet("../data_proc/rstocks_scored.parquet")

# aggregate & DiD
daily=posts.groupby(["ticker","rel_day"]).sent.mean().reset_index()
daily["post"]=daily.rel_day.gt(0).astype(int)
model=smf.ols("sent ~ post + C(ticker)", data=daily[(daily.rel_day!=0)]).fit()
print(model.summary().tables[1])

sns.lineplot(data=daily, x="rel_day", y="sent", hue="ticker")
plt.axvline(0,ls="--"); plt.title("r/stocks sentiment around CEO change"); plt.tight_layout()
plt.savefig("../assets/sentiment_did.png",dpi=160)
