
# 🌱 ESG Stock Event Study — **Market Model** (Demo)

This notebook implements an event study using the **market model**:

\[ R_{i,t} = \alpha_i + \beta_i R_{m,t} + \epsilon_{i,t} \]

- **Estimation window:** τ ∈ [-120, -20] (fit α, β per event)
- **Event window:** τ ∈ [-5, +5]
- **AR:** abnormal return = actual − expected (α + β·R_m)
- **AAR / CAR:** averaged and cumulative abnormal returns across events

> It reads the daily-updated CSVs generated by your pipeline in `data/latest/`.


In [None]:

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Locate latest-data directory (works from repo root or nested path)
CANDIDATES = [
    "data/latest",
    "data-science-projects-starter/data-science-projects/esg-stock-event-study/data/latest"
]

DATA_LATEST = next((p for p in CANDIDATES if os.path.exists(p)), None)
if DATA_LATEST is None:
    raise FileNotFoundError("Could not find data/latest. Run the pipeline first.")

prices_path  = os.path.join(DATA_LATEST, "prices_latest.csv")
returns_path = os.path.join(DATA_LATEST, "returns_latest.csv")
events_path  = os.path.join(DATA_LATEST, "esg_events_latest.csv")

# Load
rets   = pd.read_csv(returns_path, parse_dates=[0], index_col=0)
events = pd.read_csv(events_path, parse_dates=['event_date']) if os.path.exists(events_path) else pd.DataFrame(columns=['ticker','event_date'])

rets.tail(), events.head()


## 1) Choose market return series
Tries common tickers in order: `^GSPC`, `SPY`, `VOO`, `IVV`.  
Falls back to **equal-weighted average** of all returns if none found.


In [None]:

MARKET_CANDIDATES = ["^GSPC", "SPY", "VOO", "IVV"]
market_col = next((c for c in MARKET_CANDIDATES if c in rets.columns), None)

if market_col is None:
    # Fallback: equal-weighted cross-sectional mean (excluding columns with too many NaNs)
    valid_cols = [c for c in rets.columns if rets[c].notna().mean() > 0.8]
    market_col = "_EW_MARKET_"
    rets[market_col] = rets[valid_cols].mean(axis=1)

print("Using market series:", market_col)
rets[[market_col]].dropna().tail()


## 2) Fit α, β on the estimation window and compute AR in the event window


In [None]:

from datetime import timedelta

EST_WIN = (-120, -20)
EVT_WIN = (-5, 5)

def fit_alpha_beta(stock: pd.Series, market: pd.Series, event_date: pd.Timestamp):
    # Build estimation window slice
    start = event_date + timedelta(days=EST_WIN[0])
    end   = event_date + timedelta(days=EST_WIN[1])
    s = stock.loc[(stock.index >= start) & (stock.index <= end)]
    m = market.loc[(market.index >= start) & (market.index <= end)]
    df = pd.DataFrame({"s": s, "m": m}).dropna()
    if len(df) < 30:
        return np.nan, np.nan  # not enough data
    X = np.vstack([np.ones(len(df)), df["m"].values]).T
    y = df["s"].values
    # OLS via normal equations
    beta_vec = np.linalg.pinv(X) @ y
    alpha, beta = float(beta_vec[0]), float(beta_vec[1])
    return alpha, beta

def compute_event_ar_table(rets_df: pd.DataFrame, events_df: pd.DataFrame, mkt_col: str):
    rows = []
    r = rets_df.copy()
    r.index = pd.to_datetime(r.index)

    for _, ev in events_df.iterrows():
        tic = ev.get("ticker")
        if tic not in r.columns:
            continue
        event_date = pd.to_datetime(ev["event_date"]).normalize()

        alpha, beta = fit_alpha_beta(r[tic], r[mkt_col], event_date)
        if np.isnan(alpha) or np.isnan(beta):
            continue

        # Event window slice
        start = event_date + timedelta(days=EVT_WIN[0])
        end   = event_date + timedelta(days=EVT_WIN[1])
        seg = r.loc[(r.index >= start) & (r.index <= end), [tic, mkt_col]].copy()
        if seg.empty:
            continue

        seg["date"] = seg.index
        seg["tau"] = (seg["date"].dt.normalize() - event_date).dt.days
        seg["ticker"] = tic
        seg["event_date"] = event_date
        seg.rename(columns={tic: "ret", mkt_col: "mkt"} , inplace=True)

        seg["exp_ret"] = alpha + beta * seg["mkt"]
        seg["ar"] = seg["ret"] - seg["exp_ret"]
        seg["alpha"] = alpha
        seg["beta"] = beta
        rows.append(seg[["date","ticker","event_date","tau","ret","mkt","exp_ret","ar","alpha","beta"]])

    if not rows:
        return pd.DataFrame(columns=["date","ticker","event_date","tau","ret","mkt","exp_ret","ar","alpha","beta"])
    out = pd.concat(rows, ignore_index=True)
    return out

event_ar = compute_event_ar_table(rets, events, market_col)
event_ar.head(), event_ar["ticker"].nunique(), events.shape[0]


## 3) Aggregate AAR & CAR (and simple t-stats)
For each τ, we compute:
- **AAR(τ)** = mean of AR across events  
- **t(τ)** = AAR(τ) / (sd(AR_τ) / √N) — simple cross-sectional test  
- **CAR(τ)** = cumulative sum of AAR up to τ


In [None]:

def aggregate_aar_car_with_t(event_ar_df: pd.DataFrame):
    if event_ar_df.empty:
        return (pd.DataFrame(columns=["tau","AAR","N","t"]),
                pd.DataFrame(columns=["tau","CAR"]))
    g = event_ar_df.groupby("tau")["ar"]
    aar = g.mean().rename("AAR").reset_index()
    n_events = g.count().rename("N").reset_index()["N"]
    std = g.std(ddof=1).rename("sd").reset_index()["sd"]
    aar["N"] = n_events
    # Avoid division by zero
    aar["t"] = aar["AAR"] / (std / np.sqrt(np.where(n_events>0, n_events, np.nan)))
    aar.replace([np.inf, -np.inf], np.nan, inplace=True)

    car = aar[["tau","AAR"]].copy().sort_values("tau")
    car["CAR"] = car["AAR"].cumsum()
    return aar.sort_values("tau"), car

aar, car = aggregate_aar_car_with_t(event_ar)
aar.head(), car.head()


## 4) Plots


In [None]:

plt.figure()
plt.plot(aar["tau"], aar["AAR"], marker="o")
plt.axvline(0, linestyle="--")
plt.title("Average Abnormal Return (AAR) — Market Model")
plt.xlabel("Tau (days relative to event)")
plt.ylabel("AAR")
plt.grid(True)
plt.show()

plt.figure()
plt.plot(car["tau"], car["CAR"], marker="o")
plt.axvline(0, linestyle="--")
plt.title("Cumulative Abnormal Return (CAR) — Market Model")
plt.xlabel("Tau (days relative to event)")
plt.ylabel("CAR")
plt.grid(True)
plt.show()

# Optional: AAR t-stats
plt.figure()
plt.stem(aar["tau"], aar["t"], use_line_collection=True)
plt.axvline(0, linestyle="--")
plt.title("AAR t-stat by Tau")
plt.xlabel("Tau")
plt.ylabel("t-stat")
plt.grid(True)
plt.show()


## 5) Inspect one example event window


In [None]:

if not event_ar.empty:
    eg = event_ar.sort_values(["event_date","ticker","tau"]).groupby(["ticker","event_date"]).head(11)
    eg.head(20)
else:
    print("No event AR table generated. You may need more events or adjust tickers/ESG keywords.")
