In [None]:
# SECTOR-SPECIFIC, MARKET-ADJUSTED AI EVENT STUDY

import pandas as pd
import numpy as np

np.random.seed(42)

# 1. LOAD ETF PRICE DATA

prices = pd.read_csv(
    "/Users/medharavi/Downloads/Women's Datathon 2026 Datasets/Stock Data/etfs_prices_all_since_2015.csv",
    parse_dates=["Date"]
)

prices = prices.sort_values(["ticker", "Date"])
prices = prices[["Date", "ticker", "Adj Close"]]

prices["log_return"] = (
    prices.groupby("ticker")["Adj Close"]
    .transform(lambda x: np.log(x).diff())
)

# 2. OVERALL MARKET RETURN

market = prices[prices["ticker"] == "SPY"][["Date", "log_return"]]
market = market.rename(columns={"log_return": "market_return"})

prices = prices.merge(market, on="Date", how="left")

prices["market_adj_return"] = prices["log_return"] - prices["market_return"]

# 3. DEFINE SECTOR BY ETF

ETF_TO_SECTOR = {
    "XLV": "Healthcare", "VHT": "Healthcare", "IYH": "Healthcare",
    "XLF": "Financials", "VFH": "Financials", "IYF": "Financials",
    "XLP": "Consumer Staples", "VDC": "Consumer Staples", "FSTA": "Consumer Staples",
    "XLU": "Utilities", "VPU": "Utilities", "IDU": "Utilities",
    "XLE": "Energy", "VDE": "Energy", "IYE": "Energy",
    "XLB": "Materials", "VAW": "Materials", "IYM": "Materials",
}

prices["sector"] = prices["ticker"].map(ETF_TO_SECTOR)

prices = prices[prices["sector"].notna()]

# 4. loading genai release events

genai = pd.read_csv(
    "/Users/medharavi/Downloads/Women's Datathon 2026 Datasets/Gen AI/genai_dimension.csv",
    parse_dates=["Release Date"]
)

genai = genai.rename(columns={
    "GenAI Model Company": "company",
    "GenAI Model": "model",
    "Release Date": "event_date"
})

ai_events = genai[
    genai["company"].isin(["OpenAI", "Anthropic", "Google"])
][["event_date", "company", "model"]].dropna()

TREATED_SECTORS = [
    "Healthcare",
    "Financials",
    "Consumer Staples"
]

prices["treated"] = prices["sector"].isin(TREATED_SECTORS).astype(int)

# 6. building event study panel

WINDOW = 120
rows = []

for _, ev in ai_events.iterrows():
    tmp = prices.copy()
    tmp["days_from_event"] = (tmp["Date"] - ev["event_date"]).dt.days
    tmp = tmp[tmp["days_from_event"].between(-WINDOW, WINDOW)]

    tmp["event_date"] = ev["event_date"]
    tmp["event_company"] = ev["company"]
    tmp["event_model"] = ev["model"]

    rows.append(
        tmp[[
            "event_date",
            "event_company",
            "event_model",
            "sector",
            "days_from_event",
            "treated",
            "market_adj_return"
        ]]
    )

event_panel = pd.concat(rows, ignore_index=True)

# sanity checks

assert event_panel["sector"].nunique() >= 6, "Expected multiple sectors"
assert event_panel["market_adj_return"].notna().mean() > 0.98, "Unexpected missing returns"

print("\nEvent-study panel preview:")
print(event_panel.head())

print("\nSector counts:")
print(event_panel["sector"].value_counts())

event_panel.to_csv(
    "ai_event_study_sector_specific_market_adjusted.csv",
    index=False
)

print("\nSaved: ai_event_study_sector_specific_market_adjusted.csv")



Event-study panel preview:
  event_date event_company        event_model            sector  \
0 2024-06-21     Anthropic  Claude 3.5 Sonnet  Consumer Staples   
1 2024-06-21     Anthropic  Claude 3.5 Sonnet  Consumer Staples   
2 2024-06-21     Anthropic  Claude 3.5 Sonnet  Consumer Staples   
3 2024-06-21     Anthropic  Claude 3.5 Sonnet  Consumer Staples   
4 2024-06-21     Anthropic  Claude 3.5 Sonnet  Consumer Staples   

   days_from_event  treated  market_adj_return  
0             -120        1          -0.017877  
1             -119        1           0.003424  
2             -116        1           0.002805  
3             -115        1          -0.001208  
4             -114        1           0.002187  

Sector counts:
sector
Utilities           27011
Energy              27011
Financials          27011
Healthcare          27011
Materials           27011
Consumer Staples    26995
Name: count, dtype: int64

Saved: ai_event_study_sector_specific_market_adjusted.csv


summary of daily / sectr specific / market adjusted

In [None]:
import pandas as pd

df = pd.read_csv("ai_event_study_sector_specific_market_adjusted.csv")

# Define pre/post
df["post"] = (df["days_from_event"] >= 0).astype(int)

summary = (
    df.groupby(["sector", "post"])["market_adj_return"]
    .mean()
    .unstack()
)

summary["post_minus_pre"] = summary[1] - summary[0]

summary = summary.sort_values("post_minus_pre", ascending=False)

print(summary)



post              post_minus_pre  six_month_%  one_year_%
sector                                                   
Materials               0.000347     4.473381    9.146873
Energy                  0.000093     1.180368    2.374668
Consumer Staples       -0.000009    -0.117395   -0.234652
Utilities              -0.000220    -2.738480   -5.401967
Healthcare             -0.000243    -3.010739   -5.930832
Financials             -0.000247    -3.064338   -6.034775


daily x 6mo x 12mo / sectr specific / market adjusted

In [4]:
summary["six_month_%"] = ((1 + summary["post_minus_pre"]) ** 126 - 1) * 100
summary["one_year_%"] = ((1 + summary["post_minus_pre"]) ** 252 - 1) * 100

print(summary[["post_minus_pre", "six_month_%", "one_year_%"]])

post              post_minus_pre  six_month_%  one_year_%
sector                                                   
Materials               0.000347     4.473381    9.146873
Energy                  0.000093     1.180368    2.374668
Consumer Staples       -0.000009    -0.117395   -0.234652
Utilities              -0.000220    -2.738480   -5.401967
Healthcare             -0.000243    -3.010739   -5.930832
Financials             -0.000247    -3.064338   -6.034775


sector specific graphs

In [None]:
import matplotlib.pyplot as plt

sectors = df["sector"].unique()

for sector in sectors:
    s = (
        df[df["sector"] == sector]
        .groupby("days_from_event")["market_adj_return"]
        .mean()
    )

    plt.figure(figsize=(7,4))
    plt.plot(s.index, s.values)
    plt.axvline(0, color="red", linestyle="--")
    plt.axhline(0, color="gray")
    plt.title(f"{sector}: Market-Adjusted Returns Around AI Releases")
    plt.xlabel("Days from AI Release")
    plt.ylabel("Avg Market-Adjusted Return")
    plt.tight_layout()
    plt.show()


Reduced Noise / sector specific graphs / market adjusted