##  Variable Definitions & Formulas

| Variable              | Definition / Formula |
|-----------------------|----------------------|
| `Date`                | Trading calendar date of the stock |
| `ticker`              | Ticker symbol of the stock (e.g., NVDA, PLTR) |
| `Close`               | Closing price of the stock on a given day |
| `Return`              | Daily return of the stock:  $R_{i,t} = \frac{P_{i,t} - P_{i,t-1}}{P_{i,t-1}}$ |
| `expected_return`     | Rolling expected return from estimation window:  $\bar{R}_i = \frac{1}{T} \sum_{t=-90}^{-30} R_{i,t}$ |
| `abnormal_return` (AR)| Abnormal return:  $AR_{i,t} = R_{i,t} - \bar{R}_i$ |
| `event_id`            | Event identifier (e.g., `2022_export_control`) |
| `event_date`          | Announcement date of trade policy shock |
| `event_time`          | Time relative to event: $t = \text{Date} - \text{event\_date}$ |
| `CAR_pre`             | Cumulative Abnormal Return pre-event: $\sum_{t=-7}^{-1} AR_{i,t}$ |
| `CAR_post`            | Cumulative Abnormal Return post-event: $\sum_{t=0}^{7} AR_{i,t}$ |
| `delta_CAR`           | Difference-in-differences outcome: $CAR_{\text{post}} - CAR_{\text{pre}}$ |
| `post`                | Event-time indicator: $1$ if $t \geq 0$, else $0$ |
| `CAR` (custom window) | Total abnormal return over user-defined window: $\sum_{t=a}^{b} AR_{i,t}$ |
| `Treatment`           | Group indicator: $1$ if firm is hardware-dependent, else $0$ |
| `Treatment × post`    | Interaction term: $Treatment_i \times post_{it}$, used for DiD |
| `firm_FE`             | Firm fixed effect (optional, for regression control of firm-level heterogeneity) |
| `event_FE`            | Event fixed effect (optional, for regression control of event-level shocks) |





In [214]:
import yfinance as yf
import pandas as pd


def daily_return_price(ticker, start_date, end_date, treatment_flag):
    """
    Fetches historical price and computes daily return for a given ticker.

    Parameters:
        ticker (str): Stock symbol, e.g., 'NVDA'
        start_date (str): Start date in 'YYYY-MM-DD' format
        end_date (str): End date in 'YYYY-MM-DD' format
        treatment_flag (int): 1 if hardware-dependent (treatment), 0 if control

    Returns:
        DataFrame: Columns = ['Date', 'ticker', 'Close', 'Return', 'Treatment']
    """
    stock = yf.Ticker(ticker)
    price_df = stock.history(start=start_date, end=end_date)

    df = price_df.copy()
    df["Return"] = df["Close"].pct_change()
    df["ticker"] = ticker
    df["Treatment"] = treatment_flag
    df = df.reset_index()  # turn 'Date' from index to column

    return df[["Date", "ticker", "Close", "Return", "Treatment"]]

In [215]:
def compute_expected_return_rolling(df, estimation_start=-90, estimation_end=-30):
    """
    For each row, compute expected return by averaging the returns of the same ticker
    in the range [Date + estimation_start, Date + estimation_end].

    Parameters:
        df (DataFrame): Must include ['Date', 'ticker', 'Return']
        estimation_start (int): Start of look-back window (e.g., -90)
        estimation_end (int): End of look-back window (e.g., -30)

    Returns:
        DataFrame with 'expected_return' and 'abnormal_return'
    """
    df = df.copy()
    df["Date"] = pd.to_datetime(df["Date"])
    df = df.sort_values(["ticker", "Date"])

    # Multi-index for efficient slicing
    df_indexed = df.set_index(["ticker", "Date"]).sort_index()

    result = []

    for idx, row in df.iterrows():
        ticker = row["ticker"]
        date = row["Date"]

        start_date = date + pd.Timedelta(days=estimation_start)
        end_date = date + pd.Timedelta(days=estimation_end)

        try:
            history_returns = df_indexed.loc[ticker].loc[start_date:end_date]["Return"]
            expected = history_returns.mean()
        except:
            expected = None

        row["expected_return"] = expected
        row["abnormal_return"] = (
            row["Return"] - expected if pd.notnull(expected) else pd.NA
        )
        result.append(row)

    return pd.DataFrame(result)


In [216]:
def tag_event_info_all_rows(df, event_dict, window=10):
    """
    Tag event_id, event_date, and event_time for rows within ±window of any event,
    while keeping all original rows (unmatched rows get NaN).

    Parameters:
        df (DataFrame): Must include ['Date', 'ticker', ...]
        event_dict (dict): {'event_id': pd.Timestamp('YYYY-MM-DD')}
        window (int): Matching window in days (default ±10)

    Returns:
        DataFrame: Same shape as input, with additional columns:
                   ['event_id', 'event_date', 'event_time']
    """
    df = df.copy()
    df["Date"] = df["Date"].dt.tz_localize(None)

    tag_list = []

    for event_id, event_date in event_dict.items():
        temp = df.copy()
        temp["event_time_candidate"] = (temp["Date"] - event_date).dt.days
        temp["event_id_candidate"] = event_id
        temp["event_date_candidate"] = event_date

        # Keep only matches within window
        temp = temp[
            (temp["event_time_candidate"] >= -window)
            & (temp["event_time_candidate"] <= window)
        ]

        tag_list.append(
            temp[
                [
                    "Date",
                    "ticker",
                    "event_id_candidate",
                    "event_date_candidate",
                    "event_time_candidate",
                ]
            ]
        )

    # Combine all matches
    tag_df = pd.concat(tag_list, ignore_index=True)

    # Merge event info into original df
    df = df.merge(tag_df, on=["Date", "ticker"], how="left")

    # Rename columns
    df.rename(
        columns={
            "event_id_candidate": "event_id",
            "event_date_candidate": "event_date",
            "event_time_candidate": "event_time",
        },
        inplace=True,
    )

    return df


def compute_car_pre_post_delta_full(df, pre_window=(-10, -1), post_window=(0, 10)):
    """
    Compute CAR_pre, CAR_post, delta_CAR, and post indicator for all rows,
    without filtering any row. Only rows in [–10, +10] contribute to CAR computation,
    but all rows are kept.

    Parameters:
        df (DataFrame): Must include ['Date', 'ticker', 'event_date', 'abnormal_return']
        pre_window (tuple): (start_day, end_day) for CAR_pre (e.g., (-10, -1))
        post_window (tuple): for CAR_post (e.g., (0, 10))

    Returns:
        df (DataFrame): original df with added columns: ['event_time', 'CAR_pre', 'CAR_post', 'delta_CAR', 'post', 'CAR']
    """
    df = df.copy()
    df["Date"] = pd.to_datetime(df["Date"])
    df["event_date"] = pd.to_datetime(df["event_date"])

    # Step 1: Compute event_time for all rows
    df["event_time"] = (df["Date"] - df["event_date"]).dt.days

    # Step 2: Compute CAR_pre from [–10, –1]
    pre_df = (
        df[(df["event_time"] >= pre_window[0]) & (df["event_time"] <= pre_window[1])]
        .groupby(["ticker", "event_date"])["abnormal_return"]
        .sum()
        .reset_index()
    )
    pre_df.rename(columns={"abnormal_return": "CAR_pre"}, inplace=True)

    # Step 3: Compute CAR_post from [0, +10]
    post_df = (
        df[(df["event_time"] >= post_window[0]) & (df["event_time"] <= post_window[1])]
        .groupby(["ticker", "event_date"])["abnormal_return"]
        .sum()
        .reset_index()
    )
    post_df.rename(columns={"abnormal_return": "CAR_post"}, inplace=True)

    # Step 4: Merge CARs back into full df
    df = df.merge(pre_df, on=["ticker", "event_date"], how="left")
    df = df.merge(post_df, on=["ticker", "event_date"], how="left")

    # Step 5: Compute delta_CAR and post indicator
    df["delta_CAR"] = df["CAR_post"] - df["CAR_pre"]
    df["post"] = (df["Date"] >= df["event_date"]).astype(int)
    
    # Step 6: Construct CAR column = CAR_pre + CAR_post
    df["CAR"] = df["CAR_pre"] + df["CAR_post"]

    return df


In [217]:
# event_dict = {
#     "2021_china_tech_self_reliance": pd.to_datetime("2021-03-01"),  # China releases 14th Five-Year Plan emphasizing tech self-reliance
#     "2022_wto_steel_aluminum_dispute": pd.to_datetime("2022-12-09"),  # US rejects WTO ruling on steel and aluminum tariffs
#     "2023_chip_export_controls": pd.to_datetime("2023-01-27"),  # US and EU tighten controls on semiconductor technology exports to China
#     "2023_mineral_export_restrictions": pd.to_datetime("2023-07-07"),  # US criticizes China's export restrictions on critical minerals
#     "2024_tariff_increase_ev_battery": pd.to_datetime("2024-05-14"),  # US raises tariffs on EV batteries, solar panels, and metals
#     "2024_tariff_finalization": pd.to_datetime("2024-09-13"),  # US finalizes higher tariffs on EVs, batteries, critical minerals
#     "2024_nvidia_antitrust_probe": pd.to_datetime("2024-12-10"),  # China launches antitrust investigation into Nvidia
# }

event_dict = {
    "2022_oct_china_chip_export_control": pd.to_datetime("2022-10-07"),  # Biden administration restricts advanced AI chip exports to China
    "2023_chip_export_controls": pd.to_datetime("2023-01-27"),  # US and EU tighten controls on semiconductor technology exports to China
    "2023_export_expansion": pd.to_datetime("2023-10-17"),  # US expands export restrictions on semiconductors
    
    "2023_mineral_export_restrictions": pd.to_datetime("2023-07-07"),  # China restricts critical mineral exports
    "2023_aug_investment_ban": pd.to_datetime("2023-08-09"),  # Biden signs executive order banning investments in Chinese AI/semiconductors
    "2024_march_export_tightening": pd.to_datetime("2024-03-29"),  # US tightens export licensing rules for AI semiconductors
    "2024_tariff_increase_ev_battery": pd.to_datetime("2024-05-14"),  # US raises tariffs on EV batteries, solar panels, and metals
    "2024_tariff_finalization": pd.to_datetime("2024-09-13"),  # US finalizes higher tariffs
    "2024_nvidia_antitrust_probe": pd.to_datetime("2024-12-10"),  # China launches antitrust investigation into Nvidia
}


# Treatment Group (Hardware-dependent AI companies)
NVDA_df = daily_return_price("NVDA", "2020-12-31", "2025-2-1", 1)  # NVIDIA
AMD_df = daily_return_price("AMD", "2020-12-31", "2025-2-1", 1)   # AMD
INTC_df = daily_return_price("INTC", "2020-12-31", "2025-2-1", 1)  # Intel
MU_df = daily_return_price("MU", "2020-12-31", "2025-2-1", 1)      # Micron
AVGO_df = daily_return_price("AVGO", "2020-12-31", "2025-2-1", 1)  # Broadcom
QCOM_df = daily_return_price("QCOM", "2020-12-31", "2025-2-1", 1)  # Qualcomm
MRVL_df = daily_return_price("MRVL", "2020-12-31", "2025-2-1", 1)  # Marvell Technology
TSM_df = daily_return_price("TSM", "2020-12-31", "2025-2-1", 1)  # TSMC


# Control Group (Software/SaaS companies)
CRM_df = daily_return_price("CRM", "2020-12-31", "2025-2-1", 0)    # Salesforce
ORCL_df = daily_return_price("ORCL", "2020-12-31", "2025-2-1", 0)  # Oracle
ADBE_df = daily_return_price("ADBE", "2020-12-31", "2025-2-1", 0)  # Adobe
NOW_df = daily_return_price("NOW", "2020-12-31", "2025-2-1", 0)    # ServiceNow
CRWD_df = daily_return_price("CRWD", "2020-12-31", "2025-2-1", 0)  # CrowdStrike
MDB_df = daily_return_price("MDB", "2020-12-31", "2025-2-1", 0)  # MDB

INTU_df = daily_return_price("INTU", "2020-12-31", "2025-2-1", 0)  # Intuit
SNOW_df = daily_return_price("SNOW", "2020-12-31", "2025-2-1", 0)   # SNOW



# Market Benchmark
Market_df = daily_return_price("QQQ", "2020-12-31", "2025-2-1", 0)
Market_df["ticker"] = "QQQ"

# === 2. Apply Expected Return Rolling for Each Stock ===
all_stocks = [
    NVDA_df, AMD_df, INTC_df, MU_df, AVGO_df, QCOM_df, MRVL_df,TSM_df,
    CRM_df, ORCL_df, ADBE_df,NOW_df, CRWD_df, MDB_df, INTU_df, SNOW_df, 
    Market_df
]
# all_stocks = [
#     NVDA_df, AMD_df, INTC_df, MU_df, AVGO_df, QCOM_df, MRVL_df,TSM_df,
#     CRM_df, ORCL_df, ADBE_df,NOW_df, CRWD_df, MDB_df, SNOW_df, CSCO_df, PLTR_df,
#     Market_df
# ]

# Apply the rolling expected return calculation
all_stocks = [compute_expected_return_rolling(stock) for stock in all_stocks]

# === 3. Combine Everything into Final DataFrame ===
final_df = pd.concat(all_stocks , ignore_index=True)

print(final_df.shape)
print(final_df['Treatment'].value_counts())
print(final_df['ticker'].unique())



(17442, 7)
Treatment
0    9234
1    8208
Name: count, dtype: int64
['NVDA' 'AMD' 'INTC' 'MU' 'AVGO' 'QCOM' 'MRVL' 'TSM' 'CRM' 'ORCL' 'ADBE'
 'NOW' 'CRWD' 'MDB' 'INTU' 'SNOW' 'QQQ']


In [218]:
final_tagged_df = tag_event_info_all_rows(final_df, event_dict, window=7)
final_full_df = compute_car_pre_post_delta_full(final_tagged_df)

final_full_df.to_csv("final_df.csv", index=False)

In [219]:
# single_tagged_df = tag_event_info_all_rows(final_df, event_dict, window=10)
# single_full_df = compute_car_pre_post_delta_full(single_tagged_df)

# single_full_df.to_csv("single_df.csv", index=False)

In [220]:
print(final_full_df.columns)
print("Unique tickers:", sorted(final_full_df['ticker'].unique()))


Index(['Date', 'ticker', 'Close', 'Return', 'Treatment', 'expected_return',
       'abnormal_return', 'event_id', 'event_date', 'event_time', 'CAR_pre',
       'CAR_post', 'delta_CAR', 'post', 'CAR'],
      dtype='object')
Unique tickers: ['ADBE', 'AMD', 'AVGO', 'CRM', 'CRWD', 'INTC', 'INTU', 'MDB', 'MRVL', 'MU', 'NOW', 'NVDA', 'ORCL', 'QCOM', 'QQQ', 'SNOW', 'TSM']


In [221]:
import pandas as pd

final_df["Date"] = pd.to_datetime(final_df["Date"], errors="coerce")

date_counts = final_df.groupby("Date")["ticker"].nunique()

print(date_counts.describe())
print(date_counts.value_counts())

problem_dates = date_counts[date_counts != 17]
print(" Dates with missing or extra tickers:")
print(problem_dates)


count    1026.0
mean       17.0
std         0.0
min        17.0
25%        17.0
50%        17.0
75%        17.0
max        17.0
Name: ticker, dtype: float64
ticker
17    1026
Name: count, dtype: int64
 Dates with missing or extra tickers:
Series([], Name: ticker, dtype: int64)
