In [2]:
import pandas as pd

# === File paths ===
file1 = "/Users/chunghyunhan/Projects/agentics/Decision_runs_result_saving/decision_runs_consolidated.csv"
file2 = "/Users/chunghyunhan/Projects/agentics/dao_finished_proposals_stats.csv"
outfile = "/Users/chunghyunhan/Projects/agentics/Decision_runs_result_saving/decision_runs_consolidated_merged_test.csv"

# === Load CSVs ===
df1 = pd.read_csv(file1)
df2 = pd.read_csv(file2)

# === Extract proposal_id from snapshot_url in file1 ===
df1["proposal_id"] = df1["snapshot_url"].str.extract(r"proposal/([a-fA-F0-9x]+)")

# === Merge: bring space, proposal_id, and end_iso from file2 ===
df_merged = df1.merge(
    df2[["proposal_id", "space", "end_iso"]], 
    on="proposal_id", 
    how="left"
)

# === Save merged file ===
df_merged.to_csv(outfile, index=False)

print(f"✅ Merged file saved to: {outfile}")


✅ Merged file saved to: /Users/chunghyunhan/Projects/agentics/Decision_runs_result_saving/decision_runs_consolidated_merged_test.csv


In [11]:
# ============================== Event Study (±3d, ±7d) ==============================
# Improvements:
# - Handle abnormal data: if price/index <= 0 → NaN → forward fill
# - Price impact: compare average prices in [-k,-1] vs [+1,+k] (excludes day 0)
# - CAR (market-adjusted): sum of (token_ret - mkt_ret) over [-k,+k] (includes day 0)
# - tqdm progress bar for event loop

import pandas as pd
import numpy as np
from pathlib import Path
from tqdm import tqdm

# ===================== Paths =====================
FILE_EVENTS = Path("/Users/chunghyunhan/Projects/agentics/Decision_runs_result_saving/decision_runs_consolidated_merged_test.csv")
FILE_REG    = Path("/Users/chunghyunhan/Projects/agentics/src/agentics/assets_registry/dao_registry.csv")
FILE_PRICES = Path("/Users/chunghyunhan/Projects/agentics/cmc_historical_daily_2013_2025.parquet")
FILE_INDEX  = Path("/Users/chunghyunhan/Projects/agentics/SNP_cryptoIndex.xlsx")
OUT_CSV     = Path("/Users/chunghyunhan/Projects/agentics/Decision_runs_result_saving/decision_runs_consolidated_merged_test_eventstudy.csv")

# ===================== Load inputs =====================
events = pd.read_csv(FILE_EVENTS)
reg    = pd.read_csv(FILE_REG)
prices = pd.read_parquet(FILE_PRICES)
index_df = pd.read_excel(FILE_INDEX, sheet_name=0)

# ---- Assertions ----
assert 'space' in events.columns and 'end_iso' in events.columns
assert 'space' in reg.columns and 'CMC_ucid' in reg.columns

# Prices schema
col_ucid  = 'ucid'
col_date  = 'date'
col_close = 'price_USD'   # change if needed
for c in (col_ucid, col_date, col_close):
    assert c in prices.columns, f"prices missing '{c}'"

# Index schema
index_df.columns = [str(c).strip().lower() for c in index_df.columns]
assert 'date' in index_df.columns and 'index' in index_df.columns

# ===================== Preprocess events =====================
events['event_dt'] = pd.to_datetime(events['end_iso'], utc=True, errors='coerce').dt.date
events['event_dt'] = pd.to_datetime(events['event_dt'])
events['event_date_str'] = events['event_dt'].dt.strftime('%Y-%m-%d')
events = events.merge(reg[['space','CMC_ucid']], on='space', how='left')

# ===================== Preprocess prices =====================
prices = prices.copy()
prices[col_date] = pd.to_datetime(prices[col_date], utc=False).dt.normalize()
prices[col_close] = pd.to_numeric(prices[col_close], errors='coerce')

# abnormal 값 (<=0) → NaN → forward fill
prices.loc[prices[col_close] <= 0, col_close] = np.nan
prices[col_close] = prices.groupby(col_ucid)[col_close].ffill()

# log-return
prices['log_price'] = np.log(prices[col_close])
prices['token_ret'] = prices.groupby(col_ucid)['log_price'].diff()
prices.drop(columns=['log_price'], inplace=True)

# ===================== Preprocess index =====================
index_df = index_df.rename(columns={'date': 'mkt_date', 'index': 'mkt_index'})
index_df['mkt_date']  = pd.to_datetime(index_df['mkt_date']).dt.normalize()
index_df['mkt_index'] = pd.to_numeric(index_df['mkt_index'], errors='coerce')

# abnormal 값 (<=0) → NaN → forward fill
index_df.loc[index_df['mkt_index'] <= 0, 'mkt_index'] = np.nan
index_df['mkt_index'] = index_df['mkt_index'].ffill()

# log-return
index_df = index_df.sort_values('mkt_date').reset_index(drop=True)
index_df['log_mkt'] = np.log(index_df['mkt_index'])
index_df['mkt_ret'] = index_df['log_mkt'].diff()
index_df.drop(columns=['log_mkt'], inplace=True)

# ===================== Helpers =====================
def token_panel(ucid: int) -> pd.DataFrame:
    df = prices.loc[prices[col_ucid] == ucid, [col_date, col_close, 'token_ret']].copy()
    df = df.rename(columns={col_date: 'date', col_close: 'price'})
    df = df.merge(index_df[['mkt_date','mkt_ret']], left_on='date', right_on='mkt_date', how='left')
    df.drop(columns=['mkt_date'], inplace=True)
    return df.sort_values('date').reset_index(drop=True)

def pre_post_avg_price_impact(panel: pd.DataFrame, event_date: pd.Timestamp, k: int):
    panel = panel.set_index('date')
    pre  = panel.loc[(panel.index >= event_date - pd.Timedelta(days=k)) & (panel.index <= event_date - pd.Timedelta(days=1))]
    post = panel.loc[(panel.index >= event_date + pd.Timedelta(days=1)) & (panel.index <= event_date + pd.Timedelta(days=k))]
    pre_avg  = pre['price'].mean()  if not pre.empty  else np.nan
    post_avg = post['price'].mean() if not post.empty else np.nan
    impact_pct = (post_avg - pre_avg) / pre_avg if pd.notna(pre_avg) and pre_avg != 0 else np.nan
    return pre_avg, post_avg, impact_pct

def window_sum(series: pd.Series, start_date: pd.Timestamp, a: int, b: int):
    idx = series.index
    left  = start_date + pd.Timedelta(days=a)
    right = start_date + pd.Timedelta(days=b)
    return series.loc[(idx >= left) & (idx <= right)].sum()

def event_abnormal_returns(panel: pd.DataFrame, event_date: pd.Timestamp, a: int, b: int):
    panel = panel.set_index('date')
    if 'token_ret' not in panel or 'mkt_ret' not in panel:
        return np.nan, np.nan
    ar  = panel['token_ret'] - panel['mkt_ret']
    car = window_sum(ar, event_date, a, b)              # includes day 0
    crr = window_sum(panel['token_ret'], event_date, a, b)
    return car, crr

# ===================== Compute per-event =====================
out_rows = []
for _, row in tqdm(events.iterrows(), total=len(events), desc="Event study", unit="event"):
    ucid = row.get('CMC_ucid', np.nan)
    event_dt = row.get('event_dt', pd.NaT)

    na_result = {
        **row.to_dict(),
        'pre_avg_price_3': np.nan, 'post_avg_price_3': np.nan, 'price_impact_pct_3d': np.nan,
        'pre_avg_price_7': np.nan, 'post_avg_price_7': np.nan, 'price_impact_pct_7d': np.nan,
        'CAR_madj_[-3,+3]': np.nan, 'CRR_raw_[-3,+3]': np.nan,
        'CAR_madj_[-7,+7]': np.nan, 'CRR_raw_[-7,+7]': np.nan,
    }
    if pd.isna(ucid) or pd.isna(event_dt):
        out_rows.append(na_result); continue

    try:
        ucid_int = int(str(ucid).strip())
    except Exception:
        out_rows.append(na_result); continue

    panel = token_panel(ucid_int)
    if panel.empty:
        out_rows.append(na_result); continue

    pre3, post3, imp3 = pre_post_avg_price_impact(panel, pd.to_datetime(event_dt), 3)
    pre7, post7, imp7 = pre_post_avg_price_impact(panel, pd.to_datetime(event_dt), 7)
    car3, crr3 = event_abnormal_returns(panel, pd.to_datetime(event_dt), -3, +3)
    car7, crr7 = event_abnormal_returns(panel, pd.to_datetime(event_dt), -7, +7)
    
    car3_pct = np.expm1(car3) * 100 if pd.notna(car3) else np.nan
    car7_pct = np.expm1(car7) * 100 if pd.notna(car7) else np.nan
    crr3_pct = np.expm1(crr3) * 100 if pd.notna(crr3) else np.nan
    crr7_pct = np.expm1(crr7) * 100 if pd.notna(crr7) else np.nan

    out_rows.append({
    **row.to_dict(),
    'pre_avg_price_3': pre3, 'post_avg_price_3': post3, 'price_impact_pct_3d': imp3,
    'pre_avg_price_7': pre7, 'post_avg_price_7': post7, 'price_impact_pct_7d': imp7,
    'CAR_madj_[-3,+3]': car3, 'CAR_madj_[-3,+3]_pct': car3_pct,
    'CRR_raw_[-3,+3]': crr3, 'CRR_raw_[-3,+3]_pct': crr3_pct,
    'CAR_madj_[-7,+7]': car7, 'CAR_madj_[-7,+7]_pct': car7_pct,
    'CRR_raw_[-7,+7]': crr7, 'CRR_raw_[-7,+7]_pct': crr7_pct,
})

out = pd.DataFrame(out_rows)

# ===================== Save =====================
print("Events input rows:", len(events))
print("Events output rows:", len(out))
assert len(out) == len(events), "Row count mismatch between input events and output results."

out.to_csv(OUT_CSV, index=False)
print(f"[OK] Saved event-study results to: {OUT_CSV}")


Event study: 100%|██████████| 319/319 [00:28<00:00, 11.16event/s]


Events input rows: 319
Events output rows: 319
[OK] Saved event-study results to: /Users/chunghyunhan/Projects/agentics/Decision_runs_result_saving/decision_runs_consolidated_merged_test_eventstudy.csv
