In [None]:
# Run this in the same session where you ran the diagnostic.
# It will clean corrupted globals, patch yfinance, and run a small smoke test then the full batch.

import importlib, sys, builtins, traceback, types, os, time as real_time
print("Cleaning bad globals and restoring modules...")

# list of global names that diagnostics reported as strings
bad_names = ["time","np","pd","download","str","start","end","sleep"]

g = globals()
removed = []
for name in bad_names:
    if name in g:
        val = g[name]
        # Only remove if it's a str or otherwise suspicious
        if isinstance(val, str) or isinstance(val, (types.FunctionType,)) and val.__class__ is str:
            try:
                del g[name]
                removed.append(name)
            except Exception as e:
                print("Could not del global", name, e)

# restore common modules into globals (safe)
import numpy as np, pandas as pd
g["np"] = np
g["pd"] = pd
g["time"] = real_time  # safe alias

# restore builtins.str if somebody replaced it in globals by a string:
# (we do NOT override builtins.str; we only remove a global named 'str' so builtin str() works)
if "str" in g:
    try:
        del g["str"]
        removed.append("str")
    except Exception as e:
        print("could not remove global 'str':", e)

print("Removed globals:", removed)
print("Globals now: np ->", type(g.get("np")), " pd ->", type(g.get("pd")), " time ->", type(g.get("time")))

# patch yfinance internals (yfinance.utils._time must be real time)
import yfinance as yf
try:
    import yfinance.utils as yutils
    if getattr(yutils, "_time", None) is None or not callable(getattr(yutils, "_time", None)) and not hasattr(getattr(yutils, "_time", None), "sleep"):
        yutils._time = real_time
        print("Patched yfinance.utils._time -> real time module")
    else:
        print("yfinance.utils._time OK:", type(yutils._time))
except Exception as e:
    print("Could not patch yfinance.utils:", e)
    traceback.print_exc()

# also patch yfinance.multi._time if present
try:
    import yfinance.multi as ym
    if getattr(ym, "_time", None) is None or isinstance(getattr(ym, "_time", None), str):
        ym._time = real_time
        print("Patched yfinance.multi._time -> real time module")
except Exception:
    pass

# ---- robust fetch function (uses your Ticker.history first) ----
import pandas as pd
def fetch_close_series_like_user(ticker: str, start=None, end=None, max_retries=3):
    """Try yf.Ticker.history first (your working path), then fallback to yf.download.
       Returns pd.Series (close/adj close) or None.
    """
    for attempt in range(1, max_retries+1):
        try:
            df_hist = yf.Ticker(ticker).history(start=start, end=end, auto_adjust=True)
            if df_hist is not None and not df_hist.empty:
                # pick Close or Adj Close if present, else first column
                if "Close" in df_hist.columns:
                    s = df_hist["Close"].astype(float)
                elif "Adj Close" in df_hist.columns:
                    s = df_hist["Adj Close"].astype(float)
                else:
                    s = pd.Series(df_hist.iloc[:,0]).astype(float)
                s = s.dropna()
                s.index = pd.to_datetime(s.index)
                if getattr(s.index, "tz", None) is not None:
                    s.index = s.index.tz_localize(None)
                return s.rename(ticker)
            # fallback to download
            df = yf.download(ticker, start=start, end=end, auto_adjust=True, progress=False)
            if df is None or df.empty:
                return None
            col = "Adj Close" if "Adj Close" in df.columns else "Close"
            s = df[col].astype(float).dropna()
            s.index = pd.to_datetime(s.index)
            if getattr(s.index, "tz", None) is not None:
                s.index = s.index.tz_localize(None)
            return s.rename(ticker)
        except Exception as e:
            msg = str(e)
            # handle delisted/timezone errors gracefully
            if "YFTzMissingError" in msg or "Quote not found" in msg or "Not Found" in msg:
                print(f"yfinance indicates missing/delisted for {ticker}: {msg}")
                return None
            wait = min(5.0, 1.5 ** attempt)
            print(f"fetch attempt {attempt} failed for {ticker}: {e}  -- retrying in {wait:.1f}s")
            real_time.sleep(wait)
    print(f"error fetching {ticker} after {max_retries} attempts")
    return None

# ---- quick smoke test using your fetch (4 tickers) ----
test_tickers = ["^NSEI", "AAPL", "MSFT", "RELIANCE.NS"]
print("\nRunning smoke test for:", test_tickers)
smoke_ok = True
for t in test_tickers:
    try:
        s = fetch_close_series_like_user(t, start="2018-01-01", end=None)
        if s is None:
            print(t, " -> NO DATA")
            smoke_ok = False
        else:
            print(t, " -> rows:", len(s), "range:", s.index[0].date(), "to", s.index[-1].date())
    except Exception as e:
        print("fetch error for", t, e)
        smoke_ok = False

if not smoke_ok:
    print("\nSmoke test failed for one or more tickers. If this persists, do a runtime reset or paste the smoke output here.")
else:
    # ---- full batch using your fetch function for all 50 tickers ----
    TICKERS = [
     "AAPL","MSFT","AMZN","GOOG","META","NVDA","TSLA","BRK-B","JPM","BAC",
     "JNJ","V","MA","PG","KO","PFE","XOM","CVX","NFLX","DIS",
     "RELIANCE.NS","TCS.NS","HDFCBANK.NS","ICICIBANK.NS","INFY.NS","LT.NS",
     "BHARTIARTL.NS","ITC.NS","SUNPHARMA.NS","MARUTI.NS","AXISBANK.NS","HINDUNILVR.NS",
     "EURUSD=X","GBPUSD=X","USDJPY=X","AUDUSD=X","USDCAD=X","USDCHF=X","NZDUSD=X","USDINR=X",
     "GC=F","SI=F","CL=F","BZ=F","NG=F","ZC=F","SUGAR=F","CATTLE=F","GLD","BTC-USD"
    ]
    outdir = "momentum_results"
    os.makedirs(outdir, exist_ok=True)
    summary_rows = []
    for i,t in enumerate(TICKERS, start=1):
        print(f"[{i}/{len(TICKERS)}] {t}", end=" ... ")
        s = fetch_close_series_like_user(t, start="2018-01-01", end=None)
        if s is None or len(s) < 250:
            print("skip (insufficient/missing)")
            summary_rows.append({"ticker":t,"status":"skip","rows": None,"cagr": float("nan")})
            continue
        # compute tanh signal (your compute_tanh_signal expects returns)
        r = s.pct_change().fillna(0.0)
        # simple tanh signal (lookback 63)
        v = r.pow(2).ewm(span=63, adjust=False, min_periods=63).mean().pow(0.5)
        u = r / (v + 1e-12)
        m = u.ewm(span=63, adjust=False, min_periods=63).mean()
        z = np.tanh(m)
        std = z.dropna().std(ddof=0) if not z.dropna().empty else 1.0
        sig = (z / std).fillna(0.0).shift(1)
        # run your capital-sizing backtest (reusing backtest in your code)
        # here replicate your backtest_capital_sizing directly:
        idx = r.index
        cap = float(100000.0)
        prev_pos = 0.0
        block = 0
        in_trade = False
        cap0 = cap
        pos_list, cost_list, pnl_list, cap_list, ret_list = [], [], [], [], []
        bps = 1.0 / 10000.0
        for t_ix in range(len(idx)):
            st = float(sig.iloc[t_ix]) if np.isfinite(sig.iloc[t_ix]) else 0.0
            if block > 0:
                pos = 0.0
                block -= 1
            else:
                pos = st * cap
            turn = abs(pos - prev_pos)
            cost = bps * turn
            rt = float(r.iloc[t_ix]) if np.isfinite(r.iloc[t_ix]) else 0.0
            pnl = pos * rt
            prev_cap = cap
            cap = cap + pnl - cost
            if pos != 0.0 and not in_trade:
                in_trade = True
                cap0 = cap
            if in_trade:
                trade_pnl = cap - cap0
                if trade_pnl <= -0.01 * cap0 or trade_pnl >= 0.06 * cap0:
                    block = 1
                    in_trade = False
            pos_list.append(pos); cost_list.append(cost); pnl_list.append(pnl); cap_list.append(cap)
            ret_list.append((pnl - cost) / max(prev_cap, 1e-12))
            prev_pos = pos
        bt = pd.DataFrame({"position_inr":pd.Series(pos_list,index=idx),"pnl_inr":pd.Series(pnl_list,index=idx),"cost_inr":pd.Series(cost_list,index=idx),"capital_inr":pd.Series(cap_list,index=idx),"strategy_ret":pd.Series(ret_list,index=idx)})
        bt["equity"] = bt["capital_inr"] / 100000.0
        # compute metrics
        total_return = bt["equity"].iloc[-1] - 1.0
        ann_ret = bt["strategy_ret"].mean() * 252
        ann_vol = bt["strategy_ret"].std(ddof=1) * (252**0.5) if len(bt) > 1 else float("nan")
        sharpe = ann_ret / ann_vol if ann_vol and ann_vol > 0 else float("nan")
        cum = bt["equity"].cummax()
        drawdown = bt["equity"] / cum - 1.0
        maxdd = drawdown.min()
        summary_rows.append({"ticker":t,"status":"ok","rows": len(s),"cagr": float((bt["equity"].iloc[-1]) ** (1.0 / (len(bt)/252)) - 1.0) if len(bt)>1 else float("nan"), "sharpe":sharpe,"total_return":total_return,"max_drawdown":maxdd})
        bt.to_csv(os.path.join(outdir, f"{t.replace('/','_')}_backtest.csv"))
        print("ok")
    # save summary
    import pandas as pd
    summary_df = pd.DataFrame(summary_rows).sort_values("cagr", ascending=False, na_position="last").reset_index(drop=True)
    summary_df.to_csv(os.path.join(outdir,"summary.csv"), index=False)
    print("\nBatch finished. Summary saved to", os.path.join(outdir,"summary.csv"))
    print(summary_df.head(20).to_string(index=False))


Cleaning bad globals and restoring modules...
Removed globals: []
Globals now: np -> <class 'module'>  pd -> <class 'module'>  time -> <class 'module'>
yfinance.utils._time OK: <class 'module'>

Running smoke test for: ['^NSEI', 'AAPL', 'MSFT', 'RELIANCE.NS']
^NSEI  -> rows: 1940 range: 2018-01-02 to 2025-11-14
AAPL  -> rows: 1980 range: 2018-01-02 to 2025-11-14
MSFT  -> rows: 1980 range: 2018-01-02 to 2025-11-14
RELIANCE.NS  -> rows: 1945 range: 2018-01-01 to 2025-11-14
[1/50] AAPL ... ok
[2/50] MSFT ... ok
[3/50] AMZN ... ok
[4/50] GOOG ... ok
[5/50] META ... ok
[6/50] NVDA ... ok
[7/50] TSLA ... ok
[8/50] BRK-B ... ok
[9/50] JPM ... ok
[10/50] BAC ... ok
[11/50] JNJ ... ok
[12/50] V ... ok
[13/50] MA ... ok
[14/50] PG ... ok
[15/50] KO ... ok
[16/50] PFE ... ok
[17/50] XOM ... ok
[18/50] CVX ... ok
[19/50] NFLX ... ok
[20/50] DIS ... ok
[21/50] RELIANCE.NS ... ok
[22/50] TCS.NS ... ok
[23/50] HDFCBANK.NS ... ok
[24/50] ICICIBANK.NS ... ok
[25/50] INFY.NS ... ok
[26/50] LT.NS ... ok


  summary_rows.append({"ticker":t,"status":"ok","rows": len(s),"cagr": float((bt["equity"].iloc[-1]) ** (1.0 / (len(bt)/252)) - 1.0) if len(bt)>1 else float("nan"), "sharpe":sharpe,"total_return":total_return,"max_drawdown":maxdd})


ok
[44/50] BZ=F ... ok
[45/50] NG=F ... ok
[46/50] ZC=F ... ok
[47/50] SUGAR=F ... 

ERROR:yfinance:HTTP Error 404: {"quoteSummary":{"result":null,"error":{"code":"Not Found","description":"Quote not found for symbol: SUGAR=F"}}}
ERROR:yfinance:$SUGAR=F: possibly delisted; no timezone found
ERROR:yfinance:HTTP Error 404: {"quoteSummary":{"result":null,"error":{"code":"Not Found","description":"Quote not found for symbol: SUGAR=F"}}}
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['SUGAR=F']: YFTzMissingError('possibly delisted; no timezone found')
ERROR:yfinance:$CATTLE=F: possibly delisted; no timezone found
ERROR:yfinance:
1 Failed download:
ERROR:yfinance:['CATTLE=F']: YFTzMissingError('possibly delisted; no timezone found')


skip (insufficient/missing)
[48/50] CATTLE=F ... skip (insufficient/missing)
[49/50] GLD ... ok
[50/50] BTC-USD ... ok

Batch finished. Summary saved to momentum_results/summary.csv
      ticker status   rows      cagr    sharpe  total_return  max_drawdown
     BTC-USD     ok 2876.0  0.263813  0.698731     13.470198     -0.744437
        NVDA     ok 1980.0  0.238613  0.667569      4.372904     -0.651042
         GLD     ok 1980.0  0.039074  0.294111      0.351430     -0.381760
         JPM     ok 1980.0  0.029541  0.261673      0.257023     -0.757367
        GC=F     ok 1981.0  0.026883  0.234066      0.231879     -0.414064
    USDJPY=X     ok 2051.0  0.011119  0.163271      0.094172     -0.216332
         PFE     ok 1980.0 -0.009341  0.087509     -0.071086     -0.509576
SUNPHARMA.NS     ok 1945.0 -0.013002  0.079995     -0.096078     -0.490087
      ITC.NS     ok 1945.0 -0.014802  0.091869     -0.108721     -0.617138
       LT.NS     ok 1945.0 -0.017292  0.132520     -0.125962     -0.

In [None]:
# Paste & run in the notebook that already has momentum_results/ created
import os, math, json, traceback
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats

ROOT = Path("momentum_results")
OUTP = ROOT / "analysis_plots"
OUTP.mkdir(parents=True, exist_ok=True)

# load summary
summary_path = ROOT / "summary.csv"
if not summary_path.exists():
    raise FileNotFoundError("momentum_results/summary.csv not found. Run batch first.")
summary = pd.read_csv(summary_path)
summary = summary.sort_values("cagr", ascending=False).reset_index(drop=True)

# helper: load backtest for a ticker
def load_bt(ticker):
    fname = ROOT / f"{ticker.replace('/','_')}_backtest.csv"
    if not fname.exists():
        return None
    df = pd.read_csv(fname, index_col=0, parse_dates=True)
    return df

# extract trades function (reproduced for clarity)
def extract_trades_df(pos, strat_ret):
    p = pos.fillna(0.0).astype(float)
    sr = strat_ret.fillna(0.0).astype(float)
    if p.empty:
        return pd.DataFrame(columns=["start","end","side","bars","trade_return"])
    ch = p.ne(p.shift(1)).fillna(p.ne(0.0))
    idx = p.index
    cps = list(np.where(ch.values)[0])
    if cps and cps[0] != 0:
        cps = [0] + cps
    if not cps:
        cps = [0]
    cps.append(len(p))
    trades=[]
    for i in range(len(cps)-1):
        s=cps[i]; e=cps[i+1]
        v=float(p.iloc[s])
        if v==0.0:
            continue
        period_idx = idx[s:e]
        tr = float((1.0 + sr.loc[period_idx]).prod() - 1.0)
        trades.append({"start": period_idx[0], "end": period_idx[-1], "side": "long" if v>0 else "short", "bars": int(len(period_idx)), "trade_return": tr})
    return pd.DataFrame(trades)

# bootstrap p-value helper
def bootstrap_pvalue(series, iters=2000, seed=42):
    arr = np.array(series.dropna())
    if arr.size == 0:
        return np.nan
    rng = np.random.RandomState(seed)
    obs = arr.mean()
    means = []
    for _ in range(iters):
        sample = rng.choice(arr, size=arr.size, replace=True)
        means.append(sample.mean())
    means = np.array(means)
    pval = (np.abs(means) >= np.abs(obs)).mean()
    return float(pval)

# analysis per ticker
analysis_rows = []
top_n = 6
winners = summary.dropna(subset=["cagr"]).nlargest(top_n, "cagr")["ticker"].tolist()
losers = summary.dropna(subset=["cagr"]).nsmallest(top_n, "cagr")["ticker"].tolist()

print("Top winners:", winners)
print("Top losers: ", losers)

for row in summary.itertuples(index=False):
    t = row.ticker
    bt = load_bt(t)
    if bt is None:
        analysis_rows.append({"ticker":t, "status": "missing"})
        continue
    # trades
    trades = extract_trades_df(bt["position_inr"], bt["strategy_ret"])
    n_trades = 0 if trades.empty else len(trades)
    win_rate = np.nan if trades.empty else (trades["trade_return"] > 0).mean()
    avg_trade = np.nan if trades.empty else trades["trade_return"].mean()
    median_trade = np.nan if trades.empty else trades["trade_return"].median()
    avg_hold = np.nan if trades.empty else trades["bars"].mean()
    # annualized stats (recompute to be safe)
    equity = bt["equity"]
    strat = bt["strategy_ret"]
    total_return = float(equity.iloc[-1] - 1.0)
    years = len(strat) / 252.0
    cagr = (float(equity.iloc[-1]) ** (1.0 / years) - 1.0) if years>0 else np.nan
    ann_ret = float(strat.mean() * 252.0)
    ann_vol = float(strat.std(ddof=1) * math.sqrt(252.0)) if len(strat)>1 else np.nan
    sharpe = ann_ret / ann_vol if ann_vol and ann_vol>0 else np.nan
    # bootstrap p-value for mean strategy return
    pval = bootstrap_pvalue(strat, iters=2000, seed=123)
    analysis_rows.append({
        "ticker": t,
        "status": "ok",
        "rows": len(bt),
        "n_trades": n_trades,
        "win_rate": win_rate,
        "avg_trade": avg_trade,
        "median_trade": median_trade,
        "avg_hold_bars": avg_hold,
        "total_return": total_return,
        "cagr": cagr,
        "ann_ret": ann_ret,
        "ann_vol": ann_vol,
        "sharpe": sharpe,
        "bootstrap_pval": pval,
        "max_drawdown": row.max_drawdown if "max_drawdown" in row._fields else np.nan
    })

# save analysis table
analysis_df = pd.DataFrame(analysis_rows)
analysis_df.to_csv(ROOT / "analysis_summary.csv", index=False)
print("Wrote momentum_results/analysis_summary.csv")

# plot top winners/losers equity curves and trade return histograms
def plot_equity_and_trades(ticker, bt):
    fig, ax = plt.subplots(1,2, figsize=(12,3))
    bt["equity"].plot(ax=ax[0], title=f"{ticker} equity (final {bt['equity'].iloc[-1]:.3f})")
    ax[0].set_ylabel("Equity (rebased)")
    trades = extract_trades_df(bt["position_inr"], bt["strategy_ret"])
    if not trades.empty:
        ax[1].hist(trades["trade_return"].dropna(), bins=40)
        ax[1].set_title(f"{ticker} trade returns (n={len(trades)})")
        ax[1].set_xlabel("trade return")
    else:
        ax[1].text(0.5,0.5,"no trades", ha="center")
    plt.tight_layout()
    plt.savefig(OUTP / f"{ticker}_eq_trades.png")
    plt.close()

for t in winners + losers:
    bt = load_bt(t)
    if bt is not None:
        plot_equity_and_trades(t, bt)

# summary prints
print("\nTop 10 by CAGR (summary.csv):")
print(summary.head(10).to_string(index=False))
print("\nAnalysis head (analysis_summary.csv):")
print(analysis_df.sort_values("cagr", ascending=False).head(12).to_string(index=False))

print("\nPlots saved to", OUTP)


Top winners: ['BTC-USD', 'NVDA', 'GLD', 'JPM', 'GC=F', 'USDJPY=X']
Top losers:  ['CVX', 'MA', 'NFLX', 'BAC', 'HDFCBANK.NS', 'NG=F']
Wrote momentum_results/analysis_summary.csv

Top 10 by CAGR (summary.csv):
      ticker status   rows      cagr   sharpe  total_return  max_drawdown
     BTC-USD     ok 2876.0  0.263813 0.698731     13.470198     -0.744437
        NVDA     ok 1980.0  0.238613 0.667569      4.372904     -0.651042
         GLD     ok 1980.0  0.039074 0.294111      0.351430     -0.381760
         JPM     ok 1980.0  0.029541 0.261673      0.257023     -0.757367
        GC=F     ok 1981.0  0.026883 0.234066      0.231879     -0.414064
    USDJPY=X     ok 2051.0  0.011119 0.163271      0.094172     -0.216332
         PFE     ok 1980.0 -0.009341 0.087509     -0.071086     -0.509576
SUNPHARMA.NS     ok 1945.0 -0.013002 0.079995     -0.096078     -0.490087
      ITC.NS     ok 1945.0 -0.014802 0.091869     -0.108721     -0.617138
       LT.NS     ok 1945.0 -0.017292 0.132520     -0.

In [None]:
#!/usr/bin/env python3
"""
make_report_figs.py

Load per-ticker backtests from momentum_results/ and create a small set
of publication-quality figures for the LaTeX report. Also write a LaTeX
snippet file (figures.tex) that includes the generated PNGs.

Usage:
    python make_report_figs.py

Dependencies:
    pip install pandas numpy matplotlib
"""
import os
import math
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ---- user settings ----
RESULTS_DIR = Path("momentum_results")
OUT_DIR = Path("report_figs")
OUT_DIR.mkdir(exist_ok=True)
SUMMARY_CSV = RESULTS_DIR / "summary.csv"
TOP_N = 3   # top 3 winners
BOTTOM_N = 3  # top 3 losers
FIG_DPI = 200

# ---- helpers ----
def load_backtest(ticker: str):
    fn = RESULTS_DIR / f"{ticker.replace('/','_')}_backtest.csv"
    if not fn.exists():
        return None
    df = pd.read_csv(fn, index_col=0, parse_dates=True)
    return df

def extract_trades(pos, strat_ret):
    p = pos.fillna(0.0).astype(float)
    sr = strat_ret.fillna(0.0).astype(float)
    if p.empty:
        return pd.DataFrame(columns=["start","end","side","bars","trade_return"])
    ch = p.ne(p.shift(1)).fillna(p.ne(0.0))
    idx = p.index
    cps = list(np.where(ch.values)[0])
    if cps and cps[0] != 0:
        cps = [0] + cps
    if not cps:
        cps = [0]
    cps.append(len(p))
    trades = []
    for i in range(len(cps)-1):
        s = cps[i]; e = cps[i+1]
        v = float(p.iloc[s])
        if v == 0.0:
            continue
        period_idx = idx[s:e]
        tr = float((1.0 + sr.loc[period_idx]).prod() - 1.0)
        trades.append({"start": period_idx[0], "end": period_idx[-1], "side": "long" if v>0 else "short", "bars": int(len(period_idx)), "trade_return": tr})
    return pd.DataFrame(trades)

def safe_plot_save(fig, path):
    fig.tight_layout()
    fig.savefig(path, dpi=FIG_DPI)
    plt.close(fig)

# ---- load summary and select tickers ----
if not SUMMARY_CSV.exists():
    raise FileNotFoundError(f"summary.csv not found in {RESULTS_DIR}. Run batch first.")

summary = pd.read_csv(SUMMARY_CSV)
# ensure cagr numeric
summary['cagr'] = pd.to_numeric(summary['cagr'], errors='coerce')
top = summary.dropna(subset=['cagr']).nlargest(TOP_N, 'cagr')['ticker'].tolist()
bottom = summary.dropna(subset=['cagr']).nsmallest(BOTTOM_N, 'cagr')['ticker'].tolist()

# fallback if not enough
selected = []
for t in top:
    selected.append(t)
for t in bottom:
    if t not in selected:
        selected.append(t)

print("Selected tickers for figures:", selected)

# ---- 1) Combined equity curves for top N winners ----
def plot_top_equities(tickers, outpath):
    fig, ax = plt.subplots(figsize=(8,4))
    for t in tickers:
        bt = load_backtest(t)
        if bt is None:
            print(f"warning: backtest file missing for {t}")
            continue
        ax.plot(bt.index, bt['equity'], label=f"{t} (final {bt['equity'].iloc[-1]:.2f})", linewidth=1.2)
    ax.set_title("Top %d Tickers — Equity Curves" % len(tickers))
    ax.set_xlabel("Date"); ax.set_ylabel("Equity (relative)")
    ax.legend(loc='best', fontsize=8)
    safe_plot_save(fig, outpath)

plot_top_equities(top, OUT_DIR / "01_top3_equity.png")

# ---- 2) Drawdown plot for top N ----
def drawdown(series):
    cm = series.cummax()
    return series / cm - 1.0

def plot_top_drawdowns(tickers, outpath):
    fig, ax = plt.subplots(figsize=(8,4))
    for t in tickers:
        bt = load_backtest(t)
        if bt is None: continue
        dd = drawdown(bt['equity'])
        ax.plot(dd.index, dd, label=t, linewidth=1.2)
    ax.set_title("Top %d Tickers — Drawdowns" % len(tickers))
    ax.set_xlabel("Date"); ax.set_ylabel("Drawdown")
    ax.axhline(0, color='k', linewidth=0.6)
    ax.legend(loc='best', fontsize=8)
    safe_plot_save(fig, outpath)

plot_top_drawdowns(top, OUT_DIR / "02_top3_drawdown.png")

# ---- 3) Trade-return histograms for top N (subplot grid) ----
def plot_trade_histograms(tickers, outpath):
    n = len(tickers)
    cols = min(3, n)
    rows = math.ceil(n / cols)
    fig, axes = plt.subplots(rows, cols, figsize=(cols*4, rows*3))
    if isinstance(axes, np.ndarray):
        axes = axes.flatten()
    else:
        axes = [axes]
    for i, t in enumerate(tickers):
        bt = load_backtest(t)
        if bt is None:
            axes[i].text(0.5,0.5,"missing", ha='center')
            continue
        trades = extract_trades(bt['position_inr'], bt['strategy_ret'])
        if trades.empty:
            axes[i].text(0.5,0.5,"no trades", ha='center')
            continue
        axes[i].hist(trades['trade_return'].dropna(), bins=40)
        axes[i].set_title(t)
        axes[i].set_xlabel("trade return"); axes[i].set_ylabel("count")
    # hide unused axes
    for j in range(n, rows*cols):
        axes[j].axis('off')
    fig.suptitle("Trade Return Distributions (Top tickers)")
    safe_plot_save(fig, outpath)

plot_trade_histograms(top, OUT_DIR / "03_top3_trade_hist.png")

# ---- 4) Equal-weight portfolio equity of top N (one curve) ----
def plot_portfolio_equity(tickers, outpath):
    dfs = []
    for t in tickers:
        bt = load_backtest(t)
        if bt is None: continue
        dfs.append(bt['equity'].rename(t))
    if not dfs:
        print("no backtests for portfolio plot")
        return
    eq_all = pd.concat(dfs, axis=1).dropna(how='all')
    # rebase each to 1 at first common date
    eq_all = eq_all / eq_all.iloc[0]
    # equal-weight portfolio (simple mean)
    port = eq_all.mean(axis=1)
    fig, ax = plt.subplots(figsize=(8,4))
    ax.plot(port.index, port.values, linewidth=1.4)
    ax.set_title("Equal-weight Portfolio Equity (Top %d)" % len(tickers))
    ax.set_xlabel("Date"); ax.set_ylabel("Equity (relative)")
    safe_plot_save(fig, outpath)

plot_portfolio_equity(top, OUT_DIR / "04_portfolio_equity.png")

# ---- 5) BTC price vs tanh-signal overlay (if BTC present) ----
def plot_price_vs_signal(ticker, outpath):
    bt = load_backtest(ticker)
    if bt is None:
        print(f"no backtest for {ticker}")
        return
    # try to locate price and reconstruct signal approximately from position sign
    # We will attempt to load original price from the backtest CSV if present as 'capital_inr' or recreate placeholder
    # If actual signal series file isn't available, we plot equity and position sign as proxy.
    fig, ax1 = plt.subplots(figsize=(8,4))
    # plot equity as proxy for price movement interest
    ax1.plot(bt.index, bt['capital_inr'], label='Capital (INR)', color='black')
    ax1.set_ylabel("Capital (INR)", color='black')
    ax2 = ax1.twinx()
    pos_sign = np.sign(bt['position_inr'].fillna(0.0))
    ax2.plot(bt.index, pos_sign, label='Position sign', color='orange', alpha=0.7)
    ax2.set_ylabel("Position sign", color='orange')
    ax1.set_title(f"{ticker}: Capital and Position Sign (proxy for signal)")
    safe_plot_save(fig, outpath)

# prefer BTC-USD if present, else first top ticker
btc = "BTC-USD" if "BTC-USD" in summary['ticker'].values else (top[0] if top else None)
if btc:
    plot_price_vs_signal(btc, OUT_DIR / "05_btc_signal_price.png")

# ---- 6) Correlation heatmap of returns for selected tickers (top 3 winners + bottom 3 losers) ----
def plot_corr_heatmap(tickers, outpath):
    rets = []
    names = []
    for t in tickers:
        bt = load_backtest(t)
        if bt is None: continue
        # attempt daily returns from backtest; fallback to diff of equity
        if 'strategy_ret' in bt.columns:
            r = bt['strategy_ret']
        else:
            r = bt['equity'].pct_change().fillna(0.0)
        rets.append(r.rename(t))
        names.append(t)
    if not rets:
        print("no returns for correlation heatmap")
        return
    dfR = pd.concat(rets, axis=1).dropna(how='all')
    corr = dfR.corr()
    fig, ax = plt.subplots(figsize=(6,5))
    cax = ax.matshow(corr, cmap='RdBu_r', vmin=-1, vmax=1)
    ax.set_xticks(range(len(names))); ax.set_yticks(range(len(names)))
    ax.set_xticklabels(names, rotation=45, ha='left')
    ax.set_yticklabels(names)
    fig.colorbar(cax, fraction=0.025)
    ax.set_title("Return Correlation (selected tickers)")
    safe_plot_save(fig, outpath)

plot_corr_heatmap(selected, OUT_DIR / "06_corr_heatmap_top6.png")

# ---- write LaTeX snippet to include figures ----
tex_path = OUT_DIR / "figures.tex"
with open(tex_path, "w", encoding="utf-8") as f:
    f.write("% Auto-generated LaTeX snippet including the report figures\n")
    f.write("\\begin{figure}[htbp]\n\\centering\n")
    f.write("\\includegraphics[width=0.95\\linewidth]{" + str(OUT_DIR / "01_top3_equity.png") + "}\n")
    f.write("\\caption{Equity curves — top 3 tickers by CAGR.}\n\\end{figure}\n\n")
    f.write("\\begin{figure}[htbp]\n\\centering\n")
    f.write("\\includegraphics[width=0.95\\linewidth]{" + str(OUT_DIR / "02_top3_drawdown.png") + "}\n")
    f.write("\\caption{Drawdowns — top 3 tickers.}\n\\end{figure}\n\n")
    f.write("\\begin{figure}[htbp]\n\\centering\n")
    f.write("\\includegraphics[width=0.95\\linewidth]{" + str(OUT_DIR / "03_top3_trade_hist.png") + "}\n")
    f.write("\\caption{Trade return distributions for top tickers.}\n\\end{figure}\n\n")
    f.write("\\begin{figure}[htbp]\n\\centering\n")
    f.write("\\includegraphics[width=0.95\\linewidth]{" + str(OUT_DIR / "04_portfolio_equity.png") + "}\n")
    f.write("\\caption{Equal-weight portfolio equity of top 3 tickers.}\n\\end{figure}\n\n")
    if btc:
        f.write("\\begin{figure}[htbp]\n\\centering\n")
        f.write("\\includegraphics[width=0.95\\linewidth]{" + str(OUT_DIR / "05_btc_signal_price.png") + "}\n")
        f.write("\\caption{BTC-USD: capital and position sign (signal proxy).}\n\\end{figure}\n\n")
    f.write("\\begin{figure}[htbp]\n\\centering\n")
    f.write("\\includegraphics[width=0.95\\linewidth]{" + str(OUT_DIR / "06_corr_heatmap_top6.png") + "}\n")
    f.write("\\caption{Return correlation between selected tickers (top winners + losers).}\n\\end{figure}\n")
print("Figures written to", OUT_DIR)
print("LaTeX snippet written to", tex_path)


Selected tickers for figures: ['BTC-USD', 'NVDA', 'GLD', 'CVX', 'MA', 'NFLX']


In [9]:
# event_analysis_batch.py
"""
Event-based analysis for momentum backtests.

Expect per-ticker CSVs in: momentum_results/
Files should be named like: <TICKER>_backtest.csv (or similar that endswith '_backtest.csv')

Output:
 - event_figs/<TICKER>/<EVENT_NAME>.png  (plots)
 - momentum_results/event_analysis_summary.csv (summary table)

Usage:
    python event_analysis_batch.py
"""

import os
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime

# --------- CONFIG ---------
BACKTEST_FOLDER = "momentum_results"
OUT_FIG_FOLDER = "event_figs"
SUMMARY_CSV = os.path.join(BACKTEST_FOLDER, "event_analysis_summary.csv")

# Define event windows (you can edit/add)
EVENTS = {
    "COVID_Crash_2020": ("2020-02-01", "2020-04-01"),
    "US_Elections_2020": ("2020-10-01", "2020-12-15"),
    "Russia_Ukraine_2022": ("2022-02-01", "2022-03-15"),
    "Crypto_Crash_2021": ("2021-05-01", "2021-07-01"),
    "FTX_Collapse_2022": ("2022-10-01", "2022-12-15"),
}

# Pre/post sample size for simple diagnostics
PRE_DAYS = 20
POST_DAYS = 20

# Minimal sanity checks
MIN_DAYS_REQUIRED = 10   # skip too-short series

# --------- HELPERS ----------
def safe_read_backtest(path):
    """
    Read a backtest CSV and return a DataFrame with expected columns:
      - datetime index
      - position_inr
      - strategy_ret
      - equity (if missing, compute from capital_inr / first capital)
    Returns DataFrame or None if file not usable.
    """
    try:
        df = pd.read_csv(path, parse_dates=True, index_col=0)
    except Exception as e:
        print(f"FAILED read {path}: {e}")
        return None

    # ensure datetime index
    if not isinstance(df.index, pd.DatetimeIndex):
        try:
            df.index = pd.to_datetime(df.index)
        except Exception:
            # if first column is a date
            df = df.reset_index()
            df.iloc[:,0] = pd.to_datetime(df.iloc[:,0])
            df = df.set_index(df.columns[0])

    cols = df.columns.str.lower()
    # unify column names by lowercase variants
    df.columns = [c.lower() for c in df.columns]

    # require 'strategy_ret' or 'strategy_ret' like
    if "strategy_ret" not in df.columns and "strategy return" not in df.columns:
        # maybe there is 'strategy_ret' spelled differently: try "strategy_ret"
        if "strategy" in df.columns:
            df["strategy_ret"] = df["strategy"]
        else:
            print(f"File {path} missing 'strategy_ret' column. Skipping.")
            return None

    # require 'position_inr'
    if "position_inr" not in df.columns and "position" not in df.columns:
        # try to infer from notional-like columns
        possible = [c for c in df.columns if "position" in c or "notional" in c]
        if possible:
            df["position_inr"] = df[possible[0]]
        else:
            print(f"File {path} missing 'position_inr' column. Skipping.")
            return None

    # compute 'equity' if missing (try capital_inr)
    if "equity" not in df.columns:
        if "capital_inr" in df.columns:
            try:
                initial = df["capital_inr"].iloc[0]
                df["equity"] = df["capital_inr"] / float(initial) if initial != 0 else df["capital_inr"]
            except Exception:
                df["equity"] = (1 + df["strategy_ret"].fillna(0)).cumprod()
        else:
            df["equity"] = (1 + df["strategy_ret"].fillna(0)).cumprod()

    # ensure numeric
    for c in ["strategy_ret", "position_inr", "equity"]:
        if c in df.columns:
            df[c] = pd.to_numeric(df[c], errors='coerce')

    # drop rows with NA index
    df = df[~df.index.isna()]

    if len(df) < MIN_DAYS_REQUIRED:
        print(f"File {path} has only {len(df)} rows (less than {MIN_DAYS_REQUIRED}) - skipping.")
        return None

    # add pos sign column
    df["pos"] = np.sign(df["position_inr"].fillna(0.0))

    return df

def event_window_slice(df, start, end):
    """Return df slice from start..end inclusive (dates are strings 'YYYY-MM-DD')."""
    s = pd.to_datetime(start)
    e = pd.to_datetime(end)
    # inclusive slice using loc
    try:
        return df.loc[(df.index >= s) & (df.index <= e)].copy()
    except Exception:
        return pd.DataFrame()

def make_event_plot(df_win, ticker, event_name, out_path):
    """
    Create a plot for the event window:
      - equity (left axis)
      - pos sign (right axis as filled area)
    Saves to out_path (PNG)
    """
    if df_win.empty:
        return False
    fig, ax1 = plt.subplots(figsize=(10,4))
    ax1.plot(df_win.index, df_win["equity"], color="black", linewidth=1.0, label="Equity (rel)")
    ax1.set_ylabel("Equity (relative)")
    ax1.set_title(f"{ticker}: {event_name.replace('_', ' ')}")

    ax2 = ax1.twinx()
    # draw pos as a filled area between -1 and 1 (scale to visible)
    ax2.fill_between(df_win.index, df_win["pos"], alpha=0.25, color="orange", step='mid')
    ax2.set_ylabel("Position sign", color="orange")

    ax1.grid(True, alpha=0.3)
    fig.tight_layout()
    try:
        fig.savefig(out_path, dpi=200)
        plt.close(fig)
        return True
    except Exception as e:
        plt.close(fig)
        print(f"Failed saving plot {out_path}: {e}")
        return False

def compute_event_stats(df, start, end, pre_days=PRE_DAYS, post_days=POST_DAYS):
    """
    Compute basic pre-event, event-window, post-event statistics.
    Returns dict with numeric entries.
    """
    s = pd.to_datetime(start)
    e = pd.to_datetime(end)
    win = df.loc[(df.index >= s) & (df.index <= e)]

    # pre-window: last pre_days before s (strictly before s)
    pre_df = df.loc[df.index < s].tail(pre_days)
    # post-window: first post_days after e (strictly after e)
    post_df = df.loc[df.index > e].head(post_days)

    # compute event compounded return
    if win.empty:
        event_total = np.nan
    else:
        # product(1 + r) - 1
        event_total = (1.0 + win["strategy_ret"].fillna(0.0)).prod() - 1.0

    return {
        "pre_avg_daily_ret": float(pre_df["strategy_ret"].mean()) if not pre_df.empty else np.nan,
        "event_total_ret": float(event_total),
        "post_avg_daily_ret": float(post_df["strategy_ret"].mean()) if not post_df.empty else np.nan,
        "n_days_pre": int(len(pre_df)),
        "n_days_event": int(len(win)),
        "n_days_post": int(len(post_df))
    }

# --------- MAIN BATCH PROCESS ----------
def main():
    os.makedirs(OUT_FIG_FOLDER, exist_ok=True)
    # collect all backtest csv files
    pattern = os.path.join(BACKTEST_FOLDER, "*_backtest.csv")
    files = sorted(glob.glob(pattern))
    if not files:
        print(f"No files found with pattern {pattern}. Check your BACKTEST_FOLDER and filenames.")
        return

    rows = []  # summary rows
    print(f"Found {len(files)} backtest files. Running event analysis...")

    for path in files:
        ticker = os.path.basename(path).replace("_backtest.csv", "")
        print(f"\nProcessing {ticker} ...")

        df = safe_read_backtest(path)
        if df is None:
            print(f"Skipping {ticker} (could not read or insufficient data).")
            continue

        # create ticker folder for plots
        ticker_fig_dir = os.path.join(OUT_FIG_FOLDER, ticker.replace("/", "_"))
        os.makedirs(ticker_fig_dir, exist_ok=True)

        for ev_name, (start, end) in EVENTS.items():
            win_df = event_window_slice(df, start, end)
            # save plot
            out_png = os.path.join(ticker_fig_dir, f"{ticker}_{ev_name}.png")
            plotted = make_event_plot(win_df, ticker, ev_name, out_png)

            stats = compute_event_stats(df, start, end)
            row = {
                "ticker": ticker,
                "event": ev_name,
                "event_start": start,
                "event_end": end,
                "plotted": plotted,
                **stats
            }
            rows.append(row)

    # save summary CSV
    if rows:
        summary_df = pd.DataFrame(rows)
        summary_df = summary_df.sort_values(["event", "ticker"]).reset_index(drop=True)
        summary_df.to_csv(SUMMARY_CSV, index=False)
        print(f"\nEvent analysis summary written to: {SUMMARY_CSV}")
    else:
        print("No event rows to save.")

    print("\nDone. Check the folder 'event_figs/' for plots and the CSV summary file.")

if __name__ == "__main__":
    main()


Found 48 backtest files. Running event analysis...

Processing AAPL ...

Processing AMZN ...

Processing AUDUSD=X ...

Processing AXISBANK.NS ...

Processing BAC ...

Processing BHARTIARTL.NS ...

Processing BRK-B ...

Processing BTC-USD ...

Processing BZ=F ...

Processing CL=F ...


  return umr_prod(a, axis, dtype, out, keepdims, initial, where)



Processing CVX ...

Processing DIS ...

Processing EURUSD=X ...

Processing GBPUSD=X ...

Processing GC=F ...

Processing GLD ...

Processing GOOG ...

Processing HDFCBANK.NS ...

Processing HINDUNILVR.NS ...

Processing ICICIBANK.NS ...

Processing INFY.NS ...

Processing ITC.NS ...

Processing JNJ ...

Processing JPM ...

Processing KO ...

Processing LT.NS ...

Processing MARUTI.NS ...

Processing MA ...

Processing META ...

Processing MSFT ...

Processing NFLX ...

Processing NG=F ...

Processing NVDA ...

Processing NZDUSD=X ...

Processing PFE ...

Processing PG ...

Processing RELIANCE.NS ...

Processing SI=F ...

Processing SUNPHARMA.NS ...

Processing TCS.NS ...

Processing TSLA ...

Processing USDCAD=X ...

Processing USDCHF=X ...

Processing USDINR=X ...

Processing USDJPY=X ...

Processing V ...

Processing XOM ...

Processing ZC=F ...

Event analysis summary written to: momentum_results/event_analysis_summary.csv

Done. Check the folder 'event_figs/' for plots and the CS

In [10]:
# analysis_from_csv.py
"""
Analysis script that reads:
  - momentum_results/summary.csv
  - momentum_results/event_analysis_summary.csv

Produces figures and tables under report_figs/ and momentum_results/.
Generates a LaTeX snippet to include in your report with escaped underscores.
"""
import os
import math
import textwrap
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# -------- CONFIG --------
BACKROOT = "momentum_results"
SUMMARY_CSV = os.path.join(BACKROOT, "summary.csv")
EVENT_SUMMARY_CSV = os.path.join(BACKROOT, "event_analysis_summary.csv")

OUT_DIR = "report_figs"
os.makedirs(OUT_DIR, exist_ok=True)

# analysis parameters
TOP_N = 6   # top tickers to show in some plots
LATEX_SNIPPET = os.path.join(OUT_DIR, "event_analysis_section.tex")

# -------- helpers --------
def safe_load_csv(path):
    if not os.path.exists(path):
        print(f"ERROR: missing file: {path}")
        return None
    try:
        return pd.read_csv(path)
    except Exception as e:
        print(f"ERROR reading {path}: {e}")
        return None

def savefig(fig, fname, dpi=200):
    try:
        fig.savefig(fname, dpi=dpi, bbox_inches="tight")
        plt.close(fig)
    except Exception as e:
        print("Failed to save", fname, e)

def escape_tex(s: str):
    # escape underscores for \texttt and file names
    return s.replace("_", "\\_")

# -------- load data --------
summary = safe_load_csv(SUMMARY_CSV)
event = safe_load_csv(EVENT_SUMMARY_CSV)

if summary is None or event is None:
    raise SystemExit("Missing required CSVs. Run the backtest & event script first.")

# normalize column names
summary.columns = [c.lower() for c in summary.columns]
event.columns = [c.lower() for c in event.columns]

# ensure numeric columns exist
for col in ["cagr", "sharpe", "total_return", "max_drawdown"]:
    if col not in summary.columns:
        summary[col] = np.nan

# basic distributions: CAGR histogram
fig = plt.figure(figsize=(8,4))
ax = fig.add_subplot(1,1,1)
ax.hist(summary['cagr'].dropna(), bins=30, edgecolor='k')
ax.set_title("Distribution of CAGR (per-ticker)")
ax.set_xlabel("CAGR")
ax.set_ylabel("Count")
savefig(fig, os.path.join(OUT_DIR, "hist_cagr.png"))

# scatter: sharpe vs cagr
fig = plt.figure(figsize=(7,5))
ax = fig.add_subplot(1,1,1)
ax.scatter(summary['cagr'], summary['sharpe'], alpha=0.7)
ax.set_xlabel("CAGR")
ax.set_ylabel("Sharpe")
ax.grid(True, alpha=0.2)
ax.set_title("Sharpe vs CAGR (per-ticker)")
savefig(fig, os.path.join(OUT_DIR, "scatter_sharpe_cagr.png"))

# Top winners/losers overall
summary_sorted = summary.sort_values("cagr", ascending=False).reset_index(drop=True)
top_winners = summary_sorted.head(TOP_N)
top_losers = summary_sorted.tail(TOP_N).iloc[::-1]

fig, ax = plt.subplots(figsize=(9,4))
ax.barh(top_winners["ticker"].astype(str), top_winners["cagr"], color="tab:blue")
ax.set_xlabel("CAGR")
ax.set_title(f"Top {TOP_N} tickers by CAGR")
savefig(fig, os.path.join(OUT_DIR, "topN_cagr.png"))

fig, ax = plt.subplots(figsize=(9,4))
ax.barh(top_losers["ticker"].astype(str), top_losers["cagr"], color="tab:orange")
ax.set_xlabel("CAGR")
ax.set_title(f"Bottom {TOP_N} tickers by CAGR")
savefig(fig, os.path.join(OUT_DIR, "bottomN_cagr.png"))

# ---- Event-based aggregations ----
# For each event, show top 10 winners and losers (by event_total_ret)
if "event" not in event.columns or "event_total_ret" not in event.columns:
    print("Event summary missing expected columns (event, event_total_ret).")
else:
    events = sorted(event['event'].unique())
    event_plots = []
    for ev in events:
        ev_df = event[event['event'] == ev].dropna(subset=["event_total_ret"])
        if ev_df.empty:
            continue
        ev_sorted = ev_df.sort_values("event_total_ret", ascending=False).reset_index(drop=True)
        top = ev_sorted.head(10)
        bot = ev_sorted.tail(10)

        # bar chart top winners
        fig, ax = plt.subplots(figsize=(8,4))
        ax.barh(top['ticker'].astype(str), top['event_total_ret'], color="tab:green")
        ax.set_title(f"Top 10 winners during {ev}")
        ax.set_xlabel("event total return")
        savefig(fig, os.path.join(OUT_DIR, f"event_{ev}_top10.png"))

        # bar chart top losers
        fig, ax = plt.subplots(figsize=(8,4))
        ax.barh(bot['ticker'].astype(str), bot['event_total_ret'], color="tab:red")
        ax.set_title(f"Top 10 losers during {ev}")
        ax.set_xlabel("event total return")
        savefig(fig, os.path.join(OUT_DIR, f"event_{ev}_bottom10.png"))

        event_plots.append((ev, top, bot))

    # summary table across events: median event return per ticker (pivot)
    pivot = event.pivot_table(index="ticker", columns="event", values="event_total_ret", aggfunc="median")
    # save pivot CSV
    pivot.to_csv(os.path.join(BACKROOT, "event_median_returns_by_ticker.csv"))

    # heatmap of median event returns for top tickers by overall CAGR (top 12)
    top12 = summary_sorted.head(12)['ticker'].astype(str).tolist()
    pivot_top12 = pivot.reindex(top12).fillna(0.0)

    # plot heatmap with matplotlib (no seaborn dependency)
    fig, ax = plt.subplots(figsize=(8,6))
    im = ax.imshow(pivot_top12.values, aspect='auto', cmap='RdYlGn', vmin=-1, vmax=1)
    ax.set_xticks(np.arange(len(pivot_top12.columns)))
    ax.set_xticklabels([c.replace("_", " ") for c in pivot_top12.columns], rotation=45, ha='right')
    ax.set_yticks(np.arange(len(pivot_top12.index)))
    ax.set_yticklabels(pivot_top12.index)
    ax.set_title("Median event returns (top 12 tickers by CAGR)")
    cbar = fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
    cbar.set_label("Median event return")
    savefig(fig, os.path.join(OUT_DIR, "heatmap_median_event_returns_top12.png"))

# ---- Pre/Event/Post comparison for top N tickers ----
N = 6
topN = summary_sorted.head(N)['ticker'].astype(str).tolist()
rows = []
for t in topN:
    # grab all events for ticker
    te = event[event['ticker'].astype(str) == t]
    for _, r in te.iterrows():
        rows.append({
            "ticker": t,
            "event": r['event'],
            "n_days_pre": r.get('n_days_pre', np.nan),
            "pre_avg_daily_ret": r.get('pre_avg_daily_ret', np.nan),
            "event_total_ret": r.get('event_total_ret', np.nan),
            "post_avg_daily_ret": r.get('post_avg_daily_ret', np.nan)
        })
comp_df = pd.DataFrame(rows)
comp_df.to_csv(os.path.join(BACKROOT, f"top{N}_pre_event_post_table.csv"), index=False)

# save a simple latex table snippet for top N
def df_to_latex_table(df, caption, label):
    # uses simple tabular, escapes underscores
    colnames = df.columns.tolist()
    header = " & ".join([escape_tex(c) for c in colnames]) + r" \\"
    lines = [r"\begin{table}[H]", r"\centering", r"\small", rf"\caption{{{caption}}}", rf"\label{{{label}}}", r"\begin{tabular}{%s}" % ("l" * len(colnames)), r"\toprule", header, r"\midrule"]
    for _, r in df.iterrows():
        vals = []
        for c in colnames:
            v = r[c]
            if pd.isna(v):
                vals.append("")
            elif isinstance(v, (int, np.integer)):
                vals.append(str(int(v)))
            elif isinstance(v, (float, np.floating)):
                vals.append(f"{v:.4g}")
            else:
                vals.append(escape_tex(str(v)))
        lines.append(" & ".join(vals) + r" \\")
    lines.extend([r"\bottomrule", r"\end{tabular}", r"\end{table}"])
    return "\n".join(lines)

latex_tables = []
if not comp_df.empty:
    # keep only a few columns for readability
    display_df = comp_df[["ticker", "event", "n_days_pre", "pre_avg_daily_ret", "event_total_ret", "post_avg_daily_ret"]].copy()
    # small formatting
    latex_tables.append(df_to_latex_table(display_df.head(20), f"Pre/Event/Post metrics for top {N} tickers", "tab:pre_event_topN"))
    # save snippet file
    with open(LATEX_SNIPPET, "w", encoding="utf-8") as f:
        f.write("% LaTeX snippet auto-generated by analysis_from_csv.py\n")
        f.write("% Put this file into your report with \\input{report_figs/event_analysis_section.tex}\n\n")
        # include event figures (top winners/losers)
        f.write("\\section*{Event-based Analysis}\n")
        f.write("\\begin{figure}[H]\n\\centering\n")
        # include a couple of images if they exist
        cand_imgs = [
            "hist_cagr.png",
            "scatter_sharpe_cagr.png",
            "topN_cagr.png",
            "bottomN_cagr.png",
            "heatmap_median_event_returns_top12.png"
        ]
        for im in cand_imgs:
            p = os.path.join(OUT_DIR, im)
            if os.path.exists(p):
                f.write(r"\includegraphics[width=0.48\linewidth]{" + escape_tex(p) + "}\n")
        f.write("\\caption{Selected diagnostic plots for event & per-ticker performance.}\n\\end{figure}\n\n")
        # add topN table
        f.write(latex_tables[0])
        f.write("\n\n% End of auto-generated event analysis snippet\n")

    print("Wrote LaTeX snippet to:", LATEX_SNIPPET)
else:
    print("No pre/post comparison table generated (comp_df empty).")

# ---- Save additional CSVs for manual inspection ----
summary_sorted.to_csv(os.path.join(BACKROOT, "summary_sorted_by_cagr.csv"), index=False)
print("Saved summary_sorted_by_cagr.csv")
if 'pivot' in locals():
    # pivot from earlier
    pivot.fillna(0.0).to_csv(os.path.join(BACKROOT, "event_median_returns_by_ticker.csv"))
    print("Saved event_median_returns_by_ticker.csv")

print("\nAll analysis outputs saved under:", OUT_DIR, "and", BACKROOT)
print("You can include the LaTeX snippet:", LATEX_SNIPPET, "in your report (it escapes underscores).")


Wrote LaTeX snippet to: report_figs/event_analysis_section.tex
Saved summary_sorted_by_cagr.csv
Saved event_median_returns_by_ticker.csv

All analysis outputs saved under: report_figs and momentum_results
You can include the LaTeX snippet: report_figs/event_analysis_section.tex in your report (it escapes underscores).
