In [1]:
import os
import re
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm

In [2]:
def read_data_and_load_model(model_name):
    # Read Data
    csv_folder_dir = "../_csv/out_of_sample/"
    csv_list = [x for x in os.listdir(csv_folder_dir) if not x.startswith(".")]

    prices_df = None

    for csv_name in csv_list:
        csv_dir = os.path.join(csv_folder_dir, csv_name)
        current_df = pd.read_csv(csv_dir,
                                 usecols=['Date', 'Close'],
                                 parse_dates=['Date'])
        current_df.rename(columns={'Close': f"{csv_name[:-4]}"}, inplace=True)

        # Merge the current DataFrame with the final DataFrame
        if prices_df is None:
            prices_df = current_df  # Initialize with the first DataFrame
        else:
            prices_df = pd.merge(prices_df, current_df, on='Date', how='outer')  # Merge on 'Date'

    # Load Model


    from tensorflow.keras.models import load_model

    model = load_model(f"../models/{model_name}.keras")
    model.summary()
    """
    import keras
    keras.config.set_backend("tensorflow")  # ensure TF backend
    model = keras.models.load_model("I60-R5.keras", compile=False)
    """
    return prices_df, model

In [3]:
def build_probs_df(model_name, model, levered = False):
    # STEP 3 — Build a Date-aligned predictions DataFrame (one column per stock)
    if levered:
        image_dir = f"../_images/{model_name}/out_of_sample_levered/"
    else:
        image_dir = f"../_images/{model_name}/out_of_sample/"
    dirlist = [x for x in os.listdir(image_dir) if not x.startswith(".")]
    date_regex = re.compile(r"(?P<date>\d{8})_")  # matches leading YYYYMMDD_

    # Helper: extract pandas.Timestamp from filename
    def extract_date_from_filename(fname: str) -> pd.Timestamp | None:
        m = date_regex.search(fname)
        if not m:
            return None
        s = m.group("date")
        # YYYYMMDD -> Timestamp
        return pd.to_datetime(s, format="%Y%m%d", errors="coerce")

    # Helper: batched model prediction to keep memory steady
    def predict_in_batches(img_paths, batch_size=256):
        preds = []
        for i in range(0, len(img_paths), batch_size):
            batch_paths = img_paths[i:i+batch_size]
            batch_imgs = []
            valid_idx = []
            for j, p in enumerate(batch_paths):
                img = cv2.imread(p, cv2.IMREAD_GRAYSCALE)
                if img is None:
                    continue  # skip unreadable
                # If your model expects channels-last with 1 channel, expand dims:
                # img = np.expand_dims(img, axis=-1)
                batch_imgs.append(img)
                valid_idx.append(j)
            if not batch_imgs:
                preds.extend([np.nan] * len(batch_paths))
                continue
            X = np.array(batch_imgs)
            # If your training normalized inputs, apply the same here (kept identical to your snippet)
            batch_pred = model.predict(X, verbose=0).flatten()
            # Map predictions back to overall positions (including skipped files as NaN)
            out = [np.nan] * len(batch_paths)
            vi = 0
            for j in valid_idx:
                out[j] = float(batch_pred[vi])
                vi += 1
            preds.extend(out)
        return np.array(preds, dtype=float)

    # Build the date-aligned predictions DataFrame
    preds_df = None

    for folder_name in tqdm(dirlist, desc="Predicting"):
        folder_dir = os.path.join(image_dir, folder_name)
        if not os.path.isdir(folder_dir):
            continue

        # Collect and sort files so dates are in chronological (or lexicographic) order
        files = [f for f in os.listdir(folder_dir) if f.endswith(".png")]
        # sort by filename ensures date order because filenames start with YYYYMMDD
        files.sort()

        # Parse dates and keep only files with a valid date
        dates = []
        file_paths = []
        for f in files:
            d = extract_date_from_filename(f)
            if d is None or pd.isna(d):
                continue
            dates.append(d)
            file_paths.append(os.path.join(folder_dir, f))

        if not file_paths:
            continue

        # Predict
        probs = predict_in_batches(file_paths, batch_size=256)

        # Build per-stock DataFrame
        stock_col = f"probability_{folder_name}"
        stock_df = pd.DataFrame({"Date": dates, stock_col: probs})

        # If multiple images end up with the same Date (shouldn’t, but just in case), keep the last
        stock_df = stock_df.drop_duplicates(subset=["Date"], keep="last")

        # Merge into the big predictions df (outer on Date)
        if preds_df is None:
            preds_df = stock_df
        else:
            preds_df = pd.merge(preds_df, stock_df, on="Date", how="outer")

    # Final tidy-up: sort by Date
    if preds_df is not None:
        preds_df = preds_df.sort_values("Date").reset_index(drop=True)
        preds_df.head()
    return preds_df

In [4]:
def trade(preds_df, prices_df, model_name, levered = False, top_n=50, save_dir="backtest_output"):
    """
    Backtests a date-aligned probability signal using a delayed-entry scheme:
      - Signals on `signal_date`
      - Enter at `signal_date + R` trading days
      - Exit  at `signal_date + 2R` trading days
    where R is parsed from model_name like "I5-R20" -> R=20.

    Parameters
    ----------
    preds_df : pd.DataFrame
        Columns: ["Date", "probability_<TICKER>", ...]; rows only on signal dates.
    prices_df : pd.DataFrame
        Columns: ["Date", <TICKER1>, <TICKER2>, ...]; daily close prices, sorted or not.
    model_name : str
        e.g., "I5-R5", "I20-R60" (pattern I<image_days>-R<response_days>).
    top_n : int, optional
        Number of top/bottom probabilities to long/short. Defaults to 50.
    save_dir : str, optional
        Directory to save CSV output. Defaults to "backtest_output".

    Returns
    -------
    results_df : pd.DataFrame
        Columns: signal_date, entry_date, exit_date, n_long, n_short, long_return, short_return, combined_return
    """
    import os
    import re
    import numpy as np
    import pandas as pd

    # --- Parse model_name like "I5-R20"
    m = re.fullmatch(r"[Ii](\d+)-[Rr](\d+)", model_name.strip())
    if not m:
        raise ValueError(f"model_name '{model_name}' must look like 'I5-R20'.")

    image_days = int(m.group(1))       # not used in trading logic, but captured if needed
    response_days = int(m.group(2))    # this is the holding window size R
    window_size = response_days

    # --- Copy & index prep
    prices_df = prices_df.copy()
    preds_df  = preds_df.copy()

    prices_df["Date"] = pd.to_datetime(prices_df["Date"])
    preds_df["Date"]  = pd.to_datetime(preds_df["Date"])

    prices_df = prices_df.sort_values("Date").set_index("Date")
    preds_df  = preds_df.sort_values("Date").set_index("Date")

    # --- Align tickers
    prob_cols = [c for c in preds_df.columns if c.startswith("probability_")]
    tickers = [c.replace("probability_", "") for c in prob_cols]
    rename_map = dict(zip(prob_cols, tickers))
    preds_as_tickers = preds_df.rename(columns=rename_map)

    # Keep only tickers that exist in prices_df
    valid_tickers = [t for t in tickers if t in prices_df.columns]
    if not valid_tickers:
        raise ValueError("No overlapping tickers between preds_df and prices_df.")
    preds_as_tickers = preds_as_tickers[valid_tickers]

    results = []

    # --- Iterate each signal date
    for signal_date in preds_as_tickers.index:
        if signal_date not in prices_df.index:
            # If your images include non-trading days, just skip them
            continue

        sig_idx   = prices_df.index.get_loc(signal_date)
        entry_idx = sig_idx + window_size        # delayed entry at +R
        exit_idx  = sig_idx + 2 * window_size    # exit at +2R

        if exit_idx >= len(prices_df.index):
            break  # not enough future data

        entry_date = prices_df.index[entry_idx]
        exit_date  = prices_df.index[exit_idx]

        # Current probs for this signal date
        current_probs = preds_as_tickers.loc[signal_date].dropna()
        if current_probs.empty:
            continue

        k = min(top_n, len(current_probs))
        to_buy  = current_probs.nlargest(k).index.tolist()
        to_sell = current_probs.nsmallest(k).index.tolist()

        # Prices at entry/exit
        px_entry_long  = prices_df.loc[entry_date, to_buy]
        px_exit_long   = prices_df.loc[exit_date,  to_buy]
        px_entry_short = prices_df.loc[entry_date, to_sell]
        px_exit_short  = prices_df.loc[exit_date,  to_sell]

        # Returns
        long_rets  = (px_exit_long - px_entry_long) / px_entry_long
        short_rets = -(px_exit_short - px_entry_short) / px_entry_short

        long_portfolio_return  = np.nanmean(long_rets.values)  if len(long_rets)  else np.nan
        short_portfolio_return = np.nanmean(short_rets.values) if len(short_rets) else np.nan
        combined_return = np.nanmean([long_portfolio_return, short_portfolio_return])

        results.append({
            "model_name": model_name,
            "image_days": image_days,
            "response_days": response_days,
            "signal_date": signal_date,
            "entry_date": entry_date,
            "exit_date": exit_date,
            "n_long": len(to_buy),
            "n_short": len(to_sell),
            "long_return": long_portfolio_return,
            "short_return": short_portfolio_return,
            "combined_return": combined_return
        })

    results_df = pd.DataFrame(results).sort_values("signal_date").reset_index(drop=True)

    # Save
    os.makedirs(save_dir, exist_ok=True)
    if levered:
        out_path = os.path.join(save_dir, f"backtest_levered_{model_name}.csv")
    else:
        out_path = os.path.join(save_dir, f"backtest_{model_name}.csv")
    results_df.to_csv(out_path, index=False)

    return results_df

In [5]:
def trade_with_costs(preds_df, prices_df, model_name, levered=False, top_n=50, save_dir="backtest_output",
          commission_bps=0.0, spread_bps=0.0, slippage_bps=0.0, short_borrow_bps_ann=0.0):
    """
    Added transaction-cost modeling:
      - Round-trip trading cost (bps): 2 * (commission_bps + spread_bps + slippage_bps)
      - Short borrow cost (annualized bps) applied pro-rata over holding window.
    Returns remain NET of costs in long_return / short_return / combined_return.
    Gross returns are also stored for diagnostics.
    """
    import os, re, numpy as np, pandas as pd

    m = re.fullmatch(r"[Ii](\d+)-[Rr](\d+)", model_name.strip())
    if not m:
        raise ValueError(f"model_name '{model_name}' must look like 'I5-R20'.")
    image_days = int(m.group(1))
    response_days = int(m.group(2))
    window_size = response_days

    prices_df = prices_df.copy()
    preds_df  = preds_df.copy()
    prices_df["Date"] = pd.to_datetime(prices_df["Date"])
    preds_df["Date"]  = pd.to_datetime(preds_df["Date"])
    prices_df = prices_df.sort_values("Date").set_index("Date")
    preds_df  = preds_df.sort_values("Date").set_index("Date")

    prob_cols = [c for c in preds_df.columns if c.startswith("probability_")]
    tickers = [c.replace("probability_", "") for c in prob_cols]
    preds_as_tickers = preds_df.rename(columns=dict(zip(prob_cols, tickers)))
    valid_tickers = [t for t in tickers if t in prices_df.columns]
    if not valid_tickers:
        raise ValueError("No overlapping tickers between preds_df and prices_df.")
    preds_as_tickers = preds_as_tickers[valid_tickers]

    # --- Cost constants (in return space) ---
    per_side_cost_bps = float(commission_bps) + float(spread_bps) + float(slippage_bps)
    roundtrip_cost_return = 2.0 * per_side_cost_bps / 10_000.0
    borrow_cost_return_per_trade = (float(short_borrow_bps_ann) / 10_000.0) * (response_days / 252.0)

    results = []
    for signal_date in preds_as_tickers.index:
        if signal_date not in prices_df.index:
            continue
        sig_idx   = prices_df.index.get_loc(signal_date)
        entry_idx = sig_idx + window_size
        exit_idx  = sig_idx + 2 * window_size
        if exit_idx >= len(prices_df.index):
            break

        entry_date = prices_df.index[entry_idx]
        exit_date  = prices_df.index[exit_idx]

        current_probs = preds_as_tickers.loc[signal_date].dropna()
        if current_probs.empty:
            continue

        k = min(top_n, len(current_probs))
        to_buy  = current_probs.nlargest(k).index.tolist()
        to_sell = current_probs.nsmallest(k).index.tolist()

        px_entry_long  = prices_df.loc[entry_date, to_buy]
        px_exit_long   = prices_df.loc[exit_date,  to_buy]
        px_entry_short = prices_df.loc[entry_date, to_sell]
        px_exit_short  = prices_df.loc[exit_date,  to_sell]

        # --- Gross returns per name
        gross_long_rets  = (px_exit_long - px_entry_long) / px_entry_long
        gross_short_rets = -(px_exit_short - px_entry_short) / px_entry_short

        # --- Net returns after costs
        # Trading costs hit both long and short; borrow hits the short leg only.
        net_long_rets  = gross_long_rets  - roundtrip_cost_return
        net_short_rets = gross_short_rets - (roundtrip_cost_return + borrow_cost_return_per_trade)

        long_portfolio_return_gross  = np.nanmean(gross_long_rets.values)  if len(gross_long_rets)  else np.nan
        short_portfolio_return_gross = np.nanmean(gross_short_rets.values) if len(gross_short_rets) else np.nan
        combined_return_gross = np.nanmean([long_portfolio_return_gross, short_portfolio_return_gross])

        long_portfolio_return  = np.nanmean(net_long_rets.values)  if len(net_long_rets)  else np.nan
        short_portfolio_return = np.nanmean(net_short_rets.values) if len(net_short_rets) else np.nan
        combined_return = np.nanmean([long_portfolio_return, short_portfolio_return])

        results.append({
            "model_name": model_name,
            "image_days": image_days,
            "response_days": response_days,
            "signal_date": signal_date,
            "entry_date": entry_date,
            "exit_date": exit_date,
            "n_long": len(to_buy),
            "n_short": len(to_sell),

            # NET returns (used by your report)
            "long_return":  long_portfolio_return,
            "short_return": short_portfolio_return,
            "combined_return": combined_return,

            # Diagnostics
            "gross_long_return":  long_portfolio_return_gross,
            "gross_short_return": short_portfolio_return_gross,
            "gross_combined_return": combined_return_gross,

            # Cost assumptions actually applied
            "commission_bps": commission_bps,
            "spread_bps": spread_bps,
            "slippage_bps": slippage_bps,
            "roundtrip_cost_bps": 2 * per_side_cost_bps,
            "short_borrow_bps_ann": short_borrow_bps_ann,
            "borrow_cost_return_per_trade": borrow_cost_return_per_trade,

            # Leverage metadata (already suggested)
            "levered": bool(levered),
            "leverage_factor": 5 if levered else 1,
            "effective_step_days": response_days / (5 if levered else 1),
        })

    results_df = pd.DataFrame(results).sort_values("signal_date").reset_index(drop=True)

    os.makedirs(save_dir, exist_ok=True)
    out_path = os.path.join(save_dir, f"backtest_{'levered_' if levered else ''}_with_costs_low_{model_name}.csv")
    results_df.to_csv(out_path, index=False)
    return results_df

In [6]:
def backtest_report(results_df, model_name, levered = False, pfd_name_add = "", save_dir="backtest_reports", rolling_window_trades=26, n_boot=5000, alpha=0.05):
    """
    Create a PDF backtest report for a given model's results.

    Annualization uses R trading days per trade when levered=False,
    and uses an *effective step* of R/5 trading days when levered=True
    (implicit 5x frequency via overlapping cohorts).
    """
    import os
    import re
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    from matplotlib.backends.backend_pdf import PdfPages

    # --- Parse model_name like "I20-R60"
    m = re.fullmatch(r"[Ii](\d+)-[Rr](\d+)", model_name.strip())
    if not m:
        raise ValueError(f"model_name '{model_name}' must look like 'I5-R20'.")

    image_days = int(m.group(1))
    response_days = int(m.group(2))
    window_size = response_days  # holding window (R) in trading days

    # >>> NEW: leverage + effective step <<<
    leverage_factor = 5 if levered else 1
    # This is the calendar step between entries when levered=True (R/5),
    # used for periods/year in annualization & Sharpe.
    effective_step = max(1.0, window_size / leverage_factor)

    os.makedirs(save_dir, exist_ok=True)
    if levered:
        pdf_path = os.path.join(save_dir, f"backtest_levered_{pfd_name_add}{model_name}.pdf")
    else:
        pdf_path = os.path.join(save_dir, f"backtest_{pfd_name_add}{model_name}.pdf")

    color_map = {
        "Long Only": "#1f77b4",
        "Short Only": "#d62728",
        "Long-Short": "#2ca02c"
    }

    df = results_df.copy().sort_values("signal_date").reset_index(drop=True)

    # === METRIC HELPERS ===
    # >>> CHANGED: use effective_step instead of window_size in annualization <<<
    def annualized_return(overall_ret: float, n_periods: int) -> float:
        """overall_ret is total return over n_periods trades.
        Elapsed trading days ~= n_periods * effective_step.
        """
        if n_periods <= 0 or pd.isna(overall_ret):
            return np.nan
        return (1.0 + overall_ret) ** (252.0 / (n_periods * effective_step)) - 1.0

    def annualized_sharpe(ret: pd.Series) -> float:
        """Scale Sharpe by sqrt(periods_per_year)."""
        r = ret.dropna().values
        if r.size < 2 or np.isclose(r.std(ddof=0), 0):
            return np.nan
        periods_per_year = 252.0 / effective_step
        return (r.mean() / r.std(ddof=0)) * np.sqrt(periods_per_year)

    def equity_curve(ret: pd.Series) -> pd.Series:
        return (1.0 + ret.fillna(0.0)).cumprod()

    def max_drawdown(eq: pd.Series) -> float:
        if eq.empty:
            return np.nan
        roll_max = eq.cummax()
        dd = eq / roll_max - 1.0
        return dd.min()

    def confusion_counts(ret: pd.Series):
        r = ret.dropna()
        wins = int((r > 0).sum())
        losses = int((r <= 0).sum())
        return wins, losses

    def compute_all(label, ret_series, n_trades):
        eq = equity_curve(ret_series)
        overall = float(eq.iloc[-1] - 1.0) if len(eq) else np.nan
        # >>> CHANGED: pass len(ret_series) only; function uses effective_step <<<
        ann_ret = annualized_return(overall, len(ret_series))
        sharpe = annualized_sharpe(ret_series)
        mdd = max_drawdown(eq)
        wins, losses = confusion_counts(ret_series)
        win_rate = wins / (wins + losses) if (wins + losses) else np.nan
        out = {
            "Portfolio": label,
            "Overall": overall,
            "Ann. Return": ann_ret,
            "Ann. Sharpe": sharpe,
            "Max DD": mdd,
            "Trades": int(n_trades),
            "Wins": wins,
            "Losses": losses,
            "Win %": win_rate,
            "Mean": float(ret_series.mean()) if len(ret_series) else np.nan,
            "Std": float(ret_series.std(ddof=0)) if len(ret_series) else np.nan,
        }
        return out, eq

    def bootstrap_mean_ci(x: pd.Series, n_boot=5000, alpha=0.05, random_state=42):
        rng = np.random.default_rng(random_state)
        a = x.dropna().values
        if a.size == 0:
            return np.nan, np.nan, np.nan
        boots = rng.choice(a, size=(n_boot, a.size), replace=True).mean(axis=1)
        lo, hi = np.quantile(boots, [alpha/2, 1 - alpha/2])
        return a.mean(), float(lo), float(hi)

    # >>> CHANGED: rolling_sharpe uses effective_step <<<
    def rolling_sharpe(ret: pd.Series, window_trades: int) -> pd.Series:
        def _sharpe(window_vals):
            w = window_vals[~np.isnan(window_vals)]
            if w.size < 2 or np.isclose(w.std(ddof=0), 0):
                return np.nan
            periods_per_year = 252.0 / effective_step
            return (w.mean() / w.std(ddof=0)) * np.sqrt(periods_per_year)
        return ret.rolling(window_trades, min_periods=window_trades).apply(_sharpe, raw=True)

    # === BUILD METRICS ===
    long_series   = df["long_return"]
    short_series  = df["short_return"]
    comb_series   = df["combined_return"]

    long_stats,  long_eq  = compute_all("Long Only",  long_series,  int(df["n_long"].sum()))
    short_stats, short_eq = compute_all("Short Only", short_series, int(df["n_short"].sum()))
    comb_stats,  comb_eq  = compute_all("Long-Short", comb_series,  int(df["n_long"].sum() + df["n_short"].sum()))

    summary_df = pd.DataFrame([long_stats, short_stats, comb_stats])
    display_cols = ["Portfolio","Overall","Ann. Return","Ann. Sharpe","Max DD","Trades","Wins","Losses","Win %","Mean","Std"]

    def _fmt_cell(val, col):
        if isinstance(val, str): return val
        if col in ("Overall","Ann. Return","Win %","Max DD"):
            return "–" if pd.isna(val) else f"{val:.2%}"
        if col in ("Ann. Sharpe","Mean","Std"):
            return "–" if pd.isna(val) else f"{val:.2f}"
        if col in ("Trades","Wins","Losses"):
            return "–" if pd.isna(val) else f"{int(val)}"
        return str(val)

    table_data = [[_fmt_cell(row[c], c) for c in display_cols] for _, row in summary_df[display_cols].iterrows()]

    # === PLOTTING & PDF EXPORT ===
    from matplotlib.backends.backend_pdf import PdfPages
    with PdfPages(pdf_path) as pp:
        # 0) Header page
        fig, ax = plt.subplots(figsize=(11, 1.95))
        ax.axis("off")
        ax.text(0.01, 0.72, f"Backtest Report — {model_name}", fontsize=16, weight="bold")
        ax.text(0.01, 0.36,
                f"I (image): {image_days}d   |   R (response/hold): {response_days}d   |   Levered: {levered}",
                fontsize=11)
        # >>> NEW: call out the effective step used for annualization <<<
        if levered:
            ax.text(0.01, 0.10,
                    f"Implicit leverage via overlapping cohorts: 5× frequency → effective step = R/5 = {effective_step:.2f} trading days "
                    f"(Annualization uses 252/{effective_step:.2f}).",
                    fontsize=10, color="dimgray")
        else:
            ax.text(0.01, 0.10,
                    f"Annualization uses step = R = {window_size} trading days (periods/year = 252/{window_size}).",
                    fontsize=10, color="dimgray")
        pp.savefig(fig, bbox_inches="tight"); plt.close(fig)

        # 1) Summary table
        fig, ax = plt.subplots(figsize=(11, 3.0))
        ax.axis("off")
        col_widths = [0.12, 0.10, 0.12, 0.12, 0.10, 0.08, 0.08, 0.09, 0.08, 0.06, 0.05]
        tbl = ax.table(cellText=table_data, colLabels=display_cols, colWidths=col_widths, loc="center")
        tbl.auto_set_font_size(False); tbl.set_fontsize(9); tbl.scale(1, 1.25)
        ax.set_title("Backtest Summary", pad=10)
        pp.savefig(fig, bbox_inches="tight"); plt.close(fig)

        # 2) Equity curves — combined
        fig, ax = plt.subplots(figsize=(11, 5))
        ax.plot(df["signal_date"], long_eq,  label="Long Only",   color=color_map["Long Only"])
        ax.plot(df["signal_date"], short_eq, label="Short Only",  color=color_map["Short Only"])
        ax.plot(df["signal_date"], comb_eq,  label="Long-Short",  color=color_map["Long-Short"])
        ax.set_xlabel("Date"); ax.set_ylabel("Equity (relative)")
        ax.set_title("Equity Curves — All Portfolios"); ax.grid(True); ax.legend()
        pp.savefig(fig, bbox_inches="tight"); plt.close(fig)

        # 2b) individual curves
        for name, eq in [("Long Only", long_eq), ("Short Only", short_eq), ("Long-Short", comb_eq)]:
            fig, ax = plt.subplots(figsize=(11, 4.5))
            ax.plot(df["signal_date"], eq, label=name, color=color_map[name])
            ax.set_xlabel("Date"); ax.set_ylabel("Equity (relative)")
            ax.set_title(f"Equity Curve — {name}"); ax.grid(True); ax.legend()
            pp.savefig(fig, bbox_inches="tight"); plt.close(fig)

        # 3) Histograms
        def bootstrap_mean_ci(x: pd.Series, n_boot=n_boot, alpha=alpha):
            rng = np.random.default_rng(42)
            a = x.dropna().values
            if a.size == 0: return np.nan, np.nan, np.nan
            boots = rng.choice(a, size=(n_boot, a.size), replace=True).mean(axis=1)
            lo, hi = np.quantile(boots, [alpha/2, 1 - alpha/2])
            return a.mean(), float(lo), float(hi)

        for name, series in [("Long Only", long_series), ("Short Only", short_series), ("Long-Short", comb_series)]:
            mu, lo, hi = bootstrap_mean_ci(series)
            fig, ax = plt.subplots(figsize=(10, 4))
            ax.hist(series.dropna(), bins=30, edgecolor="black", color=color_map[name], alpha=0.7)
            if pd.notna(mu): ax.axvline(mu, linestyle="--", linewidth=2, color="black", label=f"Mean = {mu:.4f}")
            if pd.notna(lo) and pd.notna(hi):
                ax.axvspan(lo, hi, alpha=0.15, color="grey", label=f"{int((1-alpha)*100)}% CI [{lo:.4f}, {hi:.4f}]")
            ax.set_title(f"{name} Trade Returns — Histogram with Mean CI")
            ax.set_xlabel("Return"); ax.set_ylabel("Frequency"); ax.grid(True); ax.legend()
            pp.savefig(fig, bbox_inches="tight"); plt.close(fig)

        # 4) Rolling Sharpe — combined (uses effective_step inside)
        long_roll_sharpe  = rolling_sharpe(long_series,  rolling_window_trades)
        short_roll_sharpe = rolling_sharpe(short_series, rolling_window_trades)
        comb_roll_sharpe  = rolling_sharpe(comb_series,  rolling_window_trades)

        fig, ax = plt.subplots(figsize=(11, 5))
        ax.plot(df["signal_date"], long_roll_sharpe,  label=f"Long (win={rolling_window_trades})",  color=color_map["Long Only"])
        ax.plot(df["signal_date"], short_roll_sharpe, label=f"Short (win={rolling_window_trades})", color=color_map["Short Only"])
        ax.plot(df["signal_date"], comb_roll_sharpe,  label=f"Long-Short (win={rolling_window_trades})", color=color_map["Long-Short"])
        ax.axhline(0, linewidth=1, color="black")
        ax.set_title("Rolling Annualized Sharpe — All Portfolios")
        ax.set_xlabel("Date"); ax.set_ylabel("Sharpe"); ax.grid(True); ax.legend()
        pp.savefig(fig, bbox_inches="tight"); plt.close(fig)

        # 4b) Per-leg rolling Sharpe
        for name, series in [("Long Only", long_series), ("Short Only", short_series), ("Long-Short", comb_series)]:
            rs = rolling_sharpe(series, rolling_window_trades)
            fig, ax = plt.subplots(figsize=(11, 4.5))
            ax.plot(df["signal_date"], rs, label=f"{name} (win={rolling_window_trades})", color=color_map[name])
            ax.axhline(0, linewidth=1, color="black")
            ax.set_title(f"Rolling Annualized Sharpe — {name}")
            ax.set_xlabel("Date"); ax.set_ylabel("Sharpe"); ax.grid(True); ax.legend()
            pp.savefig(fig, bbox_inches="tight"); plt.close(fig)

    print("Saved:")
    print(f"- PDF Report:  {pdf_path}")
    print()

In [7]:
def backtest2(model_code, scenario, levered = False):
    prices_df, model = read_data_and_load_model(model_code)
    preds_df = build_probs_df(model_code, model, levered)
    if scenario == "cost_low":
        results_df = trade_with_costs(
            preds_df, prices_df, model_name, levered, top_n=50,
            commission_bps=0.10, spread_bps=0.50, slippage_bps=0.50, short_borrow_bps_ann=10
        )
    # Feed 'results' straight into backtest_report (already annualization-aware for levered).
        backtest_report(results_df, model_code, levered, pfd_name_add = "cost_low_")
    elif scenario == "cost_med":
        results_df = trade_with_costs(
            preds_df, prices_df, model_name, levered, top_n=50,
            commission_bps=0.30, spread_bps=1.00, slippage_bps=1.00, short_borrow_bps_ann=25
        )
        backtest_report(results_df, model_code, levered, pfd_name_add = "cost_med_")
    elif scenario == "cost_high":
        results_df = trade_with_costs(
            preds_df, prices_df, model_name, levered, top_n=50,
            commission_bps=0.50, spread_bps=2.00, slippage_bps=2.00, short_borrow_bps_ann=100
        )
        backtest_report(results_df, model_code, levered, pfd_name_add = "cost_high_")
    else:
        raise ValueError(f"Unknown scenario: {scenario}")


def backtest(model_code, levered = False):
    prices_df, model = read_data_and_load_model(model_code)
    preds_df = build_probs_df(model_code, model, levered)
    results_df = trade(preds_df, prices_df, model_name, levered)
# Feed 'results' straight into backtest_report (already annualization-aware for levered).
    backtest_report(results_df, model_code, levered)


In [None]:
model_names = [
    "I5-R5",
    "I5-R20",
    "I5-R60",
    "I20-R5",
    "I20-R20",
    "I20-R60",
    # "I60-R5",
    # "I60-R20",
    # "I60-R60",
]

for model_name in model_names:
    backtest(model_name)
    backtest(model_name, levered = True)

In [8]:
model_names = [
    "I5-R5",
    "I5-R20",
    "I5-R60",
    "I20-R5",
    "I20-R20",
    "I20-R60",
    # "I60-R5",
    # "I60-R20",
    # "I60-R60",
]

for model_name in model_names:
    backtest2(model_name, scenario = "cost_low")
    backtest2(model_name, scenario = "cost_med")
    backtest2(model_name, scenario = "cost_high")
    backtest2(model_name, scenario = "cost_low", levered = True)
    backtest2(model_name, scenario = "cost_med", levered = True)
    backtest2(model_name, scenario = "cost_high", levered = True)

2025-08-16 23:32:20.087394: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Pro
2025-08-16 23:32:20.087421: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-08-16 23:32:20.087426: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2025-08-16 23:32:20.087443: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-08-16 23:32:20.087458: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Predicting:   0%|          | 0/489 [00:00<?, ?it/s]2025-08-16 23:32:20.416691: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.
Predicting: 100%|██████████| 489/489 [00:39<00:00, 12.23it/s]


Saved:
- PDF Report:  backtest_reports/backtest_cost_low_I5-R5.pdf



Predicting: 100%|██████████| 489/489 [00:39<00:00, 12.42it/s]


Saved:
- PDF Report:  backtest_reports/backtest_cost_med_I5-R5.pdf



Predicting: 100%|██████████| 489/489 [00:38<00:00, 12.75it/s]


Saved:
- PDF Report:  backtest_reports/backtest_cost_high_I5-R5.pdf



Predicting: 100%|██████████| 489/489 [02:58<00:00,  2.74it/s]


Saved:
- PDF Report:  backtest_reports/backtest_levered_cost_low_I5-R5.pdf



Predicting: 100%|██████████| 489/489 [02:49<00:00,  2.89it/s]


Saved:
- PDF Report:  backtest_reports/backtest_levered_cost_med_I5-R5.pdf



Predicting: 100%|██████████| 489/489 [02:47<00:00,  2.92it/s]


Saved:
- PDF Report:  backtest_reports/backtest_levered_cost_high_I5-R5.pdf



Predicting: 100%|██████████| 489/489 [00:22<00:00, 21.42it/s]


Saved:
- PDF Report:  backtest_reports/backtest_cost_low_I5-R20.pdf



Predicting: 100%|██████████| 489/489 [00:22<00:00, 21.53it/s]


Saved:
- PDF Report:  backtest_reports/backtest_cost_med_I5-R20.pdf



Predicting: 100%|██████████| 489/489 [00:22<00:00, 21.58it/s]


Saved:
- PDF Report:  backtest_reports/backtest_cost_high_I5-R20.pdf



Predicting: 100%|██████████| 489/489 [00:46<00:00, 10.44it/s]


Saved:
- PDF Report:  backtest_reports/backtest_levered_cost_low_I5-R20.pdf



Predicting: 100%|██████████| 489/489 [00:44<00:00, 10.96it/s]


Saved:
- PDF Report:  backtest_reports/backtest_levered_cost_med_I5-R20.pdf



Predicting: 100%|██████████| 489/489 [00:45<00:00, 10.77it/s]


Saved:
- PDF Report:  backtest_reports/backtest_levered_cost_high_I5-R20.pdf



Predicting: 100%|██████████| 489/489 [00:19<00:00, 25.67it/s]


Saved:
- PDF Report:  backtest_reports/backtest_cost_low_I5-R60.pdf



Predicting: 100%|██████████| 489/489 [00:18<00:00, 26.35it/s]


Saved:
- PDF Report:  backtest_reports/backtest_cost_med_I5-R60.pdf



Predicting: 100%|██████████| 489/489 [00:20<00:00, 23.50it/s]


Saved:
- PDF Report:  backtest_reports/backtest_cost_high_I5-R60.pdf



Predicting: 100%|██████████| 489/489 [00:28<00:00, 17.30it/s]


Saved:
- PDF Report:  backtest_reports/backtest_levered_cost_low_I5-R60.pdf



Predicting: 100%|██████████| 489/489 [00:28<00:00, 17.34it/s]


Saved:
- PDF Report:  backtest_reports/backtest_levered_cost_med_I5-R60.pdf



Predicting: 100%|██████████| 489/489 [00:27<00:00, 17.48it/s]


Saved:
- PDF Report:  backtest_reports/backtest_levered_cost_high_I5-R60.pdf



Predicting: 100%|██████████| 489/489 [02:06<00:00,  3.86it/s]


Saved:
- PDF Report:  backtest_reports/backtest_cost_low_I20-R5.pdf



Predicting: 100%|██████████| 489/489 [02:06<00:00,  3.88it/s]


Saved:
- PDF Report:  backtest_reports/backtest_cost_med_I20-R5.pdf



Predicting: 100%|██████████| 489/489 [02:06<00:00,  3.87it/s]


Saved:
- PDF Report:  backtest_reports/backtest_cost_high_I20-R5.pdf



Predicting: 100%|██████████| 489/489 [08:42<00:00,  1.07s/it]


Saved:
- PDF Report:  backtest_reports/backtest_levered_cost_low_I20-R5.pdf



Predicting: 100%|██████████| 489/489 [08:53<00:00,  1.09s/it]


Saved:
- PDF Report:  backtest_reports/backtest_levered_cost_med_I20-R5.pdf



Predicting: 100%|██████████| 489/489 [08:52<00:00,  1.09s/it]


Saved:
- PDF Report:  backtest_reports/backtest_levered_cost_high_I20-R5.pdf



Predicting: 100%|██████████| 489/489 [00:55<00:00,  8.78it/s]


Saved:
- PDF Report:  backtest_reports/backtest_cost_low_I20-R20.pdf



Predicting: 100%|██████████| 489/489 [00:56<00:00,  8.65it/s]


Saved:
- PDF Report:  backtest_reports/backtest_cost_med_I20-R20.pdf



Predicting: 100%|██████████| 489/489 [00:57<00:00,  8.57it/s]


Saved:
- PDF Report:  backtest_reports/backtest_cost_high_I20-R20.pdf



Predicting: 100%|██████████| 489/489 [02:54<00:00,  2.79it/s]


Saved:
- PDF Report:  backtest_reports/backtest_levered_cost_low_I20-R20.pdf



Predicting: 100%|██████████| 489/489 [02:53<00:00,  2.82it/s]


Saved:
- PDF Report:  backtest_reports/backtest_levered_cost_med_I20-R20.pdf



Predicting: 100%|██████████| 489/489 [02:53<00:00,  2.82it/s]


Saved:
- PDF Report:  backtest_reports/backtest_levered_cost_high_I20-R20.pdf



Predicting: 100%|██████████| 489/489 [00:31<00:00, 15.32it/s]


Saved:
- PDF Report:  backtest_reports/backtest_cost_low_I20-R60.pdf



Predicting: 100%|██████████| 489/489 [00:31<00:00, 15.45it/s]


Saved:
- PDF Report:  backtest_reports/backtest_cost_med_I20-R60.pdf



Predicting: 100%|██████████| 489/489 [00:31<00:00, 15.45it/s]


Saved:
- PDF Report:  backtest_reports/backtest_cost_high_I20-R60.pdf



Predicting: 100%|██████████| 489/489 [01:09<00:00,  7.01it/s]


Saved:
- PDF Report:  backtest_reports/backtest_levered_cost_low_I20-R60.pdf



Predicting: 100%|██████████| 489/489 [01:12<00:00,  6.75it/s]


Saved:
- PDF Report:  backtest_reports/backtest_levered_cost_med_I20-R60.pdf



Predicting: 100%|██████████| 489/489 [01:15<00:00,  6.47it/s]


Saved:
- PDF Report:  backtest_reports/backtest_levered_cost_high_I20-R60.pdf

