In [1]:
import os
import re
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm


In [2]:
csv_folder_dir = "../_csv/out_of_sample/"
csv_list = [x for x in os.listdir(csv_folder_dir) if not x.startswith(".")]

prices_df = None

for csv_name in csv_list:
    csv_dir = os.path.join(csv_folder_dir, csv_name)
    current_df = pd.read_csv(csv_dir,
                             usecols=['Date', 'Close'],
                             parse_dates=['Date'])
    current_df.rename(columns={'Close': f"{csv_name[:-4]}"}, inplace=True)

    # Merge the current DataFrame with the final DataFrame
    if prices_df is None:
        prices_df = current_df  # Initialize with the first DataFrame
    else:
        prices_df = pd.merge(prices_df, current_df, on='Date', how='outer')  # Merge on 'Date'

In [3]:
prices_df.head(3)

Unnamed: 0,Date,CSCO,UAL,TROW,ISRG,NVR,TPR,DVN,CE,MRO,...,WM,DOV,CRM,PGR,WAT,IEX,BWA,LRCX,BLK,PPL
0,2021-01-04,43.959999,41.630001,147.690002,266.666656,4040.649902,32.709999,16.129999,125.93,6.83,...,114.830002,123.339996,220.309998,97.129997,250.149994,195.309998,33.503521,47.801998,710.820007,27.58
1,2021-01-05,43.98,43.400002,149.089996,268.350006,4008.909912,32.52,17.360001,131.600006,7.45,...,115.209999,124.059998,221.520004,96.949997,255.429993,194.929993,34.295776,49.455002,714.580017,27.610001
2,2021-01-06,44.400002,43.540001,151.529999,264.263336,3966.48999,34.32,18.32,136.550003,7.5,...,117.760002,127.559998,216.149994,98.790001,262.470001,202.179993,35.360916,49.648998,734.960022,28.110001


In [4]:
from tensorflow.keras.models import load_model

model = load_model("../models/I5-R5.keras")
model.summary()

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

2025-08-14 12:05:01.676960: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Pro
2025-08-14 12:05:01.677001: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-08-14 12:05:01.677007: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
I0000 00:00:1755165901.677017 1645937 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1755165901.677041 1645937 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [5]:
# STEP 3 — Build a Date-aligned predictions DataFrame (one column per stock)
image_dir = "../_images/I5-R5/out_of_sample/"
dirlist = [x for x in os.listdir(image_dir) if not x.startswith(".")]
date_regex = re.compile(r"(?P<date>\d{8})_")  # matches leading YYYYMMDD_

# Helper: extract pandas.Timestamp from filename
def extract_date_from_filename(fname: str) -> pd.Timestamp | None:
    m = date_regex.search(fname)
    if not m:
        return None
    s = m.group("date")
    # YYYYMMDD -> Timestamp
    return pd.to_datetime(s, format="%Y%m%d", errors="coerce")

# Helper: batched model prediction to keep memory steady
def predict_in_batches(img_paths, batch_size=256):
    preds = []
    for i in range(0, len(img_paths), batch_size):
        batch_paths = img_paths[i:i+batch_size]
        batch_imgs = []
        valid_idx = []
        for j, p in enumerate(batch_paths):
            img = cv2.imread(p, cv2.IMREAD_GRAYSCALE)
            if img is None:
                continue  # skip unreadable
            # If your model expects channels-last with 1 channel, expand dims:
            # img = np.expand_dims(img, axis=-1)
            batch_imgs.append(img)
            valid_idx.append(j)
        if not batch_imgs:
            preds.extend([np.nan] * len(batch_paths))
            continue
        X = np.array(batch_imgs)
        # If your training normalized inputs, apply the same here (kept identical to your snippet)
        batch_pred = model.predict(X, verbose=0).flatten()
        # Map predictions back to overall positions (including skipped files as NaN)
        out = [np.nan] * len(batch_paths)
        vi = 0
        for j in valid_idx:
            out[j] = float(batch_pred[vi])
            vi += 1
        preds.extend(out)
    return np.array(preds, dtype=float)

# Build the date-aligned predictions DataFrame
preds_df = None

for folder_name in tqdm(dirlist, desc="Predicting"):
    folder_dir = os.path.join(image_dir, folder_name)
    if not os.path.isdir(folder_dir):
        continue

    # Collect and sort files so dates are in chronological (or lexicographic) order
    files = [f for f in os.listdir(folder_dir) if f.endswith(".png")]
    # sort by filename ensures date order because filenames start with YYYYMMDD
    files.sort()

    # Parse dates and keep only files with a valid date
    dates = []
    file_paths = []
    for f in files:
        d = extract_date_from_filename(f)
        if d is None or pd.isna(d):
            continue
        dates.append(d)
        file_paths.append(os.path.join(folder_dir, f))

    if not file_paths:
        continue

    # Predict
    probs = predict_in_batches(file_paths, batch_size=256)

    # Build per-stock DataFrame
    stock_col = f"probability_{folder_name}"
    stock_df = pd.DataFrame({"Date": dates, stock_col: probs})

    # If multiple images end up with the same Date (shouldn’t, but just in case), keep the last
    stock_df = stock_df.drop_duplicates(subset=["Date"], keep="last")

    # Merge into the big predictions df (outer on Date)
    if preds_df is None:
        preds_df = stock_df
    else:
        preds_df = pd.merge(preds_df, stock_df, on="Date", how="outer")

# Final tidy-up: sort by Date
if preds_df is not None:
    preds_df = preds_df.sort_values("Date").reset_index(drop=True)
    preds_df.head()


Predicting:   0%|          | 0/489 [00:00<?, ?it/s]2025-08-14 12:05:05.565102: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.
Predicting: 100%|██████████| 489/489 [00:30<00:00, 15.81it/s]


In [6]:
preds_df

Unnamed: 0,Date,probability_CTAS,probability_WELL,probability_VZ,probability_AMZN,probability_CNP,probability_RCL,probability_CAT,probability_TFC,probability_AAPL,...,probability_EXPE,probability_HUM,probability_HST,probability_NVR,probability_STT,probability_CCI,probability_SCHW,probability_STZ,probability_MSCI,probability_GLW
0,2021-01-11,0.498343,0.486565,0.499991,0.498986,0.492247,0.490965,0.518488,0.511927,0.490899,...,0.505449,0.499859,0.492374,0.499156,0.511070,0.491936,0.508257,0.503673,0.486864,0.498540
1,2021-01-19,0.504290,0.498588,0.487630,0.503112,0.492746,0.503999,0.503580,0.512859,0.487501,...,0.500289,0.491441,0.512069,0.503711,0.507998,0.492546,0.511539,0.507200,0.505053,0.508221
2,2021-01-26,0.511994,0.504261,0.514476,0.506739,0.519300,0.496375,0.515325,0.515583,0.495755,...,0.519286,0.516248,0.492152,0.517656,0.502617,0.496430,0.510478,0.514990,0.505942,0.509338
3,2021-02-02,0.494167,0.510391,0.501416,0.512718,0.505688,0.499253,0.485832,0.493430,0.502219,...,0.486505,0.492840,0.498293,0.483255,0.492060,0.505049,0.501363,0.500384,0.498487,0.500942
4,2021-02-09,0.525641,0.497162,0.500880,0.525362,0.491255,0.500005,0.502237,0.495327,0.512932,...,0.505948,0.511199,0.488511,0.497563,0.505309,0.491800,0.485599,0.509081,0.488883,0.488248
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
188,2024-10-07,0.488139,0.504276,0.495304,0.480899,0.515069,0.502691,0.505293,0.488762,0.500422,...,0.493888,0.491846,0.506359,0.514577,0.492403,0.507855,0.483401,0.491047,0.500546,0.479983
189,2024-10-14,0.491754,0.497187,0.501729,0.486752,0.483485,0.507830,0.501827,0.480563,0.488825,...,0.474772,0.505810,0.493077,0.472544,0.524038,0.492515,0.507215,0.490757,0.498892,0.514996
190,2024-10-21,0.504646,0.507723,0.510243,0.514317,0.495003,0.518010,0.515312,0.498006,0.525581,...,0.511537,0.495354,0.500296,0.508566,0.496302,0.510577,0.499886,0.503347,0.509443,0.514533
191,2024-10-28,0.501408,0.504086,0.493322,0.488404,0.509505,0.515989,0.503000,0.485217,0.491554,...,0.485554,0.504880,0.490439,0.514672,0.494620,0.516013,0.498442,0.505762,0.520342,0.482837


In [7]:
import pandas as pd
import numpy as np

# --- Assumptions:
# prices_df: columns = ["Date", <tickers...>], one row per trading day, sorted ascending by Date
# preds_df:  columns = ["Date", "probability_<TICKER>", ...], rows only on signal dates (image days)
# window_size = 5  # I5-R5

window_size = 5

# Ensure proper datetime and indexing
prices_df = prices_df.copy()
preds_df  = preds_df.copy()

prices_df["Date"] = pd.to_datetime(prices_df["Date"])
preds_df["Date"]  = pd.to_datetime(preds_df["Date"])

prices_df = prices_df.sort_values("Date").set_index("Date")
preds_df  = preds_df.sort_values("Date").set_index("Date")

# Map preds_df columns -> tickers by stripping "probability_"
prob_cols = [c for c in preds_df.columns if c.startswith("probability_")]
tickers_from_probs = [c.replace("probability_", "") for c in prob_cols]

# Build a helper Series to rename prob columns to pure tickers
rename_map = dict(zip(prob_cols, tickers_from_probs))
preds_as_tickers = preds_df.rename(columns=rename_map)


In [8]:
preds_as_tickers

Unnamed: 0_level_0,CTAS,WELL,VZ,AMZN,CNP,RCL,CAT,TFC,AAPL,PANW,...,EXPE,HUM,HST,NVR,STT,CCI,SCHW,STZ,MSCI,GLW
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-01-11,0.498343,0.486565,0.499991,0.498986,0.492247,0.490965,0.518488,0.511927,0.490899,0.498776,...,0.505449,0.499859,0.492374,0.499156,0.511070,0.491936,0.508257,0.503673,0.486864,0.498540
2021-01-19,0.504290,0.498588,0.487630,0.503112,0.492746,0.503999,0.503580,0.512859,0.487501,0.518294,...,0.500289,0.491441,0.512069,0.503711,0.507998,0.492546,0.511539,0.507200,0.505053,0.508221
2021-01-26,0.511994,0.504261,0.514476,0.506739,0.519300,0.496375,0.515325,0.515583,0.495755,0.510725,...,0.519286,0.516248,0.492152,0.517656,0.502617,0.496430,0.510478,0.514990,0.505942,0.509338
2021-02-02,0.494167,0.510391,0.501416,0.512718,0.505688,0.499253,0.485832,0.493430,0.502219,0.487240,...,0.486505,0.492840,0.498293,0.483255,0.492060,0.505049,0.501363,0.500384,0.498487,0.500942
2021-02-09,0.525641,0.497162,0.500880,0.525362,0.491255,0.500005,0.502237,0.495327,0.512932,0.507451,...,0.505948,0.511199,0.488511,0.497563,0.505309,0.491800,0.485599,0.509081,0.488883,0.488248
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-07,0.488139,0.504276,0.495304,0.480899,0.515069,0.502691,0.505293,0.488762,0.500422,0.492199,...,0.493888,0.491846,0.506359,0.514577,0.492403,0.507855,0.483401,0.491047,0.500546,0.479983
2024-10-14,0.491754,0.497187,0.501729,0.486752,0.483485,0.507830,0.501827,0.480563,0.488825,0.518239,...,0.474772,0.505810,0.493077,0.472544,0.524038,0.492515,0.507215,0.490757,0.498892,0.514996
2024-10-21,0.504646,0.507723,0.510243,0.514317,0.495003,0.518010,0.515312,0.498006,0.525581,0.520499,...,0.511537,0.495354,0.500296,0.508566,0.496302,0.510577,0.499886,0.503347,0.509443,0.514533
2024-10-28,0.501408,0.504086,0.493322,0.488404,0.509505,0.515989,0.503000,0.485217,0.491554,0.502963,...,0.485554,0.504880,0.490439,0.514672,0.494620,0.516013,0.498442,0.505762,0.520342,0.482837


In [9]:
# Keep only tickers that exist in prices_df
valid_tickers = [t for t in tickers_from_probs if t in prices_df.columns]

In [10]:
pred

NameError: name 'pred' is not defined

In [11]:
# Keep only tickers that exist in prices_df
valid_tickers = [t for t in tickers_from_probs if t in prices_df.columns]
# preds_as_tickers = preds_as_tickers[["probability_"+t if ("probability_"+t) in preds_df.columns else t for t in []]]  # no-op line for clarity
preds_as_tickers = preds_as_tickers[valid_tickers]

results = []

# Iterate over each signal date available in preds_df
for signal_date in preds_as_tickers.index:
    # If the signal date is not a trading day in prices_df, skip
    if signal_date not in prices_df.index:
        continue

    # Compute entry and exit indices using trading-day offsets
    sig_idx = prices_df.index.get_loc(signal_date)
    entry_idx = sig_idx + window_size
    exit_idx  = sig_idx + 2 * window_size

    # If we don't have enough future data, stop
    if exit_idx >= len(prices_df.index):
        break

    entry_date = prices_df.index[entry_idx]
    exit_date  = prices_df.index[exit_idx]

    # Current probabilities (as a Series indexed by ticker)
    current_probs = preds_as_tickers.loc[signal_date].dropna()

    if current_probs.empty:
        continue

    # Select top 50 to buy, bottom 50 to short (reduce if fewer available)
    k = min(50, len(current_probs))
    to_buy  = current_probs.nlargest(k).index.tolist()
    to_sell = current_probs.nsmallest(k).index.tolist()

    # Slice prices for entry/exit
    px_entry_long  = prices_df.loc[entry_date, to_buy]
    px_exit_long   = prices_df.loc[exit_date,  to_buy]

    px_entry_short = prices_df.loc[entry_date, to_sell]
    px_exit_short  = prices_df.loc[exit_date,  to_sell]

    # Compute returns; handle NaNs robustly
    long_rets  = (px_exit_long - px_entry_long) / px_entry_long
    short_rets = -(px_exit_short - px_entry_short) / px_entry_short

    long_portfolio_return  = np.nanmean(long_rets.values)  if len(long_rets)  else np.nan
    short_portfolio_return = np.nanmean(short_rets.values) if len(short_rets) else np.nan
    combined_return = np.nanmean([long_portfolio_return, short_portfolio_return])

    results.append({
        "signal_date": signal_date,
        "entry_date": entry_date,
        "exit_date": exit_date,
        "n_long": len(to_buy),
        "n_short": len(to_sell),
        "long_return": long_portfolio_return,
        "short_return": short_portfolio_return,
        "combined_return": combined_return
    })

results_df = pd.DataFrame(results).sort_values("signal_date").reset_index(drop=True)

# results_df now contains per-period portfolio returns you can cumulate for an equity curve,
# sum/mean for overall return, and use for Sharpe ratio computation

In [12]:
results_df

Unnamed: 0,signal_date,entry_date,exit_date,n_long,n_short,long_return,short_return,combined_return
0,2021-01-11,2021-01-19,2021-01-26,50,50,-0.036204,-0.006116,-0.021160
1,2021-01-19,2021-01-26,2021-02-02,50,50,-0.002377,0.015865,0.006744
2,2021-01-26,2021-02-02,2021-02-09,50,50,0.024560,-0.021562,0.001499
3,2021-02-02,2021-02-09,2021-02-17,50,50,0.003913,-0.011453,-0.003770
4,2021-02-09,2021-02-17,2021-02-24,50,50,0.015484,-0.016524,-0.000520
...,...,...,...,...,...,...,...,...
188,2024-10-07,2024-10-14,2024-10-21,50,50,-0.020388,-0.001256,-0.010822
189,2024-10-14,2024-10-21,2024-10-28,50,50,-0.005781,0.003256,-0.001262
190,2024-10-21,2024-10-28,2024-11-04,50,50,-0.020932,0.017183,-0.001874
191,2024-10-28,2024-11-04,2024-11-11,50,50,0.040316,-0.060466,-0.010075


In [19]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

# === CONFIG ===
window_size = 5            # holding period in trading days (I5-R5)
pdf_path = "backtest_report.pdf"
csv_path = "backtest_summary.csv"
rolling_window_trades = 26 # trades per rolling Sharpe window
n_boot = 5000              # bootstrap samples for histogram CI
alpha = 0.05               # 1 - confidence level

# Fixed colors for portfolios
color_map = {
    "Long Only": "#1f77b4",     # blue
    "Short Only": "#d62728",    # red
    "Long-Short": "#2ca02c"     # green
}

# === INPUT ===
df = results_df.copy().sort_values("signal_date").reset_index(drop=True)

# === METRIC HELPERS ===
def annualized_return(overall_ret: float, n_periods: int, window_size: int) -> float:
    if n_periods <= 0:
        return np.nan
    return (1.0 + overall_ret) ** (252.0 / (n_periods * window_size)) - 1.0

def annualized_sharpe(ret: pd.Series, window_size: int) -> float:
    r = ret.dropna().values
    if r.size < 2 or np.isclose(r.std(ddof=0), 0):
        return np.nan
    periods_per_year = 252.0 / window_size
    return (r.mean() / r.std(ddof=0)) * np.sqrt(periods_per_year)

def equity_curve(ret: pd.Series) -> pd.Series:
    return (1.0 + ret.fillna(0.0)).cumprod()

def max_drawdown(eq: pd.Series) -> float:
    if eq.empty:
        return np.nan
    roll_max = eq.cummax()
    dd = eq / roll_max - 1.0
    return dd.min()

def confusion_counts(ret: pd.Series):
    r = ret.dropna()
    wins = int((r > 0).sum())
    losses = int((r <= 0).sum())
    return wins, losses

def compute_all(label, ret_series, n_trades):
    eq = equity_curve(ret_series)
    overall = float(eq.iloc[-1] - 1.0) if len(eq) else np.nan
    ann_ret = annualized_return(overall, len(ret_series), window_size)
    sharpe = annualized_sharpe(ret_series, window_size)
    mdd = max_drawdown(eq)
    wins, losses = confusion_counts(ret_series)
    win_rate = wins / (wins + losses) if (wins + losses) else np.nan
    out = {
        "Portfolio": label,
        "Overall": overall,
        "Ann. Return": ann_ret,
        "Ann. Sharpe": sharpe,
        "Max DD": mdd,
        "Trades": int(n_trades),
        "Wins": wins,
        "Losses": losses,
        "Win %": win_rate,
        "Mean": float(ret_series.mean()) if len(ret_series) else np.nan,
        "Std": float(ret_series.std(ddof=0)) if len(ret_series) else np.nan,
    }
    return out, eq

# === BOOTSTRAP CI (for mean) ===
def bootstrap_mean_ci(x: pd.Series, n_boot=5000, alpha=0.05, random_state=42):
    rng = np.random.default_rng(random_state)
    a = x.dropna().values
    if a.size == 0:
        return np.nan, np.nan, np.nan
    boots = rng.choice(a, size=(n_boot, a.size), replace=True).mean(axis=1)
    lo, hi = np.quantile(boots, [alpha/2, 1 - alpha/2])
    return a.mean(), float(lo), float(hi)

# === ROLLING SHARPE (over trades) ===
def rolling_sharpe(ret: pd.Series, window_trades: int, window_size: int) -> pd.Series:
    def _sharpe(window_vals):
        w = window_vals[~np.isnan(window_vals)]
        if w.size < 2 or np.isclose(w.std(ddof=0), 0):
            return np.nan
        periods_per_year = 252.0 / window_size
        return (w.mean() / w.std(ddof=0)) * np.sqrt(periods_per_year)
    return ret.rolling(window_trades, min_periods=window_trades).apply(_sharpe, raw=True)

# === BUILD METRICS ===
long_series   = df["long_return"]
short_series  = df["short_return"]
comb_series   = df["combined_return"]

long_stats,  long_eq  = compute_all("Long Only",  long_series,  int(df["n_long"].sum()))
short_stats, short_eq = compute_all("Short Only", short_series, int(df["n_short"].sum()))
comb_stats,  comb_eq  = compute_all("Long-Short", comb_series,  int(df["n_long"].sum() + df["n_short"].sum()))

summary_df = pd.DataFrame([long_stats, short_stats, comb_stats])
summary_df.to_csv(csv_path, index=False)

# Pre-format a clean-looking table
display_cols = ["Portfolio","Overall","Ann. Return","Ann. Sharpe","Max DD","Trades","Wins","Losses","Win %","Mean","Std"]

def _fmt_cell(val, col):
    if isinstance(val, str):
        return val
    if col in ("Overall","Ann. Return","Win %","Max DD"):
        return "–" if pd.isna(val) else f"{val:.2%}"
    if col in ("Ann. Sharpe","Mean","Std"):
        return "–" if pd.isna(val) else f"{val:.2f}"
    if col in ("Trades","Wins","Losses"):
        return "–" if pd.isna(val) else f"{int(val)}"
    return str(val)

table_data = [[_fmt_cell(row[c], c) for c in display_cols] for _, row in summary_df[display_cols].iterrows()]

# === PLOTTING & PDF EXPORT ===
pp = PdfPages(pdf_path)

# 1) Summary table
fig, ax = plt.subplots(figsize=(11, 3.0))
ax.axis("off")
col_widths = [0.12, 0.10, 0.12, 0.12, 0.10, 0.08, 0.08, 0.09, 0.08, 0.06, 0.05]
tbl = ax.table(
    cellText=table_data,
    colLabels=display_cols,
    colWidths=col_widths,
    loc="center"
)
tbl.auto_set_font_size(False)
tbl.set_fontsize(9)
tbl.scale(1, 1.25)
ax.set_title("Backtest Summary", pad=10)
pp.savefig(fig, bbox_inches="tight"); plt.close(fig)

# 2) Equity curves — combined
fig, ax = plt.subplots(figsize=(11, 5))
ax.plot(df["signal_date"], long_eq,  label="Long Only", color=color_map["Long Only"])
ax.plot(df["signal_date"], short_eq, label="Short Only", color=color_map["Short Only"])
ax.plot(df["signal_date"], comb_eq,  label="Long-Short", color=color_map["Long-Short"])
ax.set_xlabel("Date"); ax.set_ylabel("Equity (relative)")
ax.set_title("Equity Curves — All Portfolios"); ax.grid(True); ax.legend()
pp.savefig(fig, bbox_inches="tight"); plt.close(fig)

# 2b) Equity curves — single pages with same colors
for name, eq in [("Long Only", long_eq), ("Short Only", short_eq), ("Long-Short", comb_eq)]:
    fig, ax = plt.subplots(figsize=(11, 4.5))
    ax.plot(df["signal_date"], eq, label=name, color=color_map[name])
    ax.set_xlabel("Date"); ax.set_ylabel("Equity (relative)")
    ax.set_title(f"Equity Curve — {name}"); ax.grid(True); ax.legend()
    pp.savefig(fig, bbox_inches="tight"); plt.close(fig)

# 3) Histograms — with consistent colors & mean CI
for name, series in [("Long Only", long_series), ("Short Only", short_series), ("Long-Short", comb_series)]:
    mu, lo, hi = bootstrap_mean_ci(series, n_boot=n_boot, alpha=alpha)
    fig, ax = plt.subplots(figsize=(10, 4))
    counts, bins, patches = ax.hist(series.dropna(), bins=30, edgecolor="black", color=color_map[name], alpha=0.7)
    if pd.notna(mu):
        ax.axvline(mu, linestyle="--", linewidth=2, color="black", label=f"Mean = {mu:.4f}")
    if pd.notna(lo) and pd.notna(hi):
        ax.axvspan(lo, hi, alpha=0.15, color="grey", label=f"{int((1-alpha)*100)}% CI [{lo:.4f}, {hi:.4f}]")
    ax.set_title(f"{name} Trade Returns — Histogram with Mean CI")
    ax.set_xlabel("Return"); ax.set_ylabel("Frequency"); ax.grid(True); ax.legend()
    pp.savefig(fig, bbox_inches="tight"); plt.close(fig)

# 4) Rolling Sharpe — combined
long_roll_sharpe  = rolling_sharpe(long_series,  rolling_window_trades, window_size)
short_roll_sharpe = rolling_sharpe(short_series, rolling_window_trades, window_size)
comb_roll_sharpe  = rolling_sharpe(comb_series,  rolling_window_trades, window_size)

fig, ax = plt.subplots(figsize=(11, 5))
ax.plot(df["signal_date"], long_roll_sharpe,  label=f"Long (win={rolling_window_trades})", color=color_map["Long Only"])
ax.plot(df["signal_date"], short_roll_sharpe, label=f"Short (win={rolling_window_trades})", color=color_map["Short Only"])
ax.plot(df["signal_date"], comb_roll_sharpe,  label=f"Long-Short (win={rolling_window_trades})", color=color_map["Long-Short"])
ax.axhline(0, linewidth=1, color="black")
ax.set_title("Rolling Annualized Sharpe — All Portfolios")
ax.set_xlabel("Date"); ax.set_ylabel("Sharpe"); ax.grid(True); ax.legend()
pp.savefig(fig, bbox_inches="tight"); plt.close(fig)

# 4b) Rolling Sharpe — single pages with same colors
for name, series in [("Long Only", long_series), ("Short Only", short_series), ("Long-Short", comb_series)]:
    rs = rolling_sharpe(series, rolling_window_trades, window_size)
    fig, ax = plt.subplots(figsize=(11, 4.5))
    ax.plot(df["signal_date"], rs, label=f"{name} (win={rolling_window_trades})", color=color_map[name])
    ax.axhline(0, linewidth=1, color="black")
    ax.set_title(f"Rolling Annualized Sharpe — {name}")
    ax.set_xlabel("Date"); ax.set_ylabel("Sharpe"); ax.grid(True); ax.legend()
    pp.savefig(fig, bbox_inches="tight"); plt.close(fig)

pp.close()

print("Saved:")
print(f"- Summary CSV: {csv_path}")
print(f"- PDF Report:  {pdf_path}")
print()
print(summary_df[display_cols])

Saved:
- Summary CSV: backtest_summary.csv
- PDF Report:  backtest_report.pdf

    Portfolio   Overall  Ann. Return  Ann. Sharpe    Max DD  Trades  Wins  \
0   Long Only  0.471146     0.106068     0.682585 -0.221451    9650   115   
1  Short Only -0.390874    -0.121426    -0.670434 -0.403935    9650    80   
2  Long-Short -0.004318    -0.001129     0.005344 -0.101338   19300   101   

   Losses     Win %      Mean       Std  
0      78  0.595855  0.002288  0.023795  
1     113  0.414508 -0.002277  0.024107  
2      92  0.523316  0.000006  0.007471  
