In [None]:
import json
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import display

plt.style.use("seaborn-v0_8-whitegrid")
pd.set_option("display.max_rows", 20)
pd.set_option("display.float_format", lambda x: f"{x:0.3f}")

# Compact helper for inline plots in VS Code.
%matplotlib inline

In [None]:
def resolve_repo_root() -> Path:
    candidate = Path.cwd().resolve()
    target = candidate / "docs" / "modeling" / "predictions"
    if target.exists():
        return candidate
    for parent in candidate.parents:
        target = parent / "docs" / "modeling" / "predictions"
        if target.exists():
            return parent
    raise FileNotFoundError("Unable to locate docs/modeling/predictions in parent folders")

REPO_ROOT = resolve_repo_root()
PREDICTIONS_DIR = REPO_ROOT / "docs" / "modeling" / "predictions"
PNL_PATHS = {
    "full_window_test": PREDICTIONS_DIR / "intraday_v2_tcn_full_window_test_pnl.json",
    "q4_split_1_test": PREDICTIONS_DIR / "intraday_v2_tcn_q4_split_1_test_pnl.json",
    "q4_split_2_test": PREDICTIONS_DIR / "intraday_v2_tcn_q4_split_2_test_pnl.json",
}

PNL_PATHS

In [None]:
def load_pnl_report(path: Path) -> dict:
    with path.open("r", encoding="utf-8") as handle:
        return json.load(handle)

def make_threshold_frame(report: dict, split: str) -> pd.DataFrame:
    rows = []
    for label, payload in report.items():
        rows.append({
            "split": split,
            "threshold_label": label,
            "threshold": payload.get("threshold"),
            "signals": payload.get("signals"),
            "win_rate": payload.get("win_rate"),
            "avg_r": payload.get("avg_r"),
            "median_r": payload.get("median_r"),
            "expectancy": payload.get("expectancy"),
            "pnl_std": payload.get("pnl_std"),
            "max_drawdown": payload.get("max_drawdown"),
        })
    return pd.DataFrame(rows)

def make_nested_frame(report: dict, split: str, key: str) -> pd.DataFrame:
    records = []
    for label, payload in report.items():
        for item in payload.get(key, []):
            enriched = {
                "split": split,
                "threshold_label": label,
            }
            enriched.update(item)
            records.append(enriched)
    return pd.DataFrame(records)

def make_equity_frame(report: dict, split: str) -> pd.DataFrame:
    rows = []
    for label, payload in report.items():
        for idx, value in enumerate(payload.get("equity_curve", []), start=1):
            rows.append({
                "split": split,
                "threshold_label": label,
                "step": idx,
                "equity": value,
            })
    return pd.DataFrame(rows)

In [None]:
reports = {}
threshold_frames, secid_frames, day_frames, equity_frames = [], [], [], []

for split, path in PNL_PATHS.items():
    data = load_pnl_report(path)
    reports[split] = data
    threshold_frames.append(make_threshold_frame(data, split))
    secid_frames.append(make_nested_frame(data, split, "by_secid"))
    day_frames.append(make_nested_frame(data, split, "by_day"))
    equity_frames.append(make_equity_frame(data, split))

threshold_df = pd.concat(threshold_frames, ignore_index=True)
secid_df = pd.concat(secid_frames, ignore_index=True)
day_df = pd.concat(day_frames, ignore_index=True)
equity_df = pd.concat(equity_frames, ignore_index=True)

threshold_df.sort_values(["split", "threshold"], inplace=True)
secid_df.sort_values(["split", "threshold", "secid"], inplace=True)
day_df["date"] = pd.to_datetime(day_df["date"])
equity_df.sort_values(["split", "threshold_label", "step"], inplace=True)

threshold_df

In [None]:
display(
    threshold_df[["split", "threshold", "signals", "win_rate", "avg_r", "max_drawdown"]]
        .sort_values(["split", "threshold"]))
)

best_thresholds = threshold_df.sort_values("avg_r", ascending=False).groupby("split").head(1)
display(best_thresholds)
code

ticker_view = (
    secid_df.groupby(["split", "threshold_label", "secid"])[
        ["signals", "win_rate", "avg_r"]
    ]
    .mean()
    .reset_index()
)
display(ticker_view.head(20))

worst_tickers = ticker_view.sort_values("avg_r").groupby("split").head(5)
display(worst_tickers)
code

daily_window = day_df.copy()
daily_window["rolling_pnl"] = daily_window.groupby(["split", "threshold_label"])["pnl_sum"].cumsum()

drawdown_drivers = daily_window.sort_values("pnl_sum").groupby("split").head(5)
display(drawdown_drivers[["split", "threshold_label", "date", "signals", "win_rate", "avg_r", "pnl_sum"]])

display(daily_window.tail(10))
code

fig, ax = plt.subplots(figsize=(11, 5))
for (split, label), subset in equity_df.groupby(["split", "threshold_label"]):
    ax.plot(subset["step"], subset["equity"], label=f"{split} | {label}")

ax.set_title("Equity Curves per Threshold")
ax.set_xlabel("Step (per-day cumulative order)")
ax.set_ylabel("Cumulative R")
ax.legend(loc="upper left", bbox_to_anchor=(1, 1))
plt.tight_layout()
plt.show()
Python 3
python
python3
python
3.11
4
5