# Fix Equity Exports Notebook

This notebook reconstructs correct equity curves from trade logs and overwrites the previously flat `best_equity_*.csv` and `best_equity_scaled_100000_*.csv` files. It also includes a full data-handling scaffold per required outline sections.

Sections:
1. Environment verification
2. Imports
3. Configure paths & load data
4. Data audit
5. Missing / invalid handling
6. Feature engineering (add derived metrics)
7. Aggregations / pivoting
8. Visualization
9. Utility functions
10. Unit tests
11. Performance timing
12. Save outputs
13. Logging & capture
14. Reproducibility snapshot

Parameters let you choose a run timestamp suffix (e.g. `20251028_094345`).

In [None]:
# 1. Set Up Environment and Verify Runtime
import sys, os, subprocess, json, platform, textwrap
from pathlib import Path
print(f"Python executable: {sys.executable}")
print(f"Python version   : {platform.python_version()}")
print(f"Working dir      : {os.getcwd()}")
root = Path.cwd()
print("Root listing (truncated):")
for p in list(root.iterdir())[:25]:
    print(" -", p)
# Show key package versions (best-effort)
for pkg in ["pandas", "numpy", "matplotlib", "seaborn"]:
    try:
        mod = __import__(pkg)
        print(f"{pkg}: {getattr(mod, '__version__', 'N/A')}")
    except ImportError:
        print(f"{pkg}: NOT INSTALLED")


In [None]:
# 2. Import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from typing import Optional, Tuple
import logging

sns.set_context("talk")
sns.set_style("whitegrid")
plt.rcParams["figure.figsize"] = (11, 5)


In [None]:
# 3. Configure Paths and Load Data
from datetime import datetime

# Parameters
RUN_SUFFIX = "20251028_094345"  # e.g., 20251028_094345; override per run
START_EQUITY_NORM = 1.0
START_EQUITY_SCALED = 100000.0

repo = Path.cwd()
pt_dir = repo / "paper_trading_outputs"

# File candidates constructed from suffix
best_trade_log = pt_dir / f"best_trade_log_{RUN_SUFFIX}.csv"
best_equity_norm = pt_dir / f"best_equity_{RUN_SUFFIX}.csv"
best_equity_scaled = pt_dir / f"best_equity_scaled_100000_{RUN_SUFFIX}.csv"

print("Using files:")
print(" - trade_log :", best_trade_log)
print(" - equity    :", best_equity_norm)
print(" - equity$   :", best_equity_scaled)

# Load with robust options
try:
    df_trades = pd.read_csv(best_trade_log, parse_dates=["ts"], infer_datetime_format=True)
except FileNotFoundError:
    raise FileNotFoundError(f"Trade log not found: {best_trade_log}")

try:
    df_equity_proto = pd.read_csv(best_equity_norm, parse_dates=["ts"], infer_datetime_format=True)
except FileNotFoundError:
    raise FileNotFoundError(f"Equity CSV not found: {best_equity_norm}")

# Basic expectations
required_trade_cols = {"ts", "equity_prev", "equity_next", "cum_pnl_$"}
missing = required_trade_cols - set(df_trades.columns)
if missing:
    raise ValueError(f"Trade log missing required columns: {missing}")

if "equity" not in df_equity_proto.columns:
    raise ValueError("Equity CSV must have columns ['ts','equity']")

# Sort and drop duplicates
df_trades = df_trades.sort_values("ts").drop_duplicates(subset=["ts"], keep="last").reset_index(drop=True)
df_equity_proto = df_equity_proto.sort_values("ts").drop_duplicates(subset=["ts"], keep="last").reset_index(drop=True)


In [None]:
# 4. Quick Data Audit
print("Trades head/tail:")
display(df_trades.head(3))
display(df_trades.tail(3))
print("Equity proto head/tail:")
display(df_equity_proto.head(3))
display(df_equity_proto.tail(3))
print("Shapes:", df_trades.shape, df_equity_proto.shape)
print("df_trades.info() ->")
df_trades.info()
print("df_equity_proto.info() ->")
df_equity_proto.info()
print("NA counts (trades):\n", df_trades.isna().sum())
print("NA counts (equity):\n", df_equity_proto.isna().sum())
