In [5]:
import os
import glob
import numpy as np
import pandas as pd
from pandas.tseries.offsets import MonthBegin

# ============================================================
# CONFIG
# ============================================================

# Root folder containing all strategy folders
ROOT_DIR = r"D:\work\Trade Analysis\Monthly Selection"

# Pattern to find your monthly stats file (CSV only)
MONTHLY_CSV_PATTERN = "monthly_stats_all.csv"

# Rolling window = 5 years = 60 months
WINDOW_MONTHS = 60

# Sharpe filter
MIN_SHARPE = 1.0

# Recency weight configuration
WEIGHT_CONFIG = {
    "recent_3m": 0.20,   # last 3 months
    "rest_year1": 0.20,  # months 4–12
    "year2": 0.25,       # months 13–24
    "year3": 0.12,       # months 25–36
    "year4": 0.12,       # months 37–48
    "year5": 0.12        # months 49–60
}


# ============================================================
# BUILD WEIGHTS
# ============================================================

def build_weights():
    n = WINDOW_MONTHS
    w = np.zeros(n)

    w_r3 = WEIGHT_CONFIG["recent_3m"]
    w_y1 = WEIGHT_CONFIG["rest_year1"]
    w_y2 = WEIGHT_CONFIG["year2"]
    w_y3 = WEIGHT_CONFIG["year3"]
    w_y4 = WEIGHT_CONFIG["year4"]
    w_y5 = WEIGHT_CONFIG["year5"]

    for i in range(n):
        m = (n - 1) - i  # 0=newest
        if m <= 2:
            w[i] = w_r3 / 3
        elif m <= 11:
            w[i] = w_y1 / 9
        elif m <= 23:
            w[i] = w_y2 / 12
        elif m <= 35:
            w[i] = w_y3 / 12
        elif m <= 47:
            w[i] = w_y4 / 12
        else:
            w[i] = w_y5 / 12

    w /= w.sum()
    return w


WEIGHTS_60 = build_weights()

# ----------------------------
# Weight sanity-check (calendar-month buckets)
# ----------------------------
def summarize_weights(w):
    w = np.asarray(w, dtype=float)
    assert len(w) == WINDOW_MONTHS
    # w is ordered oldest -> newest
    buckets = {
        "last_3m":            w[-3:].sum(),
        "months_4_to_12":     w[-12:-3].sum(),   # 9 months
        "months_13_to_24":    w[-24:-12].sum(),  # 12 months
        "months_25_to_36":    w[-36:-24].sum(),  # 12 months
        "months_37_to_48":    w[-48:-36].sum(),  # 12 months
        "months_49_to_60":    w[:12].sum(),      # 12 months (oldest)
    }
    return buckets

WEIGHT_BUCKETS = summarize_weights(WEIGHTS_60)

print("WEIGHTS_60 sum:", WEIGHTS_60.sum())
print("Weight buckets (effective, after normalization):")
for k,v in WEIGHT_BUCKETS.items():
    print(f"  {k:>14s}: {v:.6f}")

# Optional: enforce that your intended config sums to 1.0 (recommended)
cfg_sum = sum(WEIGHT_CONFIG.values())
if abs(cfg_sum - 1.0) > 1e-9:
    print(f"WARNING: WEIGHT_CONFIG sums to {cfg_sum:.6f} (not 1.0). "
          f"Effective bucket weights are scaled by 1/{cfg_sum:.6f}.")



# ============================================================
# WALK-FORWARD CALC
# ============================================================

def expand_to_calendar_months(group: pd.DataFrame) -> pd.DataFrame:
    """
    Ensure *calendar* month continuity (month-end frequency) for one param combo.
    Missing months (no trading activity) are inserted with:
      - monthly_return = 0
      - monthly_trades = 0

    All other identifier columns are forward/back filled.
    """
    g = group.sort_values("month_end").copy()
    g["month_end"] = pd.to_datetime(g["month_end"])

    # Full calendar month-end index between first and last observed month_end
    full_idx = pd.date_range(g["month_end"].min(), g["month_end"].max(), freq="ME")

    g = g.set_index("month_end").reindex(full_idx)
    g.index.name = "month_end"

    # Fill missing performance months with zeros
    for c in ["monthly_return", "monthly_trades"]:
        if c in g.columns:
            g[c] = g[c].fillna(0)

    # Forward/back fill identifiers
    id_cols = [
        "year", "month", "iteration_id", "strategy_name",
        "symbol", "side", "strategy", "base_combo_key", "param_combo_key"
    ]
    for c in id_cols:
        if c in g.columns:
            g[c] = g[c].ffill().bfill()

    # Recompute year/month from calendar month_end (authoritative)
    g["year"] = g.index.year
    g["month"] = g.index.month

    return g.reset_index()


def compute_wf_metrics(group):
    # Expand to calendar months first (fills non-trading months with 0s)
    group = expand_to_calendar_months(group)
    group = group.sort_values("month_end").reset_index(drop=True)

    if len(group) < WINDOW_MONTHS:
        return pd.DataFrame()

    rows = []

    for i in range(WINDOW_MONTHS - 1, len(group)):
        window = group.iloc[i - WINDOW_MONTHS + 1 : i + 1]

        pnl = window["monthly_return"].values
        wmean = np.sum(WEIGHTS_60 * pnl)
        wstd  = np.sqrt(np.sum(WEIGHTS_60 * (pnl - wmean) ** 2))

        sharpe = (wmean / wstd) * np.sqrt(12) if wstd > 0 else np.nan
        trades = window["monthly_trades"].sum()

        eval_month = group.loc[i, "month_end"]
        trade_month = (pd.to_datetime(eval_month) + MonthBegin(1)).normalize()

        row = group.loc[i].to_dict()
        row.update({
            "eval_month": eval_month,
            "trade_month": trade_month,
            "wf_pnl": wmean,
            "wf_sharpe": sharpe,
            "wf_trades": trades
        })
        rows.append(row)

    return pd.DataFrame(rows)


# ============================================================
# LOAD MONTHLY STATS ALL
# ============================================================

def load_monthly_stats(strategy_folder):
    pattern = os.path.join(strategy_folder, MONTHLY_CSV_PATTERN)
    files = glob.glob(pattern)

    if not files:
        print(f"  No monthly stats file found in: {strategy_folder}")
        return None

    f = files[0]
    print(f"  Found monthly stats: {f}")
    return pd.read_csv(f)


# ============================================================
# PROCESS INDIVIDUAL STRATEGY FOLDER
# ============================================================

def process_strategy(strategy_folder):
    print(f"\nProcessing: {strategy_folder}")

    df = load_monthly_stats(strategy_folder)
    if df is None:
        return

    # Mandatory columns
    req = ["year", "month", "month_end", "monthly_return",
           "monthly_trades", "iteration_id", "strategy_name"]

    if any(col not in df.columns for col in req):
        print("  Missing required cols → skipping.")
        return

    # Force calendar month-end based on (year, month) so we align to true calendar months
    df["month_end"] = pd.to_datetime(dict(year=df["year"], month=df["month"], day=1)) + pd.offsets.MonthEnd(0)

    # Extract symbol + side from iteration_id
    parts = df["iteration_id"].str.split("_")
    df["symbol"] = parts.str[0]
    df["side"]   = parts.str[-1]
    df["strategy"] = df["strategy_name"]

    df["base_combo_key"]  = df["symbol"] + "|" + df["strategy"] + "|" + df["side"]
    df["param_combo_key"] = df["iteration_id"]

    # WALK FORWARD per param combo
    all_res = []
    for key, g in df.groupby("param_combo_key"):
        res = compute_wf_metrics(g)
        if not res.empty:
            res["strategy_folder"] = os.path.basename(strategy_folder)
            all_res.append(res)

    if not all_res:
        print("  No combos with ≥60 months → skipping.")
        return

    scores_df = pd.concat(all_res)

    # Save ALL
    path_all = os.path.join(strategy_folder, "walk_forward_scores_all.csv")
    scores_df.to_csv(path_all, index=False)
    print(f"  Saved ALL scores → {path_all}")

    # Select best param per base combo + eval_month
    scores_sorted = scores_df.sort_values(
        ["base_combo_key", "eval_month", "wf_sharpe", "wf_pnl"],
        ascending=[True, True, False, False]
    )

    best = (
        scores_sorted
        .groupby(["base_combo_key", "eval_month"], as_index=False)
        .first()
    )

    # Filter Sharpe >= 1
    final_sel = best[best["wf_sharpe"] >= MIN_SHARPE].copy()

    # Expand base columns
    # print(final_sel)
    try:
        parts = final_sel["base_combo_key"].str.split("|", expand=True)
        final_sel["symbol"] = parts[0]
        final_sel["strategy"] = parts[1]
        final_sel["side"] = parts[2]
    
        path_sel = os.path.join(strategy_folder, "walk_forward_selection_sharpe_ge_1.csv")
        final_sel.to_csv(path_sel, index=False)
        print(f"  Saved SELECTION (Sharpe ≥ 1) → {path_sel}")
    except:
        print("Not selected")


# ============================================================
# MAIN LOOP
# ============================================================

for folder in os.listdir(ROOT_DIR):
    strat_path = os.path.join(ROOT_DIR, folder)
    if os.path.isdir(strat_path):
        process_strategy(strat_path)

print("\nDONE — Walk-forward monthly selection completed.")


WEIGHTS_60 sum: 1.0
Weight buckets (effective, after normalization):
         last_3m: 0.198020
  months_4_to_12: 0.198020
  months_13_to_24: 0.247525
  months_25_to_36: 0.118812
  months_37_to_48: 0.118812
  months_49_to_60: 0.118812

Processing: D:\work\Trade Analysis\Monthly Selection\old
  Found monthly stats: D:\work\Trade Analysis\Monthly Selection\old\monthly_stats_all.csv
  Saved ALL scores → D:\work\Trade Analysis\Monthly Selection\old\walk_forward_scores_all.csv
  Saved SELECTION (Sharpe ≥ 1) → D:\work\Trade Analysis\Monthly Selection\old\walk_forward_selection_sharpe_ge_1.csv

DONE — Walk-forward monthly selection completed.


In [4]:
import os
import pandas as pd

# Root folder where all strategy folders live
ROOT_DIR = r"D:\work\Trade Analysis\Monthly Selection"

# Name of the selection file created by Stage 2
SELECTION_FILE_NAME = "walk_forward_selection_sharpe_ge_1.csv"

# Name of the deduplicated output file
OUTPUT_FILE_NAME = "walk_forward_selection_sharpe_ge_1_unique.csv"


def find_column(df, candidates, what_for, required=True):
    """
    Helper to find a column by trying multiple candidate names (case-insensitive).
    """
    lower_map = {c.lower(): c for c in df.columns}
    for cand in candidates:
        if cand.lower() in lower_map:
            return lower_map[cand.lower()]
    if required:
        raise ValueError(
            f"Could not find column for {what_for}. "
            f"Tried: {candidates}. Available: {list(df.columns)}"
        )
    return None


def process_strategy_folder(folder_path: str):
    sel_path = os.path.join(folder_path, SELECTION_FILE_NAME)
    if not os.path.exists(sel_path):
        # No selection file in this folder
        return

    print(f"Processing selection file: {sel_path}")
    df = pd.read_csv(sel_path)

    # --- Detect key columns (be a bit flexible on names) ---
    # Strategy column
    strategy_col = find_column(
        df,
        ["strategy_name", "strategy", "strat_name"],
        what_for="strategy name"
    )
    # Side column
    side_col = find_column(
        df,
        ["side", "long_short", "direction"],
        what_for="side (Long/Short)"
    )
    # Month column (prefer trade_month, else eval_month)
    if "trade_month" in df.columns:
        month_col = "trade_month"
    else:
        month_col = find_column(
            df,
            ["eval_month", "month", "month_end"],
            what_for="month"
        )

    # Ensure month as datetime
    df[month_col] = pd.to_datetime(df[month_col])

    # Sharpe / PnL columns
    sharpe_col = find_column(
        df,
        ["wf_sharpe", "sharpe", "sr", "sharp"],
        what_for="walk-forward Sharpe"
    )
    pnl_col = find_column(
        df,
        ["wf_pnl", "pnl", "monthly_return", "return"],
        what_for="walk-forward PnL"
    )

    # --- Sort so best row comes first in each group ---
    df_sorted = df.sort_values(
        [strategy_col, side_col, month_col, sharpe_col, pnl_col],
        ascending=[True, True, True, False, False]
    )

    # --- Group by (strategy, side, month) and keep best (first) ---
    group_cols = [strategy_col, side_col, month_col]
    unique_df = (
        df_sorted
        .groupby(group_cols, as_index=False)
        .first()
    )

    # --- Save deduplicated file ---
    out_path = os.path.join(folder_path, OUTPUT_FILE_NAME)
    unique_df.to_csv(out_path, index=False)
    print(f"  Saved unique selection to: {out_path}")


def main():
    for name in os.listdir(ROOT_DIR):
        strategy_folder = os.path.join(ROOT_DIR, name)
        if not os.path.isdir(strategy_folder):
            continue
        try:
            process_strategy_folder(strategy_folder)
        except Exception as e:
            print(f"Error processing {strategy_folder}: {e}")

    print("\nDone: deduplicated selection files for all strategy folders.")


if __name__ == "__main__":
    main()



Done: deduplicated selection files for all strategy folders.


In [17]:
# import os
# import glob
# import numpy as np
# import pandas as pd
# from pandas.tseries.offsets import MonthBegin

# # ============================================================
# # CONFIG – ADJUST THESE PATHS
# # ============================================================

# # Root folder where all strategy folders (with unique selection files) live
# SELECTION_ROOT = r"E:\WSG Markets\Bactesting 1 (Nov 2025) Year\New Selection tech\Results\Monthly Selection"

# # Root folder where BT results (trade level data) live
# # Inside: BT_RESULTS_ROOT / <bt_strategy_folder_name> / <trade-level-subfolder> / *<parent_combo_key>*.parquet
# BT_RESULTS_ROOT = r"E:\WSG Markets\Bactesting 1 (Nov 2025) Year\BT results"

# # Master price file (your big CSV with all symbols)
# MASTER_PRICE_FILE = r"E:\WSG Markets\Bactesting 1 (Nov 2025) Year\Raw Data\Master_Data_5yrs_till_Nov2025_Clean.csv"

# # Name of the *deduped* selection file inside each strategy folder
# SELECTION_FILE_NAME = "walk_forward_selection_sharpe_ge_1_unique.csv"

# # Ex-ante volatility lookback (days)
# VOL_LOOKBACK_DAYS = 90

# # Target ex-ante portfolio fall (e.g., 20%)
# TARGET_FALL = 0.20

# # Candidate names for the trade-level-data subfolder under each BT strategy folder
# TRADE_SUBFOLDER_CANDIDATES = [
#     "trade level data",
#     "Trade level data",
#     "Trade Level Data",
#     "Trade_Level_Data",
#     "TradeLevelData"
# ]

# # ============================================================
# # UTILITIES
# # ============================================================

# def find_column(df: pd.DataFrame, candidates, what_for: str, required: bool = True):
#     """Case-insensitive column finder."""
#     lower_map = {c.lower(): c for c in df.columns}
#     for cand in candidates:
#         if cand.lower() in lower_map:
#             return lower_map[cand.lower()]
#     if required:
#         raise ValueError(
#             f"Could not find column for {what_for}. "
#             f"Tried: {candidates}. Available: {list(df.columns)}"
#         )
#     return None

# # ============================================================
# # MASTER PRICE DATA + VOL
# # ============================================================

# _master_price_df = None
# _price_cache = {}  # per-symbol cache


# def load_master_price():
#     """Load and prepare master price file once."""
#     global _master_price_df
#     if _master_price_df is not None:
#         return _master_price_df

#     df = pd.read_csv(MASTER_PRICE_FILE)

#     # Parse date as day-first (per your file)
#     df["date"] = pd.to_datetime(df["date"], dayfirst=True)

#     # Use mid-close as price
#     df["mid_close"] = (df["askclose"] + df["bidclose"]) / 2.0

#     # Sort and compute daily returns per org_symbol
#     df = df.sort_values(["org_symbol", "date"]).reset_index(drop=True)
#     df["daily_return"] = (
#         df.groupby("org_symbol")["mid_close"]
#           .pct_change()
#           .values
#     )

#     _master_price_df = df
#     return _master_price_df


# def load_price_data_for_symbol(symbol: str) -> pd.DataFrame:
#     """
#     Return price series for one symbol (org_symbol) with:
#       - date
#       - mid_close
#       - daily_return
#     """
#     if symbol in _price_cache:
#         return _price_cache[symbol]

#     master = load_master_price()
#     sub = master[master["org_symbol"].astype(str) == str(symbol)].copy()

#     if sub.empty:
#         print(f"WARNING: No price data found in master for symbol '{symbol}'")
#         _price_cache[symbol] = pd.DataFrame(columns=["date", "mid_close", "daily_return"])
#         return _price_cache[symbol]

#     sub = sub[["date", "mid_close", "daily_return"]].sort_values("date").reset_index(drop=True)
#     _price_cache[symbol] = sub
#     return sub


# def compute_ex_ante_vol(symbol: str) -> pd.DataFrame:
#     """
#     Compute 90-day rolling ex-ante volatility for a symbol.
#     Returns:
#       - date
#       - vol_90
#     """
#     px = load_price_data_for_symbol(symbol)
#     if px.empty:
#         return pd.DataFrame(columns=["date", "vol_90"])

#     vol_df = px[["date", "daily_return"]].copy()
#     vol_df["vol_90"] = vol_df["daily_return"].rolling(VOL_LOOKBACK_DAYS).std()
#     return vol_df[["date", "vol_90"]]

# # ============================================================
# # TRADE-LEVEL DATA HELPERS
# # ============================================================

# def find_trade_subfolder(bt_strategy_folder: str):
#     """
#     Find 'trade level data' subfolder under a BT strategy folder:
#         BT_RESULTS_ROOT / <bt_strategy_folder_name> / <trade-level-subfolder>
#     """
#     for cand in TRADE_SUBFOLDER_CANDIDATES:
#         path = os.path.join(bt_strategy_folder, cand)
#         if os.path.isdir(path):
#             return path
#     return None


# def load_trade_level_data_for_selection(bt_strategy_folder_name: str,
#                                         parent_combo_key: str,
#                                         symbol: str,
#                                         trade_month: pd.Timestamp) -> pd.DataFrame:
#     """
#     Load trade-level parquet for a selection row using:
#         BT_RESULTS_ROOT / <bt_strategy_folder_name> / <trade-level-subfolder> / *<parent_combo_key>*.parquet

#     Then filter it to trades active in the given trade_month.

#     Uses:
#       - 'date'          -> entry_date
#       - 'Closing Date'  -> exit_date
#     """
#     # 1) Find BT strategy folder
#     bt_strategy_folder = os.path.join(BT_RESULTS_ROOT, bt_strategy_folder_name)
#     if not os.path.isdir(bt_strategy_folder):
#         print(f"  WARNING: BT strategy folder not found: {bt_strategy_folder}")
#         return pd.DataFrame()

#     # 2) Find 'trade level data' subfolder
#     trade_subfolder = find_trade_subfolder(bt_strategy_folder)
#     if trade_subfolder is None:
#         print(f"  WARNING: No 'trade level data' subfolder under {bt_strategy_folder}")
#         return pd.DataFrame()

#     # 3) Find parquet file via parent_combo_key
#     pattern = os.path.join(trade_subfolder, f"*{parent_combo_key}*.parquet")
#     matches = [p for p in glob.glob(pattern)]
#     if not matches:
#         print(f"  WARNING: No parquet file for parent_combo_key '{parent_combo_key}' in {trade_subfolder}")
#         return pd.DataFrame()

#     parquet_path = matches[0]
#     trades = pd.read_parquet(parquet_path)

#     # ---- Map entry/exit explicitly for your schema ----
#     # entry: 'date', exit: 'Closing Date'
#     if "date" not in trades.columns or "Closing Date" not in trades.columns:
#         raise ValueError(
#             f"Expected 'date' and 'Closing Date' in trade-level file {parquet_path}, "
#             f"found: {list(trades.columns)}"
#         )

#     trades["entry_date"] = pd.to_datetime(trades["date"], dayfirst=True, errors="coerce")
#     trades["exit_date"] = pd.to_datetime(trades["Closing Date"], dayfirst=True, errors="coerce")

#     # Symbol
#     if "org_symbol" in trades.columns:
#         trades["symbol"] = trades["org_symbol"].astype(str)
#     elif "Symbol" in trades.columns:
#         trades["symbol"] = trades["Symbol"].astype(str)
#     elif "symbol" in trades.columns:
#         trades["symbol"] = trades["symbol"].astype(str)
#     else:
#         trades["symbol"] = symbol

#     # Use parent_combo_key as iteration_id for reference
#     trades["iteration_id"] = parent_combo_key

#     # Side (we will overwrite from selection anyway)
#     if "side" in trades.columns:
#         trades["side"] = trades["side"].astype(str)
#     else:
#         trades["side"] = "Long"

#     # Trade ID
#     if "trade_id" in trades.columns:
#         trades["trade_id"] = trades["trade_id"].astype(str)
#     else:
#         trades["trade_id"] = (
#             trades["iteration_id"].astype(str) + "_" +
#             trades["entry_date"].dt.strftime("%Y%m%d") + "_" +
#             trades["exit_date"].dt.strftime("%Y%m%d")
#         )

#     # Filter to trades active in that trade_month
#     month_start = trade_month.replace(day=1)
#     month_end = (month_start + MonthBegin(1)) + pd.offsets.MonthEnd(0)

#     mask = (trades["entry_date"] <= month_end) & (trades["exit_date"] >= month_start)
#     trades = trades.loc[mask].copy()

#     return trades

# # ============================================================
# # LOAD DEDUPED SELECTIONS
# # ============================================================

# def load_all_selections(selection_root: str) -> pd.DataFrame:
#     frames = []
#     for name in os.listdir(selection_root):
#         strat_path = os.path.join(selection_root, name)
#         if not os.path.isdir(strat_path):
#             continue

#         sel_path = os.path.join(strat_path, SELECTION_FILE_NAME)
#         if not os.path.exists(sel_path):
#             continue

#         df = pd.read_csv(sel_path)
#         df["selection_folder"] = strat_path
#         frames.append(df)

#     if not frames:
#         print("No deduped selection files found.")
#         return pd.DataFrame()

#     sel = pd.concat(frames, ignore_index=True)

#     # Strategy folder name for BT results (column S)
#     bt_strategy_folder_col = find_column(
#         sel,
#         ["strategy_folder", "strategy_folder_name", "bt_strategy_folder", "bt_folder"],
#         what_for="BT strategy folder name"
#     )

#     # Instrument symbol
#     symbol_col = find_column(
#         sel,
#         ["symbol", "org_symbol", "ticker"],
#         what_for="symbol (instrument)"
#     )

#     # Side
#     side_col = find_column(
#         sel,
#         ["side", "long_short", "direction"],
#         what_for="side (Long/Short)"
#     )

#     # Trade month
#     trade_month_col = find_column(
#         sel,
#         ["trade_month"],
#         what_for="trade month"
#     )
#     sel["trade_month"] = pd.to_datetime(sel[trade_month_col])

#     # Parent combo key
#     parent_combo_col = find_column(
#         sel,
#         ["parent_combo_key", "param_combo_key", "combination_key", "combination"],
#         what_for="parent combo key"
#     )

#     # Store column names once in the DataFrame
#     sel["_bt_strategy_folder_col"] = bt_strategy_folder_col
#     sel["_symbol_col"] = symbol_col
#     sel["_side_col"] = side_col
#     sel["_parent_combo_col"] = parent_combo_col
#     sel["_trade_month_col"] = trade_month_col

#     return sel

# # ============================================================
# # EXPAND TRADES TO DAILY USING MASTER PRICE
# # ============================================================

# def expand_trades_to_daily(trades_df: pd.DataFrame) -> pd.DataFrame:
#     """
#     For each trade, expand into daily rows using price data.

#     trades_df must have:
#       - symbol (org_symbol; e.g. ACN)
#       - iteration_id (we use parent_combo_key here)
#       - entry_date
#       - exit_date
#       - side (Long / Short)
#     """
#     if trades_df.empty:
#         return pd.DataFrame(columns=["date", "symbol", "iteration_id", "trade_id", "trade_daily_return"])

#     all_days = []

#     for (symbol, iteration_id), group in trades_df.groupby(["symbol", "iteration_id"]):
#         price_df = load_price_data_for_symbol(symbol)
#         if price_df.empty:
#             continue

#         for _, tr in group.iterrows():
#             entry = tr["entry_date"]
#             exit_ = tr["exit_date"]

#             mask = (price_df["date"] >= entry) & (price_df["date"] <= exit_)
#             px = price_df.loc[mask, ["date", "daily_return"]].copy()
#             if px.empty:
#                 continue

#             side = str(tr["side"]).lower()
#             sign = 1.0 if "long" in side else -1.0
#             px["trade_daily_return"] = sign * px["daily_return"]

#             px["symbol"] = symbol
#             px["iteration_id"] = iteration_id
#             px["trade_id"] = tr["trade_id"]
#             all_days.append(px)

#     if not all_days:
#         return pd.DataFrame(columns=["date", "symbol", "iteration_id", "trade_id", "trade_daily_return"])

#     daily_trades = pd.concat(all_days, ignore_index=True)
#     return daily_trades

# # ============================================================
# # BUILD PORTFOLIO (1/vol SIZING + 20% TARGET FALL)
# # ============================================================

# def build_portfolio_pnl(daily_trades: pd.DataFrame) -> pd.DataFrame:
#     if daily_trades.empty:
#         return pd.DataFrame()

#     daily_trades["date"] = pd.to_datetime(daily_trades["date"])

#     # Compute vol_90 for each symbol
#     vol_frames = []
#     for symbol in daily_trades["symbol"].unique():
#         vol_df = compute_ex_ante_vol(symbol)
#         vol_df["symbol"] = symbol
#         vol_frames.append(vol_df)

#     if not vol_frames:
#         return pd.DataFrame()

#     vol_all = pd.concat(vol_frames, ignore_index=True)

#     df = daily_trades.merge(vol_all, on=["symbol", "date"], how="left")
#     df = df.dropna(subset=["vol_90"]).copy()

#     portfolio_rows = []
#     for date, grp in df.groupby("date"):
#         g = grp.copy()
#         g["base_size"] = 1.0 / g["vol_90"]
#         g.replace([np.inf, -np.inf], np.nan, inplace=True)
#         g = g.dropna(subset=["base_size"])

#         if g.empty:
#             continue

#         n_active = len(g)
#         k = TARGET_FALL / n_active  # scaling so total ex-ante fall ≈ 20%

#         g["position_size"] = k * g["base_size"]
#         g["pnl_contribution"] = g["position_size"] * g["trade_daily_return"]

#         portfolio_rows.append({
#             "date": date,
#             "portfolio_return": g["pnl_contribution"].sum(),
#             "num_trades_active": g["trade_id"].nunique(),
#             "num_combos_active": g["iteration_id"].nunique(),
#             "avg_vol_90": g["vol_90"].mean(),
#             "avg_position_size": g["position_size"].mean()
#         })

#     if not portfolio_rows:
#         return pd.DataFrame()

#     port_df = pd.DataFrame(portfolio_rows).sort_values("date").reset_index(drop=True)
#     return port_df

# # ============================================================
# # MAIN
# # ============================================================

# def main():
#     # 1) Load deduped selections
#     selections = load_all_selections(SELECTION_ROOT)
#     if selections.empty:
#         print("No selections found. Exiting.")
#         return

#     print(f"Total selected rows (after dedupe): {len(selections)}")

#     # Resolve actual column names from stored markers
#     bt_strategy_folder_col = selections["_bt_strategy_folder_col"].iloc[0]
#     symbol_col = selections["_symbol_col"].iloc[0]
#     side_col = selections["_side_col"].iloc[0]
#     parent_combo_col = selections["_parent_combo_col"].iloc[0]
#     trade_month_col = selections["_trade_month_col"].iloc[0]

#     trade_frames = []

#     # 2) Load trade-level data for all selections
#     for _, row in selections.iterrows():
#         bt_strategy_folder_name = row[bt_strategy_folder_col]
#         symbol = row[symbol_col]
#         side = row[side_col]
#         parent_combo_key = row[parent_combo_col]
#         trade_month = row[trade_month_col]

#         trades = load_trade_level_data_for_selection(
#             bt_strategy_folder_name=bt_strategy_folder_name,
#             parent_combo_key=parent_combo_key,
#             symbol=symbol,
#             trade_month=trade_month
#         )
#         if trades.empty:
#             continue

#         # Ensure side matches selection side
#         trades["side"] = side
#         trade_frames.append(trades)

#     if not trade_frames:
#         print("No trade-level data loaded. Exiting.")
#         return

#     all_trades = pd.concat(trade_frames, ignore_index=True)
#     print(f"Total trades loaded (rows): {len(all_trades)}")

#     # 3) Expand to daily
#     daily_trades = expand_trades_to_daily(all_trades)
#     if daily_trades.empty:
#         print("No daily trades generated. Exiting.")
#         return

#     print(f"Total daily trade rows: {len(daily_trades)}")

#     # 4) Build portfolio
#     portfolio_df = build_portfolio_pnl(daily_trades)
#     if portfolio_df.empty:
#         print("No portfolio P&L created. Exiting.")
#         return

#     print(f"Portfolio rows: {len(portfolio_df)}")

#     # 5) Save outputs
#     out_portfolio = os.path.join(SELECTION_ROOT, "portfolio_daily_pnl.csv")
#     portfolio_df.to_csv(out_portfolio, index=False)
#     print(f"Saved portfolio daily P&L → {out_portfolio}")

#     # Some basic counts
#     print("\nSUMMARY:")
#     print(f"  Unique symbols: {all_trades['symbol'].nunique()}")
#     print(f"  Unique iteration_ids (parent_combo_keys): {all_trades['iteration_id'].nunique()}")
#     print(f"  Unique trades: {all_trades['trade_id'].nunique()}")


# if __name__ == "__main__":
#     main()


In [19]:
# import os
# import time
# import numpy as np
# import pandas as pd

# # =========================================
# # CONFIG – EDIT THESE 3 PATHS
# # =========================================

# # 1) Trade-level file for ONE strategy (the file you shared)
# TRADE_FILE = r"E:\WSG Markets\Backtesting V2 Nov 2025\Monthly Selection\Stairs strategy\selected_trade_level_all_months.csv"

# # 2) Raw daily price file
# RAW_PRICE_FILE = r"E:\WSG Markets\Backtesting V2 Nov 2025\Raw Data\Master_Data till 31 0ct 2025.csv"

# # 3) Output file for the daily expansion of this strategy
# OUTPUT_FILE = r"E:\WSG Markets\Backtesting V2 Nov 2025\Portfolios-Summary\TradeDaily_OneStrategy_Test.csv"

# # Capital used per trade to compute quantity
# PER_TRADE_CAPITAL = 100000.0

# # =========================================
# # HELPERS
# # =========================================

# _price_master = None
# _price_cache = {}

# def log(msg):
#     print(f"[{time.strftime('%H:%M:%S')}] {msg}")

# def load_price_master():
#     """
#     Load raw price file once and prepare for mapping:
#     trade['Currency']  <->  price['Symbol'] (capital S).
#     """
#     global _price_master
#     if _price_master is not None:
#         return _price_master

#     log(f"Loading raw price file: {RAW_PRICE_FILE}")
#     df = pd.read_csv(RAW_PRICE_FILE)

#     if "date" not in df.columns:
#         raise ValueError("Raw price file must have a 'date' column")

#     df["date"] = pd.to_datetime(df["date"], dayfirst=True, errors="coerce")

#     if "Symbol" not in df.columns:
#         raise ValueError("Raw price file must have a 'Symbol' column (capital S)")

#     # Mid close as daily price
#     if {"askclose", "bidclose"}.issubset(df.columns):
#         df["price"] = (df["askclose"] + df["bidclose"]) / 2.0
#     else:
#         raise ValueError("Raw price file must contain 'askclose' and 'bidclose' columns")

#     df = df.sort_values(["Symbol", "date"]).reset_index(drop=True)
#     _price_master = df
#     log("Raw price file loaded.")
#     return _price_master

# def get_price_series(currency_value: str) -> pd.DataFrame:
#     """
#     Return price series (date, price) for given trade Currency.
#     Mapping: Currency (trade) <-> Symbol (price).
#     """
#     global _price_cache
#     if currency_value in _price_cache:
#         return _price_cache[currency_value]

#     master = load_price_master()
#     sub = master[master["Symbol"].astype(str) == str(currency_value)].copy()
#     if sub.empty:
#         log(f"  WARNING: No price series found for Currency='{currency_value}'")
#         _price_cache[currency_value] = pd.DataFrame()
#         return _price_cache[currency_value]

#     sub = sub[["date", "price"]].sort_values("date").reset_index(drop=True)
#     _price_cache[currency_value] = sub
#     return sub

# # =========================================
# # LOAD TRADE LEVEL (ONE STRATEGY)
# # =========================================

# def load_trades_one_strategy() -> pd.DataFrame:
#     """
#     Load selected_trade_level_all_months for one strategy and prepare:
#       - entry_date, exit_date
#       - entry_price
#       - Currency, Strategy, Timeframe, Condition, side
#     """
#     log(f"Loading trade-level file: {TRADE_FILE}")
#     df = pd.read_csv(TRADE_FILE)

#     required = ["date", "Closing Date", "Currency", "Strategy",
#                 "Timeframe", "Condition", "Trading Price Point"]
#     for c in required:
#         if c not in df.columns:
#             raise ValueError(f"Missing required column '{c}' in trade file.")

#     # Parse dates from trade file
#     df["entry_date"] = pd.to_datetime(df["date"], dayfirst=True, errors="coerce")
#     df["exit_date"] = pd.to_datetime(df["Closing Date"], dayfirst=True, errors="coerce")

#     # Core fields
#     df["Currency"] = df["Currency"].astype(str)
#     df["Strategy"] = df["Strategy"].astype(str)
#     df["Timeframe"] = df["Timeframe"].astype(str)
#     df["Condition"] = df["Condition"].astype(str)
#     df["entry_price"] = df["Trading Price Point"].astype(float)

#     # side (default Long if not present)
#     if "side" in df.columns:
#         df["side"] = df["side"].astype(str)
#     else:
#         df["side"] = "Long"

#     # Drop incomplete rows
#     df = df.dropna(subset=["entry_date", "exit_date", "entry_price"])
#     df = df.sort_values(["Currency", "entry_date", "exit_date"]).reset_index(drop=True)

#     # Create TradeID (0,1,2,...) for this strategy
#     df["TradeID"] = np.arange(len(df), dtype=int)

#     log(f"Loaded {len(df)} trades. "
#         f"Earliest entry: {df['entry_date'].min().date()}, "
#         f"latest entry: {df['entry_date'].max().date()}")
#     return df

# # =========================================
# # EXPAND TRADES TO DAILY
# # =========================================

# def expand_trades_to_daily(trades: pd.DataFrame) -> pd.DataFrame:
#     """
#     For each trade in this strategy, create daily rows:

#       - Dates: each trading day between entry_date and exit_date (inclusive)
#       - One row per TradeID per date.
#       - All dates are strictly >= entry_date and <= exit_date.
#     """
#     if trades.empty:
#         return pd.DataFrame()

#     rows = []
#     t0 = time.time()
#     n_trades = len(trades)
#     log(f"Expanding {n_trades} trades to daily rows...")

#     for i, tr in trades.iterrows():
#         if (i + 1) % 50 == 0 or i == n_trades - 1:
#             log(f"  Trade {i+1}/{n_trades} ({tr['Currency']} {tr['Strategy']})")

#         ccy = tr["Currency"]
#         px = get_price_series(ccy)
#         if px.empty:
#             continue

#         entry_date = tr["entry_date"]
#         exit_date = tr["exit_date"]

#         # STRICT filter between entry and exit dates (inclusive)
#         mask = (px["date"] >= entry_date) & (px["date"] <= exit_date)
#         sub = px.loc[mask].copy()
#         if sub.empty:
#             # No prices inside this window → skip
#             continue

#         sub = sub.sort_values("date").reset_index(drop=True)

#         entry_price = tr["entry_price"]
#         trade_capital = PER_TRADE_CAPITAL
#         qty = trade_capital / entry_price

#         side_str = tr["side"].lower()
#         direction = 1 if "long" in side_str else -1

#         # Price & price_prev
#         sub["price"] = sub["price"].astype(float)
#         sub["price_prev"] = sub["price"].shift(1)

#         # daily_pnl: Qty * (price - price_prev) * Direction; first day pnl = 0
#         sub["daily_pnl"] = qty * (sub["price"] - sub["price_prev"].fillna(sub["price"])) * direction
#         sub.loc[sub.index[0], "daily_pnl"] = 0.0

#         sub["trade_capital"] = trade_capital
#         sub["daily_return_vs_trade_capital"] = sub["daily_pnl"] / trade_capital

#         # For reference: position value & exposure vs trade capital (like your sample)
#         sub["position_value"] = qty * sub["price"] * direction
#         sub["exposure_vs_trade_capital"] = sub["position_value"].abs() / trade_capital
#         sub["exposure_vs_portfolio"] = sub["exposure_vs_trade_capital"]  # can be changed later

#         # Final_PnL = sum over this TradeID
#         final_pnl = sub["daily_pnl"].sum()
#         sub["Final_PnL"] = final_pnl

#         # Repeat opening/closing dates and metadata
#         sub["Opening Date"] = entry_date
#         sub["Closing Date"] = exit_date
#         sub["Qty"] = qty
#         sub["Direction"] = direction
#         sub["TradeID"] = tr["TradeID"]
#         sub["Strategy"] = tr["Strategy"]
#         sub["Currency"] = tr["Currency"]
#         sub["Timeframe"] = tr["Timeframe"]
#         sub["Condition"] = tr["Condition"]

#         # Combination as in your sample:
#         # e.g. "Stairs strategy_ACN_USD_Long_D1"
#         ccy_clean = tr["Currency"].replace("/", "_")
#         combo = f"{tr['Strategy']}_{ccy_clean}_{tr['Condition']}_{tr['Timeframe']}"
#         sub["Combination"] = combo

#         # Data_Type constant for now (you can change)
#         sub["Data_Type"] = "SL_Updated"

#         rows.append(sub)

#     if not rows:
#         log("No daily rows were generated.")
#         return pd.DataFrame()

#     daily = pd.concat(rows, ignore_index=True)
#     daily = daily.sort_values(["TradeID", "date"]).reset_index(drop=True)

#     # Order columns similar to your TradeDaily_All sample
#     cols_order = [
#         "date", "price", "TradeID", "Strategy", "Currency", "Timeframe",
#         "Condition", "Combination", "Data_Type", "Qty", "Direction",
#         "trade_capital", "Opening Date", "Closing Date", "Final_PnL",
#         "price_prev", "position_value", "exposure_vs_trade_capital",
#         "exposure_vs_portfolio", "daily_pnl", "daily_return_vs_trade_capital",
#     ]
#     # Keep extra columns at the end if any
#     cols_order = [c for c in cols_order if c in daily.columns] + \
#                  [c for c in daily.columns if c not in cols_order]

#     daily = daily[cols_order]
#     return daily

# # =========================================
# # MAIN
# # =========================================

# def main():
#     os.makedirs(os.path.dirname(OUTPUT_FILE), exist_ok=True)

#     trades = load_trades_one_strategy()
#     if trades.empty:
#         log("No trades loaded – check TRADE_FILE path / content.")
#         return

#     daily = expand_trades_to_daily(trades)
#     log(f"Daily rows generated: {len(daily)}")

#     daily.to_csv(OUTPUT_FILE, index=False)
#     log(f"Saved daily expansion for this strategy → {OUTPUT_FILE}")

#     # Quick sanity check on dates:
#     if not daily.empty:
#         log(f"Earliest daily date in output: {daily['date'].min()}")
#         log(f"Latest daily date in output:   {daily['date'].max()}")
#         log(f"Earliest trade entry date:     {trades['entry_date'].min()}")
#         log(f"Latest trade entry date:       {trades['entry_date'].max()}")

# if __name__ == "__main__":
#     main()
