In [None]:
#DO NOT RUN LOCALLY - @65min STILL RUNNING
# ================================================================
# Optimal-Stopping MDP Panel Builder + Backward Induction Targets
# ================================================================
# - Expands each trade into daily decision states t=1..H (default H=10)
# - Guarantees NO LEAKAGE: only uses info available up to time t
# - Computes SELL (immediate exercise) payoff at each t
# - Runs backward induction to produce V_target (no risk adjustments)
# - Saves:
#     1) mdp_trajectory_panel.csv  (full panel w/ identifiers)
#     2) mdp_features_for_value_model.csv (numeric features for model)
# ================================================================

import re
import numpy as np
import pandas as pd

from sklearn.impute import SimpleImputer
from sklearn.ensemble import GradientBoostingRegressor

# -------------------
# Config
# -------------------
# >>> EDIT THIS <<<
INPUT_PATH = "oip_mega_boost1.csv"   # your big CSV
# or, if you're testing the provided example Excel:
# INPUT_PATH = r"/mnt/data/oip_mega_boost1_example_r30859.xlsx"

# Output locations
OUTPUT_PANEL_CSV    = "mdp_trajectory_panel.csv"
OUTPUT_FEATURES_CSV = "mdp_features_for_value_model.csv"

# Horizon (max days after entry to consider)
H_DEFAULT = 10

# Train/Test splits by entry date (mebuydate)
TRAIN_END  = pd.Timestamp("2024-02-21")
TEST_START = pd.Timestamp("2024-02-21")
TEST_END   = pd.Timestamp("2025-07-22")

# -------------------
# Load
# -------------------
def load_any(INPUT_PATH: str) -> pd.DataFrame:
    if INPUT_PATH.lower().endswith(".xlsx"):
        df0 = pd.read_excel(INPUT_PATH)
    else:
        # Fast CSV read; tweak dtype/options as needed for your file
        df0 = pd.read_csv(INPUT_PATH)
    return df0

df = load_any(INPUT_PATH).copy()

# Parse dates (mm/dd/yyyy or m/d/yyyy tolerated)
for c in ["filing_date", "trade_date", "mebuydate"]:
    if c in df.columns:
        df[c] = pd.to_datetime(df[c], errors="coerce", infer_datetime_format=True)

# -------------------
# Identify forward-looking columns (must NOT be used for features at time t)
# -------------------
re_price_fwd = re.compile(r"^p_p(\d+)_td$")        # e.g., p_p1_td (price at +1 TD)
re_vol_fwd   = re.compile(r"^v_p(\d+)_td$")        # e.g., v_p3_td (volume at +3 TD)
re_ret_fwd   = re.compile(r"^ret_p_p(\d+)_td$")    # e.g., ret_p_p5_td (entry->+5 TD return)

fwd_price_cols = sorted([(int(re_price_fwd.match(c).group(1)), c) for c in df.columns if re_price_fwd.match(c)])
fwd_vol_cols   = sorted([(int(re_vol_fwd.match(c).group(1)), c)   for c in df.columns if re_vol_fwd.match(c)])
fwd_ret_cols   = sorted([(int(re_ret_fwd.match(c).group(1)), c)   for c in df.columns if re_ret_fwd.match(c)])

max_fwd = 0
if fwd_price_cols: max_fwd = max(max_fwd, max(k for k,_ in fwd_price_cols))
if fwd_vol_cols:   max_fwd = max(max_fwd,   max(k for k,_ in fwd_vol_cols))
if fwd_ret_cols:   max_fwd = max(max_fwd,   max(k for k,_ in fwd_ret_cols))
H = min(H_DEFAULT, max_fwd) if max_fwd > 0 else H_DEFAULT

def is_forward_col(c: str) -> bool:
    return bool(re_price_fwd.match(c) or re_vol_fwd.match(c) or re_ret_fwd.match(c))

# -------------------
# Base features (safe at entry): exclude forward-look columns
# We'll keep id-like columns separate (not in the model feature set)
# -------------------
id_like = {
    "ticker","company_name","insider_name","title","trade_type","year",
    "filing_date","trade_date","mebuydate","mebuy_price","filing_price"
}

base_feature_cols = []
for c in df.columns:
    if is_forward_col(c):
        continue
    if c in id_like:
        continue
    base_feature_cols.append(c)

# -------------------
# Time-varying features up to (and including) day t
# Uses only info allowed as-of t
# -------------------
def build_time_features(row: pd.Series, t: int) -> dict:
    feats = {}
    # cumulative return to day t, if present (ret_p_p{t}_td)
    ret_col = f"ret_p_p{t}_td"
    price_col_t   = f"p_p{t}_td"
    price_col_tm1 = f"p_p{t-1}_td" if t-1 >= 1 else None

    ret_t = row.get(ret_col, np.nan)
    px_t  = row.get(price_col_t, np.nan)
    px_tm1 = row.get(price_col_tm1, np.nan) if price_col_tm1 else np.nan
    entry_px = row.get("mebuy_price", np.nan)

    # Cum ret to day t
    feats["cum_ret_t"] = ret_t

    # One-day return (t vs t-1), or (t vs entry) for t=1
    if t == 1:
        if pd.notna(px_t) and pd.notna(entry_px) and entry_px != 0:
            feats["ret_1d"] = px_t / entry_px - 1.0
        else:
            feats["ret_1d"] = ret_t if pd.notna(ret_t) else np.nan
    else:
        if pd.notna(px_t) and pd.notna(px_tm1) and px_tm1 != 0:
            feats["ret_1d"] = px_t / px_tm1 - 1.0
        else:
            feats["ret_1d"] = np.nan

    # (Optional) You can extend here with additional *as-of-t* features,
    # e.g., path-dependent stats built only from info up to t.
    return feats

# -------------------
# Build the trajectory panel
# -------------------
panel_rows = []
df_reset = df.reset_index(drop=True)

for idx, row in df_reset.iterrows():
    trade_id = idx
    entry_dt = row.get("mebuydate", pd.NaT)
    entry_px = row.get("mebuy_price", np.nan)
    ticker   = row.get("ticker", None)

    for t in range(1, H+1):
        price_t = row.get(f"p_p{t}_td", np.nan)
        ret_t   = row.get(f"ret_p_p{t}_td", np.nan)

        # Skip if we don't have *either* the price or the return for day t
        if pd.isna(price_t) and pd.isna(ret_t):
            continue

        # Assemble base & time-varying features
        base_feats = row[base_feature_cols].to_dict()
        tv_feats   = build_time_features(row, t)
        days_left  = H - t

        # Immediate SELL payoff at time t (relative to entry)
        payoff_t = ret_t
        if pd.isna(payoff_t) and pd.notna(price_t) and pd.notna(entry_px) and entry_px != 0:
            payoff_t = price_t / entry_px - 1.0

        panel_rows.append({
            "trade_id": trade_id,
            "ticker": ticker,
            "entry_date": entry_dt,
            "t": t,
            "days_left": days_left,
            "payoff_t": payoff_t,
            "SELL_value": payoff_t,
            **base_feats,
            **tv_feats
        })

panel = pd.DataFrame(panel_rows)

# -------------------
# Train/Test split by entry_date
# -------------------
def label_set(d: pd.Timestamp) -> str:
    if pd.isna(d):
        return "drop"
    if d <= TRAIN_END:
        return "train"
    if (d >= TEST_START) and (d <= TEST_END):
        return "test"
    return "drop"

panel["set"] = panel["entry_date"].apply(label_set)
panel = panel[panel["set"] != "drop"].copy()

# -------------------
# Backward induction to derive V_target (no risk adjustments)
# - numeric-only features + median imputation for the learner
# -------------------
panel_bi = panel.copy()
panel_bi["V_target"] = np.nan

# Numeric-only feature matrix (exclude identifiers / target-like fields)
exclude_feats = {"trade_id","ticker","entry_date","set","V_target","payoff_t","SELL_value"}
numeric_cols = panel_bi.select_dtypes(include=[np.number]).columns.tolist()
numeric_features = [c for c in numeric_cols if c not in exclude_feats]

# Ensure time coordinates included
if "t" not in numeric_features:
    numeric_features = ["t"] + numeric_features
if "days_left" not in numeric_features:
    numeric_features = ["days_left"] + [c for c in numeric_features if c != "days_left"]

# Initialize terminal step: must sell at t=H
panel_bi.loc[panel_bi["t"] == H, "V_target"] = panel_bi.loc[panel_bi["t"] == H, "payoff_t"]

imputer = SimpleImputer(strategy="median")

# Work backwards t = H-1 ... 1
for k in range(H-1, 0, -1):
    train_mask = panel_bi["t"] >= (k+1)
    X_train = panel_bi.loc[train_mask, numeric_features]
    y_train = panel_bi.loc[train_mask, "V_target"]

    # If insufficient data or missing targets (e.g., very sparse tails), fallback to SELL_value
    if (train_mask.sum() < 50) or y_train.isna().any():
        hold_pred = panel_bi.loc[panel_bi["t"] == k, "payoff_t"].values
    else:
        X_train_imp = imputer.fit_transform(X_train)
        gbr = GradientBoostingRegressor(
            random_state=42, n_estimators=500, learning_rate=0.05,
            max_depth=3, subsample=0.8
        )
        gbr.fit(X_train_imp, y_train.values)

        X_k = panel_bi.loc[panel_bi["t"] == k, numeric_features]
        X_k_imp = imputer.transform(X_k)
        hold_pred = gbr.predict(X_k_imp)

    sell_val = panel_bi.loc[panel_bi["t"] == k, "SELL_value"].values
    v_k = np.maximum(sell_val, hold_pred)
    panel_bi.loc[panel_bi["t"] == k, "V_target"] = v_k

# -------------------
# Save outputs
# -------------------
# 1) Full panel with identifiers and V_target (good for auditing & backtesting)
panel_bi.to_csv(OUTPUT_PANEL_CSV, index=False)

# 2) Trimmed numeric feature table for modeling
#    Keep identifiers + set + SELL_value + V_target + numeric_features
meta_cols = ["trade_id","ticker","entry_date","t","days_left","set","SELL_value","V_target"]
keep_cols = meta_cols + [c for c in numeric_features if c not in {"t","days_left"}]  # t/days_left already in meta
# Ensure uniqueness and preserve order
seen = set()
ordered_keep = []
for c in keep_cols:
    if c not in seen and c in panel_bi.columns:
        ordered_keep.append(c)
        seen.add(c)

feat_df = panel_bi[ordered_keep].copy()
feat_df.to_csv(OUTPUT_FEATURES_CSV, index=False)

print(f"Done. Horizon H={H}")
print(f"Panel rows: {len(panel_bi):,}")
print(f"Train rows: {int((panel_bi['set']=='train').sum()):,}")
print(f"Test rows : {int((panel_bi['set']=='test').sum()):,}")
print(f"Wrote: {OUTPUT_PANEL_CSV}")
print(f"Wrote: {OUTPUT_FEATURES_CSV}")


  df[c] = pd.to_datetime(df[c], errors="coerce", infer_datetime_format=True)
  df[c] = pd.to_datetime(df[c], errors="coerce", infer_datetime_format=True)
  df[c] = pd.to_datetime(df[c], errors="coerce", infer_datetime_format=True)


KeyboardInterrupt: 