In [6]:
import pandas as pd
import pandas as pd
import numpy as np
from pathlib import Path
import re

BASE_DIR = Path.cwd().parent          # notebooks/ -> project root
DATA_DIR = BASE_DIR / "data"

In [7]:

port = pd.read_csv(DATA_DIR / "port.csv")
prop = pd.read_csv(DATA_DIR / "proposed_trades.csv")

print("PORT columns:", port.columns.tolist())
print("PROP columns:", prop.columns.tolist())
print("Rows: port =", len(port), "prop =", len(prop))


PORT columns: ['Financial Instrument', 'Ticker Action', 'Position', 'Avg Price', 'Market Value', 'Daily P&L', 'Bid Size', 'Bid', 'Ask', 'Ask Size', 'Last', 'Change', 'Change %', 'In The Money', 'Unnamed: 14']
PROP columns: ['ETF', 'Underlying', 'Leverage', 'cagr_port_hist', 'LevType', 'borrow_current', 'shares_available', 'borrow_spiking', 'borrow_leq_cap', 'borrow_gt_cap', 'whitelisted', 'exclude_borrow_gt_cap', 'exclude_no_shares', 'exclude_borrow_spike', 'long_usd', 'short_usd', 'strategy_tag']
Rows: port = 100 prop = 102


In [8]:
import pandas as pd
import numpy as np
import re

def to_num(x):
    if pd.isna(x): return np.nan
    s = str(x).strip().replace(",", "").replace("'", "")
    s = re.sub(r"^[A-Za-z]+", "", s)
    s = re.sub(r"[^0-9\.\-]", "", s)
    if s in ("", "-", ".", "-."): return np.nan
    try: return float(s)
    except: return np.nan

def norm_ticker(x):
    return str(x).upper().strip()

# ============================
# 1) LIVE portfolio (port -> port_agg)
# ============================
port2 = port.copy()
port2["ticker"] = port2["Financial Instrument"].astype(str).map(norm_ticker)

port2["qty_live"] = port2["Position"].apply(to_num).fillna(0.0)
port2["px_live"]  = port2["Last"].apply(to_num)
port2["mv_live"]  = port2["Market Value"].apply(to_num)

# IBKR Market Value is usually signed (shorts negative). Keep it.
port2["notional_live"] = np.where(
    port2["mv_live"].notna(),
    port2["mv_live"],
    port2["qty_live"] * port2["px_live"]
)

port_agg = (
    port2.groupby("ticker", as_index=False)
         .agg(qty_live=("qty_live", "sum"),
              px_live=("px_live", "last"),
              notional_live=("notional_live", "sum"))
)

live_by_ticker = port_agg.copy()

# ============================
# 2) PROPOSED trades (prop -> prop2)
# ============================
prop2 = prop.copy()

req = ["Underlying", "ETF", "long_usd", "short_usd", "Leverage"]
missing = [c for c in req if c not in prop2.columns]
if missing:
    raise ValueError(f"proposed_trades.csv missing columns: {missing}")

prop2["Underlying"] = prop2["Underlying"].astype(str).map(norm_ticker)
prop2["ETF"]        = prop2["ETF"].astype(str).map(norm_ticker)

prop2["long_usd"]   = pd.to_numeric(prop2["long_usd"], errors="coerce").fillna(0.0)
prop2["short_usd"]  = pd.to_numeric(prop2["short_usd"], errors="coerce").fillna(0.0)
prop2["leverage"]   = pd.to_numeric(prop2["Leverage"], errors="coerce").fillna(1).astype(float)
prop2.loc[prop2["leverage"] == 0, "leverage"] = 1.0

# ---- SIGN-SAFE MAGNITUDES ----
# We treat long_usd / short_usd as magnitudes, regardless of how the CSV signs them.
prop2["long_usd_mag"]  = prop2["long_usd"].abs()
prop2["short_usd_mag"] = prop2["short_usd"].abs()

# Effective hedge exposure (negative contribution)
prop2["etf_usd_effective"] = -prop2["leverage"] * prop2["short_usd_mag"]

# ============================
# 3) PAIR-LEVEL proposed aggregation
# ============================
pair_prop = (
    prop2.groupby(["Underlying","ETF"], as_index=False)
         .agg(
             proposed_underlying_usd=("long_usd_mag","sum"),
             proposed_etf_short_usd=("short_usd_mag","sum"),
             proposed_etf_usd_effective=("etf_usd_effective","sum"),  # already negative
             lever_mult_min=("leverage","min"),
             lever_mult_max=("leverage","max"),
             n_rows=("leverage","size"),
         )
)

pair_prop["leverage_inconsistent"] = (pair_prop["lever_mult_min"] != pair_prop["lever_mult_max"])

# Proposed net delta (this is the clean delta-like number)
pair_prop["proposed_net_delta"] = (
    pair_prop["proposed_underlying_usd"] + pair_prop["proposed_etf_usd_effective"]
)

# ============================
# 4) Attach LIVE positions to each pair
# ============================
pair_true = pair_prop.merge(
    live_by_ticker.rename(columns={
        "ticker":"Underlying",
        "notional_live":"true_underlying_notional_live",
        "qty_live":"true_underlying_qty_live",
        "px_live":"true_underlying_px_live",
    }),
    on="Underlying",
    how="left"
).merge(
    live_by_ticker.rename(columns={
        "ticker":"ETF",
        "notional_live":"true_etf_notional_live",
        "qty_live":"true_etf_qty_live",
        "px_live":"true_etf_px_live",
    }),
    on="ETF",
    how="left"
)

for c in ["true_underlying_notional_live","true_etf_notional_live","true_underlying_qty_live","true_etf_qty_live"]:
    pair_true[c] = pair_true[c].fillna(0.0)

# For LIVE effective hedge, pick a leverage to apply.
# If leverage differs within pair, use max + keep it flagged.
pair_true["lever_mult_for_live"] = pair_true["lever_mult_max"].astype(float)

pair_true["true_etf_usd_effective_live"] = -pair_true["lever_mult_for_live"] * pair_true["true_etf_notional_live"].abs()

# True net delta (delta-like): long underlying signed + (we assume notional_live already signed)
# For underlying, if you want magnitude-based too, swap to abs() here.
pair_true["true_net_delta_live"] = (
    pair_true["true_underlying_notional_live"] + pair_true["true_etf_usd_effective_live"]
)

# ============================
# 5) Gaps + status
# ============================
pair_true["gap_net_delta"]     = pair_true["proposed_net_delta"] - pair_true["true_net_delta_live"]
pair_true["abs_gap_net_delta"] = pair_true["gap_net_delta"].abs()

USD_EPS = 1e-6
pair_true["missing_underlying_live"] = pair_true["true_underlying_notional_live"].abs() < USD_EPS
pair_true["missing_etf_live"]        = pair_true["true_etf_notional_live"].abs() < USD_EPS

pair_true["status"] = np.select(
    [
        pair_true["missing_underlying_live"] & pair_true["missing_etf_live"],
        ~pair_true["missing_underlying_live"] & pair_true["missing_etf_live"],
        pair_true["missing_underlying_live"] & ~pair_true["missing_etf_live"],
    ],
    ["MISSING_BOTH_LEGS","MISSING_ETF_LEG","MISSING_UNDERLYING_LEG"],
    default="HAS_BOTH_LEGS"
)

pair_true = pair_true.sort_values("abs_gap_net_delta", ascending=False)

cols = [
    "Underlying","ETF","n_rows",
    "lever_mult_min","lever_mult_max","leverage_inconsistent",
    "proposed_underlying_usd","proposed_etf_short_usd","proposed_etf_usd_effective","proposed_net_delta",
    "true_underlying_notional_live","true_etf_notional_live","true_etf_usd_effective_live","true_net_delta_live",
    "gap_net_delta","abs_gap_net_delta","status",
]


In [9]:
import numpy as np
import pandas as pd

USD_EPS = 25.0  # threshold in USD notionals; tune (25-100 is usually reasonable)

# pair_true must already exist from the previous pair-level build
# and must contain:
# Underlying, ETF,
# proposed_underlying_usd, proposed_etf_short_usd, lever_mult_max,
# true_underlying_notional_live, true_etf_notional_live

df = pair_true.copy()

# --- 1) Build an ETF "fill ratio" per pair using magnitude
# If ETF is short in proposal, we expect true_etf_notional_live to be negative.
# Use abs() to compare magnitudes.
df["etf_fill_ratio"] = np.where(
    df["proposed_etf_short_usd"] > 0,
    np.clip(df["true_etf_notional_live"].abs() / df["proposed_etf_short_usd"], 0.0, 1.5),
    0.0
)

# --- 2) Allocate underlying live notional across pairs *within each underlying*
# Weight pairs by how much their ETF leg appears filled.
# This prevents "other pair made us long underlying" from making this pair look executed.
w_sum = df.groupby("Underlying")["etf_fill_ratio"].transform("sum")

df["alloc_weight"] = np.where(w_sum > 0, df["etf_fill_ratio"] / w_sum, 0.0)

df["true_underlying_notional_live_alloc"] = (
    df.groupby("Underlying")["true_underlying_notional_live"].transform("first") * df["alloc_weight"]
)

# Optional: allocated qty version if you want (only if you merged qtys)
if "true_underlying_qty_live" in df.columns:
    df["true_underlying_qty_live_alloc"] = (
        df.groupby("Underlying")["true_underlying_qty_live"].transform("first") * df["alloc_weight"]
    )

# --- 3) Define pair execution status using allocated underlying + ETF
df["missing_etf_leg"] = df["true_etf_notional_live"].abs() < USD_EPS
df["missing_underlying_alloc"] = df["true_underlying_notional_live_alloc"].abs() < USD_EPS

# Interpretation you want:
# If ETF leg missing AND allocated underlying missing => this pair did not execute,
# even if underlying exists due to other pairs.
df["pair_executed"] = ~(df["missing_etf_leg"] & df["missing_underlying_alloc"])

df["pair_status"] = np.select(
    [
        df["missing_etf_leg"] & df["missing_underlying_alloc"],
        df["missing_etf_leg"] & ~df["missing_underlying_alloc"],
        ~df["missing_etf_leg"] & df["missing_underlying_alloc"],
    ],
    [
        "NOT_EXECUTED_BOTH_LEGS",
        "UNDERLYING_ONLY_ALLOCATED_(HEDGE_MISSING)",
        "ETF_ONLY_(ODD)_CHECK",
    ],
    default="EXECUTED_BOTH_LEGS_(LIKELY)"
)

# --- 4) (Optional) compute net deltas using allocated underlying so you can rank misses cleanly
df["lever_mult_for_live"] = df["lever_mult_for_live"] if "lever_mult_for_live" in df.columns else df["lever_mult_max"].astype(float)

df["true_net_delta_live_alloc"] = (
    df["true_underlying_notional_live_alloc"] + df["lever_mult_for_live"] * df["true_etf_notional_live"]
)

df["gap_net_delta_alloc"] = df["proposed_net_delta"] - df["true_net_delta_live_alloc"]
df["abs_gap_net_delta_alloc"] = df["gap_net_delta_alloc"].abs()

# --- 5) Display: rank the pairs most likely skipped / broken
out_cols = [
    "Underlying","ETF",
    "proposed_underlying_usd","proposed_etf_short_usd","lever_mult_max",
    "true_etf_notional_live","etf_fill_ratio",
    "true_underlying_notional_live","true_underlying_notional_live_alloc","alloc_weight",
    "pair_status",
    "gap_net_delta_alloc","abs_gap_net_delta_alloc",
]

df = df.sort_values(["pair_status","abs_gap_net_delta_alloc"], ascending=[True, False])[out_cols]
df

Unnamed: 0,Underlying,ETF,proposed_underlying_usd,proposed_etf_short_usd,lever_mult_max,true_etf_notional_live,etf_fill_ratio,true_underlying_notional_live,true_underlying_notional_live_alloc,alloc_weight,pair_status,gap_net_delta_alloc,abs_gap_net_delta_alloc
23,CELH,CELT,23529.41,11764.71,2.0,-12733.0,1.082305,0.0,0.0,1.0,ETF_ONLY_(ODD)_CHECK,25465.99,25465.99
21,BRK-B,BRKU,23529.41,11764.71,2.0,-11597.0,0.985745,0.0,0.0,1.0,ETF_ONLY_(ODD)_CHECK,23193.99,23193.99
18,BKNG,BKNU,23529.41,11764.71,2.0,-1416.0,0.120360,0.0,0.0,1.0,ETF_ONLY_(ODD)_CHECK,2831.99,2831.99
3,AI,AIYY,17647.06,17647.06,1.0,-2131.0,0.120757,0.0,0.0,1.0,ETF_ONLY_(ODD)_CHECK,2131.00,2131.00
89,SPY,XYLD,17647.06,17647.06,1.0,-2127.0,0.120530,0.0,0.0,1.0,ETF_ONLY_(ODD)_CHECK,2127.00,2127.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
87,SPY,JEPI,17647.06,17647.06,1.0,0.0,0.000000,0.0,0.0,0.0,NOT_EXECUTED_BOTH_LEGS,0.00,0.00
90,SPY,XYLG,17647.06,17647.06,1.0,0.0,0.000000,0.0,0.0,0.0,NOT_EXECUTED_BOTH_LEGS,0.00,0.00
88,SPY,SPYI,17647.06,17647.06,1.0,0.0,0.000000,0.0,0.0,0.0,NOT_EXECUTED_BOTH_LEGS,0.00,0.00
97,TSM,TSMY,17647.06,17647.06,1.0,0.0,0.000000,0.0,0.0,0.0,NOT_EXECUTED_BOTH_LEGS,0.00,0.00


In [10]:
final_cols = [
    # identity
    "Underlying",
    "ETF",

    # proposed intent
    "proposed_underlying_usd",
    "proposed_etf_short_usd",
    "lever_mult_max",

    # what actually happened (pair-specific)
    "true_etf_notional_live",
    "true_underlying_notional_live_alloc",

    # execution diagnostics
    "etf_fill_ratio",
    "pair_status",

    # risk impact
    "gap_net_delta_alloc",
    "abs_gap_net_delta_alloc",
]


In [11]:
import numpy as np
import pandas as pd

USD_EPS = 25.0  # threshold in USD notionals; tune (25-100 is usually reasonable)

# pair_true must already exist from the previous pair-level build
# and must contain:
# Underlying, ETF,
# proposed_underlying_usd, proposed_etf_short_usd, lever_mult_max,
# true_underlying_notional_live, true_etf_notional_live

df = pair_true.copy()

# --- 1) Build an ETF "fill ratio" per pair using magnitude
# If ETF is short in proposal, we expect true_etf_notional_live to be negative.
# Use abs() to compare magnitudes.
df["etf_fill_ratio"] = np.where(
    df["proposed_etf_short_usd"] > 0,
    np.clip(df["true_etf_notional_live"].abs() / df["proposed_etf_short_usd"], 0.0, 1.5),
    0.0
)

# --- 2) Allocate underlying live notional across pairs *within each underlying*
# Weight pairs by how much their ETF leg appears filled.
# This prevents "other pair made us long underlying" from making this pair look executed.
w_sum = df.groupby("Underlying")["etf_fill_ratio"].transform("sum")

df["alloc_weight"] = np.where(w_sum > 0, df["etf_fill_ratio"] / w_sum, 0.0)

df["true_underlying_notional_live_alloc"] = (
    df.groupby("Underlying")["true_underlying_notional_live"].transform("first") * df["alloc_weight"]
)

# Optional: allocated qty version if you want (only if you merged qtys)
if "true_underlying_qty_live" in df.columns:
    df["true_underlying_qty_live_alloc"] = (
        df.groupby("Underlying")["true_underlying_qty_live"].transform("first") * df["alloc_weight"]
    )

# --- 3) Define pair execution status using allocated underlying + ETF
df["missing_etf_leg"] = df["true_etf_notional_live"].abs() < USD_EPS
df["missing_underlying_alloc"] = df["true_underlying_notional_live_alloc"].abs() < USD_EPS

# Interpretation you want:
# If ETF leg missing AND allocated underlying missing => this pair did not execute,
# even if underlying exists due to other pairs.
df["pair_executed"] = ~(df["missing_etf_leg"] & df["missing_underlying_alloc"])

df["pair_status"] = np.select(
    [
        df["missing_etf_leg"] & df["missing_underlying_alloc"],
        df["missing_etf_leg"] & ~df["missing_underlying_alloc"],
        ~df["missing_etf_leg"] & df["missing_underlying_alloc"],
    ],
    [
        "NOT_EXECUTED_BOTH_LEGS",
        "UNDERLYING_ONLY_ALLOCATED_(HEDGE_MISSING)",
        "ETF_ONLY_(ODD)_CHECK",
    ],
    default="EXECUTED_BOTH_LEGS_(LIKELY)"
)

# --- 4) (Optional) compute net deltas using allocated underlying so you can rank misses cleanly
df["lever_mult_for_live"] = df["lever_mult_for_live"] if "lever_mult_for_live" in df.columns else df["lever_mult_max"].astype(float)

df["true_net_delta_live_alloc"] = (
    df["true_underlying_notional_live_alloc"] + df["lever_mult_for_live"] * df["true_etf_notional_live"]
)

df["gap_net_delta_alloc"] = df["proposed_net_delta"] - df["true_net_delta_live_alloc"]
df["abs_gap_net_delta_alloc"] = df["gap_net_delta_alloc"].abs()

# --- 5) Display: rank the pairs most likely skipped / broken
out_cols = [
    "Underlying","ETF",
    "proposed_underlying_usd","proposed_etf_short_usd","lever_mult_max",
    "true_etf_notional_live","etf_fill_ratio",
    "true_underlying_notional_live","true_underlying_notional_live_alloc","alloc_weight",
    "pair_status",
    "gap_net_delta_alloc","abs_gap_net_delta_alloc",
]

df = df.sort_values(["pair_status","abs_gap_net_delta_alloc"], ascending=[True, False])[out_cols]
df


Unnamed: 0,Underlying,ETF,proposed_underlying_usd,proposed_etf_short_usd,lever_mult_max,true_etf_notional_live,etf_fill_ratio,true_underlying_notional_live,true_underlying_notional_live_alloc,alloc_weight,pair_status,gap_net_delta_alloc,abs_gap_net_delta_alloc
23,CELH,CELT,23529.41,11764.71,2.0,-12733.0,1.082305,0.0,0.0,1.0,ETF_ONLY_(ODD)_CHECK,25465.99,25465.99
21,BRK-B,BRKU,23529.41,11764.71,2.0,-11597.0,0.985745,0.0,0.0,1.0,ETF_ONLY_(ODD)_CHECK,23193.99,23193.99
18,BKNG,BKNU,23529.41,11764.71,2.0,-1416.0,0.120360,0.0,0.0,1.0,ETF_ONLY_(ODD)_CHECK,2831.99,2831.99
3,AI,AIYY,17647.06,17647.06,1.0,-2131.0,0.120757,0.0,0.0,1.0,ETF_ONLY_(ODD)_CHECK,2131.00,2131.00
89,SPY,XYLD,17647.06,17647.06,1.0,-2127.0,0.120530,0.0,0.0,1.0,ETF_ONLY_(ODD)_CHECK,2127.00,2127.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
87,SPY,JEPI,17647.06,17647.06,1.0,0.0,0.000000,0.0,0.0,0.0,NOT_EXECUTED_BOTH_LEGS,0.00,0.00
90,SPY,XYLG,17647.06,17647.06,1.0,0.0,0.000000,0.0,0.0,0.0,NOT_EXECUTED_BOTH_LEGS,0.00,0.00
88,SPY,SPYI,17647.06,17647.06,1.0,0.0,0.000000,0.0,0.0,0.0,NOT_EXECUTED_BOTH_LEGS,0.00,0.00
97,TSM,TSMY,17647.06,17647.06,1.0,0.0,0.000000,0.0,0.0,0.0,NOT_EXECUTED_BOTH_LEGS,0.00,0.00


In [12]:
# Export to CSV
df.to_csv(
    "pair_tracker.csv",
    index=False
)

print("Saved pair_tracker.csv")


Saved pair_tracker.csv
