# ML Trading Pipeline - Classification-Based Implementation

## 1. Import Required Libraries

In [27]:
# Imports
import pandas as pd, numpy as np, warnings, os, json, joblib
from datetime import datetime, timedelta
from collections import defaultdict, deque
import scipy.stats as stats
from scipy.optimize import minimize_scalar
from sklearn.ensemble import HistGradientBoostingClassifier, RandomForestClassifier, HistGradientBoostingRegressor, RandomForestRegressor, ExtraTreesRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso, LassoCV, ElasticNetCV, HuberRegressor, LogisticRegression
from sklearn.model_selection import KFold, TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, accuracy_score, classification_report, confusion_matrix, log_loss
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest
from sklearn.base import clone
from typing import Dict, List, Tuple, Optional, Union
import random
warnings.filterwarnings('ignore')

# Deterministic seeds
np.random.seed(42)
random.seed(42)

In [28]:
# Single-source threshold & cost config for allocator-only mode
# These globals are consumed by consolidated arms and backtest cells.
try:
    S_MIN
except NameError:
    S_MIN = 0.12
try:
    M_MIN
except NameError:
    M_MIN = 0.12
try:
    CONF_MIN
except NameError:
    CONF_MIN = 0.60
try:
    ALPHA_MIN
except NameError:
    ALPHA_MIN = 0.10
try:
    COOLDOWN
except NameError:
    COOLDOWN = 1
try:
    COST_BP
except NameError:
    COST_BP = 5.0
try:
    IMPACT_K
except NameError:
    IMPACT_K = 0.0

# Risk and execution controls
try:
    SIGMA_TARGET
except NameError:
    SIGMA_TARGET = 0.20  # per-bar target scaler proxy
try:
    POS_MAX
except NameError:
    POS_MAX = 1.0
try:
    DD_STOP
except NameError:
    DD_STOP = 0.05
try:
    LATENCY_BARS
except NameError:
    LATENCY_BARS = 0
try:
    SLIPPAGE_BPS
except NameError:
    SLIPPAGE_BPS = 0.0
try:
    COST_CONVENTION
except NameError:
    COST_CONVENTION = 'per_transition'  # or 'per_roundtrip'
try:
    SMOOTH_BETA
except NameError:
    SMOOTH_BETA = 0.0

## Data Loading and Processing

In [29]:
# Minimal robust merge of OHLCV + funding + cohort-filtered fills (keep all columns)
import os

# Cohorts
cohort_top = pd.read_csv('top_cohort.csv') if os.path.exists('top_cohort.csv') else pd.DataFrame()
cohort_bot = pd.read_csv('bottom_cohort.csv') if os.path.exists('bottom_cohort.csv') else pd.DataFrame()
for cdf in (cohort_top, cohort_bot):
    if not cdf.empty and 'user' not in cdf.columns:
        col = next((c for c in ['Account', 'address', 'user', 'addr'] if c in cdf.columns), None)
        if col: cdf['user'] = cdf[col].astype(str)
cohort_top_users = set(cohort_top.get('user', pd.Series(dtype=str)).dropna().astype(str))
cohort_bot_users = set(cohort_bot.get('user', pd.Series(dtype=str)).dropna().astype(str))
cohort_addresses = cohort_top_users | cohort_bot_users

# Fills -> keep all columns, add normalized helpers
fills = pd.read_csv('historical_trades_btc.csv', low_memory=False) if os.path.exists('historical_trades_btc.csv') else pd.DataFrame()
if not fills.empty:
    if 'user' not in fills.columns:
        if 'Account' in fills.columns: fills['user'] = fills['Account'].astype(str)
    # Timestamp parsing (multiple possibilities)
    ts_col = 'Timestamp' if 'Timestamp' in fills.columns else (next((c for c in ['timestamp', 'ts'] if c in fills.columns), None))
    ts_raw = pd.to_numeric(fills[ts_col], errors='coerce') if ts_col else pd.Series(dtype='float64')
    if ts_raw.notna().any():
        med = ts_raw.dropna().median(); unit = 'ns' if med>1e14 else ('ms' if med>1e12 else 's')
        fills['timestamp'] = pd.to_datetime(ts_raw, unit=unit, errors='coerce')
    else:
        time_col = next((c for c in ['Timestamp IST', 'time'] if c in fills.columns), None)
        fills['timestamp'] = pd.to_datetime(fills[time_col], errors='coerce') if time_col else pd.NaT
    # Map helpers (retain originals)
    if 'Execution Price' in fills.columns and 'px' not in fills.columns:
        fills['px'] = pd.to_numeric(fills['Execution Price'], errors='coerce')
    if 'Size Tokens' in fills.columns and 'sz' not in fills.columns:
        fills['sz'] = pd.to_numeric(fills['Size Tokens'], errors='coerce')
    if 'Size USD' in fills.columns and 'notional' not in fills.columns:
        fills['notional'] = pd.to_numeric(fills['Size USD'], errors='coerce')
    if 'Fee' in fills.columns and 'fee' not in fills.columns:
        fills['fee'] = pd.to_numeric(fills['Fee'], errors='coerce')
    if 'Side' in fills.columns and 'side' not in fills.columns:
        fills['side'] = fills['Side'].map({'BUY':'B','SELL':'A'}).fillna(fills['Side'].astype(str))
    # Cohort flags
    if 'user' in fills.columns:
        u = fills['user'].astype(str)
        fills['is_top_cohort'] = u.isin(cohort_top_users).astype(int)
        fills['is_bottom_cohort'] = u.isin(cohort_bot_users).astype(int)
    # Filter by cohorts if available
    if cohort_addresses and 'user' in fills.columns:
        fills = fills[fills['user'].astype(str).isin(cohort_addresses)]
    fills_aligned = fills.dropna(subset=['timestamp']).sort_values('timestamp')
else:
    fills_aligned = pd.DataFrame(columns=['timestamp'])

# Funding (keep premium if present)
funding = pd.read_csv('funding_btc.csv', low_memory=False) if os.path.exists('funding_btc.csv') else pd.DataFrame()
if not funding.empty:
    if 'timestamp' not in funding.columns:
        cand = next((c for c in ['ts','Timestamp'] if c in funding.columns), None)
        if cand: funding = funding.rename(columns={cand:'timestamp'})
    if 'funding_rate' not in funding.columns:
        cand = next((c for c in ['fundingRate','rate','funding'] if c in funding.columns), None)
        if cand: funding = funding.rename(columns={cand:'funding_rate'})
    ts_raw = pd.to_numeric(funding['timestamp'], errors='coerce')
    med = ts_raw.dropna().median() if ts_raw.notna().any() else 0
    unit = 'ns' if med>1e14 else ('ms' if med>1e12 else 's')
    funding['timestamp'] = pd.to_datetime(ts_raw, unit=unit, errors='coerce')
    if 'funding_rate' in funding.columns:
        funding['funding_rate'] = pd.to_numeric(funding['funding_rate'], errors='coerce')
    if 'premium' in funding.columns:
        funding['premium'] = pd.to_numeric(funding['premium'], errors='coerce')
    funding = funding.dropna(subset=['timestamp']).drop_duplicates('timestamp').sort_values('timestamp')

# OHLCV (headerless fallback)
ohlcv_path = 'ohlc_btc_5m.csv'
ohlcv = pd.read_csv(ohlcv_path, low_memory=False, header=0) if os.path.exists(ohlcv_path) else pd.DataFrame()
if 'timestamp' not in ohlcv.columns or 'open' not in ohlcv.columns:
    ohlcv_raw = pd.read_csv(ohlcv_path, header=None)
    ohlcv = ohlcv_raw[[0,2,3,4,5,6]].copy(); ohlcv.columns=['timestamp','open','high','low','close','volume']
ts_raw = pd.to_numeric(ohlcv['timestamp'], errors='coerce')
med = ts_raw.dropna().median(); unit = 'ns' if med>1e14 else ('ms' if med>1e12 else 's')
ohlcv['timestamp'] = pd.to_datetime(ts_raw, unit=unit, errors='coerce')
for c in ['open','high','low','close','volume']: ohlcv[c]=pd.to_numeric(ohlcv[c], errors='coerce')
ohlcv = ohlcv.dropna(subset=['timestamp','open','high','low','close','volume']).drop_duplicates('timestamp').sort_values('timestamp')

# Merge funding columns into OHLCV
fund_cols = ['funding_rate','premium']
avail_fund_cols = [c for c in fund_cols if c in funding.columns]
if avail_fund_cols:
    df = pd.merge_asof(ohlcv, funding[['timestamp']+avail_fund_cols].sort_values('timestamp'), on='timestamp', direction='backward')
    for c in avail_fund_cols:
        df[c] = df[c].fillna(method='ffill').fillna(0.0)
else:
    df = ohlcv.copy()
    for c in fund_cols: df[c]=0.0

# Aligned frames for features
ohlcv_aligned = df[['timestamp','open','high','low','close','volume']].copy()
funding_aligned = df[['timestamp']+avail_fund_cols] if avail_fund_cols else df[['timestamp']+fund_cols]

# Optional wide merge with last known fill row (keeps all fill columns)
if not fills_aligned.empty:
    df_merged = pd.merge_asof(df.sort_values('timestamp'), fills_aligned.sort_values('timestamp'), on='timestamp', direction='backward')
else:
    df_merged = df.copy()


In [30]:
# Dynamic daily cohorts (Top/Bottom 5%) with reliability and cohort signals (robust to column variations)
import pandas as pd
import numpy as np

# Resolve bars_df and bar timestamps
bars_df = bt if 'bt' in globals() else (ohlcv if 'ohlcv' in globals() else price_bars if 'price_bars' in globals() else df)
if 'timestamp' in bars_df.columns:
    bar_ts_series = pd.to_datetime(bars_df['timestamp'], errors='coerce')
elif hasattr(bars_df.index, 'dtype') and str(bars_df.index.dtype).startswith('datetime64'):
    bar_ts_series = pd.to_datetime(bars_df.index)
elif 'df' in globals() and isinstance(df, pd.DataFrame) and 'timestamp' in df.columns:
    bar_ts_series = pd.to_datetime(df['timestamp'], errors='coerce')
else:
    bar_ts_series = pd.Series(pd.NaT, index=range(len(bars_df)))

# Price series and returns aligned to bar_ts_series
if 'close' in bars_df.columns and len(bars_df) == len(bar_ts_series):
    price_series = pd.Series(bars_df['close'].values, index=bar_ts_series)
else:
    # fallback to df
    if 'df' in globals() and 'close' in df.columns and 'timestamp' in df.columns:
        price_series = pd.Series(df['close'].values, index=pd.to_datetime(df['timestamp'], errors='coerce'))
    else:
        # minimal fallback
        price_series = pd.Series(bars_df.iloc[:, 0].values, index=bar_ts_series)

price_series = price_series.dropna()
bar_ts_series = price_series.index  # ensure consistency
returns_5m = price_series.pct_change()
r_next = returns_5m.shift(-1)

# Build a normalized fills frame with canonical columns: ts, address, sign, notional
fraw = fills.copy() if 'fills' in globals() else pd.DataFrame()
if isinstance(fraw, pd.DataFrame) and not fraw.empty:
    fr = fraw.copy()
    # Timestamp -> ts (datetime)
    if 'ts' in fr.columns:
        ts = pd.to_datetime(fr['ts'], errors='coerce')
    else:
        cand_ts = next((c for c in ['timestamp','Timestamp','time','date'] if c in fr.columns), None)
        if cand_ts is not None:
            ts_raw = fr[cand_ts]
            if np.issubdtype(getattr(ts_raw, 'dtype', object), np.number):
                med = pd.to_numeric(ts_raw, errors='coerce').dropna().median()
                unit = 'ns' if (pd.notna(med) and med>1e14) else ('ms' if (pd.notna(med) and med>1e12) else 's')
                ts = pd.to_datetime(pd.to_numeric(ts_raw, errors='coerce'), unit=unit, errors='coerce')
            else:
                ts = pd.to_datetime(ts_raw, errors='coerce')
        else:
            ts = pd.to_datetime(pd.NaT)
    # Address/user id -> address
    if 'address' in fr.columns:
        addr = fr['address'].astype(str)
    else:
        cand_addr = next((c for c in ['user','User','Account','addr'] if c in fr.columns), None)
        addr = fr[cand_addr].astype(str) if cand_addr is not None else pd.Series([], dtype=str)
    # Trade direction -> sign (+1 buy, -1 sell)
    if 'sign' in fr.columns:
        sgn = pd.to_numeric(fr['sign'], errors='coerce')
    else:
        if 'side' in fr.columns:
            sgn = fr['side'].map({'B':1,'A':-1,'BUY':1,'SELL':-1}).fillna(0).astype(float)
        elif 'Side' in fr.columns:
            sgn = fr['Side'].map({'BUY':1,'SELL':-1}).fillna(0).astype(float)
        else:
            sgn = pd.Series(0.0, index=fr.index)
    # Notional -> notional (fallback px*sz)
    if 'notional' in fr.columns:
        notional = pd.to_numeric(fr['notional'], errors='coerce')
    else:
        if {'px','sz'}.issubset(fr.columns):
            notional = pd.to_numeric(fr['px'], errors='coerce') * pd.to_numeric(fr['sz'], errors='coerce')
        elif 'Size USD' in fr.columns:
            notional = pd.to_numeric(fr['Size USD'], errors='coerce')
        else:
            notional = pd.Series(0.0, index=fr.index)
    f = pd.DataFrame({'ts': ts, 'address': addr, 'sign': sgn, 'notional': notional})
    f = f.dropna(subset=['ts'])
    # Discard zero/not-a-number rows
    f['sign'] = pd.to_numeric(f['sign'], errors='coerce').fillna(0.0)
    f['notional'] = pd.to_numeric(f['notional'], errors='coerce').fillna(0.0)
    f = f[(f['notional']>0) & (f['sign']!=0)]
else:
    f = pd.DataFrame(columns=['ts','address','sign','notional'])

if not f.empty:
    f = f.sort_values('ts')
    # Map each fill to its bar and next-bar return using datetime keys
    base = pd.DataFrame({'bar_ts': pd.to_datetime(bar_ts_series, errors='coerce')}).dropna().sort_values('bar_ts').drop_duplicates('bar_ts')
    f['ts'] = pd.to_datetime(f['ts'], errors='coerce')
    f = f.dropna(subset=['ts'])
    f = pd.merge_asof(f.sort_values('ts'), base, left_on='ts', right_on='bar_ts', direction='backward', allow_exact_matches=True)
    f['date'] = f['ts'].dt.floor('D')
    # r_next keyed by bar_ts index
    r_next_by_ts = pd.Series(r_next.values, index=price_series.index)
    f['r_next'] = f['bar_ts'].map(r_next_by_ts)
    f['pnl_proxy'] = f['sign'].astype(float) * f['notional'].astype(float) * f['r_next'].fillna(0.0)
    # Daily user metrics
    daily_user = f.groupby(['address','date'], as_index=False).agg(
        notional_sum=('notional','sum'),
        trades_count=('notional','size'),
        pnl_sum=('pnl_proxy','sum')
    )
    daily_user['pnl_bps'] = 1e4 * daily_user['pnl_sum'] / daily_user['notional_sum'].replace(0.0, np.nan)
    daily_user['pnl_bps'] = daily_user['pnl_bps'].fillna(0.0)
    daily_user = daily_user.sort_values(['address','date'])
    # Rolling Sharpe (30d) and activity gates (60d)
    def _rolling_features(g: pd.DataFrame) -> pd.DataFrame:
        g = g.sort_values('date').copy()
        g['active'] = (g['trades_count'] > 0).astype(int)
        g['sh_30d'] = g['pnl_bps'].rolling(window=30, min_periods=10).mean() / (g['pnl_bps'].rolling(30, min_periods=10).std().replace(0.0,np.nan))
        g['sh_30d'] = g['sh_30d'].replace([np.inf, -np.inf], np.nan).fillna(0.0)
        g['tr_60d'] = g['trades_count'].rolling(60, min_periods=10).sum()
        g['ad_60d'] = g['active'].rolling(60, min_periods=10).sum()
        # recency: days since last trade
        last_trade = g['date'].where(g['trades_count']>0)
        last_trade = last_trade.ffill()
        g['days_since'] = (g['date'] - last_trade).dt.days.fillna(999)
        return g
    daily_user = daily_user.groupby('address', group_keys=False).apply(_rolling_features)
    # Daily cohort selection
    def _select_day(df_day: pd.DataFrame) -> pd.DataFrame:
        eligible = df_day[(df_day['tr_60d']>=200) & (df_day['ad_60d']>=30)]
        if eligible.empty:
            return pd.DataFrame(columns=['date','address','cohort','rho'])
        q_top = eligible['sh_30d'].quantile(0.95)
        q_bot = eligible['sh_30d'].quantile(0.05)
        top = eligible[eligible['sh_30d']>=q_top].copy()
        bot = eligible[eligible['sh_30d']<=q_bot].copy()
        if top.empty and bot.empty:
            return pd.DataFrame(columns=['date','address','cohort','rho'])
        tau = 10.0
        lam = np.exp(-1.0/tau)
        rows = []
        if not top.empty:
            w_rec = np.power(lam, top['days_since'].clip(lower=0))
            w_stab = top['sh_30d'].clip(lower=0.0, upper=2.0)
            rho = (w_rec * w_stab).clip(lower=0.0, upper=2.0)
            rows.append(pd.DataFrame({'date': top['date'], 'address': top['address'], 'cohort':'top', 'rho': rho}))
        if not bot.empty:
            w_rec = np.power(lam, bot['days_since'].clip(lower=0))
            w_stab = (-bot['sh_30d']).clip(lower=0.0, upper=2.0)
            rho = (w_rec * w_stab).clip(lower=0.0, upper=2.0)
            rows.append(pd.DataFrame({'date': bot['date'], 'address': bot['address'], 'cohort':'bot', 'rho': rho}))
        return pd.concat(rows, axis=0, ignore_index=True) if rows else pd.DataFrame(columns=['date','address','cohort','rho'])
    cohorts = daily_user.groupby('date', group_keys=False).apply(_select_day)
    # Map cohorts to fills per day and aggregate flows to 5m
    if not cohorts.empty:
        fills_day = f[['ts','bar_ts','date','address','sign','notional']].copy()
        fills_day = fills_day.merge(cohorts, on=['date','address'], how='inner')
        # Normalize rho within day×cohort
        denom = fills_day.groupby(['date','cohort'])['rho'].transform('sum').replace(0.0, np.nan)
        fills_day['alpha'] = (fills_day['rho'] / denom).fillna(0.0)
        fills_day['contrib'] = fills_day['alpha'] * fills_day['sign'].astype(float) * fills_day['notional'].astype(float)
        top_flow = fills_day.loc[fills_day['cohort']=='top'].groupby('bar_ts')['contrib'].sum()
        bot_flow = fills_day.loc[fills_day['cohort']=='bot'].groupby('bar_ts')['contrib'].sum()
        F_top_series = top_flow.reindex(bar_ts_series).fillna(0.0)
        F_bot_series = bot_flow.reindex(bar_ts_series).fillna(0.0)
        # ADV20 for normalization keyed by bar_ts
        def compute_adv20_ts(bars_df_local: pd.DataFrame, bar_ts_local: pd.Series) -> pd.Series:
            if {'close','volume'}.issubset(bars_df_local.columns) and len(bars_df_local) == len(bar_ts_local):
                adv = (bars_df_local['close'] * bars_df_local['volume']).rolling(20, min_periods=1).mean()
                return pd.Series(adv.values, index=bar_ts_local)
            if 'df' in globals() and {'close','volume','timestamp'}.issubset(df.columns):
                adv = (df['close'] * df['volume']).rolling(20, min_periods=1).mean()
                return pd.Series(adv.values, index=pd.to_datetime(df['timestamp'], errors='coerce'))
            return pd.Series(1.0, index=bar_ts_local)
        adv20_by_ts = compute_adv20_ts(bars_df, bar_ts_series)
        def cohort_signals_from_flows(F_top_s: pd.Series, F_bot_s: pd.Series, adv_s: pd.Series, clip_k: float=3.0, decay: float=0.98):
            idx = F_top_s.index
            adv_s = adv_s.reindex(idx).replace(0.0, np.nan).fillna(method='ffill').fillna(1.0)
            nt = (F_top_s / adv_s).fillna(0.0)
            nb = (F_bot_s / adv_s).fillna(0.0)
            S_top_s = np.tanh(nt.clip(-clip_k, clip_k))
            S_bot_s = -np.tanh(nb.clip(-clip_k, clip_k))
            S_top_s = S_top_s.ewm(alpha=(1-decay), adjust=False).mean()
            S_bot_s = S_bot_s.ewm(alpha=(1-decay), adjust=False).mean()
            return S_top_s, S_bot_s
        S_top, S_bot = cohort_signals_from_flows(F_top_series, F_bot_series, adv20_by_ts)


## 3. Feature Engineering

### 3.1 Price-Based Features (A Features)

In [31]:
# Cohort flow features (compact): use S_top, S_bot computed from dynamic cohorts (Cell 7) to avoid duplication
import pandas as pd
import numpy as np

flow_features = df[['timestamp']].copy()

# If dynamic cohort signals exist, reuse them; else fall back to zeroes
if 'S_top' in globals() and 'S_bot' in globals():
    # Align to df timestamps
    ts_idx = pd.to_datetime(df['timestamp'], errors='coerce')
    S_top_aligned = S_top.reindex(ts_idx).values if isinstance(S_top, pd.Series) else np.zeros(len(ts_idx))
    S_bot_aligned = S_bot.reindex(ts_idx).values if isinstance(S_bot, pd.Series) else np.zeros(len(ts_idx))
    flow_features['S_top'] = S_top_aligned
    flow_features['S_bot'] = S_bot_aligned
    flow_features['flow_diff'] = flow_features['S_top'] - (-flow_features['S_bot'])
else:
    # No cohort signals available; keep neutral
    flow_features['S_top'] = 0.0
    flow_features['S_bot'] = 0.0
    flow_features['flow_diff'] = 0.0

flow_features = flow_features[['timestamp','S_top','S_bot','flow_diff']]


### 3.2 Microstructure Features (B Features)

In [32]:
microstructure_features = df[['timestamp']].copy()
if 'volume' in df.columns:
    microstructure_features['volume_roc_1'] = df['volume'].pct_change()
    volume_ma_20 = df['volume'].rolling(20, min_periods=10).mean()
    microstructure_features['volume_intensity'] = df['volume'] / (volume_ma_20 + 1e-8)
    volume_std_20 = df['volume'].rolling(20, min_periods=10).std()
    microstructure_features['volume_spike'] = ((df['volume'] - volume_ma_20) / (volume_std_20 + 1e-8)).clip(-3, 3)
else:
    for c in ['volume_roc_1','volume_intensity','volume_spike']:
        microstructure_features[c]=np.nan
if {'high','low','close'}.issubset(df.columns):
    microstructure_features['price_efficiency'] = ((df['close'] - df['low']) / (df['high'] - df['low'] + 1e-8)).replace([np.inf,-np.inf], np.nan)
    price_range = df['high'] - df['low']
    microstructure_features['price_impact_proxy'] = (price_range / (df['volume'] + 1e-8)).rolling(5, min_periods=3).mean()
    returns_volatility = df['close'].pct_change().rolling(20, min_periods=10).std()
    microstructure_features['vol_adj_volume'] = df['volume'] / (returns_volatility + 1e-8)
else:
    microstructure_features['price_efficiency']=np.nan
    microstructure_features['price_impact_proxy']=np.nan
    microstructure_features['vol_adj_volume']=np.nan
if {'volume','close'}.issubset(df.columns):
    price_change = df['close'].pct_change()
    microstructure_features['vwap_momentum'] = ((price_change * df['volume']).rolling(5, min_periods=3).sum() / (df['volume'].rolling(5, min_periods=3).sum() + 1e-8))
    microstructure_features['volume_pressure'] = np.tanh(price_change * np.log1p(df['volume']))
else:
    microstructure_features['vwap_momentum']=np.nan
    microstructure_features['volume_pressure']=np.nan
if {'volume','high','low','close'}.issubset(df.columns):
    microstructure_features['turnover_proxy'] = np.log1p(df['volume'])
    microstructure_features['price_volume_corr'] = df['close'].rolling(10, min_periods=5).corr(df['volume'])
    price_volatility = (df['high'] - df['low']) / df['close']
    microstructure_features['depth_proxy'] = np.log1p(df['volume']) / (price_volatility + 1e-8)
else:
    microstructure_features['turnover_proxy']=np.nan
    microstructure_features['price_volume_corr']=np.nan
    microstructure_features['depth_proxy']=np.nan

### 3.3 Price Action Features (C Features)

In [33]:
price_bars = df[['timestamp', 'open', 'high', 'low', 'close', 'volume']].copy()

technical_cols = ['hl_range', 'oc_range', 'typical_price', 'weighted_price', 'true_range',
                 'body_size', 'upper_shadow', 'lower_shadow', 'direction', 'price_change', 
                 'price_change_pct', 'range_pct']

for col in technical_cols:
    if col in df.columns:
        price_bars[col] = df[col]

price_bars['price'] = price_bars['close']

price_bars = price_bars.dropna(subset=['price'])

if len(price_bars) > 0:
    price_bars['returns'] = price_bars['price'].pct_change()
    
    if 'true_range' in price_bars.columns:
        price_bars['atr_14'] = price_bars['true_range'].rolling(window=14, min_periods=7).mean()
        price_bars['vol_50'] = price_bars['atr_14'] / price_bars['price']
    else:
        price_bars['vol_50'] = price_bars['returns'].rolling(window=50, min_periods=10).std()
    
    price_bars['vol_200'] = price_bars['returns'].rolling(window=200, min_periods=20).std()
    
    for h in [1, 3, 6]:
        if 'price_change_pct' in price_bars.columns and h == 1:
            price_bars[f'mom_{h}'] = price_bars['price_change_pct']
        else:
            price_bars[f'mom_{h}'] = price_bars['price'].pct_change(periods=h)
    
    price_bars['ema20'] = price_bars['price'].ewm(span=20, adjust=False).mean()
    price_bars['mr_ema20'] = (price_bars['price'] - price_bars['ema20']) / price_bars['ema20']
    
    rolling_mean = price_bars['price'].rolling(window=100, min_periods=20).mean()
    rolling_std = price_bars['price'].rolling(window=100, min_periods=20).std()
    price_bars['mr_ema20_z'] = (price_bars['price'] - rolling_mean) / (rolling_std + 1e-8)
    
    if 'true_range' in price_bars.columns:
        price_bars['rv_1h'] = price_bars['true_range'].rolling(window=12, min_periods=6).sum() / price_bars['price']
        price_bars['rv_15m'] = price_bars['true_range'].rolling(window=3, min_periods=2).sum() / price_bars['price']
        price_bars['rv_1d'] = price_bars['true_range'].rolling(window=288, min_periods=50).sum() / price_bars['price']
    else:
        price_bars['rv_1h'] = price_bars['returns'].rolling(window=12, min_periods=6).apply(lambda x: (x**2).sum())
        price_bars['rv_15m'] = price_bars['returns'].rolling(window=3, min_periods=2).apply(lambda x: (x**2).sum())
        price_bars['rv_1d'] = price_bars['returns'].rolling(window=288, min_periods=50).apply(lambda x: (x**2).sum())
    
    rv_threshold = price_bars['rv_1h'].rolling(window=100, min_periods=25).quantile(0.75)
    price_bars['regime_high_vol'] = (price_bars['rv_1h'] > rv_threshold).astype(int)
    
    price_bars['price_velocity'] = price_bars['price'].diff().ewm(span=5, adjust=False).mean()
    price_bars['price_acceleration'] = price_bars['price_velocity'].diff()
    
    price_bars['price_normalized'] = (price_bars['price'] - price_bars['price'].rolling(window=1000, min_periods=100).mean()) / (price_bars['price'].rolling(window=1000, min_periods=100).std() + 1e-8)
    price_bars['price_velocity_norm'] = np.tanh(price_bars['price_velocity'] / (price_bars['price'].rolling(window=100).std() + 1e-8))
    price_bars['price_acceleration_norm'] = np.tanh(price_bars['price_acceleration'] / (price_bars['price_velocity'].rolling(window=100).std() + 1e-8))
    
    price_action_cols = [
        'timestamp', 'mom_1', 'mom_3', 'mr_ema20_z', 'rv_1h', 'regime_high_vol'
    ]
    
    available_cols = [col for col in price_action_cols if col in price_bars.columns]
    price_action_features = price_bars[available_cols].copy()
    
else:
    price_action_features = pd.DataFrame()

### 3.4 Volatility Features (D Features)

In [34]:
# Volatility features from price_action_features if available, else compute minimal fallback
if 'price_action_features' in locals() and len(price_action_features) > 0:
    volatility_features = price_action_features[['timestamp']].copy()
    if 'rv_1h' in price_action_features.columns:
        volatility_features['rv_1h'] = price_action_features['rv_1h']
    if 'regime_high_vol' in price_action_features.columns:
        volatility_features['regime_high_vol'] = price_action_features['regime_high_vol']
else:
    volatility_features = df[['timestamp']].copy()
    returns = df['close'].pct_change()
    volatility_features['rv_1h'] = returns.rolling(12, min_periods=6).apply(lambda x: (x**2).sum())
    vol_threshold = volatility_features['rv_1h'].rolling(100, min_periods=25).quantile(0.75)
    volatility_features['regime_high_vol'] = (volatility_features['rv_1h'] > vol_threshold).astype('float')
# no ffill/bfill here; NaNs remain for early bars and will be dropped later

In [35]:
enhanced_vol_features = df[['timestamp']].copy()
if {'high','low','close','open'}.issubset(df.columns):
    gk_vol = np.log(df['high']/df['low'])**2 / 2 - (2*np.log(2)-1) * np.log(df['close']/df['open'])**2
    enhanced_vol_features['gk_volatility'] = gk_vol.rolling(12, min_periods=6).mean()
    parkinson_vol = np.log(df['high']/df['low'])**2 / (4 * np.log(2))
    enhanced_vol_features['parkinson_volatility'] = parkinson_vol.rolling(12, min_periods=6).mean()
    vol_percentile_20 = enhanced_vol_features['gk_volatility'].rolling(100, min_periods=50).quantile(0.2)
    vol_percentile_80 = enhanced_vol_features['gk_volatility'].rolling(100, min_periods=50).quantile(0.8)
    enhanced_vol_features['vol_regime_low'] = (enhanced_vol_features['gk_volatility'] <= vol_percentile_20).astype('float')
    enhanced_vol_features['vol_regime_high'] = (enhanced_vol_features['gk_volatility'] >= vol_percentile_80).astype('float')
else:
    for c in ['gk_volatility','parkinson_volatility','vol_regime_low','vol_regime_high']:
        enhanced_vol_features[c]=np.nan
if 'close' in df.columns:
    returns = df['close'].pct_change(); return_std = returns.rolling(50, min_periods=25).std()
    enhanced_vol_features['jump_indicator'] = (np.abs(returns) > 3 * return_std).astype('float')
    enhanced_vol_features['jump_magnitude'] = np.abs(returns) / (return_std + 1e-8)
else:
    enhanced_vol_features['jump_indicator']=np.nan
    enhanced_vol_features['jump_magnitude']=np.nan
if 'gk_volatility' in enhanced_vol_features.columns:
    vol_ma_short = enhanced_vol_features['gk_volatility'].rolling(5, min_periods=3).mean()
    vol_ma_long = enhanced_vol_features['gk_volatility'].rolling(20, min_periods=10).mean()
    enhanced_vol_features['vol_momentum'] = (vol_ma_short - vol_ma_long) / (vol_ma_long + 1e-8)
    enhanced_vol_features['vol_mean_reversion'] = (enhanced_vol_features['gk_volatility'] - vol_ma_long) / (vol_ma_long + 1e-8)
# no ffill/bfill; early NaNs are allowed and will be dropped later

### 3.5 Funding Features (E Features)

In [36]:
funding_5m = df[['timestamp']].copy()

if len(funding_aligned) > 0:
    funding_5m = pd.merge_asof(funding_5m, funding_aligned, on='timestamp', direction='backward')
    # use as-is; do not forward/back fill here to keep strict causality
    use_real_funding = True
else:
    funding_5m['funding_rate'] = np.nan
    use_real_funding = False

if use_real_funding and len(funding_5m) > 0 and 'funding_rate' in funding_5m.columns:
    funding_5m['funding_momentum_1h'] = funding_5m['funding_rate'].diff(12)
    funding_5m['funding_momentum_4h'] = funding_5m['funding_rate'].diff(48)
    
    funding_ma = funding_5m['funding_rate'].rolling(window=288, min_periods=50).mean()
    funding_5m['funding_mr'] = (funding_5m['funding_rate'] - funding_ma) / (funding_ma.abs() + 1e-8)
    
    funding_5m['funding_vol'] = funding_5m['funding_rate'].rolling(window=288, min_periods=50).std()
    
    funding_features = funding_5m[['timestamp', 'funding_rate', 'funding_momentum_1h', 
                                  'funding_momentum_4h', 'funding_mr', 'funding_vol']].copy()
    
else:
    funding_features = df[['timestamp']].copy()
    funding_features['funding_rate'] = np.nan
    funding_features['funding_momentum_1h'] = np.nan
    funding_features['funding_momentum_4h'] = np.nan
    funding_features['funding_mr'] = np.nan
    funding_features['funding_vol'] = np.nan

### 3.8 Smart Trader Cohort Features & Final Dataset Assembly

In [37]:
# Assemble a compact feature set (no duplicate columns)

def create_target_labels(df_in, col_name, thresholds):
    lower, upper = thresholds
    x = df_in[col_name]
    # 0: down, 1: neutral, 2: up (to match 3-class classifier)
    return np.where(x > upper, 2, np.where(x < lower, 0, 1))

base_timestamps = ohlcv_aligned[['timestamp', 'close']].copy()

# Keep list: single source per feature; avoid duplicates downstream
features_keep = [
    'mom_1','mom_3','mr_ema20_z',               # momentum/price action
    'rv_1h','gk_volatility','jump_magnitude','regime_high_vol',  # vol/jumps
    'volume_intensity','price_efficiency','price_volume_corr','vwap_momentum','depth_proxy',  # micro/liquidity
    'funding_rate','funding_momentum_1h',       # funding
    'flow_diff','S_top','S_bot'                 # flow
]

# Source priority controls which slice "wins" if multiple slices carry the same feature
sources = [
    locals().get('price_action_features', pd.DataFrame()),
    locals().get('volatility_features', pd.DataFrame()),           # prefer this for rv_1h / regime_high_vol
    locals().get('enhanced_vol_features', pd.DataFrame()),         # skip duplicates already taken
    locals().get('microstructure_features', pd.DataFrame()),
    locals().get('funding_features', pd.DataFrame()),
    locals().get('interaction_features', pd.DataFrame()),
    locals().get('flow_features', pd.DataFrame()),
]

# Build slices with de-duplication by priority order
selected = set()
slices = []
for df_src in sources:
    if isinstance(df_src, pd.DataFrame) and len(df_src) > 0:
        cols = [c for c in features_keep if c in df_src.columns and c not in selected]
        if cols:
            keep = ['timestamp'] + cols
            slices.append(df_src[keep].copy())
            selected.update(cols)

# Merge all unique feature slices
final_dataset = base_timestamps.copy()
for sl in slices:
    final_dataset = final_dataset.merge(sl, on='timestamp', how='inner', suffixes=('', '_dup'))

# Clean up any residual suffixes (robustness if upstream added overlaps)
# 1) Handle "_dup" from merges above
_dup_cols = [c for c in final_dataset.columns if c.endswith('_dup')]
for col in _dup_cols:
    base = col[:-4]
    if base not in final_dataset.columns:
        final_dataset[base] = final_dataset[col]
if _dup_cols:
    final_dataset.drop(columns=_dup_cols, inplace=True, errors='ignore')

# 2) Handle stray "_x"/"_y" if prior cells introduced them
for suf in ('_x', '_y'):
    suf_cols = [c for c in final_dataset.columns if c.endswith(suf)]
    for col in suf_cols:
        base = col[:-2]
        if base not in final_dataset.columns:
            final_dataset[base] = final_dataset[col]
    if suf_cols:
        final_dataset.drop(columns=suf_cols, inplace=True, errors='ignore')

# Ensure rv_1h and regime_high_vol exist by fallback if missing
if 'rv_1h' not in final_dataset.columns:
    if 'volatility_features' in locals() and isinstance(volatility_features, pd.DataFrame) and 'rv_1h' in volatility_features.columns:
        final_dataset = final_dataset.merge(volatility_features[['timestamp','rv_1h']], on='timestamp', how='left')
    else:
        returns = df['close'].pct_change()
        rv_1h_fallback = returns.rolling(12, min_periods=6).apply(lambda x: (x**2).sum())
        final_dataset = final_dataset.merge(pd.DataFrame({'timestamp': df['timestamp'], 'rv_1h': rv_1h_fallback}), on='timestamp', how='left')
if 'regime_high_vol' not in final_dataset.columns:
    if 'volatility_features' in locals() and isinstance(volatility_features, pd.DataFrame) and 'regime_high_vol' in volatility_features.columns:
        final_dataset = final_dataset.merge(volatility_features[['timestamp','regime_high_vol']], on='timestamp', how='left')
    else:
        vol_threshold = final_dataset['rv_1h'].rolling(100, min_periods=25).quantile(0.75)
        final_dataset['regime_high_vol'] = (final_dataset['rv_1h'] > vol_threshold).astype(float)

# Targets (horizon aligned to next bar on 5m data)
if 'returns_3min_bps' not in final_dataset.columns:
    final_dataset['returns_3min_bps'] = (final_dataset['close'].shift(-1) / final_dataset['close'] - 1) * 10000
# Adjust label thresholds for 5m horizon to reduce over-neutralization
final_dataset['direction_confidence_3min'] = create_target_labels(final_dataset, 'returns_3min_bps', (-5, 5))
final_dataset['profitable_opportunity'] = (final_dataset['returns_3min_bps'].abs() > 5).astype(int)

TARGET_COLS = ['direction_confidence_3min', 'returns_3min_bps', 'profitable_opportunity']

# Strict causality: do not ffill/bfill; drop any rows with missing features
feature_cols = [c for c in final_dataset.columns if c not in ['timestamp','close'] + TARGET_COLS]
final_dataset = final_dataset.dropna(subset=feature_cols)

# Drop rows with missing targets
if final_dataset[TARGET_COLS].isnull().any(axis=None):
    final_dataset = final_dataset[final_dataset[TARGET_COLS].notna().all(axis=1)].copy()

# Lightweight causality audit of predictors
_predictor_cols = [c for c in final_dataset.columns if c not in ['timestamp','close'] + TARGET_COLS]
_suspect = [c for c in _predictor_cols if c.endswith('_tplus')]
if len(_suspect) > 0:
    _suspect  # intentionally no print/log

In [38]:
# Create exactly two datasets from final_dataset: train_data (80%) and backtest_data (20%)
# Minimal, deterministic split; no extra variables.

if 'final_dataset' not in globals():
    print("final_dataset not found. Please run cells 5–18 to build it before splitting.")
else:
    n = len(final_dataset)
    split_idx = max(1, int(n * 0.8))
    train_data = final_dataset.iloc[:split_idx].copy()
    backtest_data = final_dataset.iloc[split_idx:].copy()
    print(f"Split complete | train_data: {train_data.shape} | backtest_data: {backtest_data.shape}")

Split complete | train_data: (8290, 22) | backtest_data: (2073, 22)


## 4. Model Training and Calibration

In [39]:
class EnhancedMetaClassifier:
    """Enhanced Meta-Learner for Classification"""
    
    def __init__(self, meta_C=1.0, random_state=42, n_folds=5, 
                 embargo_pct=0.01, purge_pct=0.02, min_train_samples=1000, n_classes=3):
        from sklearn.model_selection import TimeSeriesSplit
        from sklearn.linear_model import LogisticRegression
        from sklearn.ensemble import HistGradientBoostingClassifier, RandomForestClassifier
        from sklearn.preprocessing import RobustScaler
        from sklearn.metrics import accuracy_score, log_loss
        from sklearn.pipeline import Pipeline
        
        self.meta_C = meta_C
        self.random_state = random_state
        self.n_folds = n_folds
        self.embargo_pct = embargo_pct
        self.purge_pct = purge_pct
        self.min_train_samples = min_train_samples
        self.n_classes = n_classes
        
        self.base_models = {}
        self.meta_model = None
        self.scaler = RobustScaler()
        self.is_fitted = False
        self.meta_score = 0.0
        self.cv_scores = {}
        self.fold_scores = {}
    
    def get_params(self, deep=True):
        return {
            'meta_C': self.meta_C,
            'random_state': self.random_state,
            'n_folds': self.n_folds,
            'embargo_pct': self.embargo_pct,
            'purge_pct': self.purge_pct,
            'min_train_samples': self.min_train_samples,
            'n_classes': self.n_classes
        }
    
    def set_params(self, **params):
        for param, value in params.items():
            if hasattr(self, param):
                setattr(self, param, value)
            else:
                raise ValueError(f"Invalid parameter {param} for estimator {type(self).__name__}")
        return self
        
    def _get_base_models(self):
        from sklearn.preprocessing import QuantileTransformer
        from sklearn.ensemble import ExtraTreesClassifier, GradientBoostingClassifier
        from sklearn.naive_bayes import GaussianNB
        from sklearn.linear_model import LogisticRegression
        from sklearn.ensemble import HistGradientBoostingClassifier, RandomForestClassifier
        from sklearn.pipeline import Pipeline
        from sklearn.preprocessing import RobustScaler

        return {
            'histgb': HistGradientBoostingClassifier(
                max_iter=180,
                learning_rate=0.07,
                max_depth=7,
                min_samples_leaf=20,
                l2_regularization=1e-2,
                max_bins=255,
                validation_fraction=0.1,
                random_state=self.random_state
            ),
            'randomforest': RandomForestClassifier(
                n_estimators=300,
                max_depth=16,
                min_samples_split=4,
                min_samples_leaf=2,
                max_features='sqrt',
                bootstrap=True,
                class_weight='balanced_subsample',
                random_state=self.random_state,
                n_jobs=-1
            ),
            'extratrees': ExtraTreesClassifier(
                n_estimators=300,
                max_depth=16,
                min_samples_split=4,
                min_samples_leaf=2,
                class_weight='balanced',
                random_state=self.random_state,
                n_jobs=-1
            ),
            'gb_classifier': GradientBoostingClassifier(
                n_estimators=200,
                learning_rate=0.05,
                max_depth=3,
                subsample=0.7,
                random_state=self.random_state
            ),
            'logistic_scaled': Pipeline([
                ('quantile', QuantileTransformer(n_quantiles=800, output_distribution='normal')),
                ('scaler', RobustScaler()),
                ('classifier', LogisticRegression(random_state=self.random_state, max_iter=1500, C=1.0, solver='lbfgs', multi_class='auto', class_weight='balanced'))
            ]),
            'naive_bayes_scaled': Pipeline([
                ('scaler', RobustScaler()),
                ('classifier', GaussianNB())
            ])
        }
    
    def _create_purged_splits(self, X, y):
        n_samples = len(X)
        embargo_periods = max(1, int(n_samples * self.embargo_pct))
        purge_periods = max(1, int(n_samples * self.purge_pct))
        
        class_counts = y.value_counts()
        
        min_required_samples = self.min_train_samples + embargo_periods + purge_periods + 100
        if n_samples < min_required_samples:
            raise ValueError(f"INSUFFICIENT DATA: Need at least {min_required_samples} samples for purged CV, got {n_samples}")
        
        min_class_count = class_counts.min()
        
        splits = []
        step_size = n_samples // (self.n_folds + 1)
        
        for i in range(self.n_folds):
            train_end = (i + 1) * step_size
            train_start = 0
            
            train_end_purged = max(train_start + self.min_train_samples, train_end - purge_periods)
            
            val_start = train_end + embargo_periods
            val_end = min(val_start + step_size, n_samples)
            
            train_size = train_end_purged - train_start
            val_size = val_end - val_start
            
            if train_size >= self.min_train_samples and val_size >= 50:
                train_idx = list(range(train_start, train_end_purged))
                val_idx = list(range(val_start, val_end))
                
                train_classes = np.unique(y.iloc[train_idx])
                val_classes = np.unique(y.iloc[val_idx])
                
                if len(train_classes) >= 2 and len(val_classes) >= 2:
                    if hasattr(X, 'index'):
                        try:
                            train_times = X.index[train_idx]
                            val_times = X.index[val_idx]
                            if len(train_times) > 0 and len(val_times) > 0:
                                train_max_time = max(train_times)
                                val_min_time = min(val_times)
                                if train_max_time >= val_min_time:
                                    continue
                        except Exception:
                            pass
                    
                    splits.append((train_idx, val_idx))
        
        if len(splits) == 0:
            raise ValueError("No valid classification splits created")
        
        if len(splits) < 2:
            raise ValueError("Only {len(splits)} valid folds created, need at least 2")
        
        return splits
    
    def fit(self, X, y):
        unique_classes = np.unique(y)
        if len(unique_classes) < 2:
            raise ValueError(f"Need at least 2 classes for classification, found: {unique_classes}")
        
        self.base_models = self._get_base_models()
        
        purged_splits = self._create_purged_splits(X, y)
        
        oof_preds = np.zeros((len(X), len(self.base_models)))
        oof_probs = np.zeros((len(X), len(self.base_models) * self.n_classes))
        
        model_names = list(self.base_models.keys())
        
        for model_idx, (name, model) in enumerate(self.base_models.items()):
            fold_scores = []
            
            for fold, (train_idx, val_idx) in enumerate(purged_splits):
                if hasattr(X, 'index') and len(X.index) > 0:
                    try:
                        train_max_time = X.index[train_idx].max()
                        val_min_time = X.index[val_idx].min()
                        if train_max_time >= val_min_time:
                            continue
                    except:
                        pass
                
                X_train_fold, X_val_fold = X.iloc[train_idx], X.iloc[val_idx]
                y_train_fold, y_val_fold = y.iloc[train_idx], y.iloc[val_idx]
                
                model_clone = clone(model)
                try:
                    model_clone.fit(X_train_fold, y_train_fold)
                    val_preds = model_clone.predict(X_val_fold)
                    val_probs = model_clone.predict_proba(X_val_fold)
                    
                    oof_preds[val_idx, model_idx] = val_preds
                    
                    prob_start = model_idx * self.n_classes
                    prob_end = prob_start + self.n_classes
                    
                    if val_probs.shape[1] == self.n_classes:
                        oof_probs[val_idx, prob_start:prob_end] = val_probs
                    else:
                        temp_probs = np.zeros((len(val_idx), self.n_classes))
                        classes = model_clone.classes_
                        for i, cls in enumerate(classes):
                            if cls < self.n_classes:
                                temp_probs[:, cls] = val_probs[:, i]
                        oof_probs[val_idx, prob_start:prob_end] = temp_probs
                    
                    fold_accuracy = accuracy_score(y_val_fold, val_preds)
                    fold_scores.append(fold_accuracy)
                    
                except Exception:
                    most_frequent_class = y_train_fold.mode().iloc[0]
                    oof_preds[val_idx, model_idx] = most_frequent_class
                    uniform_prob = 1.0 / self.n_classes
                    prob_start = model_idx * self.n_classes
                    prob_end = prob_start + self.n_classes
                    oof_probs[val_idx, prob_start:prob_end] = uniform_prob
                    fold_scores.append(0.0)
            
            self.cv_scores[name] = np.mean(fold_scores) if fold_scores else 0.0
            self.fold_scores[name] = fold_scores
        
        oof_probs_scaled = self.scaler.fit_transform(oof_probs)
        
        # Meta logistic: balanced classes and higher max_iter for stable convergence
        self.meta_model = LogisticRegression(
            C=self.meta_C,
            random_state=self.random_state,
            max_iter=1500,
            solver='lbfgs',
            multi_class='auto',
            class_weight='balanced'
        )
        self.meta_model.fit(oof_probs_scaled, y)
        
        meta_pred = self.meta_model.predict(oof_probs_scaled)
        self.meta_score = accuracy_score(y, meta_pred)
        
        for name, model in self.base_models.items():
            model.fit(X, y)
        
        self.is_fitted = True
        return self
    
    def predict(self, X):
        if not self.is_fitted:
            return np.zeros(len(X))
        
        probs = self.predict_proba(X)
        return np.argmax(probs, axis=1)
    
    def predict_proba(self, X):
        if not self.is_fitted:
            return np.full((len(X), self.n_classes), 1.0/self.n_classes)
        
        base_probs = np.zeros((len(X), len(self.base_models) * self.n_classes))
        
        for model_idx, (name, model) in enumerate(self.base_models.items()):
            try:
                model_probs = model.predict_proba(X)
                prob_start = model_idx * self.n_classes
                prob_end = prob_start + self.n_classes
                
                if model_probs.shape[1] == self.n_classes:
                    base_probs[:, prob_start:prob_end] = model_probs
                else:
                    temp_probs = np.zeros((len(X), self.n_classes))
                    classes = model.classes_
                    for i, cls in enumerate(classes):
                        if cls < self.n_classes:
                            temp_probs[:, cls] = model_probs[:, i]
                    base_probs[:, prob_start:prob_end] = temp_probs
                    
            except Exception:
                prob_start = model_idx * self.n_classes
                prob_end = prob_start + self.n_classes
                base_probs[:, prob_start:prob_end] = 1.0 / self.n_classes
        
        base_probs_scaled = self.scaler.transform(base_probs)
        return self.meta_model.predict_proba(base_probs_scaled)
    
    def get_model_info(self):
        if not self.is_fitted:
            return {'type': 'enhanced_meta_classifier', 'fitted': False}
        
        feature_importance = {}
        model_names = list(self.base_models.keys())
        
        for class_idx in range(self.n_classes):
            feature_importance[f'class_{class_idx}'] = {}
            for model_idx, name in enumerate(model_names):
                prob_indices = list(range(model_idx * self.n_classes, (model_idx + 1) * self.n_classes))
                coefs = self.meta_model.coef_[class_idx] if self.n_classes > 2 else self.meta_model.coef_[0]
                feature_importance[f'class_{class_idx}'][name] = np.mean(np.abs(coefs[prob_indices]))
        
        return {
            'type': 'enhanced_meta_classifier',
            'fitted': True,
            'meta_score': self.meta_score,
            'feature_importance': feature_importance,
            'meta_C': self.meta_C,
            'cv_scores': self.cv_scores,
            'fold_scores': self.fold_scores,
            'n_folds': self.n_folds,
            'embargo_pct': self.embargo_pct,
            'purge_pct': self.purge_pct,
            'n_classes': self.n_classes
        }

In [40]:
from sklearn.isotonic import IsotonicRegression

class CustomClassificationCalibrator:
    """Isotonic-only calibration for the meta-learner classifier (no CV; fits on full calibration window)."""

    def __init__(self, base_estimator):
        self.base_estimator = base_estimator
        self.calibrators = {}
        self.is_fitted = False

    def fit(self, X, y):
        """Fit isotonic calibration on the full calibration set probabilities (per class)."""
        import numpy as np

        # Get uncalibrated probabilities from the already-fitted base estimator
        uncal_probas = self.base_estimator.predict_proba(X)
        n_classes = uncal_probas.shape[1]
        y_arr = np.asarray(y)

        for class_idx in range(n_classes):
            class_probas = uncal_probas[:, class_idx]
            class_labels = (y_arr == class_idx).astype(int)

            # Isotonic requires both positive and negative examples; fallback if class absent
            pos = class_labels.sum()
            neg = len(class_labels) - pos
            if pos == 0 or neg == 0:
                # Not enough signal to fit isotonic; keep calibrator as None to pass through uncalibrated probs
                self.calibrators[class_idx] = None
                continue

            calibrator = IsotonicRegression(out_of_bounds='clip')
            calibrator.fit(class_probas, class_labels)
            self.calibrators[class_idx] = calibrator

        self.is_fitted = True
        return self

    def predict_proba(self, X):
        """Return isotonic-calibrated probabilities (rows normalized to sum to 1)."""
        if not self.is_fitted:
            raise ValueError("Calibrator must be fitted before making predictions")

        import numpy as np
        uncal_probas = self.base_estimator.predict_proba(X)
        cal_probas = uncal_probas.copy()

        n_classes = uncal_probas.shape[1]
        for class_idx in range(n_classes):
            calibrator = self.calibrators.get(class_idx, None)
            if calibrator is not None:
                cal_probas[:, class_idx] = calibrator.transform(uncal_probas[:, class_idx])
            # else: leave uncalibrated values as-is

        # Normalize rows to sum to 1 to get a proper probability distribution
        row_sums = cal_probas.sum(axis=1, keepdims=True)
        row_sums[row_sums == 0] = 1.0
        cal_probas = cal_probas / row_sums
        return cal_probas

    def predict(self, X):
        """Return calibrated class predictions"""
        import numpy as np
        probas = self.predict_proba(X)
        return np.argmax(probas, axis=1)

In [41]:
if 'final_dataset' in locals() and not final_dataset.empty:
    train_dataset = final_dataset.copy()
    
    feature_cols = [col for col in train_dataset.columns 
                   if col not in TARGET_COLS + ['timestamp', 'close']]

    # No global imputation here to avoid leakage; ensure clean by dropping any residual NaNs
    if train_dataset[feature_cols].isnull().any(axis=None):
        train_dataset = train_dataset.dropna(subset=feature_cols)
    
    proper_feature_cols = feature_cols.copy()

    # Three-way time-ordered split: 64% train (fit), 16% calibration (held-out), 20% test/backtest (held-out)
    n = len(train_dataset)
    train_end = int(n * 0.64)
    calib_end = int(n * 0.80)

    train_data = train_dataset.iloc[:train_end].copy()
    calib_data = train_dataset.iloc[train_end:calib_end].copy()
    test_data = train_dataset.iloc[calib_end:].copy()

    # Feature matrices
    X_train_model_clean = train_data[feature_cols].copy()
    X_train_calib_clean = calib_data[feature_cols].copy()
    X_test_model_clean = test_data[feature_cols].copy()

    classification_target_available = True

else:
    proper_feature_cols = []
    X_train_model_clean = pd.DataFrame()
    X_train_calib_clean = pd.DataFrame()
    classification_target_available = False

In [42]:
classification_target = 'direction_confidence_3min'

if classification_target in train_dataset.columns:
    y_train_classification = train_dataset[classification_target].loc[X_train_model_clean.index]
    y_calib_classification = train_dataset[classification_target].loc[X_train_calib_clean.index]
    # Ensure test labels exist for Cell 25
    if 'X_test_model_clean' in globals():
        y_test_model_clean = train_dataset[classification_target].loc[X_test_model_clean.index]

    class_dist = y_train_classification.value_counts().sort_index()
    min_class_size = class_dist.min()

    try:
        meta_classifier = EnhancedMetaClassifier(
            meta_C=1.0,
            random_state=42,
            n_folds=3,
            embargo_pct=0.01,
            purge_pct=0.02,
            min_train_samples=1000,
            n_classes=3
        )
        meta_classifier.fit(X_train_model_clean, y_train_classification)
        # Explicit fitted flag for downstream guards
        setattr(meta_classifier, 'fitted_', True)

        meta_class_info = meta_classifier.get_model_info()

    except Exception:
        meta_classifier = None

    if meta_classifier is not None:
        try:
            meta_class_probs_uncal = meta_classifier.predict_proba(X_train_calib_clean)
            meta_class_preds_uncal = meta_classifier.predict(X_train_calib_clean)

            # Isotonic-only calibrator (no method arg)
            meta_class_calibrator = CustomClassificationCalibrator(
                base_estimator=meta_classifier
            )
            meta_class_calibrator.fit(X_train_calib_clean, y_calib_classification)
            # Explicit fitted flag for downstream guards
            setattr(meta_class_calibrator, 'fitted_', True)

            meta_class_probs_cal = meta_class_calibrator.predict_proba(X_train_calib_clean)
            meta_class_preds_cal = meta_class_calibrator.predict(X_train_calib_clean)

            from sklearn.metrics import accuracy_score, log_loss

            meta_acc_uncal = accuracy_score(y_calib_classification, meta_class_preds_uncal)
            meta_acc_cal = accuracy_score(y_calib_classification, meta_class_preds_cal)

            try:
                meta_logloss_uncal = log_loss(y_calib_classification, meta_class_probs_uncal)
                meta_logloss_cal = log_loss(y_calib_classification, meta_class_probs_cal)
            except Exception:
                meta_logloss_uncal = meta_logloss_cal = float('inf')

        except Exception:
            meta_class_calibrator = None
else:
    meta_classifier = None
    meta_class_calibrator = None

In [43]:
# Lightweight BMA-style classifier (reference): trains multiple base models, blends by OOF accuracy/consistency/recency
from sklearn.base import clone
from sklearn.metrics import accuracy_score
from sklearn.ensemble import HistGradientBoostingClassifier, RandomForestClassifier, ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import RobustScaler, QuantileTransformer
import numpy as np
import pandas as pd

class BMAStackerClassifier:
    def __init__(self, n_folds=5, random_state=42, embargo_pct=0.01, purge_pct=0.02, min_train_samples=1000, n_classes=3):
        self.n_folds = n_folds
        self.random_state = random_state
        self.embargo_pct = embargo_pct
        self.purge_pct = purge_pct
        self.min_train_samples = min_train_samples
        self.n_classes = n_classes
        self.base_models = {}
        self.weights_ = {}
        self.is_fitted = False

    def _get_base_models(self):
        return {
            'histgb': HistGradientBoostingClassifier(max_iter=100, learning_rate=0.1, max_depth=6, random_state=self.random_state),
            'randomforest': RandomForestClassifier(n_estimators=200, max_depth=12, random_state=self.random_state, n_jobs=-1, class_weight='balanced_subsample'),
            'extratrees': ExtraTreesClassifier(n_estimators=200, max_depth=12, random_state=self.random_state, n_jobs=-1, class_weight='balanced'),
            'logistic_scaled': Pipeline([
                ('quantile', QuantileTransformer(n_quantiles=800, output_distribution='normal')),
                ('scaler', RobustScaler()),
                ('classifier', LogisticRegression(random_state=self.random_state, max_iter=1000, class_weight='balanced'))
            ]),
        }

    def _create_purged_splits(self, X, y):
        n = len(X)
        embargo = max(1, int(n * self.embargo_pct))
        purge = max(1, int(n * self.purge_pct))
        step = max(1, n // (self.n_folds + 1))
        splits = []
        for i in range(self.n_folds):
            train_end = (i + 1) * step
            train_end_purged = max(self.min_train_samples, train_end - purge)
            val_start = train_end + embargo
            val_end = min(val_start + step, n)
            if train_end_purged > 0 and val_end - val_start >= 50 and train_end_purged >= self.min_train_samples:
                train_idx = list(range(0, train_end_purged))
                val_idx = list(range(val_start, val_end))
                # Basic temporal check
                if hasattr(X, 'index') and len(X.index) > 0:
                    try:
                        if X.index[train_idx].max() >= X.index[val_idx].min():
                            continue
                    except Exception:
                        pass
                # Ensure class diversity
                if len(np.unique(y.iloc[train_idx])) >= 2 and len(np.unique(y.iloc[val_idx])) >= 2:
                    splits.append((train_idx, val_idx))
        if len(splits) < 2:
            raise ValueError('Insufficient valid purged splits for BMAStackerClassifier')
        return splits

    def fit(self, X, y):
        self.base_models = self._get_base_models()
        splits = self._create_purged_splits(X, y)
        m = len(self.base_models)
        oof_preds = np.zeros((len(X), m), dtype=float)
        oof_probs = np.zeros((len(X), m * self.n_classes), dtype=float)

        names = list(self.base_models.keys())
        acc_scores = {n: [] for n in names}

        for j, (name, model) in enumerate(self.base_models.items()):
            for (tr_idx, va_idx) in splits:
                mdl = clone(model)
                Xtr, Xva = X.iloc[tr_idx], X.iloc[va_idx]
                ytr, yva = y.iloc[tr_idx], y.iloc[va_idx]
                try:
                    mdl.fit(Xtr, ytr)
                    pv = mdl.predict_proba(Xva)
                    yp = mdl.predict(Xva)
                    acc_scores[name].append(accuracy_score(yva, yp))
                    oof_preds[va_idx, j] = yp
                    ps, pe = j * self.n_classes, (j + 1) * self.n_classes
                    temp = np.zeros((len(va_idx), self.n_classes))
                    if pv.shape[1] == self.n_classes:
                        temp = pv
                    else:
                        for ii, cls in enumerate(getattr(mdl, 'classes_', [])):
                            if int(cls) < self.n_classes:
                                temp[:, int(cls)] = pv[:, ii]
                    oof_probs[va_idx, ps:pe] = temp
                except Exception:
                    acc_scores[name].append(0.0)
                    ps, pe = j * self.n_classes, (j + 1) * self.n_classes
                    oof_probs[va_idx, ps:pe] = 1.0 / self.n_classes

        # Composite weights: accuracy mean as simple proxy (normalize to sum 1)
        raw = np.array([np.mean(acc_scores[n]) if len(acc_scores[n]) else 0.0 for n in names], dtype=float)
        raw = np.clip(raw, 0.0, None)
        s = raw.sum()
        w = (raw / s) if s > 0 else np.ones_like(raw) / max(len(raw), 1)
        self.weights_ = {n: float(wi) for n, wi in zip(names, w)}

        # Fit final bases on full data
        for name, model in self.base_models.items():
            try:
                model.fit(X, y)
            except Exception:
                pass
        self.is_fitted = True
        return self

    def predict_proba(self, X):
        if not self.is_fitted:
            return np.full((len(X), self.n_classes), 1.0 / self.n_classes)
        names = list(self.base_models.keys())
        probs = np.zeros((len(X), self.n_classes), dtype=float)
        for name in names:
            mdl = self.base_models[name]
            try:
                pv = mdl.predict_proba(X)
                # align class columns if needed
                aligned = np.zeros_like(probs)
                if pv.shape[1] == self.n_classes:
                    aligned = pv
                else:
                    for ii, cls in enumerate(getattr(mdl, 'classes_', [])):
                        cls = int(cls)
                        if 0 <= cls < self.n_classes:
                            aligned[:, cls] = pv[:, ii]
                probs += self.weights_.get(name, 0.0) * aligned
            except Exception:
                probs += (self.weights_.get(name, 0.0) * (1.0 / self.n_classes))
        # Row-normalize
        row_sums = probs.sum(axis=1, keepdims=True)
        row_sums[row_sums == 0] = 1.0
        return probs / row_sums

# Train optional BMA classifier and its calibrator if training data available
bma_classifier = None
bma_class_calibrator = None
try:
    if 'X_train_model_clean' in globals() and 'y_train_classification' in globals():
        bma_classifier = BMAStackerClassifier(n_folds=3, random_state=42, embargo_pct=0.01, purge_pct=0.02, min_train_samples=800, n_classes=3)
        bma_classifier.fit(X_train_model_clean, y_train_classification)
        # Reuse isotonic calibrator wrapper defined earlier
        if 'CustomClassificationCalibrator' in globals():
            bma_class_calibrator = CustomClassificationCalibrator(base_estimator=bma_classifier)
            bma_class_calibrator.fit(X_train_calib_clean, y_calib_classification)
            setattr(bma_class_calibrator, 'fitted_', True)
except Exception:
    bma_classifier = None
    bma_class_calibrator = None


### Optional BMA classifier (reference only)

We add a lightweight BMA-style classification ensemble (trained with purged, embargoed splits) as a reference implementation. It will not replace the existing meta-classifier; instead, we expose its calibrated signal as an additional arm `bma_model` for the allocator.


In [44]:
# Persist trained model, calibrator, and feature schema
import os
import json
import joblib
import hashlib
from datetime import datetime

# Optional: pandas only used for fallbacks; safe to import
try:
    import pandas as pd  # noqa: F401
except Exception:
    pd = None


def _schema_hash(cols):
    j = json.dumps(list(cols), separators=(",", ":"), ensure_ascii=False)
    return hashlib.sha256(j.encode("utf-8")).hexdigest()[:12]


def _is_fitted(obj) -> bool:
    try:
        if getattr(obj, "fitted_", False):
            return True
        if getattr(obj, "is_fitted", False):
            return True
        # Common sklearn indicator
        if hasattr(obj, "classes_"):
            return True
        # Our calibrator may have inner calibrators
        if hasattr(obj, "calibrators_"):
            return True
    except Exception:
        pass
    return False


def save_artifacts(meta_model, calibrator, feature_cols, target_name="direction_confidence_3min"):
    assert meta_model is not None and _is_fitted(meta_model), "Meta model not fitted"
    assert calibrator is not None and _is_fitted(calibrator), "Calibrator not fitted"

    out_dir = os.path.join("paper_trading_outputs", "models")
    os.makedirs(out_dir, exist_ok=True)

    stamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
    schema_sig = _schema_hash(feature_cols)

    meta_path = os.path.join(out_dir, f"meta_classifier_{stamp}_{schema_sig}.joblib")
    cal_path  = os.path.join(out_dir, f"calibrator_{stamp}_{schema_sig}.joblib")
    cols_path = os.path.join(out_dir, f"feature_columns_{stamp}_{schema_sig}.json")
    info_path = os.path.join(out_dir, f"training_meta_{stamp}_{schema_sig}.json")
    manifest  = os.path.join(out_dir, "LATEST.json")

    # Dump models
    joblib.dump(meta_model, meta_path, compress=3)
    joblib.dump(calibrator, cal_path, compress=3)

    # Dump feature columns
    with open(cols_path, "w", encoding="utf-8") as f:
        json.dump({"feature_cols": list(feature_cols), "schema_hash": schema_sig}, f)

    # Training info (best-effort)
    info = {}
    try:
        if hasattr(meta_model, "get_model_info"):
            info = meta_model.get_model_info()
    except Exception:
        info = {}
    train_info = {
        "timestamp_utc": stamp,
        "target": target_name,
        "n_features": int(len(feature_cols)),
        "schema_hash": schema_sig,
        "class_mapping": {"down": 0, "neutral": 1, "up": 2},
        "meta_score_in_sample": float(info.get("meta_score", 0.0)) if isinstance(info, dict) else 0.0,
        "cv_scores": info.get("cv_scores", {}) if isinstance(info, dict) else {},
        "folds": int(info.get("n_folds", 0)) if isinstance(info, dict) else 0,
        "embargo_pct": float(info.get("embargo_pct", 0.0)) if isinstance(info, dict) else 0.0,
        "purge_pct": float(info.get("purge_pct", 0.0)) if isinstance(info, dict) else 0.0,
    }
    with open(info_path, "w", encoding="utf-8") as f:
        json.dump(train_info, f)

    # Update LATEST manifest
    latest = {
        "meta_classifier": os.path.basename(meta_path),
        "calibrator": os.path.basename(cal_path),
        "feature_columns": os.path.basename(cols_path),
        "training_meta": os.path.basename(info_path),
    }
    with open(manifest, "w", encoding="utf-8") as f:
        json.dump(latest, f)

    print("Saved model artifacts:")
    print(" -", meta_path)
    print(" -", cal_path)
    print(" -", cols_path)
    print(" -", info_path)
    print("LATEST ->", manifest)


# Resolve feature cols and target name from globals
_feature_cols = None
for name in ("proper_feature_cols",):
    if name in globals() and globals()[name] is not None:
        _feature_cols = list(globals()[name])
        break
if _feature_cols is None:
    for name in ("X_train_model_clean", "X_train_calib_clean", "X_test_model_clean"):
        if name in globals() and getattr(globals()[name], "columns", None) is not None:
            _feature_cols = list(globals()[name].columns)
            break
if _feature_cols is None:
    raise RuntimeError("Could not determine feature columns to save. Ensure training cells ran.")

_target_name = globals().get("classification_target", "direction_confidence_3min")

# Save once after fitting/calibration
if "meta_classifier" in globals() and "meta_class_calibrator" in globals():
    if _is_fitted(meta_classifier) and _is_fitted(meta_class_calibrator):
        save_artifacts(meta_classifier, meta_class_calibrator, _feature_cols, target_name=_target_name)
    else:
        print("Artifacts not saved: model or calibrator not fitted.")
else:
    print("Artifacts not saved: meta_classifier or meta_class_calibrator missing.")

Saved model artifacts:
 - paper_trading_outputs\models\meta_classifier_20251028_101254_d7a9e9fb3a42.joblib
 - paper_trading_outputs\models\calibrator_20251028_101254_d7a9e9fb3a42.joblib
 - paper_trading_outputs\models\feature_columns_20251028_101254_d7a9e9fb3a42.json
 - paper_trading_outputs\models\training_meta_20251028_101254_d7a9e9fb3a42.json
LATEST -> paper_trading_outputs\models\LATEST.json


In [45]:
# Cell 25 — Test-holdout validation and artifact checks
import numpy as np
from sklearn.metrics import accuracy_score, log_loss

# Guards
assert 'meta_classifier' in globals() and getattr(meta_classifier, 'fitted_', False), "Model not fitted. Run training cells."
assert 'meta_class_calibrator' in globals() and getattr(meta_class_calibrator, 'fitted_', False), "Calibrator not fitted. Run calibration."
assert 'X_test_model_clean' in globals() and 'y_test_model_clean' in globals(), "Test split missing."

X_test = X_test_model_clean
y_test = y_test_model_clean

# Uncalibrated predictions
proba_uncal = meta_classifier.predict_proba(X_test)
preds_uncal = np.argmax(proba_uncal, axis=1)

# Calibrated predictions (calibrator wraps the model internally)
proba_cal = meta_class_calibrator.predict_proba(X_test)
preds_cal = meta_class_calibrator.predict(X_test)

# Metrics
acc_uncal = accuracy_score(y_test, preds_uncal)
acc_cal = accuracy_score(y_test, preds_cal)

# Handle potential single-class edge case for log_loss
try:
    ll_uncal = log_loss(y_test, proba_uncal, labels=np.unique(y_test))
    ll_cal = log_loss(y_test, proba_cal, labels=np.unique(y_test))
except ValueError:
    ll_uncal, ll_cal = np.nan, np.nan

print(f"test_acc_uncal={acc_uncal:.4f} test_acc_cal={acc_cal:.4f}")
print(f"test_logloss_uncal={ll_uncal:.4f} test_logloss_cal={ll_cal:.4f}")
print(f"proba_shapes uncal={proba_uncal.shape} cal={proba_cal.shape}; rowsum≈1 check cal={np.allclose(proba_cal.sum(axis=1)[:5], 1.0, atol=1e-6)}")

test_acc_uncal=0.3333 test_acc_cal=0.3734
test_logloss_uncal=1.1892 test_logloss_cal=1.1002
proba_shapes uncal=(2073, 3) cal=(2073, 3); rowsum≈1 check cal=True


## 5. Trading Signal Generation

### Advanced Signal Pipeline with Bandit Strategy Selection

In [46]:
# Bandit allocator: class and backtest function (tri-state side + flat fallback)
import numpy as np
import pandas as pd
from typing import Tuple, Optional

class SimpleThompsonBandit:
    def __init__(self, n_arms: int):
        self.counts = np.zeros(n_arms, dtype=float)
        self.means = np.zeros(n_arms, dtype=float)
        self.vars = np.ones(n_arms, dtype=float)

    def select(self, eligible_mask: np.ndarray) -> int:
        masked_means = self.means.copy()
        masked_vars = self.vars.copy()
        masked_means[~eligible_mask] = -np.inf
        masked_vars[~eligible_mask] = 1.0
        samples = np.where(eligible_mask, np.random.normal(masked_means, np.sqrt(masked_vars + 1e-9)), -np.inf)
        return int(np.argmax(samples))

    def update(self, arm: int, reward: float):
        c = self.counts[arm] + 1.0
        mu = self.means[arm]
        new_mu = mu + (reward - mu) / c
        delta = reward - mu
        delta2 = reward - new_mu
        var = self.vars[arm] + (delta * delta2 - self.vars[arm]) / c
        self.counts[arm] = c
        self.means[arm] = new_mu
        self.vars[arm] = max(var, 1e-6)

def _compute_sortino(returns: pd.Series, annualizer: float) -> float:
    if returns is None or returns.empty:
        return np.nan
    downside = returns[returns < 0]
    denom = downside.std()
    if denom == 0 or np.isnan(denom):
        return np.nan
    mu = returns.mean()
    return float(annualizer * mu / denom)


def _compute_turnover(pos_series: pd.Series) -> float:
    if pos_series is None or pos_series.empty:
        return np.nan
    return float(np.abs(pos_series.diff().fillna(0.0)).sum())


def _compute_hit_rate(trades_df: pd.DataFrame) -> float:
    if trades_df is None or trades_df.empty:
        return np.nan
    if 'pnl_$' not in trades_df.columns:
        return np.nan
    wins = (trades_df['pnl_$'] > 0).sum()
    return float(wins / max(len(trades_df), 1))


def run_allocator_backtest(
    bt_df: pd.DataFrame,
    arm_signals: np.ndarray,
    arm_eligible: np.ndarray,
    adv_series: pd.Series,
    cooldown_bars: int,
    cost_bp: float,
    impact_k: float,
    side_eps_vec: Optional[np.ndarray] = None,
    eps: float = 1e-12
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, dict]:
    price_series = bt_df['close'] if 'close' in bt_df.columns else bt_df.iloc[:, 0]
    price = price_series.to_numpy(dtype=float)
    n = len(price)

    rets = np.zeros_like(price, dtype=float)
    denom = np.where(price[:-1] == 0, 1.0, price[:-1])
    rets[1:] = (price[1:] - price[:-1]) / denom

    adv_valid = isinstance(adv_series, pd.Series) and not adv_series.dropna().empty
    adv_arr = adv_series.reindex(bt_df.index).to_numpy() if adv_valid else np.ones(n, dtype=float)
    impact_k_eff = impact_k if adv_valid else 0.0

    bandit = SimpleThompsonBandit(n_arms=arm_signals.shape[1])

    pos = 0.0
    pos_smooth = 0.0
    last_flip_idx = -10**9

    records_eq = []
    records_tr = []
    records_bu = []

    cum_equity = 1.0
    equity_series = np.ones(n, dtype=float)

    sigma_target = float(globals().get('SIGMA_TARGET', 0.20))
    pos_max = float(globals().get('POS_MAX', 1.0))
    dd_stop = float(globals().get('DD_STOP', 0.05))
    latency_k = int(globals().get('LATENCY_BARS', 0))
    slippage_bps = float(globals().get('SLIPPAGE_BPS', 0.0))
    cost_convention = str(globals().get('COST_CONVENTION', 'per_transition'))
    smooth_beta = float(globals().get('SMOOTH_BETA', 0.0))

    vol_window = 50
    rets_series = pd.Series(rets, index=bt_df.index)
    realized_vol = rets_series.rolling(vol_window, min_periods=1).std().fillna(0.0)

    warmup_count = rets_series.expanding().count()
    warmup_done = (warmup_count >= vol_window).astype(float)

    from collections import deque
    exec_pos_buffer = deque([0.0] * max(1, latency_k + 1), maxlen=max(1, latency_k + 1))

    last_exec_pos = 0.0

    for t in range(n):
        elig = arm_eligible[t] if t < len(arm_eligible) else np.array([False]*arm_signals.shape[1])
        desired_side = pos
        chosen = None

        if np.any(elig):
            chosen = bandit.select(elig)
            raw_val = float(arm_signals[t, chosen])
            th = float(side_eps_vec[chosen]) if (side_eps_vec is not None) else 0.0
            if abs(raw_val) < th:
                desired_side = 0.0
            else:
                desired_side = 1.0 if raw_val > 0 else -1.0
        else:
            if (t - last_flip_idx) >= cooldown_bars:
                desired_side = 0.0

        rv = float(realized_vol.iloc[t])
        scaler_raw = (sigma_target / max(rv, 1e-8)) if sigma_target > 0 else 1.0
        scaler = float(warmup_done.iloc[t]) * scaler_raw + (1.0 - float(warmup_done.iloc[t])) * 1.0
        target_pos = np.clip(desired_side * scaler, -pos_max, pos_max)

        if smooth_beta > 0.0:
            pos_smooth = smooth_beta * target_pos + (1.0 - smooth_beta) * pos_smooth
            target_pos = np.clip(pos_smooth, -pos_max, pos_max)

        exec_pos_buffer.append(target_pos)
        exec_pos = exec_pos_buffer[0] if latency_k > 0 else target_pos

        cost_bps = 0.0
        if exec_pos != last_exec_pos:
            delta = abs(exec_pos - last_exec_pos)
            impact = (impact_k_eff * (delta / max(adv_arr[t], eps))) if impact_k_eff > 0 else 0.0
            trans_cost = cost_bp + slippage_bps + impact
            if cost_convention == 'per_roundtrip':
                trans_cost *= 0.5
            cost_bps = trans_cost
            records_tr.append((bt_df.index[t], float(last_exec_pos), float(exec_pos), float(cost_bps)))
            last_exec_pos = exec_pos
            last_flip_idx = t

        pnl = rets[t] * (exec_pos_buffer[0] if latency_k > 0 else exec_pos)
        pnl -= (cost_bps / 10000.0) if cost_bps > 0 else 0.0
        cum_equity *= (1.0 + pnl)
        equity_series[t] = cum_equity
        records_eq.append((bt_df.index[t], cum_equity))

        if chosen is not None:
            bandit.update(chosen, pnl)
            records_bu.append((bt_df.index[t], int(chosen), float(pnl)))

        if dd_stop > 0:
            peak = np.max(equity_series[:t+1])
            if peak > 0 and (cum_equity / peak - 1.0) < -dd_stop:
                exec_pos_buffer.append(0.0)
                last_exec_pos = 0.0

    Eq = pd.DataFrame.from_records(records_eq, columns=['ts', 'equity']).set_index('ts')
    Tr = pd.DataFrame.from_records(records_tr, columns=['ts', 'from_pos', 'to_pos', 'cost_bps'])
    Bu = pd.DataFrame.from_records(records_bu, columns=['ts', 'chosen', 'reward'])

    notional = float(globals().get('NOTIONAL', 100000.0))
    if not Tr.empty:
        Tr['ts'] = pd.to_datetime(Tr['ts'], errors='coerce')
        Tr = Tr.sort_values('ts').reset_index(drop=True)
        eq_series2 = Eq['equity'].copy()
        eq_series2.index = pd.to_datetime(eq_series2.index, errors='coerce')
        eq_series2 = eq_series2.sort_index()

        eq_df_reset = eq_series2.reset_index()
        eq_df_reset.columns = ['ts','equity']
        eq_prev_reset = eq_df_reset[['ts']].copy()
        eq_prev_reset['equity_prev'] = eq_df_reset['equity'].shift(1).values

        Tr = pd.merge_asof(Tr, eq_df_reset, on='ts', direction='backward')
        Tr = pd.merge_asof(Tr, eq_prev_reset, on='ts', direction='backward')
        Tr['equity_prev'] = Tr['equity_prev'].fillna(1.0)

        Tr['cost_$'] = (Tr['equity_prev'] * notional) * (Tr['cost_bps'] / 10000.0)

        next_ts = Tr['ts'].shift(-1)
        next_ts.iloc[-1] = eq_df_reset['ts'].iloc[-1]
        next_df = pd.DataFrame({'ts': next_ts}).sort_values('ts').reset_index(drop=True)
        next_df = pd.merge_asof(next_df, eq_df_reset, on='ts', direction='backward')
        Tr['equity_next'] = next_df['equity'].values

        Tr['pnl_$'] = (Tr['equity_next'] - Tr['equity']) * notional - Tr['cost_$']
        Tr['cum_pnl_$'] = Tr['pnl_$'].cumsum()

    eq_rets = Eq['equity'].pct_change().dropna()
    annualizer = float(globals().get('ANNUALIZER', 1.0))
    sharpe = float(annualizer * eq_rets.mean() / (eq_rets.std() if eq_rets.std() != 0 else np.nan)) if len(eq_rets) else np.nan
    sortino = _compute_sortino(eq_rets, annualizer)
    maxdd = float(-(Eq['equity'] / Eq['equity'].cummax() - 1.0).min()) if not Eq.empty else np.nan
    turnover = _compute_turnover(pd.Series(Tr['to_pos'].astype(float))) if not Tr.empty else 0.0
    hit_rate = _compute_hit_rate(Tr) if not Tr.empty else np.nan

    assumptions = {
        'vol_warmup': 'no_target_until_window',
        'adv_fallback': (not adv_valid),
        'impact_k_effective': float(impact_k_eff),
        'equity_prev_first_trade': 1.0,
    }

    metrics = {
        'final_equity': float(Eq['equity'].iloc[-1]) if len(Eq) else 1.0,
        'n_trades': int(len(Tr)),
        'sharpe': sharpe,
        'sortino': sortino,
        'maxDD': maxdd,
        'turnover': float(turnover),
        'hit_rate': float(hit_rate) if not np.isnan(hit_rate) else np.nan,
        'assumptions': assumptions,
    }
    return Eq, Tr, Bu, metrics

In [47]:
# Bind backtest data (bt) and features (X_bt) to the test holdout deterministically
# Also provide a consistent ADV20 series aligned by timestamp, and enforce calibrated-only usage.
import pandas as pd
import numpy as np

# Use the three-way split test holdout as the canonical backtest set
assert 'test_data' in globals() and isinstance(test_data, pd.DataFrame) and len(test_data) > 0, "test_data not available; run the training/split cells first."
assert 'X_test_model_clean' in globals() and isinstance(X_test_model_clean, pd.DataFrame), "X_test_model_clean missing; build features and splits first."

# Backtest price frame with datetime index for stable alignment
bt = test_data[['timestamp', 'close']].copy()
bt = bt.dropna(subset=['timestamp', 'close'])
bt['timestamp'] = pd.to_datetime(bt['timestamp'], errors='coerce')
bt = bt.dropna(subset=['timestamp']).set_index('timestamp').sort_index()

# Feature matrix for model probabilities on the same rows
X_bt = X_test_model_clean.copy()

# ADV20 series, prefer cohort-derived adv20_by_ts, else compute from df
if 'adv20_by_ts' in globals() and isinstance(adv20_by_ts, pd.Series) and len(adv20_by_ts) > 0:
    adv20 = adv20_by_ts.copy()
elif 'df' in globals() and {'timestamp','close','volume'}.issubset(df.columns):
    tmp = df[['timestamp','close','volume']].copy()
    tmp['timestamp'] = pd.to_datetime(tmp['timestamp'], errors='coerce')
    tmp = tmp.dropna(subset=['timestamp']).sort_values('timestamp')
    tmp['adv20'] = (tmp['close'] * tmp['volume']).rolling(20, min_periods=1).mean()
    adv20 = pd.Series(tmp['adv20'].values, index=tmp['timestamp'])
else:
    adv20 = pd.Series(1.0, index=bt.index)

# Provide adv20_by_ts alias for downstream code
adv20_by_ts = adv20

# Class index convention: 0=down, 2=up
up_idx = 2
down_idx = 0

# Force calibrated-only path downstream
USE_CALIBRATED = True
assert 'meta_class_calibrator' in globals() and getattr(meta_class_calibrator, 'is_fitted', False), "Calibrator not fitted; run calibration cells first."


In [22]:
# Consolidated arm signals and eligibility (allocator-only)
import numpy as np
import pandas as pd

# Resolve backtest price index from bt set earlier
assert 'bt' in globals() and isinstance(bt, pd.DataFrame) and 'close' in bt.columns, "bt not bound; run the binding cell above."
px = bt['close'].astype(float)
idx_bt = px.index

# Cohort signals (default to 0 if missing), aligned to bt index
S_top_series = (S_top.reindex(idx_bt).fillna(0.0) if 'S_top' in globals() else pd.Series(0.0, index=idx_bt))
S_bot_series = (S_bot.reindex(idx_bt).fillna(0.0) if 'S_bot' in globals() else pd.Series(0.0, index=idx_bt))

# Mood signal from price (tanh blend of 3-bar momentum and EMA-based mean reversion)
mom = px.pct_change(3)
mr = -(px / px.rolling(50, min_periods=10).mean() - 1.0)
S_mood_series = np.tanh(0.6*mom.fillna(0.0) + 0.4*mr.fillna(0.0))
S_mood_series = pd.Series(S_mood_series, index=idx_bt).fillna(0.0)

# Model directional score from calibrated probabilities on X_bt (enforced above)
assert 'X_bt' in globals() and len(X_bt) == len(idx_bt), "X_bt not bound to bt rows."
assert 'USE_CALIBRATED' in globals() and USE_CALIBRATED, "Calibrated-only required."
P_bt = meta_class_calibrator.predict_proba(X_bt)
up_idx_local = globals().get('up_idx', 2)
down_idx_local = globals().get('down_idx', 0)
p_up_bt = P_bt[:, up_idx_local]
p_dn_bt = P_bt[:, down_idx_local]
S_model_series = pd.Series(p_up_bt - p_dn_bt, index=idx_bt).fillna(0.0)

# Optional: BMA classifier calibrated probabilities → additional arm
if 'bma_class_calibrator' in globals() and bma_class_calibrator is not None and getattr(bma_class_calibrator, 'is_fitted', False):
    try:
        P_bma_bt = bma_class_calibrator.predict_proba(X_bt)
        p_up_bma = P_bma_bt[:, up_idx_local]
        p_dn_bma = P_bma_bt[:, down_idx_local]
        S_bma_series = pd.Series(p_up_bma - p_dn_bma, index=idx_bt).fillna(0.0)
    except Exception:
        S_bma_series = pd.Series(0.0, index=idx_bt)
else:
    S_bma_series = pd.Series(0.0, index=idx_bt)

# Assemble signals DataFrame
arm_signals_df = pd.DataFrame({
    'S_top': S_top_series,
    'S_bot': S_bot_series,
    'S_mood': S_mood_series,
    'S_model': S_model_series,
    'S_bma_model': S_bma_series,
}, index=idx_bt)

# Eligibility thresholds (with safe defaults)
S_MIN = globals().get('S_MIN', 0.25)
M_MIN = globals().get('M_MIN', 0.15)
CONF_MIN = globals().get('CONF_MIN', 0.60)
ALPHA_MIN = globals().get('ALPHA_MIN', 0.10)

arm_eligible_df = pd.DataFrame(False, index=idx_bt, columns=['pros','amateurs','mood','model','bma_model'])
arm_eligible_df['pros'] = arm_signals_df['S_top'].abs() >= S_MIN
arm_eligible_df['amateurs'] = arm_signals_df['S_bot'].abs() >= S_MIN
arm_eligible_df['mood'] = arm_signals_df['S_mood'].abs() >= M_MIN

# Confidence/alpha for model arms
conf_bt = np.maximum(p_up_bt, p_dn_bt)
alpha_bt = np.abs(p_up_bt - p_dn_bt)
arm_eligible_df['model'] = (conf_bt >= CONF_MIN) & (alpha_bt >= ALPHA_MIN)

# For BMA arm, compute from its own probs if available else mirror model eligibility
if 'P_bma_bt' in locals():
    conf_bma = np.maximum(p_up_bma, p_dn_bma)
    alpha_bma = np.abs(p_up_bma - p_dn_bma)
    arm_eligible_df['bma_model'] = (conf_bma >= CONF_MIN) & (alpha_bma >= ALPHA_MIN)
else:
    arm_eligible_df['bma_model'] = arm_eligible_df['model']

# Export to ndarray for backtest
arm_signals = arm_signals_df[['S_top','S_bot','S_mood','S_model','S_bma_model']].values
arm_eligible = arm_eligible_df[['pros','amateurs','mood','model','bma_model']].values
arm_names = ['pros','amateurs','mood','model','bma_model']


In [23]:
# Metrics helpers + single-run wrapper (flip_map default enabled)
import numpy as np
import pandas as pd
from typing import Dict, Optional

ANNUALIZER = np.sqrt(365*24*12)

# Default flips learned from diagnostics (can be overridden per-call)
DEFAULT_FLIP_MAP = {"Mood": True, "Model": True}

def sharpe_ratio(returns: pd.Series) -> float:
    if returns is None or returns.empty:
        return np.nan
    mu = returns.mean()
    sd = returns.std()
    if sd == 0 or np.isnan(sd):
        return np.nan
    return float(ANNUALIZER * mu / sd)


def sortino_ratio(returns: pd.Series) -> float:
    if returns is None or returns.empty:
        return np.nan
    downside = returns[returns < 0]
    denom = downside.std()
    if denom == 0 or np.isnan(denom):
        return np.nan
    mu = returns.mean()
    return float(ANNUALIZER * mu / denom)


def max_drawdown(equity: pd.Series) -> float:
    if equity is None or equity.empty:
        return np.nan
    dd = (equity / equity.cummax() - 1.0).min()
    return float(-dd) if pd.notna(dd) else np.nan


def turnover_from_trades(trades: pd.DataFrame) -> float:
    if trades is None or trades.empty:
        return 0.0
    return float(np.abs(trades['to_pos'].astype(float) - trades['from_pos'].astype(float)).sum())


def hit_rate_from_trades(trades: pd.DataFrame) -> float:
    if trades is None or trades.empty or 'pnl_$' not in trades.columns:
        return np.nan
    return float((trades['pnl_$'] > 0).sum() / max(len(trades), 1))


def _make_side_eps_vec(columns: list, cfg: Dict) -> np.ndarray:
    """Map per-arm thresholds dynamically based on arm names."""
    S_MIN_val = float(cfg.get('S_MIN', globals().get('S_MIN', 0.25)))
    M_MIN_val = float(cfg.get('M_MIN', globals().get('M_MIN', 0.15)))
    ALPHA_MIN_val = float(cfg.get('ALPHA_MIN', globals().get('ALPHA_MIN', 0.10)))
    vec = []
    for name in columns:
        lname = name.lower()
        if 'top' in lname or 'bot' in lname:
            vec.append(S_MIN_val)
        elif 'mood' in lname:
            vec.append(M_MIN_val)
        else:  # model-like arms
            vec.append(ALPHA_MIN_val)
    return np.array(vec, dtype=float)


def single_run_metrics(config: Dict, flip_map: Optional[Dict[str, bool]] = None) -> Dict:
    if flip_map is None:
        flip_map = DEFAULT_FLIP_MAP

    S_MIN_val = config.get('S_MIN', S_MIN)
    M_MIN_val = config.get('M_MIN', M_MIN)
    CONF_MIN_val = config.get('CONF_MIN', CONF_MIN)
    ALPHA_MIN_val = config.get('ALPHA_MIN', ALPHA_MIN)
    COOLDOWN_val = config.get('COOLDOWN', COOLDOWN)
    COST_BP_val = config.get('COST_BP', COST_BP)
    IMPACT_K_val = config.get('IMPACT_K', IMPACT_K)

    signals_df = arm_signals_df
    if flip_map:
        signals_df = apply_polarity_flip(signals_df, flip_map)

    side_eps_vec = _make_side_eps_vec(list(signals_df.columns), config)

    eq_df, tl_df, _, _metrics = run_allocator_backtest(
        bt_df=bt,
        arm_signals=signals_df.values,
        arm_eligible=arm_eligible,
        adv_series=adv20_by_ts,
        cooldown_bars=COOLDOWN_val,
        cost_bp=COST_BP_val,
        impact_k=IMPACT_K_val,
        side_eps_vec=side_eps_vec,
        eps=1e-12,
    )

    rets = eq_df['equity'].pct_change().dropna() if eq_df is not None else pd.Series(dtype=float)
    out = dict(
        final_equity=float(eq_df['equity'].iloc[-1]) if eq_df is not None else np.nan,
        n_trades=int(tl_df.shape[0]) if tl_df is not None else 0,
        sharpe=float(ANNUALIZER * rets.mean() / (rets.std() if rets.std() != 0 else np.nan)) if len(rets) else np.nan,
        sortino=sortino_ratio(rets),
        maxDD=max_drawdown(eq_df['equity']) if eq_df is not None else np.nan,
        turnover=turnover_from_trades(tl_df),
        hit_rate=hit_rate_from_trades(tl_df),
    )
    return out


## Extended gating and grid

We add:
- Consensus/advantage gating on top of per-arm thresholds
- Gross vs net attribution (run with/without costs)
- Extended grid over COOLDOWN and CONF_MIN

In [48]:
# Gating utilities: consensus and advantage
from typing import Dict, Optional, Tuple
import numpy as np
import pandas as pd


def compute_consensus_and_advantage(
    signals_df: pd.DataFrame,
    weights: Optional[Dict[str, float]] = None
) -> Tuple[pd.Series, pd.Series]:
    """
    Contract:
    - Input: signals_df with columns named for each arm; values in [-1, 1] directional scores.
    - Output:
        consensus: integer in {-3,-2,-1,0,1,2,3,4,5} (sum of signed votes across arms)
        advantage: float >= 0 representing margin between top-1 and top-2 weighted absolute signal strengths
    Notes:
    - Vote is sign(signal) with 0 treated as 0 (abstain)
    - Advantage compares |w_i * s_i| magnitudes
    - NaNs are treated as 0 for both vote and magnitude
    """
    if weights is None:
        weights = {c: 1.0 for c in signals_df.columns}

    vote = np.sign(signals_df.clip(-1, 1)).fillna(0.0)
    weighted = pd.DataFrame({c: weights.get(c, 1.0) * signals_df[c].abs().fillna(0.0) for c in signals_df.columns})

    consensus = vote.sum(axis=1)

    vals = weighted.values
    if vals.shape[1] == 0:
        advantage = pd.Series(0.0, index=signals_df.index)
    else:
        sorted_mag = np.sort(vals, axis=1)
        top1 = sorted_mag[:, -1]
        top2 = sorted_mag[:, -2] if weighted.shape[1] >= 2 else np.zeros(len(signals_df))
        advantage = pd.Series(top1 - top2, index=signals_df.index)

    return consensus.astype(float), advantage.astype(float)


def apply_polarity_flip(signals_df: pd.DataFrame, flip_map: Optional[Dict[str, bool]] = None) -> pd.DataFrame:
    if not flip_map:
        return signals_df
    out = signals_df.copy()
    for k, v in flip_map.items():
        if v:
            for c in out.columns:
                if k.lower() in c.lower():
                    out[c] = -out[c]
    return out


def apply_gating(
    signals_df: pd.DataFrame,
    eligible_df: pd.DataFrame,
    consensus_min: int = 2,
    advantage_min: float = 0.0
) -> Tuple[pd.DataFrame, pd.DataFrame]:
    consensus, advantage = compute_consensus_and_advantage(signals_df)
    gate_mask = (consensus.abs() >= int(consensus_min)) & (advantage.fillna(0.0) >= float(advantage_min))
    gated_eligible = eligible_df.copy()
    for c in eligible_df.columns:
        gated_eligible[c] = eligible_df[c] & gate_mask
    return signals_df.copy(), gated_eligible


def with_gating(signals_df: pd.DataFrame, eligible_df: pd.DataFrame, gate_cfg: Optional[Dict] = None):
    if not gate_cfg:
        return signals_df, eligible_df
    return apply_gating(
        signals_df,
        eligible_df,
        consensus_min=gate_cfg.get('consensus_min', 2),
        advantage_min=float(gate_cfg.get('advantage_min', 0.0))
    )


def _flip_signals(df: pd.DataFrame, fmap: Optional[Dict[str, bool]]) -> pd.DataFrame:
    if not fmap:
        return df
    out = df.copy()
    for k, v in fmap.items():
        if v:
            for c in out.columns:
                if k.lower() in c.lower():
                    out[c] = -out[c]
    return out


def _make_side_eps_vec(columns: list, cfg: Dict) -> np.ndarray:
    S_MIN_val = float(cfg.get('S_MIN', globals().get('S_MIN', 0.25)))
    M_MIN_val = float(cfg.get('M_MIN', globals().get('M_MIN', 0.15)))
    ALPHA_MIN_val = float(cfg.get('ALPHA_MIN', globals().get('ALPHA_MIN', 0.10)))
    vec = []
    for name in columns:
        lname = name.lower()
        if 'top' in lname or 'bot' in lname:
            vec.append(S_MIN_val)
        elif 'mood' in lname:
            vec.append(M_MIN_val)
        else:
            vec.append(ALPHA_MIN_val)
    return np.array(vec, dtype=float)


def run_allocator_once(cfg: Dict, gate_cfg: Optional[Dict] = None) -> Dict:
    if 'set_config_and_rebuild' in globals():
        try:
            set_config_and_rebuild(
                S_MIN_val=cfg.get('S_MIN', globals().get('S_MIN', 0.25)),
                M_MIN_val=cfg.get('M_MIN', globals().get('M_MIN', 0.15)),
                CONF_MIN_val=cfg.get('CONF_MIN', globals().get('CONF_MIN', 0.60)),
                ALPHA_MIN_val=cfg.get('ALPHA_MIN', globals().get('ALPHA_MIN', 0.10)),
                IMPACT_K_val=cfg.get('IMPACT_K', globals().get('IMPACT_K', 0.0)),
            )
        except TypeError:
            pass

    base_signals_df = globals().get('arm_signals_df', None)
    base_eligible_df = globals().get('arm_eligible_df', None)
    if base_signals_df is None or base_eligible_df is None:
        raise RuntimeError('arm_signals_df or arm_eligible_df not available')

    fmap = cfg.get('flip_map', globals().get('DEFAULT_FLIP_MAP', {}))

    sig_for_gate = _flip_signals(base_signals_df, fmap)

    if gate_cfg:
        _, gated_elig_df = with_gating(sig_for_gate, base_eligible_df, gate_cfg)
    else:
        gated_elig_df = base_eligible_df

    COOLDOWN_val = int(cfg.get('COOLDOWN', globals().get('COOLDOWN', 1)))
    COST_BP_val = float(cfg.get('COST_BP', globals().get('COST_BP', 0.0)))
    IMPACT_K_val = float(cfg.get('IMPACT_K', globals().get('IMPACT_K', 0.0)))

    side_eps_vec = _make_side_eps_vec(list(sig_for_gate.columns), cfg)

    Eq, Tr, Bu, met = run_allocator_backtest(
        bt_df=globals().get('bt'),
        arm_signals=sig_for_gate.values,
        arm_eligible=gated_elig_df.values,
        adv_series=globals().get('adv20_by_ts'),
        cooldown_bars=COOLDOWN_val,
        cost_bp=COST_BP_val,
        impact_k=IMPACT_K_val,
        side_eps_vec=side_eps_vec,
        eps=1e-12,
    )

    rets = Eq['equity'].pct_change().dropna() if Eq is not None else pd.Series(dtype=float)
    out = dict(
        final_equity=float(Eq['equity'].iloc[-1]) if Eq is not None else np.nan,
        n_trades=int(Tr.shape[0]) if Tr is not None else 0,
        sharpe=float(globals().get('ANNUALIZER', 1.0) * rets.mean() / (rets.std() if rets.std() != 0 else np.nan)) if len(rets) else np.nan,
        sortino=_compute_sortino(rets, float(globals().get('ANNUALIZER', 1.0))),
        maxDD=float(-(Eq['equity'] / Eq['equity'].cummax() - 1.0).min()) if Eq is not None and not Eq.empty else np.nan,
        turnover=float(np.abs(Tr['to_pos'].astype(float) - Tr['from_pos'].astype(float)).sum()) if Tr is not None and not Tr.empty else 0.0,
        hit_rate=float((Tr['pnl_$'] > 0).sum() / max(len(Tr), 1)) if Tr is not None and 'pnl_$' in Tr.columns else np.nan,
    )
    return out


def run_backtest_net_and_gross(cfg: Dict, gate_cfg: Optional[Dict] = None) -> Dict:
    net_res = run_allocator_once(cfg, gate_cfg=gate_cfg)

    gross_cfg = dict(cfg)
    gross_cfg['COST_BP'] = 0.0
    gross_cfg['IMPACT_K'] = 0.0
    gross_res = run_allocator_once(gross_cfg, gate_cfg=gate_cfg)

    out = {
        **{f"net_{k}": v for k, v in net_res.items()},
        **{f"gross_{k}": v for k, v in gross_res.items()},
    }
    try:
        out['cost_drag'] = (out.get('gross_final_equity', np.nan) - out.get('net_final_equity', np.nan))
        denom_n = max(int(out.get('net_n_trades', 0)), 1)
        denom_g = max(int(out.get('gross_n_trades', 0)), 1)
        out['net_per_trade'] = out.get('net_final_equity', np.nan) / denom_n
        out['gross_per_trade'] = out.get('gross_final_equity', np.nan) / denom_g
    except Exception:
        pass
    return out


In [49]:
# Extended grid with cooldown and confidence sweep, optional gating, and gross/net metrics
import itertools
from datetime import datetime

COOLDOWN_GRID = [1, 3, 5, 10]
CONF_MIN_GRID_EXT = [0.6, 0.7, 0.8]
FEE_BPS_GRID = [3.0, 5.0, 10.0]
SIGMA_TARGET_GRID = [0.10, 0.20, 0.30]

GATE_VARIANTS = [
    None,
    {'name': 'cons2_adv0', 'consensus_min': 2, 'advantage_min': 0.0},
    {'name': 'cons3_adv0', 'consensus_min': 3, 'advantage_min': 0.0},
    {'name': 'cons2_adv0p02', 'consensus_min': 2, 'advantage_min': 0.02},
]


def run_extended_grid():
    rows = []
    stamp = datetime.utcnow().strftime('%Y%m%d_%H%M%S')

    for cooldown, conf_min, fee_bps, sig_t in itertools.product(COOLDOWN_GRID, CONF_MIN_GRID_EXT, FEE_BPS_GRID, SIGMA_TARGET_GRID):
        for gate in GATE_VARIANTS:
            cfg = {
                'S_MIN': S_MIN,
                'M_MIN': M_MIN,
                'ALPHA_MIN': ALPHA_MIN,
                'CONF_MIN': conf_min,
                'COOLDOWN': cooldown,
                'COST_BP': fee_bps,
                'IMPACT_K': IMPACT_K,
                'flip_map': DEFAULT_FLIP_MAP,
            }
            globals()['SIGMA_TARGET'] = sig_t
            try:
                res = run_backtest_net_and_gross(cfg, gate_cfg=gate if gate else None)
                base = {
                    'COOLDOWN': cooldown,
                    'CONF_MIN': conf_min,
                    'GATE': (gate['name'] if gate else 'none'),
                    'FEE_BPS': fee_bps,
                    'SIGMA_TARGET': sig_t,
                }
                row = {**base, **res}
                rows.append(row)
            except Exception as e:
                rows.append({
                    'COOLDOWN': cooldown,
                    'CONF_MIN': conf_min,
                    'GATE': (gate['name'] if gate else 'none'),
                    'FEE_BPS': fee_bps,
                    'SIGMA_TARGET': sig_t,
                    'error': str(e)
                })

    grid_df_ext = pd.DataFrame(rows)
    sort_cols = [
        'net_final_equity', 'net_sharpe', 'net_sortino', 'net_n_trades',
        'gross_final_equity', 'gross_sharpe', 'gross_sortino'
    ]
    sort_cols = [c for c in sort_cols if c in grid_df_ext.columns]
    if sort_cols:
        grid_df_ext = grid_df_ext.sort_values(by=sort_cols, ascending=[False] + [False]*(len(sort_cols)-1))

    out_dir = 'paper_trading_outputs'
    os.makedirs(out_dir, exist_ok=True)
    out_path = f"{out_dir}/extended_grid_{stamp}.csv"
    grid_df_ext.to_csv(out_path, index=False)

    try:
        display(grid_df_ext.head(20))
    except Exception:
        pass

    try:
        print(f"Saved extended grid to: {out_path}")
    except Exception:
        pass

try:
    _ = run_extended_grid()
except Exception:
    pass

Unnamed: 0,COOLDOWN,CONF_MIN,GATE,FEE_BPS,SIGMA_TARGET,net_final_equity,net_n_trades,net_sharpe,net_sortino,net_maxDD,...,gross_final_equity,gross_n_trades,gross_sharpe,gross_sortino,gross_maxDD,gross_turnover,gross_hit_rate,cost_drag,net_per_trade,gross_per_trade
0,1,0.6,none,3.0,0.1,1.0,0,,,-0.0,...,1.0,0,,,-0.0,0.0,,0.0,1.0,1.0
1,1,0.6,cons2_adv0,3.0,0.1,1.0,0,,,-0.0,...,1.0,0,,,-0.0,0.0,,0.0,1.0,1.0
2,1,0.6,cons3_adv0,3.0,0.1,1.0,0,,,-0.0,...,1.0,0,,,-0.0,0.0,,0.0,1.0,1.0
3,1,0.6,cons2_adv0p02,3.0,0.1,1.0,0,,,-0.0,...,1.0,0,,,-0.0,0.0,,0.0,1.0,1.0
4,1,0.6,none,3.0,0.2,1.0,0,,,-0.0,...,1.0,0,,,-0.0,0.0,,0.0,1.0,1.0
5,1,0.6,cons2_adv0,3.0,0.2,1.0,0,,,-0.0,...,1.0,0,,,-0.0,0.0,,0.0,1.0,1.0
6,1,0.6,cons3_adv0,3.0,0.2,1.0,0,,,-0.0,...,1.0,0,,,-0.0,0.0,,0.0,1.0,1.0
7,1,0.6,cons2_adv0p02,3.0,0.2,1.0,0,,,-0.0,...,1.0,0,,,-0.0,0.0,,0.0,1.0,1.0
8,1,0.6,none,3.0,0.3,1.0,0,,,-0.0,...,1.0,0,,,-0.0,0.0,,0.0,1.0,1.0
9,1,0.6,cons2_adv0,3.0,0.3,1.0,0,,,-0.0,...,1.0,0,,,-0.0,0.0,,0.0,1.0,1.0


Saved extended grid to: paper_trading_outputs/extended_grid_20251028_101758.csv


## Apply best config to main run

We set CONF_MIN=0.60 and COOLDOWN=1 (no extra gating), rebuild eligibility, and re-run the allocator with default flips.

In [50]:
# Apply best config: CONF_MIN=0.60, COOLDOWN=1, GATE=None
import os
from datetime import datetime

BEST_CFG = dict(
    S_MIN=S_MIN,
    M_MIN=M_MIN,
    CONF_MIN=0.60,
    ALPHA_MIN=ALPHA_MIN,
    COOLDOWN=1,
    COST_BP=COST_BP,
    IMPACT_K=IMPACT_K,
    flip_map=DEFAULT_FLIP_MAP,
 )

if 'set_config_and_rebuild' in globals():
    try:
        set_config_and_rebuild(
            S_MIN_val=BEST_CFG['S_MIN'],
            M_MIN_val=BEST_CFG['M_MIN'],
            CONF_MIN_val=BEST_CFG['CONF_MIN'],
            ALPHA_MIN_val=BEST_CFG['ALPHA_MIN'],
            IMPACT_K_val=BEST_CFG['IMPACT_K'],
        )
    except TypeError:
        pass

# Run allocator net and gross without extra gating
res = run_backtest_net_and_gross(BEST_CFG, gate_cfg=None)

summary_cols = [
    'net_final_equity','net_n_trades','net_sharpe','net_sortino','net_maxDD','net_turnover','net_hit_rate',
    'gross_final_equity','gross_n_trades','gross_sharpe','gross_sortino','gross_maxDD','gross_turnover','gross_hit_rate',
    'cost_drag','net_per_trade','gross_per_trade'
 ]

out_dir = 'paper_trading_outputs'
os.makedirs(out_dir, exist_ok=True)
stamp = datetime.utcnow().strftime('%Y%m%d_%H%M%S')
summary_path = f"{out_dir}/best_config_summary_{stamp}.csv"

# Prepare signals and eligibility as used in execution
base_signals_df = arm_signals_df.copy()
signals_for_bt = _flip_signals(base_signals_df, BEST_CFG.get('flip_map', DEFAULT_FLIP_MAP))
eligible_df = arm_eligible_df.copy()

# Build per-arm thresholds dynamically
side_eps_vec = _make_side_eps_vec(list(signals_for_bt.columns), BEST_CFG)

Eq_df, Tr_df, Bu_df, met = run_allocator_backtest(
    bt_df=bt,
    arm_signals=signals_for_bt.values,
    arm_eligible=eligible_df.values,
    adv_series=adv20_by_ts,
    cooldown_bars=int(BEST_CFG.get('COOLDOWN', COOLDOWN)),
    cost_bp=float(BEST_CFG.get('COST_BP', COST_BP)),
    impact_k=float(BEST_CFG.get('IMPACT_K', IMPACT_K)),
    side_eps_vec=side_eps_vec,
    eps=1e-12,
 )

# Write summary with assumptions
assumptions_str = ';'.join([f"{k}={v}" for k, v in (met.get('assumptions', {}) or {}).items()])
summary_row = {**BEST_CFG, **{k: res.get(k) for k in summary_cols}, 'assumptions': assumptions_str}
pd.DataFrame([summary_row]).to_csv(summary_path, index=False)

# Enrich trade log with $ fields using robust asof merges
NOTIONAL = 100_000.0
Tr_enriched = Tr_df.copy()
if Tr_enriched is not None and not Tr_enriched.empty:
    Tr_enriched['ts'] = pd.to_datetime(Tr_enriched['ts'], errors='coerce')
    Tr_enriched = Tr_enriched.sort_values('ts').reset_index(drop=True)

    eq_series = Eq_df['equity'].copy()
    eq_series.index = pd.to_datetime(eq_series.index, errors='coerce')
    eq_series = eq_series.sort_index()

    eq_df_reset = eq_series.reset_index()
    eq_df_reset.columns = ['ts','equity']
    eq_prev_reset = eq_df_reset[['ts']].copy()
    eq_prev_reset['equity_prev'] = eq_df_reset['equity'].shift(1).values

    Tr_enriched = pd.merge_asof(Tr_enriched, eq_df_reset, on='ts', direction='backward')
    if 'equity' not in Tr_enriched.columns:
        Tr_enriched['equity'] = eq_series.reindex(Tr_enriched['ts']).fillna(method='ffill').fillna(1.0).values

    Tr_enriched = pd.merge_asof(Tr_enriched, eq_prev_reset, on='ts', direction='backward')
    if 'equity_prev' not in Tr_enriched.columns:
        Tr_enriched['equity_prev'] = eq_series.reindex(Tr_enriched['ts']).shift(1).fillna(1.0).values

    Tr_enriched['equity_prev'] = Tr_enriched['equity_prev'].fillna(1.0)
    Tr_enriched['cost_$'] = (Tr_enriched['equity_prev'] * NOTIONAL) * (Tr_enriched['cost_bps'] / 10000.0)

    next_ts = Tr_enriched['ts'].shift(-1)
    if len(eq_df_reset) > 0:
        next_ts.iloc[-1] = eq_df_reset['ts'].iloc[-1]
    next_df = pd.DataFrame({'ts': next_ts}).sort_values('ts').reset_index(drop=True)
    next_df = pd.merge_asof(next_df, eq_df_reset, on='ts', direction='backward')
    Tr_enriched['equity_next'] = next_df['equity'].fillna(eq_series.iloc[-1] if len(eq_series) else 1.0).values

    Tr_enriched['pnl_$'] = (Tr_enriched['equity_next'] - Tr_enriched['equity']) * NOTIONAL - Tr_enriched['cost_$']
    Tr_enriched['cum_pnl_$'] = Tr_enriched['pnl_$'].cumsum()

    Tr_export = Tr_enriched.rename(columns={'equity': 'equity_at_trade'})[
        ['ts','from_pos','to_pos','cost_bps','cost_$','pnl_$','cum_pnl_$','equity_at_trade','equity_prev','equity_next']
    ]
else:
    Tr_export = Tr_df

Eq_path = f"{out_dir}/best_equity_{stamp}.csv"
Tr_path = f"{out_dir}/best_trade_log_{stamp}.csv"
Bu_path = f"{out_dir}/best_bandit_updates_{stamp}.csv"
Eq_df.to_csv(Eq_path)
Tr_export.to_csv(Tr_path, index=False)
Bu_df.to_csv(Bu_path, index=False)

scaled = Eq_df.copy()
scaled['equity_$'] = scaled['equity'] * NOTIONAL
scaled['pnl_$'] = (scaled['equity'] - 1.0) * NOTIONAL

if Tr_export is not None and not Tr_export.empty:
    tr_aug = Tr_export.copy()
    tr_aug['equity_prev_$'] = tr_aug['equity_prev'] * NOTIONAL
else:
    tr_aug = None

scaled_path = f"{out_dir}/best_equity_scaled_{int(NOTIONAL)}_{stamp}.csv"
scaled.to_csv(scaled_path)
if tr_aug is not None:
    tr_scaled_path = f"{out_dir}/best_trade_log_scaled_{int(NOTIONAL)}_{stamp}.csv"
    tr_aug.to_csv(tr_scaled_path, index=False)


# BMA vs MetaStacker: split-arm comparison and outputs

This section computes and compares calibrated probabilities and simple metrics for two model arms:

- MetaStacker (calibrated meta-classifier you already trained)
- BMA (Bayesian model averaging over base learners, using validation-derived weights)

What you'll get after running Cells 2–5 below:
- CSV: per-row comparison of true label, p_meta, p_bma, and hard predictions
- JSON: summary metrics (accuracy, log loss, Brier) for both arms
- JSON: BMA weights used for the combination

Assumptions:
- Earlier cells have already prepared your eval set (X_backtest/y_backtest or X_test/y_test)
- The notebook already saved model artifacts to `paper_trading_outputs/models/LATEST.json`

In [None]:
# Load artifacts and evaluation data
import os, json, joblib, numpy as np
from datetime import datetime

# Resolve models directory and LATEST pointers
root_dir = os.getcwd()
models_dir = os.path.join(root_dir, "paper_trading_outputs", "models")
latest_fp = os.path.join(models_dir, "LATEST.json")
assert os.path.exists(latest_fp), f"LATEST.json not found at {latest_fp}. Run the training cells above first."

with open(latest_fp, "r") as f:
    latest = json.load(f)

# Load meta classifier and calibrator (already trained/saved earlier)
meta_fp = os.path.join(models_dir, latest.get("meta_classifier", ""))
calib_fp = os.path.join(models_dir, latest.get("calibrator", ""))
feat_fp = os.path.join(models_dir, latest.get("feature_columns", ""))
train_meta_fp = os.path.join(models_dir, latest.get("training_meta", ""))

meta = joblib.load(meta_fp)
calibrator = joblib.load(calib_fp) if os.path.exists(calib_fp) else None

# Load feature columns (JSON)
feature_columns = None
if os.path.exists(feat_fp):
    try:
        if feat_fp.lower().endswith('.json'):
            with open(feat_fp, 'r') as f:
                feature_columns = json.load(f)
        else:
            feature_columns = joblib.load(feat_fp)
    except Exception as e:
        print(f"[WARN] Failed to load feature columns from {os.path.basename(feat_fp)}: {e}")

# Load training meta (JSON)
train_meta = None
if os.path.exists(train_meta_fp):
    try:
        with open(train_meta_fp, "r") as f:
            train_meta = json.load(f)
    except Exception as e:
        print(f"[WARN] Failed to load training_meta JSON: {e}")

# Resolve evaluation split prepared earlier
X_eval = None
y_eval = None
for X_name, y_name in [
    ("X_bt","y_test"),
    ("X_backtest","y_backtest"),
    ("X_test_model_clean","y_test_model_clean"),
    ("X_test","y_test"),
    ("X_val","y_val"),
]:
    if X_name in globals() and y_name in globals():
        X_eval = globals()[X_name]
        y_eval = globals()[y_name]
        break

# Fallback: if no X_eval but we already have precomputed probabilities and y, allow later cells to use them
precomputed_ok = False
if X_eval is None or y_eval is None:
    if 'P_bt' in globals() and 'y_test' in globals():
        y_eval = globals()['y_test']
        precomputed_ok = True

if not precomputed_ok:
    assert X_eval is not None and y_eval is not None, (
        "Could not find evaluation data (tried X_bt/y_test, X_backtest/y_backtest, "
        "X_test_model_clean/y_test_model_clean, X_test/y_test, X_val/y_val). "
        "Please run earlier cells."
    )

# Ensure column order matches training
try:
    import pandas as pd
    if feature_columns is not None and 'pd' in globals() and X_eval is not None and isinstance(X_eval, pd.DataFrame):
        missing = [c for c in feature_columns if c not in X_eval.columns]
        assert not missing, f"Missing features in eval: {missing}"
        X_eval = X_eval[feature_columns]
except Exception as e:
    print(f"[WARN] Feature alignment skipped: {e}")

print(f"Eval ready: X_eval={getattr(X_eval, 'shape', None)} y_eval={getattr(y_eval, 'shape', None)} precomputed_ok={precomputed_ok}")
print(f"Artifacts loaded: meta={os.path.basename(meta_fp)} calibrator={os.path.basename(calib_fp) if calib_fp else 'None'} features={os.path.basename(feat_fp) if feat_fp else 'None'}")

[WARN] Feature alignment skipped: Missing features in eval: ['feature_cols', 'schema_hash']
Eval ready: X_eval=(2073, 17) y_eval=(2073,) precomputed_ok=False
Artifacts loaded: meta=meta_classifier_20251028_101254_d7a9e9fb3a42.joblib calibrator=calibrator_20251028_101254_d7a9e9fb3a42.joblib features=feature_columns_20251028_101254_d7a9e9fb3a42.json


In [53]:
# Extract base/BMA probabilities and compute BMA weights (prefer existing bma_classifier if available)
import numpy as np

base_probs = None
base_names = []
weights = None
reason = "equal"
classes_ = None

p_bma_full = None  # may be set here if bma_classifier or precomputed arrays exist

# Use bma_classifier if present and X_eval available
if 'bma_classifier' in globals() and X_eval is not None:
    try:
        p_bma_full = bma_classifier.predict_proba(X_eval)
        classes_ = getattr(bma_classifier, 'classes_', None)
        # Optional: if the classifier exposes internal base names/weights
        if hasattr(bma_classifier, 'base_names_'):
            base_names = list(getattr(bma_classifier, 'base_names_'))
        elif hasattr(bma_classifier, 'base_learners_'):
            base_names = [type(m).__name__ for m in getattr(bma_classifier, 'base_learners_')]
        if hasattr(bma_classifier, 'weights_'):
            w = np.asarray(getattr(bma_classifier, 'weights_'), dtype=float)
            weights = w / w.sum() if w.sum() != 0 else None
            reason = 'model_internal'
        print("[INFO] Used existing bma_classifier to compute p_bma_full.")
    except Exception as e:
        print(f"[WARN] bma_classifier available but failed to predict: {e}. Will try fallback methods.")

# If precomputed probabilities exist in the notebook, use them
if p_bma_full is None and 'P_bma_bt' in globals():
    p_bma_full = globals()['P_bma_bt']
    # classes: try meta or bma calibrator
    if classes_ is None:
        if 'bma_class_calibrator' in globals() and hasattr(bma_class_calibrator, 'classes_'):
            classes_ = getattr(bma_class_calibrator, 'classes_')
        elif hasattr(meta, 'classes_'):
            classes_ = getattr(meta, 'classes_')
    base_names = ['bma_combined']
    weights = np.array([1.0])
    reason = 'precomputed'
    print("[INFO] Using precomputed P_bma_bt from earlier cells.")

# If neither bma_classifier nor precomputed arrays, fallback to combining base estimators from meta
if p_bma_full is None:
    # Try to discover base estimators stored inside the meta model
    base_estimators = None
    for attr in ["base_learners", "base_models", "base_estimators", "estimators_"]:
        if hasattr(meta, attr):
            base_estimators = getattr(meta, attr)
            break
    if base_estimators is None and hasattr(meta, "estimators_") and isinstance(meta.estimators_, list):
        base_estimators = [est for _, est in meta.estimators_]

    assert base_estimators is not None and len(base_estimators) > 0, "Could not locate base estimators on the meta model. Expose them or save separately."

    # Compute per-base probabilities on the eval set
    base_prob_list = []
    base_names = []
    for i, est in enumerate(base_estimators):
        name = getattr(est, "__class__", type(est)).__name__
        base_names.append(name)
        if hasattr(est, "predict_proba"):
            probs = est.predict_proba(X_eval)
        else:
            if hasattr(est, "decision_function"):
                df = est.decision_function(X_eval)
                if df.ndim == 1:
                    p1 = 1.0/(1.0+np.exp(-df))
                    probs = np.vstack([1-p1, p1]).T
                else:
                    ex = np.exp(df - np.max(df, axis=1, keepdims=True))
                    probs = ex/np.sum(ex, axis=1, keepdims=True)
            else:
                raise RuntimeError(f"Base estimator {name} has neither predict_proba nor decision_function")
        base_prob_list.append(probs)

    # Determine class ordering from meta or any base
    for obj in [meta] + base_estimators:
        if hasattr(obj, "classes_"):
            classes_ = getattr(obj, "classes_")
            break

    # Compute BMA weights from training_meta if possible
    if train_meta is not None:
        try:
            logloss_by_name = {}
            cv_info = train_meta.get("cv", {})
            for nm in base_names:
                candidates = [nm, nm.lower(), nm.replace("Classifier",""), nm.replace(" ", "").lower()]
                found = None
                for key in cv_info.keys():
                    if key in ("base_models", "base_learners", "models") and isinstance(cv_info[key], dict):
                        for k2, v2 in cv_info[key].items():
                            if k2 == nm or k2 in candidates or k2.lower() in candidates:
                                if isinstance(v2, dict) and "logloss" in v2:
                                    found = v2["logloss"]
                                    break
                    elif key == nm and isinstance(cv_info[key], dict) and "logloss" in cv_info[key]:
                        found = cv_info[key]["logloss"]
                if found is not None:
                    logloss_by_name[nm] = float(found)
            if len(logloss_by_name) >= 1:
                ws = []
                for nm in base_names:
                    ll = logloss_by_name.get(nm, None)
                    ws.append(np.exp(-ll) if ll is not None else 1.0)
                weights = np.asarray(ws, dtype=float)
                weights = weights/weights.sum()
                reason = "validation_logloss"
        except Exception as e:
            print(f"[WARN] Failed to use training_meta for weights: {e}")

    if weights is None:
        weights = np.ones(len(base_prob_list), dtype=float)/len(base_prob_list)

    base_probs = np.stack(base_prob_list, axis=0)

print("Base/BMA selection:")
print(" - base_names:", base_names if base_names else '[not applicable]')
print(" - weights:", ( {nm: float(w) for nm, w in zip(base_names, weights)} if weights is not None and len(base_names)==len(weights) else (weights if weights is not None else None) ))
print(" - reason:", reason)
print(" - classes_ set:", classes_ is not None)

[WARN] bma_classifier available but failed to predict: float() argument must be a string or a real number, not 'dict'. Will try fallback methods.
Base/BMA selection:
 - base_names: [not applicable]
 - weights: None
 - reason: equal
 - classes_ set: False


In [55]:
# Compute p_meta and p_bma on the eval set
import numpy as np

# MetaStacker calibrated probability: prefer fresh compute on X_eval; fallback to precomputed if no X_eval
if 'X_eval' in globals() and X_eval is not None:
    try:
        p_meta_full = meta.predict_proba(X_eval)
        classes_meta = getattr(meta, 'classes_', None)
    except Exception as e:
        raise RuntimeError(f"Meta predict_proba failed: {e}")
elif 'P_bt' in globals() and isinstance(P_bt, np.ndarray):
    p_meta_full = P_bt
    if 'meta_classifier' in globals() and hasattr(meta_classifier, 'classes_'):
        classes_meta = getattr(meta_classifier, 'classes_')
    else:
        classes_meta = getattr(meta, 'classes_', None)
else:
    raise RuntimeError("No path to get Meta probabilities: need X_eval with meta model or precomputed P_bt.")

# BMA combined probability
if 'p_bma_full' in globals() and p_bma_full is not None:
    pass  # already computed in previous cell via bma_classifier or precomputed
elif 'P_bma_bt' in globals() and isinstance(P_bma_bt, np.ndarray):
    p_bma_full = P_bma_bt
else:
    if 'base_probs' in globals() and base_probs is not None and 'weights' in globals() and weights is not None:
        # base_probs shape: (n_models, n_samples, n_classes)
        p_bma_full = np.tensordot(weights, base_probs, axes=(0,0))  # -> (n_samples, n_classes)
    else:
        raise RuntimeError("No path to compute BMA probabilities: need bma_classifier, precomputed P_bma_bt, or base_probs+weights.")

# Determine canonical classes for downstream handling
classes_ = classes_meta if 'classes_' not in globals() or classes_ is None else classes_
if classes_ is None:
    if 'bma_class_calibrator' in globals() and hasattr(bma_class_calibrator, 'classes_'):
        classes_ = getattr(bma_class_calibrator, 'classes_')
    elif 'y_eval' in globals() and y_eval is not None:
        vals = np.unique(np.asarray(y_eval))
        assert len(vals) >= 2, "Need at least 2 classes"
        classes_ = vals

# For binary tasks, distill to positive-class probability (class == 1 if present)
classes_arr = np.array(classes_)
if p_meta_full.shape[1] == 2 and p_bma_full.shape[1] == 2:
    if 1 in classes_arr:
        pos_idx = int(np.where(classes_arr == 1)[0][0])
    else:
        pos_idx = int(np.argmax(classes_arr))
    p_meta = p_meta_full[:, pos_idx]
    p_bma = p_bma_full[:, pos_idx]
else:
    p_meta = p_meta_full
    p_bma = p_bma_full

print("Computed p_meta and p_bma.")
print("Shapes:", getattr(p_meta, 'shape', None), getattr(p_bma, 'shape', None))

Computed p_meta and p_bma.
Shapes: (2073, 3) (2073, 3)


In [57]:
# Metrics and exports: CSV + JSON
import os, json
from datetime import datetime
from sklearn.metrics import accuracy_score, log_loss, brier_score_loss
import numpy as np

# Prepare y_true in numeric form
y_true = np.asarray(y_eval)

# Determine binary vs multiclass flows
is_multiclass = (p_meta.ndim == 2 and p_meta.shape[1] > 2) or (p_bma.ndim == 2 and p_bma.shape[1] > 2)

# Helper metrics
metrics = {}
if not is_multiclass:
    # Binary: probabilities are 1D (positive class)
    y_pred_meta = (p_meta >= 0.5).astype(int)
    y_pred_bma = (p_bma >= 0.5).astype(int)

    # For log_loss, need probs for both classes
    meta_prob_2c = np.vstack([1 - p_meta, p_meta]).T
    bma_prob_2c = np.vstack([1 - p_bma, p_bma]).T

    metrics["meta"] = {
        "accuracy": float(accuracy_score(y_true, y_pred_meta)),
        "log_loss": float(log_loss(y_true, meta_prob_2c, labels=[0,1])),
        "brier": float(brier_score_loss(y_true, p_meta)),
    }
    metrics["bma"] = {
        "accuracy": float(accuracy_score(y_true, y_pred_bma)),
        "log_loss": float(log_loss(y_true, bma_prob_2c, labels=[0,1])),
        "brier": float(brier_score_loss(y_true, p_bma)),
    }
else:
    # Multiclass
    y_pred_meta = np.argmax(p_meta, axis=1)
    y_pred_bma = np.argmax(p_bma, axis=1)

    metrics["meta"] = {
        "accuracy": float(accuracy_score(y_true, y_pred_meta)),
        "log_loss": float(log_loss(y_true, p_meta)),
    }
    metrics["bma"] = {
        "accuracy": float(accuracy_score(y_true, y_pred_bma)),
        "log_loss": float(log_loss(y_true, p_bma)),
    }

# Export per-row comparison
ts = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
out_dir = os.path.join(root_dir, "paper_trading_outputs")
os.makedirs(out_dir, exist_ok=True)

comp_csv = os.path.join(out_dir, f"bma_meta_comparison_{ts}.csv")
summary_json = os.path.join(out_dir, f"bma_meta_comparison_summary_{ts}.json")
weights_json = os.path.join(out_dir, f"bma_weights_{ts}.json")

try:
    import pandas as pd
    df = pd.DataFrame({
        "y_true": y_true,
        "p_meta": p_meta if p_meta.ndim == 1 else p_meta.tolist(),
        "p_bma": p_bma if p_bma.ndim == 1 else p_bma.tolist(),
        "y_pred_meta": y_pred_meta,
        "y_pred_bma": y_pred_bma,
    })
    # Flatten arrays into JSON-friendly strings for multiclass
    if is_multiclass:
        df["p_meta"] = df["p_meta"].apply(lambda x: json.dumps(x))
        df["p_bma"] = df["p_bma"].apply(lambda x: json.dumps(x))
    df.to_csv(comp_csv, index=False)
except Exception as e:
    print(f"[WARN] Failed to write CSV via pandas ({e}), writing minimal CSV manually.")
    with open(comp_csv, "w") as f:
        header = ["y_true","p_meta","p_bma","y_pred_meta","y_pred_bma"]
        f.write(",".join(header) + "\n")
        for i in range(len(y_true)):
            pm = p_meta[i] if p_meta.ndim == 1 else json.dumps(p_meta[i].tolist())
            pb = p_bma[i] if p_bma.ndim == 1 else json.dumps(p_bma[i].tolist())
            f.write(f"{y_true[i]},{pm},{pb},{y_pred_meta[i]},{y_pred_bma[i]}\n")

# Export summary metrics and weights
with open(summary_json, "w") as f:
    json.dump({"metrics": metrics, "is_multiclass": is_multiclass}, f, indent=2)

# Weights may be unavailable when using precomputed P_bma_bt
weights_payload = {}
if 'weights' in globals() and weights is not None and 'base_names' in globals() and base_names and len(base_names) == len(weights):
    weights_payload = {nm: float(w) for nm, w in zip(base_names, weights)}
else:
    source = 'precomputed' if ('P_bma_bt' in globals()) else 'unknown'
    weights_payload = {"source": source, "weights": None}

with open(weights_json, "w") as f:
    json.dump(weights_payload, f, indent=2)

print("Saved:")
print(" -", comp_csv)
print(" -", summary_json)
print(" -", weights_json)
print("Metrics:", json.dumps(metrics, indent=2))

Saved:
 - c:\Users\vyshn\OneDrive\Desktop\MetaStacker\BotV2-LSTM\paper_trading_outputs\bma_meta_comparison_20251028_102144.csv
 - c:\Users\vyshn\OneDrive\Desktop\MetaStacker\BotV2-LSTM\paper_trading_outputs\bma_meta_comparison_summary_20251028_102144.json
 - c:\Users\vyshn\OneDrive\Desktop\MetaStacker\BotV2-LSTM\paper_trading_outputs\bma_weights_20251028_102144.json
Metrics: {
  "meta": {
    "accuracy": 0.3333333333333333,
    "log_loss": 1.189169603978172
  },
  "bma": {
    "accuracy": 0.35407621804148576,
    "log_loss": 1.1180699129878833
  }
}
