In [None]:
"""
Hull Tactical — Market Prediction: Strong baseline notebook
What this notebook does
- Loads local train/test (no internet)
- Time-series-safe preprocessing and light feature engineering
- Walk-forward (expanding window) cross-validation with LightGBM
- Volatility-aware allocation mapping in [0, 2] with a practical risk cap
- Diagnostics: CV RMSE, feature importances, simple backtest and Sharpe stats
- Trains final ensemble and writes submission.csv with columns ['date_id','allocation']

Notes & cautions
- This is a reproducible, efficient baseline designed to run < 1 hour in Kaggle.
- To reach top leaderboard: engineer better features (macro, cross-asset, seasonalities),
  robust stacking/ensembling, volatility forecasting, and careful walk-forward tuning.
"""

import warnings
warnings.filterwarnings('ignore')

import os
import gc
import time
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
import lightgbm as lgb

SEED = 42
np.random.seed(SEED)

TRAIN_PATH = 'train.csv'
TEST_PATH  = 'test.csv'
EVAL_DIR   = 'kaggle_evaluation'  # present in competition env; not required here

start_time = time.time()

# 1) Load data
LOGGER.info('Loading data...')
train = pd.read_csv(TRAIN_PATH)
test  = pd.read_csv(TEST_PATH)

# Sort by time to ensure proper temporal order
if 'date_id' in train.columns:
    train = train.sort_values('date_id').reset_index(drop=True)
if 'date_id' in test.columns:
    test = test.sort_values('date_id').reset_index(drop=True)

LOGGER.info(f'Train shape: {train.shape}')
LOGGER.info(f'Test  shape: {test.shape}')

# 2) Target selection (excess returns preferred). We keep a realized return series for backtest if possible.
# Priority: use 'market_forward_excess_returns' if provided; otherwise compute from 'forward_returns' - 'risk_free_rate'.
if 'market_forward_excess_returns' in train.columns:
    TARGET = 'market_forward_excess_returns'
    LOGGER.info('Using TARGET = market_forward_excess_returns')
else:
    if 'forward_returns' in train.columns and 'risk_free_rate' in train.columns:
        train['market_forward_excess_returns'] = train['forward_returns'] - train['risk_free_rate']
        TARGET = 'market_forward_excess_returns'
        LOGGER.info('Computed TARGET = forward_returns - risk_free_rate')
    else:
        raise ValueError('Target not found: need market_forward_excess_returns or (forward_returns and risk_free_rate).')

# Realized returns for toy backtest
if 'forward_returns' in train.columns:
    realized_returns = train['forward_returns'].copy()
elif 'market_forward_excess_returns' in train.columns and 'risk_free_rate' in train.columns:
    realized_returns = train['market_forward_excess_returns'] + train['risk_free_rate']
elif 'market_forward_excess_returns' in train.columns:
    realized_returns = train['market_forward_excess_returns'].copy()
else:
    realized_returns = None

# 3) Feature selection and preprocessing
# Use only numeric features, excluding identifiers and targets
exclude_cols = {'date_id', TARGET, 'forward_returns', 'risk_free_rate'}
num_cols = train.select_dtypes(include=[np.number]).columns.tolist()
feature_cols = [c for c in num_cols if c not in exclude_cols]
LOGGER.info(f'Initial numeric feature count: {len(feature_cols)}')

# Concatenate train+test in time order for leak-free lag/rolling creation (test follows train)
DF = pd.concat([
    train[feature_cols + (['date_id'] if 'date_id' in train.columns else [])],
    test[feature_cols + (['date_id'] if 'date_id' in test.columns else [])]
], axis=0, ignore_index=True)

# Replace infs and fill base NaNs using train-only medians
train_rows = train.shape[0]
for col in feature_cols:
    DF[col] = DF[col].replace([np.inf, -np.inf], np.nan)
base_medians = train[feature_cols].median()
DF[feature_cols] = DF[feature_cols].fillna(base_medians)

# Light, fast feature engineering: 1-lag and 5-day rolling mean (shifted) per column
lag_cols = []
for col in feature_cols:
    lag1 = f'{col}_lag1'
    rmean5 = f'{col}_rmean5'
    DF[lag1] = DF[col].shift(1)
    DF[rmean5] = DF[col].rolling(window=5, min_periods=1).mean().shift(1)
    lag_cols.extend([lag1, rmean5])

# Fill lag feature NaNs using train-only medians to avoid leakage
lag_medians = DF.iloc[:train_rows][lag_cols].median()
DF[lag_cols] = DF[lag_cols].fillna(lag_medians)

# Re-split into engineered train/test
train_fe = DF.iloc[:train_rows].copy()
test_fe  = DF.iloc[train_rows:].copy()

# Restore target and date_id
train_fe[TARGET] = train[TARGET].values
if 'date_id' in train.columns:
    train_fe['date_id'] = train['date_id'].values
if 'date_id' in test.columns:
    test_fe['date_id'] = test['date_id'].values

# Final feature list
features = [c for c in train_fe.columns if c not in ['date_id', TARGET]]
LOGGER.info(f'Final features count: {len(features)}')

# Quick EDA prints
LOGGER.info('Basic EDA:')
LOGGER.info(f"Target mean/std: {train_fe[TARGET].mean():.6f} {train_fe[TARGET].std():.6f}")
missing_rate = train[feature_cols].isna().mean().mean()
LOGGER.info(f'Average missing rate (pre-impute) over base features: {missing_rate:.4f}')

# 4) Time-series CV: expanding window / walk-forward

def expanding_walk_forward_splits(n_samples: int,
                                  n_splits: int = 5,
                                  min_train_ratio: float = 0.6,
                                  val_size_ratio: float = 0.1,
                                  min_train: int = 252,
                                  min_val: int = 120):
    min_train_size = max(int(n_samples * min_train_ratio), min_train)
    val_size = max(int(n_samples * val_size_ratio), min_val)
    if min_train_size + val_size >= n_samples:
        # fallback to ensure at least one fold
        min_train_size = max(min_train, n_samples - 2 * min_val)
        val_size = min_val
    starts = np.linspace(min_train_size, n_samples - val_size, num=n_splits, dtype=int)
    seen = set()
    for s in starts:
        if s in seen:
            continue
        seen.add(s)
        tr_idx = np.arange(0, s)
        val_end = min(s + val_size, n_samples)
        val_idx = np.arange(s, val_end)
        if len(val_idx) > 0:
            yield tr_idx, val_idx

n_samples = train_fe.shape[0]
splits = list(expanding_walk_forward_splits(n_samples, n_splits=5))
LOGGER.info(f'CV folds: {len(splits)}')

# 5) Train LightGBM models on each fold
lgb_params = {
    'objective': 'regression',
    'metric': 'rmse',
    'boosting_type': 'gbdt',
    'learning_rate': 0.02,
    'num_leaves': 64,
    'max_depth': -1,
    'min_data_in_leaf': 50,
    'feature_fraction': 0.7,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'seed': SEED,
    'verbosity': -1,
    'num_threads': -1,
}

models = []
val_scores = []
feature_importance_df = pd.DataFrame()

for fold, (tr_idx, val_idx) in enumerate(splits):
    X_tr = train_fe.iloc[tr_idx][features]
    y_tr = train_fe.iloc[tr_idx][TARGET]
    X_val = train_fe.iloc[val_idx][features]
    y_val = train_fe.iloc[val_idx][TARGET]

    LOGGER.info(f'Fold {fold+1}/{len(splits)} — train {len(tr_idx)} val {len(val_idx)}')
    dtrain = lgb.Dataset(X_tr, label=y_tr)
    dvalid = lgb.Dataset(X_val, label=y_val)

    bst = lgb.train(
        params=lgb_params,
        train_set=dtrain,
        num_boost_round=2000,
        valid_sets=[dtrain, dvalid],
        valid_names=['train', 'valid'],
        early_stopping_rounds=200,
        verbose_eval=200,
    )

    models.append(bst)

    val_pred = bst.predict(X_val, num_iteration=bst.best_iteration)
    rmse = mean_squared_error(y_val, val_pred, squared=False)
    LOGGER.info(f'Fold {fold+1} RMSE: {rmse:.6f}')
    val_scores.append(rmse)

    fi = pd.DataFrame({
        'feature': features,
        'importance': bst.feature_importance(importance_type='gain'),
        'fold': fold,
    })
    feature_importance_df = pd.concat([feature_importance_df, fi], axis=0)

    del X_tr, y_tr, X_val, y_val, dtrain, dvalid
    gc.collect()

LOGGER.info(f"CV RMSE mean: {float(np.mean(val_scores)):.6f}")
LOGGER.info(f"CV RMSE std : {float(np.std(val_scores)):.6f}")

# 6) OOF predictions for diagnostics/backtest
oof = np.zeros(n_samples, dtype=float)
counts = np.zeros(n_samples, dtype=float)
for fold, (tr_idx, val_idx) in enumerate(splits):
    bst = models[fold]
    oof[val_idx] += bst.predict(train_fe.iloc[val_idx][features], num_iteration=bst.best_iteration)
    counts[val_idx] += 1

mask = counts > 0
oof[mask] /= counts[mask]
train_fe['pred'] = oof

# Feature importances (mean over folds)
fi_mean = (feature_importance_df.groupby('feature')['importance']
           .mean().sort_values(ascending=False))
LOGGER.info('Top 20 features by average gain:')
LOGGER.info(f"\n{fi_mean.head(20)}")

plt.figure(figsize=(8, 6))
fi_mean.head(20).sort_values().plot(kind='barh')
plt.title('Top 20 feature importances (avg gain)')
plt.tight_layout()
plt.show()

# 7) Simple volatility-aware mapping from predictions to allocation in [0, 2]
# Estimate rolling prediction volatility to scale signals
pred_sigma = pd.Series(train_fe['pred']).rolling(window=20, min_periods=5).std()
pred_sigma = pred_sigma.fillna(pred_sigma.iloc[5:25].median() if pred_sigma.notna().any() else 1.0)

risk_k = 0.8  # aggressiveness; tune in walk-forward if time allows
alloc_oof = 1.0 + risk_k * (train_fe['pred'] / (pred_sigma.replace(0, np.nan).fillna(1e-6)))
alloc_oof = alloc_oof.clip(0.0, 2.0)
train_fe['alloc'] = alloc_oof

# Enforce vol cap: realized strategy vol <= 1.2 * market vol (rolling)
if realized_returns is not None:
    train_fe['realized_returns'] = realized_returns.values
    strat_ret_raw = train_fe['alloc'] * train_fe['realized_returns']
    strat_vol_180 = strat_ret_raw.rolling(180, min_periods=30).std().fillna(strat_ret_raw.std())
    market_vol_180 = train_fe['realized_returns'].rolling(180, min_periods=30).std().fillna(train_fe['realized_returns'].std())

    scale = (1.2 * market_vol_180) / (strat_vol_180.replace(0, np.nan).fillna(1e-6))
    scale = scale.clip(0.0, 2.0)
    alloc_scaled = 1.0 + (train_fe['alloc'] - 1.0) * scale
    train_fe['alloc_scaled'] = alloc_scaled.clip(0.0, 2.0)

    # Simple transaction cost model: one-way cost on changes in allocation
    tc = CONFIG.get('transaction_cost_bps', 0.0) / 10000.0
    alloc_change_raw = train_fe['alloc'].diff().abs().fillna(0.0)
    alloc_change_scaled = train_fe['alloc_scaled'].diff().abs().fillna(0.0)
    tc_raw = tc * alloc_change_raw
    tc_scaled = tc * alloc_change_scaled

    # Backtest diagnostics (after costs)
    train_fe['strat_ret_raw'] = strat_ret_raw - tc_raw
    train_fe['strat_ret_scaled'] = (train_fe['alloc_scaled'] * train_fe['realized_returns']) - tc_scaled

    train_fe['cum_sp500'] = (1.0 + train_fe['realized_returns']).cumprod()
    train_fe['cum_raw']   = (1.0 + train_fe['strat_ret_raw']).cumprod()
    train_fe['cum_scaled']= (1.0 + train_fe['strat_ret_scaled']).cumprod()

    plt.figure(figsize=(10, 6))
    x_axis = train_fe['date_id'] if 'date_id' in train_fe.columns else np.arange(len(train_fe))
    plt.plot(x_axis, train_fe['cum_sp500'], label='Market (buy-hold)')
    plt.plot(x_axis, train_fe['cum_raw'], label='Strategy (raw)')
    plt.plot(x_axis, train_fe['cum_scaled'], label='Strategy (vol scaled)')
    plt.legend(); plt.title('Cumulative performance (toy backtest)'); plt.tight_layout(); plt.show()

    def ann_sharpe(returns: pd.Series, days_per_year: int = 252):
        mu = returns.mean() * days_per_year
        sd = returns.std() * np.sqrt(days_per_year)
        return float(mu / (sd + 1e-9))

    LOGGER.info(f"Ann Sharpe raw   (toy): {ann_sharpe(train_fe['strat_ret_raw'].fillna(0)):.4f}")
    LOGGER.info(f"Ann Sharpe scaled(toy): {ann_sharpe(train_fe['strat_ret_scaled'].fillna(0)):.4f}")
    LOGGER.info(f"Ann Sharpe market      : {ann_sharpe(train_fe['realized_returns'].fillna(0)):.4f}")
else:
    LOGGER.warning('Skipping backtest: realized forward returns not available in train.')

# 8) Train final ensemble on full train and predict test
X_full = train_fe[features]
y_full = train_fe[TARGET]

avg_best_iter = int(np.clip(np.mean([m.best_iteration for m in models]), 200, 2000)) if len(models) else 1000
LOGGER.info(f'Using avg_best_iter: {avg_best_iter}')

final_models = []
N_FINAL = 3  # small ensemble for stability
for i in range(N_FINAL):
    params = lgb_params.copy()
    params['seed'] = SEED + 13 * i
    dtrain = lgb.Dataset(X_full, label=y_full)
    bst = lgb.train(params, dtrain, num_boost_round=avg_best_iter, verbose_eval=False)
    final_models.append(bst)
    del dtrain

def rank_normalize(arr: np.ndarray) -> np.ndarray:
    n = len(arr)
    if n <= 1:
        return np.zeros_like(arr, dtype=float)
    ranks = np.argsort(np.argsort(arr))
    return ranks.astype(float) / max(n - 1, 1)

# Predict test with rank-based ensembling (often more robust for Sharpe-like metrics)
X_test = test_fe[features]
preds_list = []
for bst in final_models:
    preds_list.append(bst.predict(X_test, num_iteration=bst.best_iteration))

# Average of ranks (0..1), then center roughly around 0 by subtracting 0.5 for mapping
ranked_preds = np.stack([rank_normalize(p) for p in preds_list], axis=1).mean(axis=1)
preds_test = ranked_preds - 0.5

# Allocation mapping in [0, 2]
# Use sigma estimated from recent prediction volatility on train as a proxy
pred_sigma_train = pd.Series(train_fe['pred']).rolling(window=20, min_periods=5).std()
if pred_sigma_train.notna().any():
    sigma_est = float(pred_sigma_train.iloc[-50:].median()) if pred_sigma_train.iloc[-50:].notna().any() else float(pred_sigma_train.dropna().median())
else:
    sigma_est = float(train_fe['pred'].std() if 'pred' in train_fe else 1.0)

k = 0.8  # same aggressiveness factor used above
alloc_test = 1.0 + k * (preds_test / (sigma_est + 1e-9))
alloc_test = np.clip(alloc_test, 0.0, 2.0)

# Apply a global downscale if recent historical strategy vol would breach cap
if realized_returns is not None:
    hist_window = min(len(train_fe), 180)
    hist_strat_vol = (train_fe['alloc'].iloc[-hist_window:] * train_fe['realized_returns'].iloc[-hist_window:]).std()
    hist_mkt_vol   = train_fe['realized_returns'].iloc[-hist_window:].std()
    scale_global = min(1.0, (1.2 * hist_mkt_vol) / (hist_strat_vol + 1e-9)) if hist_strat_vol > 0 else 1.0
    alloc_test = 1.0 + (alloc_test - 1.0) * scale_global
    alloc_test = np.clip(alloc_test, 0.0, 2.0)

# Prepare submission
submission = pd.DataFrame({
    'date_id': test_fe['date_id'].values if 'date_id' in test_fe.columns else np.arange(len(test_fe)),
    'allocation': alloc_test.astype(float)
})
submission.to_csv('submission.csv', index=False)
LOGGER.info(f"Wrote submission.csv with {submission.shape[0]} rows")
LOGGER.debug(f"Submission head:\n{submission.head()}")

# Save artifacts
try:
    import joblib
    joblib.dump(final_models, 'final_lgb_models.pkl')
    LOGGER.info('Saved final_lgb_models.pkl')
except Exception as e:
    LOGGER.warning(f'Model save skipped: {e}')

elapsed = time.time() - start_time
LOGGER.info(f'Total runtime: {elapsed/60:.1f} min')



In [None]:
# === Production configuration, logging, and utilities ===
import logging, json, random, platform

def get_logger(name: str = 'hull_prod', level: int = logging.INFO) -> logging.Logger:
    logger = logging.getLogger(name)
    if not logger.handlers:
        logger.setLevel(level)
        ch = logging.StreamHandler()
        ch.setLevel(level)
        fmt = logging.Formatter('[%(asctime)s] %(levelname)s:%(name)s: %(message)s')
        ch.setFormatter(fmt)
        logger.addHandler(ch)
    return logger

LOGGER = get_logger()

CONFIG = {
    'random_seed': 42,
    'oof_pred_vol_halflife': 10,     # days for EWMA pred volatility
    'realized_vol_halflife': 60,     # days for EWMA realized volatility
    'vol_cap_multiple': 1.2,         # cap strategy vol to X * market vol
    'risk_k_grid': [0.4, 0.6, 0.8, 1.0, 1.2],
    'neutral_band_grid': [0.0, 0.05, 0.10, 0.15],  # deadband in z-score units
    'artifacts_dir': 'artifacts',
'transaction_cost_bps': 1.0,   # one-way cost in basis points, applied on allocation change
}

# Deterministic seeds
os.environ['PYTHONHASHSEED'] = str(CONFIG['random_seed'])
random.seed(CONFIG['random_seed'])
np.random.seed(CONFIG['random_seed'])

try:
    import lightgbm as _lgb
    _ = _lgb.__version__
except Exception:
    pass

LOGGER.info('Environment info:')
LOGGER.info(f"Python: {platform.python_version()}")
LOGGER.info(f"NumPy: {np.__version__}, Pandas: {pd.__version__}")
try:
    LOGGER.info(f"LightGBM: {lgb.__version__}")
except Exception:
    LOGGER.info('LightGBM version: N/A')

# Helpers
from pathlib import Path

def ensure_dir(path: str):
    Path(path).mkdir(parents=True, exist_ok=True)

ensure_dir(CONFIG['artifacts_dir'])
LOGGER.info('Production utilities configured.')


In [None]:
# === Date-based CV helper and validation checks ===
from typing import Iterator, Tuple

def assert_required_columns(df: pd.DataFrame, cols: list):
    missing = [c for c in cols if c not in df.columns]
    if missing:
        raise ValueError(f"Missing required columns: {missing}")

# Ensure time order and integer monotonicity for date_id if present
if 'date_id' in train.columns:
    assert train['date_id'].is_monotonic_increasing, 'train.date_id must be sorted ascending'
if 'date_id' in test.columns:
    assert test['date_id'].is_monotonic_increasing, 'test.date_id must be sorted ascending'

assert_required_columns(train_fe, features + [TARGET])

# Optionally allow date-aware split sizes (fixed by unique dates rather than rows)
def walk_forward_by_dates(df: pd.DataFrame,
                          date_col: str,
                          n_splits: int = 5,
                          min_train_days: int = 252,
                          val_days: int = 120) -> Iterator[Tuple[np.ndarray, np.ndarray]]:
    dates = df[date_col].values
    unique_dates = np.unique(dates)
    if len(unique_dates) < (min_train_days + val_days + 1):
        # fallback to row-based splits already built
        for s in splits:
            yield s
        return
    anchors = np.linspace(min_train_days, len(unique_dates) - val_days, n_splits, dtype=int)
    for a in anchors:
        train_last_date = unique_dates[a - 1]
        val_last_date = unique_dates[min(a + val_days - 1, len(unique_dates) - 1)]
        tr_idx = np.where(dates <= train_last_date)[0]
        val_idx = np.where((dates > train_last_date) & (dates <= val_last_date))[0]
        if len(val_idx) > 0:
            yield tr_idx, val_idx

# If date_id exists, we can produce alternative date-based splits for diagnostics
if 'date_id' in train_fe.columns:
    date_splits = list(walk_forward_by_dates(train_fe, 'date_id'))
    LOGGER.info(f"Date-based splits prepared: {len(date_splits)} folds")
else:
    date_splits = None


In [None]:
# === EWMA volatility functions and risk calibration ===

def ewma_std(series: pd.Series, halflife: int) -> pd.Series:
    if series.isna().all():
        return pd.Series(index=series.index, dtype=float)
    # Use ewm variance then sqrt
    v = series.ewm(halflife=halflife, min_periods=max(5, halflife//2)).var()
    return np.sqrt(v).fillna(method='bfill').fillna(method='ffill')

# Calibrate risk parameters using OOF predictions and realized returns
# Grid search over k (aggressiveness) and neutral band (dead zone around 0 signal)

def calibrate_k_and_band(oof_pred: pd.Series,
                         realized: pd.Series,
                         pred_halflife: int,
                         realized_halflife: int,
                         k_grid: list,
                         band_grid: list,
                         vol_cap_multiple: float = 1.2,
                         annualization: int = 252) -> dict:
    assert len(oof_pred) == len(realized)
    oof_pred = oof_pred.astype(float)
    realized = realized.astype(float)

    pred_sigma = ewma_std(oof_pred, pred_halflife)
    mkt_vol = ewma_std(realized, realized_halflife)

    best = {'score': -np.inf, 'k': None, 'band': None}

    for k in k_grid:
        # z-score like signal
        z = oof_pred / (pred_sigma.replace(0, np.nan).fillna(1e-6))
        for band in band_grid:
            z_band = z.where(z.abs() >= band, 0.0)
            alloc = (1.0 + k * z_band).clip(0.0, 2.0)
            strat_ret = alloc * realized
            strat_vol = ewma_std(strat_ret, realized_halflife)
            cap = (vol_cap_multiple * mkt_vol) / (strat_vol.replace(0, np.nan).fillna(1e-6))
            cap = cap.clip(0.0, 2.0)
            alloc_scaled = (1.0 + (alloc - 1.0) * cap).clip(0.0, 2.0)
            strat_ret_scaled = alloc_scaled * realized
            # Sharpe-like objective
            mu = strat_ret_scaled.mean() * annualization
            sd = strat_ret_scaled.std() * np.sqrt(annualization)
            score = (mu / (sd + 1e-9)) if sd > 0 else -np.inf
            if score > best['score']:
                best = {'score': float(score), 'k': float(k), 'band': float(band)}
    return best

if 'pred' in train_fe.columns and realized_returns is not None:
    best_risk = calibrate_k_and_band(
        oof_pred=train_fe['pred'],
        realized=train_fe['realized_returns'] if 'realized_returns' in train_fe.columns else realized_returns,
        pred_halflife=CONFIG['oof_pred_vol_halflife'],
        realized_halflife=CONFIG['realized_vol_halflife'],
        k_grid=CONFIG['risk_k_grid'],
        band_grid=CONFIG['neutral_band_grid'],
        vol_cap_multiple=CONFIG['vol_cap_multiple'],
    )
    LOGGER.info(f"Calibrated risk params: k={best_risk['k']}, band={best_risk['band']}, score={best_risk['score']:.4f}")

    # Recompute allocation series using calibrated params
    pred_sigma = ewma_std(train_fe['pred'], CONFIG['oof_pred_vol_halflife'])
    z = train_fe['pred'] / (pred_sigma.replace(0, np.nan).fillna(1e-6))
    z_band = z.where(z.abs() >= best_risk['band'], 0.0)
    alloc_calib = (1.0 + best_risk['k'] * z_band).clip(0.0, 2.0)
    mkt_vol = ewma_std(train_fe['realized_returns'], CONFIG['realized_vol_halflife'])
    strat_vol = ewma_std(alloc_calib * train_fe['realized_returns'], CONFIG['realized_vol_halflife'])
    cap = (CONFIG['vol_cap_multiple'] * mkt_vol) / (strat_vol.replace(0, np.nan).fillna(1e-6))
    cap = cap.clip(0.0, 2.0)
    train_fe['alloc_calibrated'] = (1.0 + (alloc_calib - 1.0) * cap).clip(0.0, 2.0)

    # Apply transaction costs on calibrated series
    tc = CONFIG.get('transaction_cost_bps', 0.0) / 10000.0
    alloc_change_cal = train_fe['alloc_calibrated'].diff().abs().fillna(0.0)
    tc_cal = tc * alloc_change_cal

    # Diagnostics
    strat_cal = (train_fe['alloc_calibrated'] * train_fe['realized_returns']) - tc_cal
    cum_cal = (1.0 + strat_cal).cumprod()
    cum_mkt = (1.0 + train_fe['realized_returns']).cumprod()

    plt.figure(figsize=(10, 5))
    x_axis = train_fe['date_id'] if 'date_id' in train_fe.columns else np.arange(len(train_fe))
    plt.plot(x_axis, cum_mkt, label='Market (buy-hold)')
    plt.plot(x_axis, cum_cal, label='Strategy (calibrated)')
    plt.legend(); plt.title('Cumulative performance — calibrated'); plt.tight_layout(); plt.show()



In [None]:
# === Apply calibrated risk to TEST predictions (if available) and save artifacts ===

# If we calibrated best_risk, we also map test predictions with the same parameters.
# We reuse test predictions already computed (ranked_preds -> preds_test) and
# estimate sigma from recent train OOF prediction EWMA.

artifacts = {}

if 'pred' in train_fe.columns:
    pred_sigma_train = ewma_std(train_fe['pred'], CONFIG['oof_pred_vol_halflife'])
    sigma_est = float(pred_sigma_train.iloc[-50:].median()) if pred_sigma_train.notna().any() else float(train_fe['pred'].std())
else:
    sigma_est = float(train_fe['pred'].std()) if 'pred' in train_fe else 1.0

# Prefer calibrated params if present
k_use = best_risk['k'] if 'best_risk' in globals() and best_risk['k'] is not None else 0.8
band_use = best_risk['band'] if 'best_risk' in globals() and best_risk['band'] is not None else 0.0

# Center preds_test (already centered earlier). Apply band and scaling.
z_test = preds_test / (sigma_est + 1e-9)
z_test_band = np.where(np.abs(z_test) >= band_use, z_test, 0.0)
alloc_test_cal = 1.0 + k_use * z_test_band
alloc_test_cal = np.clip(alloc_test_cal, 0.0, 2.0)

# Optional global downscale using recent historical cap (after costs)
if realized_returns is not None and 'alloc_calibrated' in train_fe.columns:
    hist_window = min(len(train_fe), 180)
    tc = CONFIG.get('transaction_cost_bps', 0.0) / 10000.0
    alloc_change_hist = train_fe['alloc_calibrated'].iloc[-hist_window:].diff().abs().fillna(0.0)
    tc_hist = tc * alloc_change_hist
    strat_hist = (train_fe['alloc_calibrated'].iloc[-hist_window:] * train_fe['realized_returns'].iloc[-hist_window:]) - tc_hist
    hist_strat_vol = float(ewma_std(strat_hist, CONFIG['realized_vol_halflife']).iloc[-1]) if len(strat_hist) else strat_hist.std()
    hist_mkt_vol   = float(ewma_std(train_fe['realized_returns'].iloc[-hist_window:], CONFIG['realized_vol_halflife']).iloc[-1])
    scale_global = min(1.0, (CONFIG['vol_cap_multiple'] * hist_mkt_vol) / (hist_strat_vol + 1e-9)) if hist_strat_vol > 0 else 1.0
    alloc_test_cal = 1.0 + (alloc_test_cal - 1.0) * scale_global
    alloc_test_cal = np.clip(alloc_test_cal, 0.0, 2.0)

# Overwrite submission with calibrated allocations (if available)
submission_cal = pd.DataFrame({
    'date_id': test_fe['date_id'].values if 'date_id' in test_fe.columns else np.arange(len(test_fe)),
    'allocation': alloc_test_cal.astype(float)
})
submission_cal.to_csv('submission.csv', index=False)
LOGGER.info('Overwrote submission.csv with calibrated allocations')

# Save artifacts for reproducibility
ensure_dir(CONFIG['artifacts_dir'])

# CV metrics
cv_metrics = {
    'cv_rmse_mean': float(np.mean(val_scores)) if len(val_scores) else None,
    'cv_rmse_std': float(np.std(val_scores)) if len(val_scores) else None,
    'n_models': int(len(models)),
}
with open(os.path.join(CONFIG['artifacts_dir'], 'cv_metrics.json'), 'w') as f:
    json.dump(cv_metrics, f, indent=2)

# OOF predictions
if 'pred' in train_fe.columns:
    train_fe[['date_id', TARGET, 'pred']].to_csv(os.path.join(CONFIG['artifacts_dir'], 'oof_predictions.csv'), index=False)

# Feature importances
if not feature_importance_df.empty:
    fi_mean.reset_index().rename(columns={'index':'feature','importance':'importance_mean'}) \
        .to_csv(os.path.join(CONFIG['artifacts_dir'], 'feature_importances.csv'), index=False)

# Metadata
metadata = {
    'config': CONFIG,
    'avg_best_iter': int(avg_best_iter),
    'seed': int(SEED),
}
with open(os.path.join(CONFIG['artifacts_dir'], 'metadata.json'), 'w') as f:
    json.dump(metadata, f, indent=2)

LOGGER.info('Artifacts saved to artifacts/.')
