In [1]:
import numpy as np
import pandas as pd

# =======================
# Utils (indicators w/o leakage) for T+7
# =======================
def rsi(series: pd.Series, window: int = 14):
    delta = series.diff()
    gain  = delta.clip(lower=0).rolling(window, min_periods=window).mean()
    loss  = (-delta.clip(upper=0)).rolling(window, min_periods=window).mean()
    rs = gain / loss.replace(0, np.nan)
    return 100 - (100 / (1 + rs))

def ema(series: pd.Series, span: int):
    return series.ewm(span=span, adjust=False).mean()

def macd(close: pd.Series, fast=12, slow=26, signal=9):
    macd_line = ema(close, fast) - ema(close, slow)
    signal_line = ema(macd_line, signal)
    return macd_line, signal_line, macd_line - signal_line

def bollinger(close: pd.Series, window=20, n_std=2.0):
    ma = close.rolling(window, min_periods=window).mean()
    sd = close.rolling(window, min_periods=window).std()
    upper = ma + n_std * sd
    lower = ma - n_std * sd
    width = (upper - lower) / (ma.replace(0, np.nan))
    pb = (close - lower) / (upper - lower)
    return ma, upper, lower, width, pb

def true_range(high, low, close):
    prev_close = close.shift(1)
    return pd.concat(
        [
            (high - low),
            (high - prev_close).abs(),
            (low - prev_close).abs()
        ],
        axis=1
    ).max(axis=1)

def atr(high, low, close, window=14):
    return true_range(high, low, close).rolling(window, min_periods=window).mean()

def safe_log1p(s: pd.Series):
    return np.log1p(s.astype(float))

def cyclical_time_features(dt_index: pd.Series):
    dow = dt_index.dt.weekday
    month = dt_index.dt.month
    return pd.DataFrame({
        "dow_sin": np.sin(2*np.pi*dow/7),
        "dow_cos": np.cos(2*np.pi*dow/7),
        "mon_sin": np.sin(2*np.pi*(month-1)/12),
        "mon_cos": np.cos(2*np.pi*(month-1)/12),
    }, index=dt_index.index)


In [3]:
import os, json
from pathlib import Path
import numpy as np
import pandas as pd

# === Мапа до сырых CSV исключительно для T+7-пайплайна ===
# Если у тебя BTC-файл лежит в корне (как в T+1-коде), оставь так:
ASSETS_RAW = {
    "BTC": "data/btc_1d_data_2018_to_2025.csv",
    "ETH": "data/ETH_cleaned.csv",
    "TSLA": "data/TSLA_cleaned.csv",
    "AAPL": "data/AAPL_cleaned.csv",
}

OUT_DIR = Path("data"); OUT_DIR.mkdir(parents=True, exist_ok=True)

# =======================
# IO helpers специально для T+7
# (минимально скопированы из T+1-кода)
# =======================
RENAME_MAP = {
    "Open time":"open_time","Open":"open","High":"high","Low":"low","Close":"close","Volume":"volume",
    "Close time":"close_time","Quote asset volume":"quote_asset_volume","Number of trades":"num_trades",
    "Taker buy base asset volume":"taker_buy_base","Taker buy quote asset volume":"taker_buy_quote",
    "Ignore":"ignore","Symbol":"symbol"
}

def load_and_standardize(path: str) -> pd.DataFrame:
    p = Path(path)
    if not p.exists():
        raise FileNotFoundError(f"Файл не найден: {p} (cwd={Path.cwd()})")
    df = pd.read_csv(p)
    df = df.rename(columns=RENAME_MAP)

    # требуем минимум OHLCV + хотя бы одно время
    required = ["open","high","low","close","volume"]
    miss = [c for c in required if c not in df.columns]
    if miss:
        raise ValueError(f"{p}: отсутствуют обязательные столбцы: {miss}")

    # время: предпочтительно close_time, иначе open_time
    if "close_time" not in df.columns:
        if "open_time" not in df.columns:
            raise ValueError(f"{p}: нужен хотя бы один столбец времени: 'close_time' или 'open_time'")
        df["close_time"] = df["open_time"]

    for c in ["open_time","close_time"]:
        if c in df.columns:
            df[c] = pd.to_datetime(df[c], utc=True, errors="coerce")

    df = df.sort_values("close_time").drop_duplicates(subset=["close_time"]).reset_index(drop=True)
    return df


# =======================
# Core feature pipeline T+7 (per asset)
# =======================
def build_features_tplus7(df: pd.DataFrame, horizon: int = 7) -> pd.DataFrame:
    """
    Те же фичи, что для T+1, но таргет сдвинут на horizon дней вперёд.
    horizon=7 -> T+7.
    Предполагается, что функции ema/rsi/macd/bollinger/atr/safe_log1p/
    cyclical_time_features уже объявлены в предыдущих ячейках.
    """
    h = int(horizon)
    df = df.copy()

    # Targets (T+h)
    df[f"close_tplus{h}"] = df["close"].shift(-h)
    df[f"ret_{h}d_ahead"] = (df[f"close_tplus{h}"] - df["close"]) / df["close"]
    df[f"y{h}_class"]     = (df[f"ret_{h}d_ahead"] > 0).astype(int)

    # Base returns / lags
    df["ret_1d"] = df["close"].pct_change()
    for k in [2, 3, 5, 10]:
        df[f"ret_{k}d"] = df["close"].pct_change(k)
    for l in [1, 2, 3, 5, 10]:
        df[f"close_lag_{l}"] = df["close"].shift(l)

    # Rolling stats
    for win in [5, 10, 20]:
        df[f"sma_{win}"] = df["close"].rolling(win, min_periods=win).mean()
        df[f"std_{win}"] = df["close"].rolling(win, min_periods=win).std()
        df[f"ret_std_{win}"] = df["ret_1d"].rolling(win, min_periods=win).std()

    # Trend/momentum
    df["ema_12"] = ema(df["close"], 12)
    df["ema_26"] = ema(df["close"], 26)
    df["rsi_14"] = rsi(df["close"], 14)
    macd_line, signal_line, macd_hist = macd(df["close"], 12, 26, 9)
    df["macd_line"], df["macd_signal"], df["macd_hist"] = macd_line, signal_line, macd_hist

    # Bands / Volatility
    bb_ma, _, _, bb_w, bb_pb = bollinger(df["close"], 20, 2.0)
    df["bb_ma_20"], df["bb_width_20"], df["bb_percent_b_20"] = bb_ma, bb_w, bb_pb
    df["hl2"] = (df["high"] + df["low"]) / 2.0
    df["hl_spread"] = (df["high"] - df["low"]) / (df["close"] + 1e-12)
    df["atr_14"] = atr(df["high"], df["low"], df["close"], 14)

    # Volume transforms
    df["volume_log"] = safe_log1p(df["volume"])
    for win in [5, 20]:
        vol_mean = df["volume"].rolling(win, min_periods=win).mean()
        vol_std  = df["volume"].rolling(win, min_periods=win).std()
        df[f"vol_sma_{win}"]  = vol_mean
        df[f"volume_z_{win}"] = (df["volume"] - vol_mean) / (vol_std + 1e-12)

    # Optional microstructure
    if "num_trades" in df.columns:
        df["num_trades_log"] = safe_log1p(df["num_trades"])
        for win in [5, 20]:
            tr_mean = df["num_trades"].rolling(win, min_periods=win).mean()
            tr_std  = df["num_trades"].rolling(win, min_periods=win).std()
            df[f"trades_sma_{win}"] = tr_mean
            df[f"trades_z_{win}"]   = (df["num_trades"] - tr_mean) / (tr_std + 1e-12)

    if "taker_buy_base" in df.columns:
        df["taker_buy_base_log"] = safe_log1p(df["taker_buy_base"])
    if "taker_buy_quote" in df.columns:
        df["taker_buy_quote_log"] = safe_log1p(df["taker_buy_quote"])

    # Time features
    df = pd.concat([df, cyclical_time_features(df["close_time"])], axis=1)

    # Drop NaNs (rolling heads + последние h строк без таргета)
    df_feat = df.dropna().copy()
    return df_feat


def select_feature_columns_tplus7(df_feat: pd.DataFrame, horizon: int = 7):
    h = int(horizon)
    exclude_cols = {
        "open_time", "close_time", "ignore", "symbol", "close",
        f"close_tplus{h}", f"ret_{h}d_ahead", f"y{h}_class",
    }
    target_cols = [f"y{h}_class", f"ret_{h}d_ahead"]
    feature_cols = [
        c for c in df_feat.columns
        if c not in exclude_cols
        and c not in target_cols
        and df_feat[c].dtype != "O"
    ]
    return feature_cols, target_cols


def process_asset_tplus7(symbol: str, csv_path: str, horizon: int = 7):
    h = int(horizon)

    df = load_and_standardize(csv_path)
    df_feat = build_features_tplus7(df, horizon=h)
    feature_cols, target_cols = select_feature_columns_tplus7(df_feat, horizon=h)

    # Контроль NaN
    assert df_feat[feature_cols].isna().sum().sum() == 0, f"NaN в фичах T+{h} для {symbol}"
    assert df_feat[target_cols].isna().sum().sum() == 0, f"NaN в таргетах T+{h} для {symbol}"

    out_parquet = OUT_DIR / f"{symbol}_features_tplus{h}.parquet"
    out_csv     = OUT_DIR / f"{symbol}_features_tplus{h}.csv"

    cols_out = ["close_time", "close"] + feature_cols + target_cols
    cols_out = list(dict.fromkeys(cols_out))
    df_out = df_feat[cols_out].copy()

    try:
        df_out.to_parquet(out_parquet, index=False)
    except Exception as e:
        print(f"[warn] {symbol} T+{h}: Parquet не записан ({e}). Продолжаем с CSV.")
    df_out.to_csv(out_csv, index=False)

    report = {
        "symbol": symbol,
        "horizon": h,
        "rows_total_raw": int(len(df)),
        "rows_after_dropna": int(len(df_out)),
        "date_range": [str(df_out["close_time"].iloc[0]), str(df_out["close_time"].iloc[-1])],
        "n_features": len(feature_cols),
        "target_columns": target_cols,
        "files": {"parquet": str(out_parquet), "csv": str(out_csv)},
    }
    print(f"=== {symbol} T+{h} FEATURE PIPELINE — SUMMARY ===")
    print(json.dumps(report, indent=2, ensure_ascii=False))

    # Tail preview
    preview_cols = ["close_time", "close"] + target_cols + feature_cols[:8]
    try:
        from IPython.display import display
        display(df_out.tail(5)[preview_cols])
    except Exception:
        print(df_out.tail(5)[preview_cols].to_string(index=False))

    return report


# =======================
# Run T+7 for all assets (на сырых данных)
# =======================
all_reports_tplus7 = {}
H = 7

for sym, csv_path in ASSETS_RAW.items():
    try:
        all_reports_tplus7[sym] = process_asset_tplus7(sym, csv_path, horizon=H)
    except Exception as e:
        print(f"[error] {sym} T+{H}: {e}")

with open(OUT_DIR / f"features_dashboard_tplus{H}.json", "w", encoding="utf-8") as f:
    json.dump(all_reports_tplus7, f, ensure_ascii=False, indent=2)

print(f"Saved T+{H} dashboard:", OUT_DIR / f"features_dashboard_tplus{H}.json")

=== BTC T+7 FEATURE PIPELINE — SUMMARY ===
{
  "symbol": "BTC",
  "horizon": 7,
  "rows_total_raw": 2846,
  "rows_after_dropna": 2819,
  "date_range": [
    "2018-01-21 23:59:59.999000+00:00",
    "2025-10-09 23:59:59.999000+00:00"
  ],
  "n_features": 55,
  "target_columns": [
    "y7_class",
    "ret_7d_ahead"
  ],
  "files": {
    "parquet": "data\\BTC_features_tplus7.parquet",
    "csv": "data\\BTC_features_tplus7.csv"
  }
}


Unnamed: 0,close_time,close,y7_class,ret_7d_ahead,open,high,low,volume,quote_asset_volume,num_trades,taker_buy_base,taker_buy_quote
2834,2025-10-05 23:59:59.999000+00:00,122244.41,0,-0.099837,122390.99,122410.0,122157.7,261.13591,31927900.0,52378,114.73755,14028650.0
2835,2025-10-06 23:59:59.999000+00:00,123584.95,0,-0.067171,123482.32,124358.46,123084.0,1474.47086,182297400.0,382277,761.57611,94176010.0
2836,2025-10-07 23:59:59.999000+00:00,125046.94,0,-0.082656,124658.54,125082.27,124600.0,535.4452,66830440.0,135654,308.91524,38567390.0
2837,2025-10-08 23:59:59.999000+00:00,121832.92,0,-0.074741,121332.96,122089.68,121276.0,790.37194,96216090.0,270056,421.40156,51301650.0
2838,2025-10-09 23:59:59.999000+00:00,122700.8,0,-0.09532,123306.01,123348.32,122583.0,528.48578,64981980.0,131044,188.80364,23210570.0


=== ETH T+7 FEATURE PIPELINE — SUMMARY ===
{
  "symbol": "ETH",
  "horizon": 7,
  "rows_total_raw": 2557,
  "rows_after_dropna": 2530,
  "date_range": [
    "2018-11-12 00:00:00+00:00",
    "2025-10-15 00:00:00+00:00"
  ],
  "n_features": 44,
  "target_columns": [
    "y7_class",
    "ret_7d_ahead"
  ],
  "files": {
    "parquet": "data\\ETH_features_tplus7.parquet",
    "csv": "data\\ETH_features_tplus7.csv"
  }
}


Unnamed: 0,close_time,close,y7_class,ret_7d_ahead,open,high,low,volume,ret_1d,ret_2d,ret_3d,ret_5d
2545,2025-10-11 00:00:00+00:00,3750.611572,1,0.037256,3840.960449,3882.241455,3652.790039,62475475938,-0.024043,-0.141568,-0.17162,-0.199916
2546,2025-10-12 00:00:00+00:00,4164.427734,0,-0.04317,3750.946045,4195.397461,3701.478271,61216174681,0.110333,0.083637,-0.046855,-0.064416
2547,2025-10-13 00:00:00+00:00,4245.467773,0,-0.062351,4164.049316,4292.845703,4061.224609,50253782420,0.01946,0.13194,0.104725,-0.062324
2548,2025-10-14 00:00:00+00:00,4125.412109,0,-0.060272,4245.372559,4265.105469,3895.973633,67094148347,-0.028279,-0.009369,0.099931,-0.055785
2549,2025-10-15 00:00:00+00:00,3987.459473,0,-0.044975,4125.361328,4213.855957,3935.161377,50462889453,-0.03344,-0.060773,-0.042495,0.037588


=== TSLA T+7 FEATURE PIPELINE — SUMMARY ===
{
  "symbol": "TSLA",
  "horizon": 7,
  "rows_total_raw": 1255,
  "rows_after_dropna": 1228,
  "date_range": [
    "2020-11-20 00:00:00+00:00",
    "2025-10-13 00:00:00+00:00"
  ],
  "n_features": 44,
  "target_columns": [
    "y7_class",
    "ret_7d_ahead"
  ],
  "files": {
    "parquet": "data\\TSLA_features_tplus7.parquet",
    "csv": "data\\TSLA_features_tplus7.csv"
  }
}


Unnamed: 0,close_time,close,y7_class,ret_7d_ahead,open,high,low,volume,ret_1d,ret_2d,ret_3d,ret_5d
1243,2025-10-07 00:00:00+00:00,433.089996,0,-0.010021,447.820007,452.679993,432.450012,102296100,-0.044479,0.007584,-0.006674,-0.026151
1244,2025-10-08 00:00:00+00:00,438.690002,1,0.001413,437.570007,441.329987,425.230011,71192100,0.01293,-0.032124,0.020613,-0.045205
1245,2025-10-09 00:00:00+00:00,435.540009,1,0.027299,431.809998,436.350006,426.179993,69339900,-0.00718,0.005657,-0.039073,-0.001055
1246,2025-10-10 00:00:00+00:00,413.48999,1,0.070401,436.540009,443.130005,411.450012,112107900,-0.050627,-0.057444,-0.045256,-0.038015
1247,2025-10-13 00:00:00+00:00,435.899994,1,0.007043,423.529999,436.890015,419.700012,79552800,0.054197,0.000827,-0.00636,-0.038279


=== AAPL T+7 FEATURE PIPELINE — SUMMARY ===
{
  "symbol": "AAPL",
  "horizon": 7,
  "rows_total_raw": 1255,
  "rows_after_dropna": 1228,
  "date_range": [
    "2020-11-20 00:00:00+00:00",
    "2025-10-13 00:00:00+00:00"
  ],
  "n_features": 44,
  "target_columns": [
    "y7_class",
    "ret_7d_ahead"
  ],
  "files": {
    "parquet": "data\\AAPL_features_tplus7.parquet",
    "csv": "data\\AAPL_features_tplus7.csv"
  }
}


Unnamed: 0,close_time,close,y7_class,ret_7d_ahead,open,high,low,volume,ret_1d,ret_2d,ret_3d,ret_5d
1243,2025-10-07 00:00:00+00:00,256.480011,0,-0.035207,256.809998,257.399994,255.429993,31955800,-0.000818,-0.005968,-0.002528,0.007265
1244,2025-10-08 00:00:00+00:00,258.059998,0,-0.022359,256.519989,258.519989,256.109985,36496900,0.00616,0.005337,0.000155,0.010217
1245,2025-10-09 00:00:00+00:00,254.039993,1,0.032278,257.809998,258.0,253.139999,38322000,-0.015578,-0.009513,-0.010324,-0.012017
1246,2025-10-10 00:00:00+00:00,245.270004,1,0.07135,254.940002,256.380005,244.0,61999100,-0.034522,-0.049562,-0.043707,-0.049415
1247,2025-10-13 00:00:00+00:00,247.660004,1,0.043568,249.380005,249.690002,245.559998,38142900,0.009744,-0.025114,-0.040301,-0.035179


Saved T+7 dashboard: data\features_dashboard_tplus7.json


In [4]:
import os, json, time, warnings
from pathlib import Path
import numpy as np
import pandas as pd
from pandas.api.types import is_datetime64_any_dtype
from sklearn.metrics import (
    accuracy_score, balanced_accuracy_score, f1_score,
    precision_score, recall_score, roc_auc_score
)
from sklearn.isotonic import IsotonicRegression
import xgboost as xgb

warnings.filterwarnings("ignore")

# ==================== CONFIG ====================
ASSETS_T7 = {
    "BTC":  {"features": "data/BTC_features_tplus7.parquet",  "calendar": "crypto"},
    "ETH":  {"features": "data/ETH_features_tplus7.parquet",  "calendar": "crypto"},
    "TSLA": {"features": "data/TSLA_features_tplus7.parquet", "calendar": "equity"},
    "AAPL": {"features": "data/AAPL_features_tplus7.parquet", "calendar": "equity"},
}

DATA_DIR   = Path("data");   DATA_DIR.mkdir(parents=True, exist_ok=True)
MODELS_DIR = Path("models"); MODELS_DIR.mkdir(parents=True, exist_ok=True)

MODEL_VERSION_T7 = "hybrid_xgb_multi_tplus7_classic_gate_v10_1_v11ux"
RANDOM_STATE  = 42

# сплиты
TEST_LEN        = 180
VAL_LEN         = 120
EMBARGO_DAYS    = 5
LOOKBACK_YEARS  = 2

# ε
EPSILON_T7      = 0.004
Z_WIN, Z_MINP   = 180, 90

# веса v10.1
HALF_LIFE_DAYS  = 540
MAG_ALPHA       = 60.0
MAG_CAP         = 0.03

# decision constraints
MIN_POS_RATE    = 0.25
MAX_POS_RATE    = 0.75
TRADE_GATE_Q    = 0.70    
REGIME_EMA      = 200     

# UX
UX_SOFT_THR     = 0.29
UX_CONF_BAND    = 0.05

# модели
CLF_PARAMS = dict(
    objective="binary:logistic",
    eval_metric="auc",
    learning_rate=0.02,
    max_depth=2,
    min_child_weight=8,
    subsample=0.65,
    colsample_bytree=0.65,
    reg_lambda=9.0,
    reg_alpha=1.0,
    gamma=2.0,
    tree_method="hist",
    seed=RANDOM_STATE,
)

REG_PARAMS = dict(
    objective="reg:squarederror",
    eval_metric="rmse",
    learning_rate=0.02,
    max_depth=3,
    min_child_weight=6,
    subsample=0.7,
    colsample_bytree=0.7,
    reg_lambda=7.0,
    reg_alpha=0.5,
    gamma=1.0,
    tree_method="hist",
    seed=RANDOM_STATE,
)

N_ROUNDS_CLF = 7000
N_ROUNDS_REG = 7000
EARLY_STOP   = 300
STEP_TPLUS7  = 7 
# ==================== HELPERS ====================
def load_features_t7(path_parquet_or_csv: str) -> pd.DataFrame:

    p = Path(path_parquet_or_csv)
    if p.suffix.lower() == ".parquet" and p.exists():
        df = pd.read_parquet(p)
    elif p.suffix.lower() == ".parquet":
        csv = p.with_suffix(".csv")
        if not csv.exists():
            raise FileNotFoundError(f"Нет {p} и fallback {csv}")
        df = pd.read_csv(csv)
    elif p.suffix.lower() == ".csv":
        df = pd.read_csv(p)
    else:
        raise ValueError(f"Формат не поддержан: {p}")
    if "close_time" not in df:
        raise KeyError("Нет 'close_time'")
    if not is_datetime64_any_dtype(df["close_time"]):
        df["close_time"] = pd.to_datetime(df["close_time"], utc=True, errors="coerce")
    df = df.sort_values("close_time").reset_index(drop=True)
    if "close" not in df.columns:
        raise KeyError("Нет 'close'")
    return df


def next_trading_day(dt: pd.Timestamp, calendar: str) -> pd.Timestamp:

    if calendar == "crypto":
        return (dt + pd.Timedelta(days=1)).normalize()
    d = dt.normalize()
    while True:
        d = d + pd.Timedelta(days=1)
        if d.weekday() < 5:
            return d


def nth_trading_day(dt: pd.Timestamp, calendar: str, n: int) -> pd.Timestamp:
 
    d = dt
    for _ in range(n):
        d = next_trading_day(d, calendar)
    return d


def rolling_z(s: pd.Series, win=Z_WIN, minp=Z_MINP):
    mu = s.rolling(win, min_periods=minp).mean().shift(1)
    sd = s.rolling(win, min_periods=minp).std().shift(1)
    z  = (s - mu) / sd
    return z.replace([np.inf, -np.inf], np.nan)


def rolling_mad(x):
    med = np.median(x)
    return np.median(np.abs(x - med))


def rolling_robust_z(s: pd.Series, win=Z_WIN, minp=Z_MINP):
    med = s.rolling(win, min_periods=minp).median().shift(1)
    mad = s.rolling(win, min_periods=minp).apply(rolling_mad, raw=True).shift(1)
    denom = 1.4826 * mad.replace(0, np.nan)
    z = (s - med) / denom
    return z.replace([np.inf, -np.inf], np.nan)


def cum_return(x):
    x = np.asarray(x, float)
    return float(np.prod(1.0 + x) - 1.0) if x.size else 0.0


def sharpe(x, days_per_year=365):
    x = np.asarray(x, float)
    if x.size == 0:
        return 0.0
    mu, sd = np.mean(x), np.std(x, ddof=1)
    return float(mu / (sd + 1e-12) * np.sqrt(days_per_year))


def win_rate_on_trades(x):
    x = np.asarray(x, float)
    return float((x > 0).mean()) if x.size else 0.0


def priorsafe_threshold(scores, y_true, min_pos=MIN_POS_RATE, max_pos=MAX_POS_RATE):
    p = float((y_true == 1).mean())
    p = float(np.clip(p, min_pos, max_pos))
    return float(np.quantile(scores, 1.0 - p))


def train_adv(X_train, X_test, feats):

   # Adversarial validation 
 
    y_train = np.zeros(len(X_train), dtype=int)
    y_test  = np.ones(len(X_test), dtype=int)
    X = np.vstack([X_train, X_test])
    y = np.concatenate([y_train, y_test])

    rng = np.random.default_rng(RANDOM_STATE)
    perm = rng.permutation(len(y))
    X, y = X[perm], y[perm]

    split = int(0.8 * len(y))
    dtr = xgb.DMatrix(X[:split], label=y[:split], feature_names=feats)
    dva = xgb.DMatrix(X[split:], label=y[split:], feature_names=feats)

    params = dict(
        objective="binary:logistic",
        eval_metric="auc",
        max_depth=2,
        min_child_weight=6,
        eta=0.05,
        subsample=0.8,
        colsample_bytree=0.8,
        reg_lambda=4.0,
        reg_alpha=0.5,
        tree_method="hist",
        seed=RANDOM_STATE,
    )
    bst = xgb.train(
        params, dtr, num_boost_round=800,
        evals=[(dtr, "train"), (dva, "valid")],
        early_stopping_rounds=100, verbose_eval=False
    )

    from sklearn.metrics import roc_auc_score as _auc
    auc = _auc(
        y[split:],
        bst.predict(dva, iteration_range=(0, getattr(bst, "best_iteration", 0) + 1))
    )
    imp = bst.get_score(importance_type="gain")
    return auc, imp, bst


# ==================== MAIN LOOP ====================
dashboard_t7 = {}

for SYM, cfg in ASSETS_T7.items():
    try:
        calendar = cfg["calendar"]
        fp = cfg["features"]
        print(f"\n==================== {SYM} T+7 ====================")
        df = load_features_t7(fp)

        # EMA200 для риск-on фильтра
        if REGIME_EMA:
            df["ema_200"] = df["close"].ewm(span=REGIME_EMA, adjust=False).mean()
        else:
            df["ema_200"] = np.nan

        time_col = "close_time"
        for col in ["y7_class", "ret_7d_ahead", "ret_1d"]:
            if col not in df.columns:
                raise KeyError(f"{SYM}: нет '{col}'")

        # ===== Stationarization =====
        ban_cols = {
            "y7_class", "ret_7d_ahead", time_col, "close",
            "open_time", "close_time", "ignore", "symbol"
        }
        num_cols = [
            c for c in df.columns
            if c not in ban_cols and np.issubdtype(df[c].dtype, np.number)
        ]

        Z = pd.DataFrame(index=df.index)
        for c in num_cols:
            Z[f"z{Z_WIN}_{c}"] = rolling_z(df[c].astype(float))

        heavy = [
            c for c in [
                "volume", "num_trades", "taker_buy_base", "taker_buy_quote",
                "volume_log", "num_trades_log", "taker_buy_base_log", "taker_buy_quote_log"
            ] if c in df.columns
        ]
        for c in heavy:
            Z[f"rz{Z_WIN}_{c}"] = rolling_robust_z(df[c].astype(float))

        work = pd.concat(
            [df[[time_col, "y7_class", "ret_7d_ahead", "close", "ema_200"]], Z],
            axis=1
        ).dropna().reset_index(drop=True)

        # ===== Labels & splits =====
        y_raw   = work["y7_class"].astype(int).to_numpy()
        y_ret   = work["ret_7d_ahead"].to_numpy()
        times   = work[time_col].to_numpy()

        # ε-разметка: игнорируем "маленькие" движения
        y_eps = np.full_like(y_raw, -1)
        y_eps[y_ret >=  EPSILON_T7] = 1
        y_eps[y_ret <= -EPSILON_T7] = 0

        feat_cols_all = [
            c for c in work.columns
            if c not in {time_col, "y7_class", "ret_7d_ahead"}
        ]
        X_all = work[feat_cols_all].to_numpy(dtype=np.float32)

        if len(work) < (TEST_LEN + VAL_LEN + 300 + EMBARGO_DAYS):
            warnings.warn(f"{SYM}: данных мало для надёжной валидации T+7.")

        test_start = len(work) - TEST_LEN
        cutoff_time = work.iloc[test_start][time_col] - pd.Timedelta(days=365 * LOOKBACK_YEARS)
        train_mask_time = work[time_col] >= cutoff_time
        train_mask_time.iloc[test_start:] = False

        pool_idx = np.where(train_mask_time & (y_eps != -1))[0]
        if len(pool_idx) < (VAL_LEN + 300):
            warnings.warn(f"{SYM}: мало train после ε-фильтра T+7.")
        val_idx   = pool_idx[-VAL_LEN:]
        emb_start = max(0, pool_idx[-VAL_LEN] - EMBARGO_DAYS)
        train_idx = pool_idx[pool_idx < emb_start]
        test_idx  = np.arange(test_start, len(work))

        X_tr0, y_tr0, t_tr0, r_tr0 = (
            X_all[train_idx],
            y_eps[train_idx],
            times[train_idx],
            y_ret[train_idx],
        )
        X_va0, y_va0, t_va0, r_va0 = (
            X_all[val_idx],
            y_eps[val_idx],
            times[val_idx],
            y_ret[val_idx],
        )

        # ===== Adversarial pruning + IPW =====
        X_adv_train = X_all[np.where(train_mask_time)[0]]
        X_adv_test  = X_all[test_idx]

        ADV_TARGET_AUC  = 0.85
        SHIFT_DROP_MAX  = 60
        MIN_FEATS       = 15

        feat_cols = feat_cols_all.copy()
        adv_auc, adv_imp, adv_bst = train_adv(X_adv_train, X_adv_test, feat_cols)
        adv_auc_initial = float(adv_auc)
        drop_list, iters = [], 0

        while adv_auc > ADV_TARGET_AUC and iters < SHIFT_DROP_MAX and len(feat_cols) > MIN_FEATS:
            if not adv_imp:
                break
            top_feat = sorted(adv_imp.items(), key=lambda x: -x[1])[0][0]
            drop_list.append((top_feat, float(adv_auc)))
            feat_cols.remove(top_feat)
            cols_idx_tmp = [feat_cols_all.index(f) for f in feat_cols]
            adv_auc, adv_imp, adv_bst = train_adv(
                X_adv_train[:, cols_idx_tmp],
                X_adv_test[:, cols_idx_tmp],
                feat_cols,
            )
            iters += 1

        cols_idx = [feat_cols_all.index(f) for f in feat_cols]
        X_tr0, X_va0 = X_tr0[:, cols_idx], X_va0[:, cols_idx]
        X_test_full  = X_all[test_idx][:, cols_idx]
        adv_auc_final = float(adv_auc)

        # IPW
        dadv_pool = xgb.DMatrix(X_tr0, feature_names=feat_cols)
        p_testlike = adv_bst.predict(
            dadv_pool,
            iteration_range=(0, getattr(adv_bst, "best_iteration", 0) + 1)
        )
        ipw = p_testlike / np.clip(1.0 - p_testlike, 1e-6, None)
        ipw = np.clip(ipw, 0.2, 5.0)
        ipw = ipw / ipw.mean()

        # ===== Weights =====
        days_from_end = (t_tr0.max() - t_tr0).astype('timedelta64[D]').astype(int)
        w_time = 0.5 ** (days_from_end / HALF_LIFE_DAYS)
        w_mag  = 1.0 + MAG_ALPHA * np.clip(np.abs(r_tr0), 0, MAG_CAP)
        w_tr   = (w_time * w_mag * ipw).astype(np.float32)

        # ===== Classifier =====
        pos, neg = int((y_tr0 == 1).sum()), int((y_tr0 == 0).sum())
        scale_pos_weight = (neg / max(pos, 1)) if pos > 0 else 1.0
        dtr_clf = xgb.DMatrix(X_tr0, label=y_tr0, weight=w_tr, feature_names=feat_cols)
        dva_clf = xgb.DMatrix(X_va0, label=y_va0, feature_names=feat_cols)
        clf_params = {**CLF_PARAMS, "scale_pos_weight": scale_pos_weight}
        bst_clf = xgb.train(
            clf_params, dtr_clf, num_boost_round=N_ROUNDS_CLF,
            evals=[(dtr_clf, "train"), (dva_clf, "valid")],
            early_stopping_rounds=EARLY_STOP, verbose_eval=False
        )
        it_clf = getattr(bst_clf, "best_iteration", None)
        proba_va_raw = bst_clf.predict(
            dva_clf,
            iteration_range=(0, int(it_clf) + 1) if it_clf is not None else (0, 0)
        )

        # Isotonic calibration
        cw0 = (len(y_va0) / (2 * max(1, (y_va0 == 0).sum())))
        cw1 = (len(y_va0) / (2 * max(1, (y_va0 == 1).sum())))
        val_w = np.where(y_va0 == 1, cw1, cw0).astype(np.float32)
        iso = IsotonicRegression(out_of_bounds="clip")
        iso.fit(proba_va_raw, y_va0, sample_weight=val_w)
        proba_va = iso.transform(proba_va_raw)

        # ===== Regressor (magnitude) =====
        reg_idx_all = np.where(train_mask_time)[0]
        X_reg_all = X_all[reg_idx_all][:, cols_idx]
        y_reg_all = y_ret[reg_idx_all]
        X_va_reg  = X_all[val_idx][:, cols_idx]
        y_va_reg  = y_ret[val_idx]

        dtr_reg = xgb.DMatrix(
            X_reg_all[:-(VAL_LEN + EMBARGO_DAYS)],
            label=y_reg_all[:-(VAL_LEN + EMBARGO_DAYS)],
            feature_names=feat_cols
        )
        dva_reg = xgb.DMatrix(X_va_reg, label=y_va_reg, feature_names=feat_cols)
        bst_reg = xgb.train(
            REG_PARAMS, dtr_reg, num_boost_round=N_ROUNDS_REG,
            evals=[(dtr_reg, "train"), (dva_reg, "valid")],
            early_stopping_rounds=EARLY_STOP, verbose_eval=False
        )
        it_reg = getattr(bst_reg, "best_iteration", None)
        ret_va_pred = bst_reg.predict(
            dva_reg,
            iteration_range=(0, int(it_reg) + 1) if it_reg is not None else (0, 0)
        )
        scale_vol = max(1e-6, np.std(y_va_reg))
        ret_score_va = np.tanh(ret_va_pred / (3 * scale_vol))

        # ===== Threshold search =====
        alphas = np.linspace(0.0, 1.0, 11)
        thr_grid = np.linspace(0.2, 0.8, 121)
        best = {"bal_acc": -1, "alpha": 0.0, "thr": 0.5}

        for a in alphas:
            blend = (1 - a) * proba_va + a * (ret_score_va * 0.5 + 0.5)
            for thr in thr_grid:
                y_hat = (blend >= thr).astype(int)
                pos_rate = y_hat.mean()
                if pos_rate < MIN_POS_RATE or pos_rate > MAX_POS_RATE:
                    continue
                ba = balanced_accuracy_score(y_va0, y_hat)
                if ba > best["bal_acc"]:
                    best = {"bal_acc": float(ba), "alpha": float(a), "thr": float(thr)}
            # priorsafe
            thr_p = priorsafe_threshold(blend, y_va0)
            y_hat_p = (blend >= thr_p).astype(int)
            pos_rate_p = y_hat_p.mean()
            if MIN_POS_RATE <= pos_rate_p <= MAX_POS_RATE:
                ba_p = balanced_accuracy_score(y_va0, y_hat_p)
                if ba_p > best["bal_acc"]:
                    best = {"bal_acc": float(ba_p), "alpha": float(a), "thr": float(thr_p)}

        alpha_final = best["alpha"]
        thr_final   = best["thr"]

        # ===== HOLDOUT =====
        dtest = xgb.DMatrix(X_test_full, feature_names=feat_cols)
        proba_test_raw = bst_clf.predict(
            dtest,
            iteration_range=(0, int(it_clf) + 1) if it_clf is not None else (0, 0)
        )
        proba_test = iso.transform(proba_test_raw)

        ret_test_pred = bst_reg.predict(
            dtest,
            iteration_range=(0, int(it_reg) + 1) if it_reg is not None else (0, 0)
        )
        ret_score_test = np.tanh(ret_test_pred / (3 * scale_vol))

        blend_test = (1 - alpha_final) * proba_test + alpha_final * (ret_score_test * 0.5 + 0.5)

        y_test_raw = y_raw[test_idx]
        dates_test = pd.to_datetime(times[test_idx])

        # trade-gate по модулю прогнозируемого ret_score
        gate = float(np.quantile(np.abs(ret_score_va), TRADE_GATE_Q))
        trade_mask = (np.abs(ret_score_test) >= gate)

        # regime filter (EMA200)
        if REGIME_EMA:
            close_test = work.loc[test_idx, "close"].to_numpy()
            ema200_test = work.loc[test_idx, "ema_200"].to_numpy()
            regime_on = close_test > ema200_test
        else:
            regime_on = np.ones_like(trade_mask, dtype=bool)

        decision_long = ((blend_test >= thr_final) & trade_mask & regime_on).astype(int)

        # ===== Metrics (directional) =====
        auc_test = roc_auc_score(y_test_raw, blend_test) if len(np.unique(y_test_raw)) == 2 else None
        metrics_raw = {
            "acc": float(accuracy_score(y_test_raw, decision_long)),
            "bal_acc": float(balanced_accuracy_score(y_test_raw, decision_long)),
            "f1": float(f1_score(y_test_raw, decision_long, zero_division=0)),
            "precision": float(precision_score(y_test_raw, decision_long, zero_division=0)),
            "recall": float(recall_score(y_test_raw, decision_long, zero_division=0)),
            "auc": float(auc_test) if auc_test is not None else None,
            "pred_pos_rate": float(decision_long.mean()),
        }

        # ===== Business metrics: non-overlapping 7d trades =====
        r_test = y_ret[test_idx].astype(float)   # это ret_7d_ahead
        dec_all = decision_long
        gate_all = trade_mask

        # шаг 7: каждые STEP_TPLUS7 дней открываем новую 7-дневную сделку
        idx_non_overlap = np.arange(0, len(r_test), STEP_TPLUS7)

        r_test_step      = r_test[idx_non_overlap]
        dec_step         = dec_all[idx_non_overlap]
        gate_step        = gate_all[idx_non_overlap]

        # Buy&Hold
        ret_bh_step      = r_test_step
        # Стратегия: только там, где decision_long==1
        ret_strat_step   = dec_step * r_test_step

        bh_total     = cum_return(ret_bh_step)
        strat_total  = cum_return(ret_strat_step)
        excess_total = strat_total - bh_total

        # Confident
        exec_mask_all   = (dec_step == 1)
        exec_mask_conf  = gate_step & exec_mask_all

        ret_exec_all    = r_test_step[exec_mask_all]
        ret_exec_conf   = r_test_step[exec_mask_conf]

        bh_conf_total   = cum_return(r_test_step[gate_step])
        conf_total      = cum_return(ret_exec_conf)
        excess_conf     = conf_total - bh_conf_total

        biz = {
            "symbol": SYM,
            "test_period": [str(dates_test.min()), str(dates_test.max())],
            "Strategy Return (Long-only T+7 non-overlap)": strat_total,
            "Buy&Hold Return (T+7 non-overlap)": bh_total,
            "Excess Return": excess_total,
            "Confident Return (gated, non-overlap)": conf_total,
            "Confident Excess Return": excess_conf,
            "Win Rate (executed)": win_rate_on_trades(ret_exec_all),
            "Win Rate (confident executed)": win_rate_on_trades(ret_exec_conf),
            "Sharpe (all, long-only)": sharpe(ret_strat_step),
            "Sharpe (confident executed)": sharpe(ret_exec_conf),
            "Coverage (gate_non_overlap)": float(gate_step.mean()),
            "Executed trades (all_non_overlap)": int(exec_mask_all.sum()),
            "Executed trades (confident_non_overlap)": int(exec_mask_conf.sum()),
        }

        # ===== UX export =====
        up_prob_full = proba_test
        ux_soft_buy  = (up_prob_full >= UX_SOFT_THR).astype(int)

        def ux_verdict(p):
            if p >= 0.5 + UX_CONF_BAND:
                return "Покупай"
            if p <= 0.5 - UX_CONF_BAND:
                return "Осторожно"
            return "Нейтрально"

        ux_labels = np.array([ux_verdict(p) for p in up_prob_full])

        # ===== EXPORT =====
        pred_trading_path = DATA_DIR / f"predictions_trading_{SYM}_{MODEL_VERSION_T7}.csv"
        pred_ux_path      = DATA_DIR / f"predictions_ux_{SYM}_{MODEL_VERSION_T7}.csv"
        clf_path          = MODELS_DIR / f"{SYM}_{MODEL_VERSION_T7}.clf.json"
        reg_path          = MODELS_DIR / f"{SYM}_{MODEL_VERSION_T7}.reg.json"
        meta_path         = MODELS_DIR / f"{SYM}_{MODEL_VERSION_T7}.meta.json"

        # Для трейдинга оставляем все даты теста, как в T+1: ежедневные сигналы на T+7
        pd.DataFrame({
            "symbol": SYM,
            "asof_time": dates_test,
            "pred_date": [nth_trading_day(dt, calendar, 7) for dt in dates_test],
            "blend_score": blend_test,
            "gate": gate_all.astype(int),
            "regime_on": regime_on.astype(int),
            "decision_long": dec_all,
            "actual_ret_tplus7": r_test,
        }).to_csv(pred_trading_path, index=False)

        pd.DataFrame({
            "symbol": SYM,
            "asof_time": dates_test,
            "pred_date": [nth_trading_day(dt, calendar, 7) for dt in dates_test],
            "up_prob": up_prob_full,
            "ux_soft_buy": ux_soft_buy,
            "ux_verdict": ux_labels,
        }).to_csv(pred_ux_path, index=False)

        ts = int(time.time())
        bst_clf.save_model(clf_path)
        bst_reg.save_model(reg_path)
        with open(meta_path, "w", encoding="utf-8") as f:
            json.dump({
                "symbol": SYM,
                "version": MODEL_VERSION_T7,
                "created_at": ts,
                "alpha": float(alpha_final),
                "thr": float(thr_final),
                "gate_q": TRADE_GATE_Q,
                "regime_ema": REGIME_EMA,
                "features": feat_cols,
                "n_features": len(feat_cols),
                "adv_auc_initial": float(adv_auc_initial),
                "adv_auc_final": float(adv_auc_final),
                "shift_dropped": [d[0] for d in drop_list],
                "holdout_metrics_raw": metrics_raw,
                "business": biz,
            }, f, ensure_ascii=False, indent=2)

        print(json.dumps({
            "symbol": SYM,
            "val_best": {"alpha": float(alpha_final), "thr": float(thr_final)},
            "holdout_metrics_raw": metrics_raw,
            "business": {
                k: (round(v, 4) if isinstance(v, float) else v)
                for k, v in biz.items()
            },
            "artifacts": {
                "pred_trading_csv": str(pred_trading_path),
                "pred_ux_csv": str(pred_ux_path),
                "clf_model": str(clf_path),
                "reg_model": str(reg_path),
                "meta": str(meta_path),
            }
        }, indent=2, ensure_ascii=False))

        dashboard_t7[SYM] = {
            "symbol": SYM,
            "holdout_bal_acc": round(metrics_raw["bal_acc"], 4),
            "holdout_auc": round(metrics_raw["auc"], 4) if metrics_raw["auc"] is not None else None,
            "pos_rate": round(metrics_raw["pred_pos_rate"], 4),
        }

    except Exception as e:
        print(f"[ERROR] {SYM} T+7: {e}")

with open(DATA_DIR / f"dashboard_{MODEL_VERSION_T7}.json", "w", encoding="utf-8") as f:
    json.dump(dashboard_t7, f, ensure_ascii=False, indent=2)

print("\nALL DONE T+7. Dashboard:", DATA_DIR / f"dashboard_{MODEL_VERSION_T7}.json")


{
  "symbol": "BTC",
  "val_best": {
    "alpha": 0.0,
    "thr": 0.45500000000000007
  },
  "holdout_metrics_raw": {
    "acc": 0.46111111111111114,
    "bal_acc": 0.5010337252875048,
    "f1": 0.4121212121212121,
    "precision": 0.6071428571428571,
    "recall": 0.3119266055045872,
    "auc": 0.4947021579015377,
    "pred_pos_rate": 0.3111111111111111
  },
  "business": {
    "symbol": "BTC",
    "test_period": [
      "2025-04-13 23:59:59.999000+00:00",
      "2025-10-09 23:59:59.999000+00:00"
    ],
    "Strategy Return (Long-only T+7 non-overlap)": 0.2695,
    "Buy&Hold Return (T+7 non-overlap)": 0.3138,
    "Excess Return": -0.0443,
    "Confident Return (gated, non-overlap)": 0.2695,
    "Confident Excess Return": -0.0443,
    "Win Rate (executed)": 0.75,
    "Win Rate (confident executed)": 0.75,
    "Sharpe (all, long-only)": 5.4668,
    "Sharpe (confident executed)": 10.4565,
    "Coverage (gate_non_overlap)": 1.0,
    "Executed trades (all_non_overlap)": 8,
    "Executed t