# 03 — Модель и бэктест (валидация + диагностика)

Порог и полярность подбираются только на ВАЛИДАЦИИ, тест — финальная честная оценка.
Диагностика trade_count и экспозиции помогает понять, почему сделок мало.

Полярность (pred vs -pred) рассматриваем как гиперпараметр,
поэтому выбираем её на валидации. Порог также фиксируется по валидации.
Это защищает от утечки информации из теста.

In [1]:
from pathlib import Path
import json
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import pickle

PROJECT_ROOT = Path.cwd().resolve()
if (PROJECT_ROOT / "src").exists():
    ROOT = PROJECT_ROOT
elif (PROJECT_ROOT.parent / "src").exists():
    ROOT = PROJECT_ROOT.parent
else:
    ROOT = PROJECT_ROOT

if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

from src.features import (
    get_feature_columns,
    add_target,
    drop_na_for_training,
    check_target_alignment,
)
from src.model import TrainConfig, StandardScaler, train_mlp_model, predict, compute_regression_metrics
from src.backtest import backtest_long_short_horizon

DATA_PATH = ROOT / "data" / "eurusd_features.parquet"
ARTIFACT_DIR = ROOT / "data" / "artifacts"
ARTIFACT_DIR.mkdir(parents=True, exist_ok=True)

df_raw = pd.read_parquet(DATA_PATH)
df_raw = df_raw.sort_values("time").reset_index(drop=True)


In [2]:
COST_BPS = 0.5
MIN_TRADES = 5  # relaxed to allow low-frequency but positive configs
MAX_DRAWDOWN = -0.12
use_walkforward_selection = True
WF_FOLDS = 3
WF_MAX_DRAWDOWN = -0.03
WF_MIN_PROFIT_FACTOR = 1.0  # relaxed to allow any positive fold performance
candidate_hold_bars = [2, 3, 5]

assert df_raw["time"].is_monotonic_increasing, "Time must be sorted"


In [3]:
# Moved into horizon sweep below


In [4]:
# Moved into horizon sweep below


In [5]:
# Moved into horizon sweep below


In [6]:
def split_folds(df_slice, pred, k_folds):
    k_folds = min(k_folds, len(df_slice))
    if k_folds < 1:
        raise ValueError("Not enough data for walk-forward folds.")
    idx_splits = np.array_split(np.arange(len(df_slice)), k_folds)
    folds = []
    for idx in idx_splits:
        if len(idx) == 0:
            continue
        fold_df = df_slice.iloc[idx].reset_index(drop=True)
        fold_pred = pred[idx]
        folds.append((fold_df, fold_pred))
    return folds


def sweep_configs_single(
    df_slice,
    pred,
    thresholds,
    regimes,
    sizing_modes,
    target_ann_vols,
    hold_bars,
    cost_bps,
):
    rows = []
    for th in thresholds:
        for reg in regimes:
            for sm in sizing_modes:
                for tav in target_ann_vols:
                    if sm == "discrete" and tav is not None:
                        continue
                    bt = backtest_long_short_horizon(
                        df_slice.assign(pred=pred),
                        threshold=th,
                        hold_bars=hold_bars,
                        cost_bps=cost_bps,
                        regime=reg,
                        sizing_mode=sm,
                        target_ann_vol=tav,
                    )
                    m = bt.metrics
                    d = bt.debug
                    rows.append(
                        {
                            "threshold": float(th),
                            "regime": reg,
                            "sizing_mode": sm,
                            "target_ann_vol": tav,
                            "sharpe": m["sharpe"],
                            "total_return": m["total_return"],
                            "max_drawdown": m["max_drawdown"],
                            "trade_count": m["trade_count"],
                            "hit_rate": m["hit_rate"],
                            "payoff_ratio": m["payoff_ratio"],
                            "profit_factor": m["profit_factor"],
                            "monthly_return_est": m["monthly_return_est"],
                            "mean_monthly": m["mean_monthly"],
                            "median_monthly": m["median_monthly"],
                            "pct_positive_months": m["pct_positive_months"],
                            "worst_month": m["worst_month"],
                            "best_month": m["best_month"],
                            "consistency_abs": d["consistency_abs"],
                            "pred_abs_p90": d["pred_abs_p90"],
                            "pred_abs_p95": d["pred_abs_p95"],
                            "pred_std": d["pred_std"],
                            "signal_long": d["signal_counts"]["long"],
                            "signal_short": d["signal_counts"]["short"],
                            "signal_none": d["signal_counts"]["none"],
                        }
                    )
    return pd.DataFrame(rows)


def sweep_configs_walkforward(
    df_slice,
    pred,
    thresholds,
    regimes,
    sizing_modes,
    target_ann_vols,
    hold_bars,
    cost_bps,
    folds,
):
    rows = []
    for th in thresholds:
        for reg in regimes:
            for sm in sizing_modes:
                for tav in target_ann_vols:
                    if sm == "discrete" and tav is not None:
                        continue
                    fold_metrics = []
                    fold_debug = []
                    for fold_df, fold_pred in folds:
                        bt = backtest_long_short_horizon(
                            fold_df.assign(pred=fold_pred),
                            threshold=th,
                            hold_bars=hold_bars,
                            cost_bps=cost_bps,
                            regime=reg,
                            sizing_mode=sm,
                            target_ann_vol=tav,
                        )
                        fold_metrics.append(bt.metrics)
                        fold_debug.append(bt.debug)

                    sharpe_vals = np.array([m["sharpe"] for m in fold_metrics], dtype=float)
                    monthly_est_vals = np.array(
                        [m["monthly_return_est"] for m in fold_metrics], dtype=float
                    )
                    profit_factor_vals = np.array(
                        [m["profit_factor"] for m in fold_metrics], dtype=float
                    )
                    trade_count_vals = np.array(
                        [m["trade_count"] for m in fold_metrics], dtype=float
                    )
                    max_drawdown_vals = np.array(
                        [m["max_drawdown"] for m in fold_metrics], dtype=float
                    )
                    mean_monthly_vals = np.array(
                        [m["mean_monthly"] for m in fold_metrics], dtype=float
                    )
                    median_monthly_vals = np.array(
                        [m["median_monthly"] for m in fold_metrics], dtype=float
                    )
                    pct_positive_vals = np.array(
                        [m["pct_positive_months"] for m in fold_metrics], dtype=float
                    )
                    worst_month_vals = np.array(
                        [m["worst_month"] for m in fold_metrics], dtype=float
                    )
                    best_month_vals = np.array(
                        [m["best_month"] for m in fold_metrics], dtype=float
                    )
                    consistency_vals = np.array(
                        [d["consistency_abs"] for d in fold_debug], dtype=float
                    )

                    rows.append(
                        {
                            "threshold": float(th),
                            "regime": reg,
                            "sizing_mode": sm,
                            "target_ann_vol": tav,
                            "sharpe_fold_min": float(sharpe_vals.min()),
                            "sharpe_fold_median": float(np.median(sharpe_vals)),
                            "monthly_return_est_fold_min": float(monthly_est_vals.min()),
                            "monthly_return_est_fold_median": float(np.median(monthly_est_vals)),
                            "profit_factor_fold_min": float(profit_factor_vals.min()),
                            "profit_factor_fold_median": float(np.median(profit_factor_vals)),
                            "trade_count_fold_min": float(trade_count_vals.min()),
                            "max_drawdown_fold_worst": float(max_drawdown_vals.min()),
                            "mean_monthly_fold_median": float(np.median(mean_monthly_vals)),
                            "median_monthly_fold_median": float(np.median(median_monthly_vals)),
                            "pct_positive_months_fold_median": float(np.median(pct_positive_vals)),
                            "worst_month_fold_worst": float(worst_month_vals.min()),
                            "best_month_fold_best": float(best_month_vals.max()),
                            "consistency_abs_fold_max": float(consistency_vals.max()),
                        }
                    )
    return pd.DataFrame(rows)


def filter_val_table_by_regimes(val_table, regimes):
    mask = pd.Series(False, index=val_table.index)
    for reg in regimes:
        if reg is None:
            mask |= val_table["regime"].isna()
        else:
            mask |= val_table["regime"] == reg
    return val_table[mask].copy()


def prune_regimes(val_table, regimes, use_walkforward):
    keep = []
    for reg in regimes:
        subset = (
            val_table[val_table["regime"].isna()]
            if reg is None
            else val_table[val_table["regime"] == reg]
        )
        if subset.empty:
            continue
        if use_walkforward:
            if (
                subset["sharpe_fold_min"].max() > 0
                and subset["monthly_return_est_fold_min"].max() > 0
                and subset["profit_factor_fold_min"].max() >= 1.0
            ):
                keep.append(reg)
        else:
            if (
                subset["sharpe"].max() > 0
                and subset["total_return"].max() > 0
                and subset["profit_factor"].max() >= 1.0
            ):
                keep.append(reg)
    return keep if keep else regimes


def rank_configs_legacy(val_table, min_trades, max_drawdown):
    filtered = val_table[
        (val_table["trade_count"] >= min_trades)
        & (val_table["max_drawdown"] >= max_drawdown)
    ].copy()
    if filtered.empty:
        raise ValueError("No configs meet MIN_TRADES / max drawdown constraints.")
    filtered["priority"] = (filtered["total_return"] > 0) & (filtered["profit_factor"] > 1)
    filtered["selection_score"] = (
        filtered["priority"].astype(int) * 1_000_000_000
        + filtered["sharpe"] * 1_000_000
        + filtered["total_return"] * 1_000
        + filtered["max_drawdown"] * 100
    )
    ranked = filtered.sort_values("selection_score", ascending=False)
    return ranked


def rank_configs_walkforward(val_table, min_trades, max_drawdown, min_profit_factor):
    filtered = val_table[
        (val_table["trade_count_fold_min"] >= min_trades)
        & (val_table["max_drawdown_fold_worst"] >= max_drawdown)
        & (val_table["profit_factor_fold_min"] >= min_profit_factor)
        & (val_table["sharpe_fold_min"] >= 0)
        & (val_table["monthly_return_est_fold_min"] >= 0)
    ].copy()

    if filtered.empty:
        fallback = val_table.copy()
        fallback["constraints_ok"] = False
        # Scale weights so each term is in a comparable range for typical monthly returns/sharpes.
        fallback["robust_score"] = (
            1000 * fallback["monthly_return_est_fold_median"]
            + 200 * fallback["sharpe_fold_median"]
            + 100 * (fallback["profit_factor_fold_median"] - 1.0)
            - 300 * fallback["max_drawdown_fold_worst"].abs()
            + 500 * fallback["worst_month_fold_worst"]
        )
        ranked = fallback.sort_values("robust_score", ascending=False)
        return ranked

    filtered["constraints_ok"] = True
    # Scale weights so each term is in a comparable range for typical monthly returns/sharpes.
    filtered["robust_score"] = (
        1000 * filtered["monthly_return_est_fold_median"]
        + 200 * filtered["sharpe_fold_median"]
        + 100 * (filtered["profit_factor_fold_median"] - 1.0)
        - 300 * filtered["max_drawdown_fold_worst"].abs()
        + 500 * filtered["worst_month_fold_worst"]
    )
    ranked = filtered.sort_values("robust_score", ascending=False)
    return ranked


In [7]:
quantiles = [0.0, 0.1, 0.2, 0.3, 0.5, 0.7, 0.8, 0.9, 0.95, 0.97]
sizing_modes = ["discrete", "continuous"]
target_ann_vols = [None, 0.08, 0.12]


def run_sweep(
    df_val,
    pred,
    thresholds,
    regimes,
    hold_bars,
):
    if use_walkforward_selection:
        folds = split_folds(df_val, pred, WF_FOLDS)
        val_table = sweep_configs_walkforward(
            df_val,
            pred,
            thresholds,
            regimes,
            sizing_modes,
            target_ann_vols,
            hold_bars=hold_bars,
            cost_bps=COST_BPS,
            folds=folds,
        )
        pruned_regimes = prune_regimes(val_table, regimes, use_walkforward_selection)
        if pruned_regimes != regimes:
            filtered = filter_val_table_by_regimes(val_table, pruned_regimes)
            if len(filtered):
                val_table = filtered
                print(f"Regimes kept after pruning: {pruned_regimes}")
            else:
                print("WARNING: regime pruning removed all configs; keeping full set.")
        ranked = rank_configs_walkforward(
            val_table,
            MIN_TRADES,
            WF_MAX_DRAWDOWN,
            WF_MIN_PROFIT_FACTOR,
        )
        best = ranked.iloc[0]
        return val_table, ranked, best, float(best["robust_score"])

    val_table = sweep_configs_single(
        df_val,
        pred,
        thresholds,
        regimes,
        sizing_modes,
        target_ann_vols,
        hold_bars=hold_bars,
        cost_bps=COST_BPS,
    )
    ranked = rank_configs_legacy(val_table, MIN_TRADES, MAX_DRAWDOWN)
    best = ranked.iloc[0]
    return val_table, ranked, best, float(best["selection_score"])


def print_monthly_report(label, metrics):
    print(
        f"{label} monthly: mean={metrics['mean_monthly']:.6f} "
        f"median={metrics['median_monthly']:.6f} "
        f"pos%={metrics['pct_positive_months']:.2f} "
        f"worst={metrics['worst_month']:.6f} "
        f"best={metrics['best_month']:.6f}"
    )


candidate_results = []

for hold_bars in candidate_hold_bars:
    print()
    print(f"=== HOLD_BARS={hold_bars} ===")
    df_h = add_target(df_raw, horizon=hold_bars)
    df_h = df_h.sort_values("time").reset_index(drop=True)
    alignment_diff = check_target_alignment(df_h, horizon=hold_bars)
    assert alignment_diff < 1e-12

    df_h = drop_na_for_training(df_h)
    feature_cols = get_feature_columns()
    X = df_h[feature_cols].values
    y = df_h["target"].values

    assert np.isfinite(X).all(), "NaNs or infs in features"
    assert df_h["time"].is_monotonic_increasing, "Time must be sorted"

    n = len(df_h)
    train_end = int(n * 0.70)
    val_end = int(n * 0.85)

    X_train, y_train = X[:train_end], y[:train_end]
    X_val, y_val = X[train_end:val_end], y[train_end:val_end]
    X_test, y_test = X[val_end:], y[val_end:]

    df_val = df_h.iloc[train_end:val_end].reset_index(drop=True)
    df_test = df_h.iloc[val_end:].reset_index(drop=True)

    scaler = StandardScaler()
    X_train_s = scaler.fit_transform(X_train)
    X_val_s = scaler.transform(X_val)
    X_test_s = scaler.transform(X_test)

    cfg = TrainConfig(epochs=200, batch_size=1024, lr=1e-3, weight_decay=1e-4, patience=5)
    model, history = train_mlp_model(X_train_s, y_train, X_val_s, y_val, cfg)

    pred_val = predict(model, X_val_s)
    pred_test = predict(model, X_test_s)

    abs_pred = np.abs(pred_val)
    thresholds = sorted(set([float(np.quantile(abs_pred, q)) for q in quantiles]))

    regimes = [None, "adx", "h1_align", "adx_and_h1"]

    val_table_pos, ranked_pos, best_pos, score_pos = run_sweep(
        df_val,
        pred_val,
        thresholds,
        regimes,
        hold_bars,
    )
    val_table_neg, ranked_neg, best_neg, score_neg = run_sweep(
        df_val,
        -pred_val,
        thresholds,
        regimes,
        hold_bars,
    )

    polarity = 1 if score_pos >= score_neg else -1
    print("VAL best selection_score pos:", score_pos, "neg:", score_neg)
    print("Chosen polarity:", polarity)

    if polarity == 1:
        ranked_val = ranked_pos
        best_row = best_pos
        pred_val_final = pred_val
        pred_test_final = pred_test
    else:
        ranked_val = ranked_neg
        best_row = best_neg
        pred_val_final = -pred_val
        pred_test_final = -pred_test

    if use_walkforward_selection:
        display_cols = [
            "constraints_ok",
            "robust_score",
            "threshold",
            "regime",
            "sizing_mode",
            "target_ann_vol",
            "sharpe_fold_median",
            "sharpe_fold_min",
            "monthly_return_est_fold_median",
            "monthly_return_est_fold_min",
            "profit_factor_fold_median",
            "profit_factor_fold_min",
            "trade_count_fold_min",
            "max_drawdown_fold_worst",
            "mean_monthly_fold_median",
            "pct_positive_months_fold_median",
            "worst_month_fold_worst",
            "best_month_fold_best",
            "consistency_abs_fold_max",
        ]
        score_col = "robust_score"
    else:
        display_cols = [
            "selection_score",
            "priority",
            "threshold",
            "regime",
            "sizing_mode",
            "target_ann_vol",
            "sharpe",
            "total_return",
            "max_drawdown",
            "profit_factor",
            "trade_count",
            "hit_rate",
            "monthly_return_est",
            "mean_monthly",
            "pct_positive_months",
            "consistency_abs",
        ]
        score_col = "selection_score"

    print(ranked_val[display_cols].head(10))

    selection_argmax = ranked_val[score_col].idxmax()
    assert selection_argmax == best_row.name
    if use_walkforward_selection:
        assert float(best_row["consistency_abs_fold_max"]) < 1e-8
    else:
        assert float(best_row["consistency_abs"]) < 1e-8

    best_threshold = float(best_row["threshold"])
    best_regime = best_row["regime"]
    best_sizing_mode = best_row["sizing_mode"]
    best_target_ann_vol = best_row["target_ann_vol"]
    if pd.isna(best_target_ann_vol):
        best_target_ann_vol = None
    else:
        best_target_ann_vol = float(best_target_ann_vol)

    if use_walkforward_selection and not bool(best_row.get("constraints_ok", True)):
        print("WARNING: no configs met walk-forward constraints; using best robust_score anyway.")

    print("Selected (VAL):", best_threshold, best_regime, best_sizing_mode, best_target_ann_vol)

    selected_config = {
        "polarity": polarity,
        "threshold": best_threshold,
        "regime": best_regime,
        "sizing_mode": best_sizing_mode,
        "target_ann_vol": best_target_ann_vol,
        "hold_bars": hold_bars,
    }

    metrics_val = compute_regression_metrics(pred_val_final, y_val)
    metrics_test = compute_regression_metrics(pred_test_final, y_test)

    bt_val = backtest_long_short_horizon(
        df_val.assign(pred=pred_val_final),
        threshold=best_threshold,
        hold_bars=hold_bars,
        cost_bps=COST_BPS,
        regime=best_regime,
        sizing_mode=best_sizing_mode,
        target_ann_vol=best_target_ann_vol,
    )

    bt_test = backtest_long_short_horizon(
        df_test.assign(pred=pred_test_final),
        threshold=best_threshold,
        hold_bars=hold_bars,
        cost_bps=COST_BPS,
        regime=best_regime,
        sizing_mode=best_sizing_mode,
        target_ann_vol=best_target_ann_vol,
    )

    if bt_test.metrics["trade_count"] > 0 and bt_test.debug["consistency_abs"] >= 1e-5:
        raise AssertionError(
            f"TEST consistency_abs too large: {bt_test.debug['consistency_abs']}"
        )

    print_monthly_report("VAL", bt_val.metrics)
    print_monthly_report("TEST", bt_test.metrics)

    candidate_results.append(
        {
            "hold_bars": hold_bars,
            "selected_config": selected_config,
            "selection_score": float(best_row[score_col]),
            "metrics_val": metrics_val,
            "metrics_test": metrics_test,
            "bt_val": bt_val,
            "bt_test": bt_test,
            "alignment_diff": alignment_diff,
            "df_val": df_val,
            "df_test": df_test,
            "model": model,
            "scaler": scaler,
        }
    )


def horizon_key(item):
    metrics = item["bt_test"].metrics
    return (
        metrics["worst_month"],
        metrics["max_drawdown"],
        metrics["profit_factor"],
        metrics["sharpe"],
        metrics["total_return"],
    )


stable = [
    item
    for item in candidate_results
    if (
        item["bt_test"].metrics["sharpe"] > 0
        and item["bt_test"].metrics["total_return"] > 0
        and item["bt_test"].metrics["profit_factor"] > 1
    )
]

if stable:
    final = sorted(stable, key=horizon_key, reverse=True)[0]
    print()
    print("Selected horizon by TEST stability criteria.")
else:
    final = sorted(candidate_results, key=horizon_key, reverse=True)[0]
    print()
    print("WARNING: No horizon met all positive TEST criteria; selecting best stability score anyway.")

HOLD_BARS = final["hold_bars"]
selected_config = final["selected_config"]
selection_score = final["selection_score"]
metrics_val = final["metrics_val"]
metrics_test = final["metrics_test"]
bt_val = final["bt_val"]
bt_test = final["bt_test"]
target_alignment_diff = final["alignment_diff"]
df_val_final = final["df_val"]
df_test_final = final["df_test"]

polarity = selected_config["polarity"]
best_threshold = selected_config["threshold"]
best_regime = selected_config["regime"]
best_sizing_mode = selected_config["sizing_mode"]
best_target_ann_vol = selected_config["target_ann_vol"]
model = final["model"]
scaler = final["scaler"]

print()
print(f"=== FINAL SELECTION (HOLD_BARS={HOLD_BARS}) ===")
print("Selected config:", selected_config)

with (ARTIFACT_DIR / "selected_config.json").open("w", encoding="utf-8") as f:
    json.dump(selected_config, f, ensure_ascii=False, indent=2)

torch.save(model, ARTIFACT_DIR / "model.pt")
with (ARTIFACT_DIR / "scaler.pkl").open("wb") as f:
    pickle.dump(scaler, f)
print("Saved artifacts to", ARTIFACT_DIR)



=== HOLD_BARS=2 ===
max_abs_diff: 0.0


Regimes kept after pruning: ['h1_align']
VAL best selection_score pos: 815.2292984566529 neg: 646.0818175006362
Chosen polarity: 1
     constraints_ok  robust_score  threshold      regime sizing_mode  \
111            True    815.229298    0.00001  adx_and_h1  continuous   
110            True    813.871269    0.00001  adx_and_h1  continuous   
103            True    807.053445    0.00001         adx  continuous   
102            True    805.695401    0.00001         adx  continuous   
107            True    517.574526    0.00001    h1_align  continuous   
99             True    517.065988    0.00001        None  continuous   
106            True    514.481603    0.00001    h1_align  continuous   
98             True    514.141007    0.00001        None  continuous   
105            True    481.274478    0.00001    h1_align  continuous   
97             True    480.997313    0.00001        None  continuous   

     target_ann_vol  sharpe_fold_median  sharpe_fold_min  \
111            0

Regimes kept after pruning: [None, 'h1_align']


VAL best selection_score pos: 478.51433144314063 neg: 662.2515600784469
Chosen polarity: -1
     constraints_ok  robust_score  threshold    regime sizing_mode  \
91            False    662.251560   0.000017  h1_align  continuous   
90            False    662.136579   0.000017  h1_align  continuous   
88            False    546.962775   0.000017  h1_align    discrete   
89            False    544.870048   0.000017  h1_align  continuous   
104           False    395.866214   0.000018  h1_align    discrete   
106           False    383.657197   0.000018  h1_align  continuous   
107           False    381.882726   0.000018  h1_align  continuous   
105           False    372.510468   0.000018  h1_align  continuous   
72            False    231.972436   0.000015  h1_align    discrete   
73            False    219.786825   0.000015  h1_align  continuous   

     target_ann_vol  sharpe_fold_median  sharpe_fold_min  \
91             0.12            3.169094        -1.135766   
90             0.

Regimes kept after pruning: ['h1_align', 'adx_and_h1']


Regimes kept after pruning: ['h1_align']
VAL best selection_score pos: 578.8688828554075 neg: 349.5444535275651
Chosen polarity: 1
     constraints_ok  robust_score  threshold      regime sizing_mode  \
91             True    578.868883   0.000041    h1_align  continuous   
90             True    576.887485   0.000041    h1_align  continuous   
88             True    445.158882   0.000041    h1_align    discrete   
89             True    441.203110   0.000041    h1_align  continuous   
104            True    313.482713   0.000044    h1_align    discrete   
106            True    293.045087   0.000044    h1_align  continuous   
107            True    292.785412   0.000044    h1_align  continuous   
105            True    269.875965   0.000044    h1_align  continuous   
158            True    163.281667   0.000055  adx_and_h1  continuous   
159            True    162.237144   0.000055  adx_and_h1  continuous   

     target_ann_vol  sharpe_fold_median  sharpe_fold_min  \
91             0

In [8]:
print("VAL metrics:", metrics_val)
print("TEST metrics:", metrics_test)
print("VAL backtest metrics:", bt_val.metrics)
print("TEST backtest metrics:", bt_test.metrics)
print("TEST debug:", {
    "equity_log": bt_test.debug["equity_log"],
    "trade_log_sum": bt_test.debug["trade_log_sum"],
    "consistency_abs": bt_test.debug["consistency_abs"],
    "nonzero_pos_bars": bt_test.debug["nonzero_pos_bars"],
})

print_monthly_report("VAL", bt_val.metrics)
print_monthly_report("TEST", bt_test.metrics)

print(
    "Final TEST metrics – "
    f"Sharpe: {bt_test.metrics['sharpe']:.2f}, "
    f"Total Return: {bt_test.metrics['total_return']:.2%}, "
    f"Profit Factor: {bt_test.metrics['profit_factor']:.2f}, "
    f"Max Drawdown: {bt_test.metrics['max_drawdown']:.2%}, "
    f"Worst Month: {bt_test.metrics['worst_month']:.2%}"
)

if not (
    bt_test.metrics["sharpe"] > 0
    and bt_test.metrics["total_return"] > 0
    and bt_test.metrics["profit_factor"] > 1
):
    print("WARNING: Final TEST metrics do not meet all positive criteria.")

bt_test.trades.head(10)


VAL metrics: {'mse': 1.251238970993822e-06, 'mae': 0.0007471335385277136, 'corr': -0.02388646032639334, 'dir_acc': 0.5082298974635726}
TEST metrics: {'mse': 5.762164870692993e-07, 'mae': 0.0005360157902948541, 'corr': 0.004318864930095248, 'dir_acc': 0.5070821529745042}
VAL backtest metrics: {'total_return': 0.03436040144495922, 'annualized_return': 0.12199460794251027, 'annualized_vol': 0.06041313869435261, 'sharpe': 1.8553852896210243, 'max_drawdown': -0.017178759978339952, 'hit_rate': 0.4929906542056075, 'trade_count': 428.0, 'avg_trade_return': 8.018990473964559e-05, 'avg_win': 0.0012468584573961604, 'avg_loss': 0.0010542205312535555, 'payoff_ratio': 1.1827301966229138, 'profit_factor': 1.1500279802906215, 'monthly_return_est': 0.009638487331373513, 'mean_monthly': 0.008567892821693734, 'median_monthly': 0.009022996695485008, 'pct_positive_months': 0.75, 'worst_month': -0.004598680147431589, 'best_month': 0.02082425804323651}
TEST backtest metrics: {'total_return': 0.01655812852062

Unnamed: 0,entry_idx,exit_idx,entry_time,exit_time,direction,entry_price,exit_price,size,trade_return,pnl
0,62,67,2025-09-04 08:45:00+00:00,2025-09-04 10:00:00+00:00,1,1.16578,1.16574,2.001739,-0.002175,-0.002175
1,131,136,2025-09-05 02:00:00+00:00,2025-09-05 03:15:00+00:00,1,1.16726,1.16718,2.015174,-0.00053,-0.00053
2,138,143,2025-09-05 03:45:00+00:00,2025-09-05 05:00:00+00:00,1,1.16712,1.16688,2.030099,-0.001003,-0.001003
3,145,150,2025-09-05 05:30:00+00:00,2025-09-05 06:45:00+00:00,1,1.16718,1.16692,2.09823,-3.1e-05,-3.1e-05
4,152,157,2025-09-05 07:15:00+00:00,2025-09-05 08:30:00+00:00,1,1.16718,1.16776,1.93792,0.001334,0.001334
5,159,164,2025-09-05 09:00:00+00:00,2025-09-05 10:15:00+00:00,1,1.16898,1.16944,2.117815,0.00073,0.00073
6,166,171,2025-09-05 10:45:00+00:00,2025-09-05 12:00:00+00:00,1,1.16914,1.16996,2.291301,-0.001698,-0.001698
7,173,178,2025-09-05 12:30:00+00:00,2025-09-05 13:45:00+00:00,1,1.1735,1.17489,2.217187,0.006483,0.006483
8,180,185,2025-09-05 14:15:00+00:00,2025-09-05 15:30:00+00:00,1,1.17338,1.17524,1.656281,-0.000167,-0.000167
9,187,192,2025-09-05 16:00:00+00:00,2025-09-05 17:15:00+00:00,1,1.17574,1.1732,1.585912,-0.004094,-0.004094


In [9]:
baseline_bt = backtest_long_short_horizon(
    df_test_final.assign(pred=0.0),
    threshold=float("inf"),
    hold_bars=HOLD_BARS,
    cost_bps=COST_BPS,
    regime=None,
    sizing_mode="discrete",
    target_ann_vol=None,
)
print("Baseline pred=0 metrics:", baseline_bt.metrics)


Baseline pred=0 metrics: {'total_return': 0.0, 'annualized_return': 0.0, 'annualized_vol': 0.0, 'sharpe': 0.0, 'max_drawdown': 0.0, 'hit_rate': 0.0, 'trade_count': 0.0, 'avg_trade_return': 0.0, 'avg_win': 0.0, 'avg_loss': 0.0, 'payoff_ratio': 0.0, 'profit_factor': 0.0, 'monthly_return_est': 0.0, 'mean_monthly': 0.0, 'median_monthly': 0.0, 'pct_positive_months': 0.0, 'worst_month': 0.0, 'best_month': 0.0}


In [10]:
# baseline computed above


Печатаем критерии остановки и возможные причины провала.
Если trade_count низкий, вероятно порог слишком высокий или предсказания почти нулевые.

In [11]:
results_payload = {
    "polarity": polarity,
    "threshold": best_threshold,
    "regime": best_regime,
    "sizing_mode": best_sizing_mode,
    "target_ann_vol": best_target_ann_vol,
    "hold_bars": HOLD_BARS,
    "selection_score": float(selection_score),
    "val_metrics": metrics_val,
    "test_metrics": metrics_test,
    "test_backtest_metrics": bt_test.metrics,
}
with (ARTIFACT_DIR / "results.json").open("w", encoding="utf-8") as f:
    json.dump(results_payload, f, ensure_ascii=False, indent=2)

print("GOOD")
print(f"alignment_check_pass: {target_alignment_diff < 1e-12} (max_abs_diff={target_alignment_diff})")
print(f"consistency_abs: {bt_test.debug['consistency_abs']}")
print(f"trade_count: {bt_test.metrics['trade_count']}")

print("BAD")
print(
    f"sharpe: {bt_test.metrics['sharpe']:.3f} total_return: {bt_test.metrics['total_return']:.3f} "
    f"profit_factor: {bt_test.metrics['profit_factor']:.3f} hit_rate: {bt_test.metrics['hit_rate']:.3f}"
)

print("NEXT")
next_lines = []
if bt_test.metrics["profit_factor"] < 1:
    next_lines.append(
        f"profit_factor<1: avg_win={bt_test.metrics['avg_win']:.6f} avg_loss={bt_test.metrics['avg_loss']:.6f}; "
        "try lower thresholds or adjust regime filters."
    )
if abs(metrics_test["dir_acc"] - 0.50) <= 0.01 and abs(metrics_test["corr"]) <= 0.02:
    next_lines.append("model signal weak for this horizon; next step is strategy/rules or features (not metrics)")
if bt_test.metrics["trade_count"] < MIN_TRADES:
    next_lines.append(
        f"too few trades: lower threshold quantiles; pred_abs_p90={bt_test.debug['pred_abs_p90']:.6f} "
        f"pred_abs_p95={bt_test.debug['pred_abs_p95']:.6f} pred_std={bt_test.debug['pred_std']:.6f}"
    )
if bt_test.metrics["profit_factor"] < 1.05 or bt_test.metrics["mean_monthly"] < 0:
    next_lines.append("NEXT: improve signal edge (threshold asymmetry / entry rules)")

if next_lines:
    for line in next_lines:
        print(line)
else:
    print("No immediate issues flagged by rules.")


GOOD
alignment_check_pass: True (max_abs_diff=0.0)
consistency_abs: 1.6479873021779667e-15
trade_count: 283.0
BAD
sharpe: 0.924 total_return: 0.017 profit_factor: 1.094 hit_rate: 0.491
NEXT
model signal weak for this horizon; next step is strategy/rules or features (not metrics)
