# 03 — Модель и бэктест (валидация + диагностика)

Порог и полярность подбираются только на ВАЛИДАЦИИ, тест — финальная честная оценка.
Диагностика trade_count и экспозиции помогает понять, почему сделок мало.

Полярность (pred vs -pred) рассматриваем как гиперпараметр,
поэтому выбираем её на валидации. Порог также фиксируется по валидации.
Это защищает от утечки информации из теста.

In [1]:
from pathlib import Path
import json
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

PROJECT_ROOT = Path.cwd().resolve()
if (PROJECT_ROOT / "src").exists():
    ROOT = PROJECT_ROOT
elif (PROJECT_ROOT.parent / "src").exists():
    ROOT = PROJECT_ROOT.parent
else:
    ROOT = PROJECT_ROOT

if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

from src.features import get_feature_columns, drop_na_for_training, check_target_alignment
from src.model import TrainConfig, StandardScaler, train_mlp_model, predict, compute_regression_metrics
from src.backtest import backtest_long_short_horizon

DATA_PATH = ROOT / "data" / "eurusd_features.parquet"
ARTIFACT_DIR = ROOT / "data" / "artifacts"
ARTIFACT_DIR.mkdir(parents=True, exist_ok=True)

df = pd.read_parquet(DATA_PATH)
df = drop_na_for_training(df)
df = df.sort_values("time").reset_index(drop=True)


In [2]:
feature_cols = get_feature_columns()
X = df[feature_cols].values
y = df["target"].values

HOLD_BARS = 3
COST_BPS = 0.5
MIN_TRADES = 50
MAX_DRAWDOWN = -0.12
use_walkforward_selection = True
WF_FOLDS = 3
WF_MAX_DRAWDOWN = -0.03
WF_MIN_PROFIT_FACTOR = 1.05

assert np.isfinite(X).all(), "NaNs or infs in features"
assert df["time"].is_monotonic_increasing, "Time must be sorted"

target_alignment_diff = check_target_alignment(df, horizon=HOLD_BARS)
assert target_alignment_diff < 1e-12


max_abs_diff: 0.0


In [3]:
n = len(df)
train_end = int(n * 0.70)
val_end = int(n * 0.85)

X_train, y_train = X[:train_end], y[:train_end]
X_val, y_val = X[train_end:val_end], y[train_end:val_end]
X_test, y_test = X[val_end:], y[val_end:]

df_val = df.iloc[train_end:val_end].reset_index(drop=True)
df_test = df.iloc[val_end:].reset_index(drop=True)


In [4]:
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_val_s = scaler.transform(X_val)
X_test_s = scaler.transform(X_test)


In [5]:
cfg = TrainConfig(epochs=200, batch_size=1024, lr=1e-3, weight_decay=1e-4, patience=5)
model, history = train_mlp_model(X_train_s, y_train, X_val_s, y_val, cfg)

pred_val = predict(model, X_val_s)
pred_test = predict(model, X_test_s)


In [6]:
def split_folds(df_slice, pred, k_folds):
    k_folds = min(k_folds, len(df_slice))
    if k_folds < 1:
        raise ValueError("Not enough data for walk-forward folds.")
    idx_splits = np.array_split(np.arange(len(df_slice)), k_folds)
    folds = []
    for idx in idx_splits:
        if len(idx) == 0:
            continue
        fold_df = df_slice.iloc[idx].reset_index(drop=True)
        fold_pred = pred[idx]
        folds.append((fold_df, fold_pred))
    return folds


def sweep_configs_single(
    df_slice,
    pred,
    thresholds,
    regimes,
    sizing_modes,
    target_ann_vols,
    hold_bars,
    cost_bps,
):
    rows = []
    for th in thresholds:
        for reg in regimes:
            for sm in sizing_modes:
                for tav in target_ann_vols:
                    if sm == "discrete" and tav is not None:
                        continue
                    bt = backtest_long_short_horizon(
                        df_slice.assign(pred=pred),
                        threshold=th,
                        hold_bars=hold_bars,
                        cost_bps=cost_bps,
                        regime=reg,
                        sizing_mode=sm,
                        target_ann_vol=tav,
                    )
                    m = bt.metrics
                    d = bt.debug
                    rows.append(
                        {
                            "threshold": float(th),
                            "regime": reg,
                            "sizing_mode": sm,
                            "target_ann_vol": tav,
                            "sharpe": m["sharpe"],
                            "total_return": m["total_return"],
                            "max_drawdown": m["max_drawdown"],
                            "trade_count": m["trade_count"],
                            "hit_rate": m["hit_rate"],
                            "payoff_ratio": m["payoff_ratio"],
                            "profit_factor": m["profit_factor"],
                            "monthly_return_est": m["monthly_return_est"],
                            "mean_monthly": m["mean_monthly"],
                            "median_monthly": m["median_monthly"],
                            "pct_positive_months": m["pct_positive_months"],
                            "worst_month": m["worst_month"],
                            "best_month": m["best_month"],
                            "consistency_abs": d["consistency_abs"],
                            "pred_abs_p90": d["pred_abs_p90"],
                            "pred_abs_p95": d["pred_abs_p95"],
                            "pred_std": d["pred_std"],
                            "signal_long": d["signal_counts"]["long"],
                            "signal_short": d["signal_counts"]["short"],
                            "signal_none": d["signal_counts"]["none"],
                        }
                    )
    return pd.DataFrame(rows)


def sweep_configs_walkforward(
    df_slice,
    pred,
    thresholds,
    regimes,
    sizing_modes,
    target_ann_vols,
    hold_bars,
    cost_bps,
    folds,
):
    rows = []
    for th in thresholds:
        for reg in regimes:
            for sm in sizing_modes:
                for tav in target_ann_vols:
                    if sm == "discrete" and tav is not None:
                        continue
                    fold_metrics = []
                    fold_debug = []
                    for fold_df, fold_pred in folds:
                        bt = backtest_long_short_horizon(
                            fold_df.assign(pred=fold_pred),
                            threshold=th,
                            hold_bars=hold_bars,
                            cost_bps=cost_bps,
                            regime=reg,
                            sizing_mode=sm,
                            target_ann_vol=tav,
                        )
                        fold_metrics.append(bt.metrics)
                        fold_debug.append(bt.debug)

                    sharpe_vals = np.array([m["sharpe"] for m in fold_metrics], dtype=float)
                    monthly_est_vals = np.array(
                        [m["monthly_return_est"] for m in fold_metrics], dtype=float
                    )
                    profit_factor_vals = np.array(
                        [m["profit_factor"] for m in fold_metrics], dtype=float
                    )
                    trade_count_vals = np.array(
                        [m["trade_count"] for m in fold_metrics], dtype=float
                    )
                    max_drawdown_vals = np.array(
                        [m["max_drawdown"] for m in fold_metrics], dtype=float
                    )
                    mean_monthly_vals = np.array(
                        [m["mean_monthly"] for m in fold_metrics], dtype=float
                    )
                    median_monthly_vals = np.array(
                        [m["median_monthly"] for m in fold_metrics], dtype=float
                    )
                    pct_positive_vals = np.array(
                        [m["pct_positive_months"] for m in fold_metrics], dtype=float
                    )
                    worst_month_vals = np.array(
                        [m["worst_month"] for m in fold_metrics], dtype=float
                    )
                    best_month_vals = np.array(
                        [m["best_month"] for m in fold_metrics], dtype=float
                    )
                    consistency_vals = np.array(
                        [d["consistency_abs"] for d in fold_debug], dtype=float
                    )

                    rows.append(
                        {
                            "threshold": float(th),
                            "regime": reg,
                            "sizing_mode": sm,
                            "target_ann_vol": tav,
                            "sharpe_fold_min": float(sharpe_vals.min()),
                            "sharpe_fold_median": float(np.median(sharpe_vals)),
                            "monthly_return_est_fold_min": float(monthly_est_vals.min()),
                            "monthly_return_est_fold_median": float(np.median(monthly_est_vals)),
                            "profit_factor_fold_min": float(profit_factor_vals.min()),
                            "profit_factor_fold_median": float(np.median(profit_factor_vals)),
                            "trade_count_fold_min": float(trade_count_vals.min()),
                            "max_drawdown_fold_worst": float(max_drawdown_vals.min()),
                            "mean_monthly_fold_median": float(np.median(mean_monthly_vals)),
                            "median_monthly_fold_median": float(np.median(median_monthly_vals)),
                            "pct_positive_months_fold_median": float(np.median(pct_positive_vals)),
                            "worst_month_fold_worst": float(worst_month_vals.min()),
                            "best_month_fold_best": float(best_month_vals.max()),
                            "consistency_abs_fold_max": float(consistency_vals.max()),
                        }
                    )
    return pd.DataFrame(rows)


def rank_configs_legacy(val_table, min_trades, max_drawdown):
    filtered = val_table[
        (val_table["trade_count"] >= min_trades)
        & (val_table["max_drawdown"] >= max_drawdown)
    ].copy()
    if filtered.empty:
        raise ValueError("No configs meet MIN_TRADES / max drawdown constraints.")
    filtered["priority"] = (filtered["total_return"] > 0) & (filtered["profit_factor"] > 1)
    filtered["selection_score"] = (
        filtered["priority"].astype(int) * 1_000_000_000
        + filtered["sharpe"] * 1_000_000
        + filtered["total_return"] * 1_000
        + filtered["max_drawdown"] * 100
    )
    ranked = filtered.sort_values("selection_score", ascending=False)
    return ranked


def rank_configs_walkforward(val_table, min_trades, max_drawdown, min_profit_factor):
    filtered = val_table[
        (val_table["trade_count_fold_min"] >= min_trades)
        & (val_table["max_drawdown_fold_worst"] >= max_drawdown)
        & (val_table["profit_factor_fold_min"] >= min_profit_factor)
    ].copy()

    if filtered.empty:
        fallback = val_table.copy()
        fallback["constraints_ok"] = False
        # Scale weights so each term is in a comparable range for typical monthly returns/sharpes.
        fallback["robust_score"] = (
            1000 * fallback["monthly_return_est_fold_median"]
            + 200 * fallback["sharpe_fold_median"]
            + 100 * (fallback["profit_factor_fold_median"] - 1.0)
            - 300 * fallback["max_drawdown_fold_worst"].abs()
        )
        ranked = fallback.sort_values("robust_score", ascending=False)
        return ranked

    filtered["constraints_ok"] = True
    # Scale weights so each term is in a comparable range for typical monthly returns/sharpes.
    filtered["robust_score"] = (
        1000 * filtered["monthly_return_est_fold_median"]
        + 200 * filtered["sharpe_fold_median"]
        + 100 * (filtered["profit_factor_fold_median"] - 1.0)
        - 300 * filtered["max_drawdown_fold_worst"].abs()
    )
    ranked = filtered.sort_values("robust_score", ascending=False)
    return ranked


In [7]:
quantiles = [0.0, 0.1, 0.2, 0.3, 0.5, 0.7, 0.8, 0.9, 0.95, 0.97]
abs_pred = np.abs(pred_val)
thresholds = sorted(set([float(np.quantile(abs_pred, q)) for q in quantiles]))

regimes = [None, "adx", "h1_align", "adx_and_h1"]
sizing_modes = ["discrete", "continuous"]
target_ann_vols = [None, 0.08, 0.12]


def run_sweep(pred):
    if use_walkforward_selection:
        folds = split_folds(df_val, pred, WF_FOLDS)
        val_table = sweep_configs_walkforward(
            df_val,
            pred,
            thresholds,
            regimes,
            sizing_modes,
            target_ann_vols,
            hold_bars=HOLD_BARS,
            cost_bps=COST_BPS,
            folds=folds,
        )
        ranked = rank_configs_walkforward(
            val_table,
            MIN_TRADES,
            WF_MAX_DRAWDOWN,
            WF_MIN_PROFIT_FACTOR,
        )
        best = ranked.iloc[0]
        return val_table, ranked, best, float(best["robust_score"])

    val_table = sweep_configs_single(
        df_val,
        pred,
        thresholds,
        regimes,
        sizing_modes,
        target_ann_vols,
        hold_bars=HOLD_BARS,
        cost_bps=COST_BPS,
    )
    ranked = rank_configs_legacy(val_table, MIN_TRADES, MAX_DRAWDOWN)
    best = ranked.iloc[0]
    return val_table, ranked, best, float(best["selection_score"])


try:
    val_table_pos, ranked_pos, best_pos, score_pos = run_sweep(pred_val)
except ValueError as exc:
    print("WARNING: positive polarity sweep failed:", exc)
    ranked_pos = None
    best_pos = None
    score_pos = float("-inf")

try:
    val_table_neg, ranked_neg, best_neg, score_neg = run_sweep(-pred_val)
except ValueError as exc:
    print("WARNING: negative polarity sweep failed:", exc)
    ranked_neg = None
    best_neg = None
    score_neg = float("-inf")

if score_pos == float("-inf") and score_neg == float("-inf"):
    raise ValueError("No valid configs for either polarity.")

polarity = 1 if score_pos >= score_neg else -1
print("VAL best selection_score pos:", score_pos, "neg:", score_neg)
print("Chosen polarity:", polarity)

if polarity == 1:
    ranked_val = ranked_pos
    best_row = best_pos
    pred_val_final = pred_val
    pred_test_final = pred_test
else:
    ranked_val = ranked_neg
    best_row = best_neg
    pred_val_final = -pred_val
    pred_test_final = -pred_test

if use_walkforward_selection:
    display_cols = [
        "constraints_ok",
        "robust_score",
        "threshold",
        "regime",
        "sizing_mode",
        "target_ann_vol",
        "sharpe_fold_median",
        "sharpe_fold_min",
        "monthly_return_est_fold_median",
        "monthly_return_est_fold_min",
        "profit_factor_fold_median",
        "profit_factor_fold_min",
        "trade_count_fold_min",
        "max_drawdown_fold_worst",
        "mean_monthly_fold_median",
        "pct_positive_months_fold_median",
        "worst_month_fold_worst",
        "best_month_fold_best",
        "consistency_abs_fold_max",
    ]
    score_col = "robust_score"
else:
    display_cols = [
        "selection_score",
        "priority",
        "threshold",
        "regime",
        "sizing_mode",
        "target_ann_vol",
        "sharpe",
        "total_return",
        "max_drawdown",
        "profit_factor",
        "trade_count",
        "hit_rate",
        "monthly_return_est",
        "mean_monthly",
        "pct_positive_months",
        "consistency_abs",
    ]
    score_col = "selection_score"

print(ranked_val[display_cols].head(10))

selection_argmax = ranked_val[score_col].idxmax()
assert selection_argmax == best_row.name
if use_walkforward_selection:
    assert float(best_row["consistency_abs_fold_max"]) < 1e-8
else:
    assert float(best_row["consistency_abs"]) < 1e-8

best_threshold = float(best_row["threshold"])
best_regime = best_row["regime"]
best_sizing_mode = best_row["sizing_mode"]
best_target_ann_vol = best_row["target_ann_vol"]
if pd.isna(best_target_ann_vol):
    best_target_ann_vol = None
else:
    best_target_ann_vol = float(best_target_ann_vol)
if use_walkforward_selection and not bool(best_row.get("constraints_ok", True)):
    print("WARNING: no configs met walk-forward constraints; using best robust_score anyway.")

print("Selected (VAL):", best_threshold, best_regime, best_sizing_mode, best_target_ann_vol)

selected_config = {
    "polarity": polarity,
    "threshold": best_threshold,
    "regime": best_regime,
    "sizing_mode": best_sizing_mode,
    "target_ann_vol": best_target_ann_vol,
}
with (ARTIFACT_DIR / "selected_config.json").open("w", encoding="utf-8") as f:
    json.dump(selected_config, f, ensure_ascii=False, indent=2)


VAL best selection_score pos: 752.342831091413 neg: 386.732420843753
Chosen polarity: 1
     constraints_ok  robust_score     threshold      regime sizing_mode  \
147           False    752.342831  4.450459e-04        None  continuous   
146           False    752.292647  4.450459e-04        None  continuous   
144           False    699.151667  4.450459e-04        None    discrete   
40            False    697.531771  2.384243e-05    h1_align    discrete   
145           False    697.278223  4.450459e-04        None  continuous   
24            False    593.741199  1.156110e-05    h1_align    discrete   
8             False    560.006438  1.309672e-09    h1_align    discrete   
31            False    487.045411  1.156110e-05  adx_and_h1  continuous   
30            False    486.520933  1.156110e-05  adx_and_h1  continuous   
11            False    412.387914  1.309672e-09    h1_align  continuous   

     target_ann_vol  sharpe_fold_median  sharpe_fold_min  \
147            0.12       

In [8]:
metrics_val = compute_regression_metrics(pred_val_final, y_val)
metrics_test = compute_regression_metrics(pred_test_final, y_test)

bt_val = backtest_long_short_horizon(
    df_val.assign(pred=pred_val_final),
    threshold=best_threshold,
    hold_bars=HOLD_BARS,
    cost_bps=COST_BPS,
    regime=best_regime,
    sizing_mode=best_sizing_mode,
    target_ann_vol=best_target_ann_vol,
)

bt_test = backtest_long_short_horizon(
    df_test.assign(pred=pred_test_final),
    threshold=best_threshold,
    hold_bars=HOLD_BARS,
    cost_bps=COST_BPS,
    regime=best_regime,
    sizing_mode=best_sizing_mode,
    target_ann_vol=best_target_ann_vol,
)

if bt_test.metrics["trade_count"] > 0 and bt_test.debug["consistency_abs"] >= 1e-5:
    raise AssertionError(
        f"TEST consistency_abs too large: {bt_test.debug['consistency_abs']}"
    )

print("VAL metrics:", metrics_val)
print("TEST metrics:", metrics_test)
print("VAL backtest metrics:", bt_val.metrics)
print("TEST backtest metrics:", bt_test.metrics)
print("TEST debug:", {
    "equity_log": bt_test.debug["equity_log"],
    "trade_log_sum": bt_test.debug["trade_log_sum"],
    "consistency_abs": bt_test.debug["consistency_abs"],
    "nonzero_pos_bars": bt_test.debug["nonzero_pos_bars"],
})


def print_monthly_report(label, metrics):
    print(
        f"{label} monthly: mean={metrics['mean_monthly']:.6f} "
        f"median={metrics['median_monthly']:.6f} "
        f"pos%={metrics['pct_positive_months']:.2f} "
        f"worst={metrics['worst_month']:.6f} "
        f"best={metrics['best_month']:.6f}"
    )

print_monthly_report("VAL", bt_val.metrics)
print_monthly_report("TEST", bt_test.metrics)

bt_test.trades.head(10)


VAL metrics: {'mse': 7.84031370978747e-07, 'mae': 0.0005907900847507259, 'corr': -0.01571018121189088, 'dir_acc': 0.4965600971266694}
TEST metrics: {'mse': 3.881453472701456e-07, 'mae': 0.00044484396116244737, 'corr': 0.043606844718273134, 'dir_acc': 0.5}
VAL backtest metrics: {'total_return': 0.024095878525798975, 'annualized_return': 0.0845002055042956, 'annualized_vol': 0.04311062091406424, 'sharpe': 1.8237842147245222, 'max_drawdown': -0.007925142832512222, 'hit_rate': 0.4883720930232558, 'trade_count': 86.0, 'avg_trade_return': 0.0002812558663192111, 'avg_win': 0.0015208010356261873, 'avg_loss': 0.000901946340746539, 'payoff_ratio': 1.6861324950676015, 'profit_factor': 1.6094901106720725, 'monthly_return_est': 0.006782836675364168, 'mean_monthly': 0.005376206069149653, 'median_monthly': -7.266969378572075e-05, 'pct_positive_months': 0.5, 'worst_month': -0.0020878656248638494, 'best_month': 0.023738029289033902}
TEST backtest metrics: {'total_return': 0.0026717802453670725, 'annual

Unnamed: 0,entry_idx,exit_idx,entry_time,exit_time,direction,entry_price,exit_price,size,trade_return,pnl
0,140,143,2025-09-05 05:30:00+00:00,2025-09-05 06:15:00+00:00,1,1.16718,1.16716,2.020015,-0.000116,-0.000116
1,146,149,2025-09-05 07:00:00+00:00,2025-09-05 07:45:00+00:00,1,1.16718,1.1678,2.132618,0.000627,0.000627
2,156,159,2025-09-05 09:30:00+00:00,2025-09-05 10:15:00+00:00,1,1.16849,1.16944,2.203364,-0.000485,-0.000485
3,161,164,2025-09-05 10:45:00+00:00,2025-09-05 11:30:00+00:00,1,1.16914,1.16832,2.3017,-0.002394,-0.002394
4,166,169,2025-09-05 12:00:00+00:00,2025-09-05 12:45:00+00:00,1,1.16996,1.17344,2.298505,0.009159,0.009159
5,190,193,2025-09-05 18:00:00+00:00,2025-09-05 18:45:00+00:00,1,1.17228,1.17172,1.556394,-0.001536,-0.001536
6,261,264,2025-09-08 11:45:00+00:00,2025-09-08 12:30:00+00:00,1,1.17291,1.17348,1.460596,-0.000507,-0.000507
7,270,273,2025-09-08 14:00:00+00:00,2025-09-08 14:45:00+00:00,1,1.17454,1.17454,1.899542,-0.001838,-0.001838
8,275,278,2025-09-08 15:15:00+00:00,2025-09-08 16:00:00+00:00,1,1.17464,1.17489,2.001073,7.2e-05,7.2e-05
9,280,283,2025-09-08 16:30:00+00:00,2025-09-08 17:15:00+00:00,1,1.17383,1.17463,2.20784,-0.001518,-0.001518


In [9]:
baseline_bt = backtest_long_short_horizon(
    df_test.assign(pred=0.0),
    threshold=float("inf"),
    hold_bars=HOLD_BARS,
    cost_bps=COST_BPS,
    regime=None,
    sizing_mode="discrete",
    target_ann_vol=None,
)
print("Baseline pred=0 metrics:", baseline_bt.metrics)


Baseline pred=0 metrics: {'total_return': 0.0, 'annualized_return': 0.0, 'annualized_vol': 0.0, 'sharpe': 0.0, 'max_drawdown': 0.0, 'hit_rate': 0.0, 'trade_count': 0.0, 'avg_trade_return': 0.0, 'avg_win': 0.0, 'avg_loss': 0.0, 'payoff_ratio': 0.0, 'profit_factor': 0.0, 'monthly_return_est': 0.0, 'mean_monthly': 0.0, 'median_monthly': 0.0, 'pct_positive_months': 0.0, 'worst_month': 0.0, 'best_month': 0.0}


In [10]:
# baseline computed above


Печатаем критерии остановки и возможные причины провала.
Если trade_count низкий, вероятно порог слишком высокий или предсказания почти нулевые.

In [11]:
results_payload = {
    "polarity": polarity,
    "threshold": best_threshold,
    "regime": best_regime,
    "sizing_mode": best_sizing_mode,
    "target_ann_vol": best_target_ann_vol,
    "selection_score": float(best_row["robust_score"] if use_walkforward_selection else best_row["selection_score"]),
    "val_metrics": metrics_val,
    "test_metrics": metrics_test,
    "test_backtest_metrics": bt_test.metrics,
}
with (ARTIFACT_DIR / "results.json").open("w", encoding="utf-8") as f:
    json.dump(results_payload, f, ensure_ascii=False, indent=2)

print("GOOD")
print(f"alignment_check_pass: {target_alignment_diff < 1e-12} (max_abs_diff={target_alignment_diff})")
print(f"consistency_abs: {bt_test.debug['consistency_abs']}")
print(f"trade_count: {bt_test.metrics['trade_count']}")

print("BAD")
print(
    f"sharpe: {bt_test.metrics['sharpe']:.3f} total_return: {bt_test.metrics['total_return']:.3f} "
    f"profit_factor: {bt_test.metrics['profit_factor']:.3f} hit_rate: {bt_test.metrics['hit_rate']:.3f}"
)

print("NEXT")
next_lines = []
if bt_test.metrics["profit_factor"] < 1:
    next_lines.append(
        f"profit_factor<1: avg_win={bt_test.metrics['avg_win']:.6f} avg_loss={bt_test.metrics['avg_loss']:.6f}; "
        "try lower thresholds or adjust regime filters."
    )
if abs(metrics_test["dir_acc"] - 0.50) <= 0.01 and abs(metrics_test["corr"]) <= 0.02:
    next_lines.append("model signal weak for this horizon; next step is strategy/rules or features (not metrics)")
if bt_test.metrics["trade_count"] < MIN_TRADES:
    next_lines.append(
        f"too few trades: lower threshold quantiles; pred_abs_p90={bt_test.debug['pred_abs_p90']:.6f} "
        f"pred_abs_p95={bt_test.debug['pred_abs_p95']:.6f} pred_std={bt_test.debug['pred_std']:.6f}"
    )
if bt_test.metrics["profit_factor"] < 1.05 or bt_test.metrics["mean_monthly"] < 0:
    next_lines.append("NEXT: improve signal edge (threshold asymmetry / entry rules)")

if next_lines:
    for line in next_lines:
        print(line)
else:
    print("No immediate issues flagged by rules.")


GOOD
alignment_check_pass: True (max_abs_diff=0.0)
consistency_abs: 7.632783294297951e-16
trade_count: 158.0
BAD
sharpe: 0.248 total_return: 0.003 profit_factor: 1.031 hit_rate: 0.475
NEXT
NEXT: improve signal edge (threshold asymmetry / entry rules)
