# 03 — Модель и бэктест

Разбиваем данные по времени (70/15/15), масштабируем признаки по train,
обучаем линейную регрессию с L2 и early stopping.
Торговая логика использует предсказание на t, вход на t+1 и удержание horizon=3.

In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from src.features import get_feature_columns
from src.model import (
    StandardScaler,
    TrainConfig,
    train_linear_model,
    predict,
    mse_baseline_zero,
    mse_baseline_mean,
)
from src.backtest import backtest_long_short

PROJECT_ROOT = Path(".").resolve()
DATA_PATH = PROJECT_ROOT / "data" / "eurusd_features.parquet"

df = pd.read_parquet(DATA_PATH)
df = df.sort_values("time").reset_index(drop=True)


In [None]:
feature_cols = get_feature_columns()
X = df[feature_cols].values
y = df["target"].values


In [None]:
n = len(df)
train_end = int(n * 0.70)
val_end = int(n * 0.85)

X_train, y_train = X[:train_end], y[:train_end]
X_val, y_val = X[train_end:val_end], y[train_end:val_end]
X_test, y_test = X[val_end:], y[val_end:]

df_train = df.iloc[:train_end].reset_index(drop=True)
df_val = df.iloc[train_end:val_end].reset_index(drop=True)
df_test = df.iloc[val_end:].reset_index(drop=True)


In [None]:
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_val_s = scaler.transform(X_val)
X_test_s = scaler.transform(X_test)


In [None]:
config = TrainConfig(
    epochs=200,
    batch_size=1024,
    lr=1e-3,
    seed=42,
    device="cpu",
    weight_decay=1e-4,
    patience=15,
)
model, history = train_linear_model(X_train_s, y_train, X_val_s, y_val, config=config)

plt.figure(figsize=(8, 4))
plt.plot(history["train_loss"], label="train")
plt.plot(history["val_loss"], label="val")
plt.title("Loss (MSE)")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
pred_val = predict(model, X_val_s)
pred_test = predict(model, X_test_s)

def eval_regression(pred, y_true, label):
    mse = np.mean((pred - y_true) ** 2)
    mae = np.mean(np.abs(pred - y_true))
    corr = np.corrcoef(pred, y_true)[0, 1]
    dir_acc = np.mean(np.sign(pred) == np.sign(y_true))
    print(f"{label} | MSE={mse:.8f} MAE={mae:.8f} Corr={corr:.4f} DirAcc={dir_acc:.4f}")
    return mse

mse_val = eval_regression(pred_val, y_val, "VAL")
mse_test = eval_regression(pred_test, y_test, "TEST")

mse_zero_val = mse_baseline_zero(y_val)
mse_mean_val = mse_baseline_mean(y_train, y_val)
mse_zero_test = mse_baseline_zero(y_test)
mse_mean_test = mse_baseline_mean(y_train, y_test)

print(f"VAL baselines | zero={mse_zero_val:.8f} mean={mse_mean_val:.8f}")
print(f"TEST baselines | zero={mse_zero_test:.8f} mean={mse_mean_test:.8f}")


In [None]:
df_val_bt = df_val.copy()
df_val_bt["pred"] = pred_val

thresholds = [0.0, 0.00005, 0.0001, 0.0002, 0.0003]
val_table = []
for th in thresholds:
    bt = backtest_long_short(
        df_val_bt,
        pred_col="pred",
        price_col="close",
        threshold=th,
        cost_bps=0.5,
        horizon=3,
        enforce_hold=True,
    )
    m = bt.metrics
    val_table.append(
        {
            "threshold": th,
            "sharpe": m["sharpe"],
            "total_return": m["total_return"],
            "max_drawdown": m["max_drawdown"],
            "trade_count": m["trade_count"],
        }
    )

val_table_df = pd.DataFrame(val_table)
val_table_df = val_table_df.sort_values("sharpe", ascending=False).reset_index(drop=True)
val_table_df


Выбираем порог по Sharpe на валидации (tie-break: меньшая просадка/торговля).
Финальную оценку делаем на тесте.

In [None]:
best_row = val_table_df.iloc[0]
best_threshold = float(best_row["threshold"])
print(f"Best threshold on VAL: {best_threshold}")

df_test_bt = df_test.copy()
df_test_bt["pred"] = pred_test

bt_test = backtest_long_short(
    df_test_bt,
    pred_col="pred",
    price_col="close",
    threshold=best_threshold,
    cost_bps=0.5,
    horizon=3,
    enforce_hold=True,
)
print(bt_test.metrics)

plt.figure(figsize=(10, 4))
bt_test.equity_curve.plot()
plt.title("Equity Curve (Test)")
plt.grid(True)
plt.tight_layout()
plt.show()

bt_test.trades.head(10)
