# 03 — Модель и бэктест

Обучаем линейную регрессию на M15 признаках, разделяем данные по времени (70/15/15),
масштабируем признаки по train, оцениваем качество на test и выполняем простой
лонг/шорт бэктест с исполнением на следующем баре и комиссией.

In [None]:
from pathlib import Path
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

PROJECT_ROOT = Path.cwd().resolve()
if (PROJECT_ROOT / "src").exists():
    ROOT = PROJECT_ROOT
elif (PROJECT_ROOT.parent / "src").exists():
    ROOT = PROJECT_ROOT.parent
else:
    ROOT = PROJECT_ROOT

if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

from src.features import get_feature_columns
from src.model import StandardScaler, TrainConfig, train_linear_model, predict
from src.backtest import backtest_long_short

DATA_PATH = ROOT / "data" / "eurusd_features.parquet"

df = pd.read_parquet(DATA_PATH)
df = df.sort_values("time").reset_index(drop=True)



ModuleNotFoundError: No module named 'src'

In [None]:
feature_cols = get_feature_columns()
X = df[feature_cols].values
y = df["target"].values


In [None]:
n = len(df)
train_end = int(n * 0.70)
val_end = int(n * 0.85)

X_train, y_train = X[:train_end], y[:train_end]
X_val, y_val = X[train_end:val_end], y[train_end:val_end]
X_test, y_test = X[val_end:], y[val_end:]

df_test = df.iloc[val_end:].reset_index(drop=True)


In [None]:
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_val_s = scaler.transform(X_val)
X_test_s = scaler.transform(X_test)


In [None]:
config = TrainConfig(epochs=200, batch_size=512, lr=1e-3, seed=42, device="cpu")
model, history = train_linear_model(X_train_s, y_train, X_val_s, y_val, config=config)

plt.figure(figsize=(8, 4))
plt.plot(history["train_loss"], label="train")
plt.plot(history["val_loss"], label="val")
plt.title("Loss (MSE)")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
pred_test = predict(model, X_test_s)
mse = np.mean((pred_test - y_test) ** 2)
mae = np.mean(np.abs(pred_test - y_test))
corr = np.corrcoef(pred_test, y_test)[0, 1]
dir_acc = np.mean(np.sign(pred_test) == np.sign(y_test))

print(f"MSE: {mse:.8f}")
print(f"MAE: {mae:.8f}")
print(f"Corr: {corr:.4f}")
print(f"Directional accuracy: {dir_acc:.4f}")


In [None]:
df_bt = df_test.copy()
df_bt["pred"] = pred_test

bt = backtest_long_short(df_bt, pred_col="pred", price_col="close", threshold=0.0, cost_bps=0.5)
print(bt.metrics)

plt.figure(figsize=(10, 4))
bt.equity_curve.plot()
plt.title("Equity Curve (Test)")
plt.grid(True)
plt.tight_layout()
plt.show()

bt.trades.head()


Порог предсказаний следует выбирать на валидации, а не на тесте.
Здесь сделаем простой обзор для нескольких значений.

In [None]:
thresholds = [0.0, 0.00005, 0.0001, 0.0002]
for th in thresholds:
    bt_th = backtest_long_short(df_bt, pred_col="pred", price_col="close", threshold=th, cost_bps=0.5)
    metrics = bt_th.metrics
    print(f"th={th:.5f} | sharpe={metrics['sharpe']:.3f} | total_return={metrics['total_return']:.4f} | maxDD={metrics['max_drawdown']:.4f}")
