# 02 — Признаки и целевая переменная

Загружаем M15 и H1 данные, считаем индикаторы на M15, формируем трендовый признак на H1,
объединяем признаки, строим 3-барную целевую доходность и сохраняем итоговый датасет.

In [1]:
from pathlib import Path
import sys
import pandas as pd

PROJECT_ROOT = Path.cwd().resolve()
if (PROJECT_ROOT / "src").exists():
    ROOT = PROJECT_ROOT
elif (PROJECT_ROOT.parent / "src").exists():
    ROOT = PROJECT_ROOT.parent
else:
    ROOT = PROJECT_ROOT

if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

from src.features import (
    build_m15_features,
    build_h1_trend_features,
    merge_m15_with_h1,
    add_target,
    drop_na_for_training,
)

DATA_DIR = ROOT / "data"
m15_path = DATA_DIR / "eurusd_M15.parquet"
h1_path = DATA_DIR / "eurusd_H1.parquet"
output_path = DATA_DIR / "eurusd_features.parquet"

SyntaxError: unexpected character after line continuation character (features.py, line 100)

In [None]:
df_m15 = pd.read_parquet(m15_path)
df_h1 = pd.read_parquet(h1_path)

df_m15["time"] = pd.to_datetime(df_m15["time"])
df_h1["time"] = pd.to_datetime(df_h1["time"])

df_m15 = df_m15.sort_values("time").reset_index(drop=True)
df_h1 = df_h1.sort_values("time").reset_index(drop=True)


In [None]:
df_m15_feat = build_m15_features(df_m15.copy())
df_m15_feat.head()


In [None]:
df_h1_trend = build_h1_trend_features(df_h1.copy())
df_h1_trend.head()


In [None]:
df_merged = merge_m15_with_h1(df_m15_feat, df_h1_trend)
df_merged.head()


In [None]:
HOLD_BARS = 3
df_with_target = add_target(df_merged, horizon=HOLD_BARS, entry_shift=1)
df_with_target[["time", "close", "target"]].head(10)


Проверка выравнивания таргета с фактической доходностью исполнения (вход на следующей свече).


In [None]:
exec_ret = df_merged["close"].shift(-(1 + HOLD_BARS)) / df_merged["close"].shift(-1) - 1
mask = df_with_target["target"].notna() & exec_ret.notna()
max_abs_diff = (df_with_target.loc[mask, "target"] - exec_ret.loc[mask]).abs().max()
print("Alignment max_abs_diff:", max_abs_diff)
print("NaNs in target:", int(df_with_target["target"].isna().sum()))
if max_abs_diff is not None and max_abs_diff > 1e-10:
    print("WARNING: target misalignment detected")


In [None]:
df_final = drop_na_for_training(df_with_target)
df_final.shape
df_final.isna().sum().sort_values(ascending=False).head(10)
df_final[["target", "log_ret_1", "log_ret_1_norm", "roll_vol_20", "trend_strength_m15"]].describe()
df_final["log_ret_1_norm"].quantile([0.001, 0.01, 0.5, 0.99, 0.999])


df_final.to_parquet(output_path, index=False)
print(f"Saved final feature dataset to: {output_path}")


Итоговый датасет `eurusd_features.parquet` будет использоваться в ноутбуке 03 для обучения модели
и проверки стратегии. Признаки и целевая переменная готовы к обучению.