# 02 — Признаки и целевая переменная

Загружаем M15 и H1 данные, считаем индикаторы на M15, формируем трендовый признак на H1,
объединяем признаки, строим 3-барную целевую доходность и сохраняем итоговый датасет.

In [1]:
from pathlib import Path
import sys
import pandas as pd

PROJECT_ROOT = Path.cwd().resolve()
if (PROJECT_ROOT / "src").exists():
    ROOT = PROJECT_ROOT
elif (PROJECT_ROOT.parent / "src").exists():
    ROOT = PROJECT_ROOT.parent
else:
    ROOT = PROJECT_ROOT

if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

from src.features import (
    build_m15_features,
    build_h1_trend_features,
    merge_m15_with_h1,
    add_target,
    check_target_alignment,
    drop_na_for_training,
)

DATA_DIR = ROOT / "data"
m15_path = DATA_DIR / "eurusd_M15.parquet"
h1_path = DATA_DIR / "eurusd_H1.parquet"
output_path = DATA_DIR / "eurusd_features.parquet"

In [2]:
df_m15 = pd.read_parquet(m15_path)
df_h1 = pd.read_parquet(h1_path)

df_m15["time"] = pd.to_datetime(df_m15["time"])
df_h1["time"] = pd.to_datetime(df_h1["time"])

df_m15 = df_m15.sort_values("time").reset_index(drop=True)
df_h1 = df_h1.sort_values("time").reset_index(drop=True)


In [3]:
df_m15_feat = build_m15_features(df_m15.copy())
df_m15_feat.head()


Unnamed: 0,time,open,high,low,close,volume,ema_20,ema_50,ema_20_50_diff,rsi_14,...,log_ret_3,roll_vol_20,log_ret_1_norm,atr_14_norm,trend_strength_m15,hour,minute,sin_hour,cos_hour,adx_above_threshold
0,2023-12-22 02:00:00+00:00,1.09984,1.10002,1.09974,1.09996,505,,,,,...,,,,0.0,,2,0,0.5,0.866025,0
1,2023-12-22 02:15:00+00:00,1.09996,1.09996,1.09956,1.09982,504,,,,,...,,,,0.0,,2,15,0.5,0.866025,0
2,2023-12-22 02:30:00+00:00,1.0998,1.09995,1.0996,1.09976,505,,,,,...,,,,0.0,,2,30,0.5,0.866025,0
3,2023-12-22 02:45:00+00:00,1.09974,1.10008,1.09974,1.10002,439,,,,,...,5.5e-05,,,0.0,,2,45,0.5,0.866025,0
4,2023-12-22 03:00:00+00:00,1.10002,1.1003,1.0999,1.10027,521,,,,,...,0.000409,,,0.0,,3,0,0.707107,0.707107,0


In [4]:
df_h1_trend = build_h1_trend_features(df_h1.copy())
df_h1_trend.head()


Unnamed: 0,time,ema_50_h1,h1_trend_flag,h1_trend_distance
0,2023-12-22 02:00:00+00:00,,0,
1,2023-12-22 03:00:00+00:00,,0,
2,2023-12-22 04:00:00+00:00,,0,
3,2023-12-22 05:00:00+00:00,,0,
4,2023-12-22 06:00:00+00:00,,0,


In [5]:
df_merged = merge_m15_with_h1(df_m15_feat, df_h1_trend)
df_merged.head()


Unnamed: 0,time,open,high,low,close,volume,ema_20,ema_50,ema_20_50_diff,rsi_14,...,atr_14_norm,trend_strength_m15,hour,minute,sin_hour,cos_hour,adx_above_threshold,ema_50_h1,h1_trend_flag,h1_trend_distance
0,2023-12-22 02:00:00+00:00,1.09984,1.10002,1.09974,1.09996,505,,,,,...,0.0,,2,0,0.5,0.866025,0,,0,
1,2023-12-22 02:15:00+00:00,1.09996,1.09996,1.09956,1.09982,504,,,,,...,0.0,,2,15,0.5,0.866025,0,,0,
2,2023-12-22 02:30:00+00:00,1.0998,1.09995,1.0996,1.09976,505,,,,,...,0.0,,2,30,0.5,0.866025,0,,0,
3,2023-12-22 02:45:00+00:00,1.09974,1.10008,1.09974,1.10002,439,,,,,...,0.0,,2,45,0.5,0.866025,0,,0,
4,2023-12-22 03:00:00+00:00,1.10002,1.1003,1.0999,1.10027,521,,,,,...,0.0,,3,0,0.707107,0.707107,0,,0,


In [6]:
HOLD_BARS = 3
df_with_target = add_target(df_merged, horizon=HOLD_BARS)
df_with_target[["time", "close", "target"]].head(10)


Unnamed: 0,time,close,target
0,2023-12-22 02:00:00+00:00,1.09996,0.000409
1,2023-12-22 02:15:00+00:00,1.09982,0.000491
2,2023-12-22 02:30:00+00:00,1.09976,-0.000309
3,2023-12-22 02:45:00+00:00,1.10002,-0.000436
4,2023-12-22 03:00:00+00:00,1.10027,-0.000273
5,2023-12-22 03:15:00+00:00,1.1003,0.000309
6,2023-12-22 03:30:00+00:00,1.09968,0.000318
7,2023-12-22 03:45:00+00:00,1.09979,5.5e-05
8,2023-12-22 04:00:00+00:00,1.1,-0.000127
9,2023-12-22 04:15:00+00:00,1.10002,-0.000255


Проверка выравнивания таргета с фактической доходностью исполнения (вход на следующей свече).


In [7]:
max_abs_diff = check_target_alignment(df_with_target, horizon=HOLD_BARS)
print("NaNs in target:", int(df_with_target["target"].isna().sum()))


max_abs_diff: 0.0
NaNs in target: 4


In [8]:
df_final = drop_na_for_training(df_with_target)
df_final.shape
df_final.isna().sum().sort_values(ascending=False).head(10)
df_final[["target", "log_ret_1", "log_ret_1_norm", "roll_vol_20", "trend_strength_m15"]].describe()
df_final["log_ret_1_norm"].quantile([0.001, 0.01, 0.5, 0.99, 0.999])


df_final.to_parquet(output_path, index=False)
print(f"Saved final feature dataset to: {output_path}")


Saved final feature dataset to: /Users/oleksandrpc/Desktop/my projects/EUR_USD_MODEL/data/eurusd_features.parquet


Итоговый датасет `eurusd_features.parquet` будет использоваться в ноутбуке 03 для обучения модели
и проверки стратегии. Признаки и целевая переменная готовы к обучению.