In [1]:
from __future__ import annotations

import os
import sys
from pathlib import Path

# Projektwurzel finden (Ordner mit 'src')
project_root = Path.cwd().resolve()
while not (project_root / "src").is_dir():
    if project_root.parent == project_root:
        raise RuntimeError("Projektwurzel mit 'src' nicht gefunden.")
    project_root = project_root.parent

os.chdir(project_root)
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

print("Projektwurzel:", project_root)
print("Arbeitsverzeichnis:", Path.cwd())


Projektwurzel: /Users/jeremynathan/Documents/GitHub/hs2025_ml_project/hs2025_ml_project
Arbeitsverzeichnis: /Users/jeremynathan/Documents/GitHub/hs2025_ml_project/hs2025_ml_project


In [2]:
# ==============================
# ZENTRALE EXPERIMENT-CONFIG (ein Ort!)
# ==============================
#
# Ziel:
# - Alle Variablen nur hier ändern
# - EXP_ID wird aus derselben Config gebaut
# - Pipeline + PDF lesen dieselbe Config/Outputs (keine Magic-Ints)

from copy import deepcopy

from src.experiments.v2_config import save_experiment_config

BASE = "at_v2"  # frei wählbar, wird Teil der EXP_ID
TEST_START = "2025-01-01"

# Du willst am Tagesende handeln -> Entry am Close des Tages
ENTRY_MODE = "close"  # 'close' | 'next_open'

# --------------------------------
# (A) Datenquellen (Yahoo vs EODHD)
# --------------------------------
# 'date_shift_days' ist ein einfacher Test, um Cut-Unterschiede zu approximieren.
# Für echtes Live-Trading ist der Cut deines Brokers entscheidend.
DATA_VARIANTS = [
    {
        "name": "yahoo",
        "data": {"price_source": "yahoo", "drop_weekends": False, "date_shift_days": 0, "cut": "as_is"},
    },
    {
        "name": "eodhd",
        "data": {"price_source": "eodhd", "drop_weekends": True, "date_shift_days": 0, "cut": "as_is"},
    },
]

# --------------------------------
# (B) Label-Varianten (beides testen)
# --------------------------------
# 1) close_path = wie dein bisheriges Labeling (Close-Pfad + optional max_adverse_move_pct)
# 2) tp_sl      = trading-nah (TP/SL via High/Low, optional ATR)

COMMON_HORIZON_DAYS = 15
TP_PCT = 0.02
UP_THR = 0.02
DOWN_THR = -0.02

# Hier kannst du für 'close_path' mehrere max_adverse_move_pct testen
ADV_LIST = [0.004, 0.01]  # 0.4% und 1%

# Für 'tp_sl' kannst du beide Stop-Varianten testen:
FIXED_SL_LIST = [0.01, 0.015]  # 1% / 1.5%
ATR_WINDOW = 14
ATR_MULT_LIST = [1.0, 1.5]

LABEL_VARIANTS = []

# close_path Varianten
for adv in ADV_LIST:
    LABEL_VARIANTS.append(
        {
            "name": f"close_path_adv{adv}",
            "label": {
                "mode": "close_path",
                "horizon_days": COMMON_HORIZON_DAYS,
                "up_threshold": UP_THR,
                "down_threshold": DOWN_THR,
                "strict_monotonic": False,
                "max_adverse_move_pct": adv,
                "hit_within_horizon": True,
                "first_hit_wins": True,
            },
        }
    )

# tp_sl fixed Stop Varianten
for sl in FIXED_SL_LIST:
    LABEL_VARIANTS.append(
        {
            "name": f"tp_sl_fixed{sl}",
            "label": {
                "mode": "tp_sl",
                "horizon_days": COMMON_HORIZON_DAYS,
                "entry": ENTRY_MODE,
                "tp_pct": TP_PCT,
                "sl_mode": "fixed_pct",
                "sl_pct": sl,
                "intraday_tie_breaker": "stop",
                "conflict_policy": "first",
            },
        }
    )

# tp_sl ATR Stop Varianten
for m in ATR_MULT_LIST:
    LABEL_VARIANTS.append(
        {
            "name": f"tp_sl_atr{m}",
            "label": {
                "mode": "tp_sl",
                "horizon_days": COMMON_HORIZON_DAYS,
                "entry": ENTRY_MODE,
                "tp_pct": TP_PCT,
                "sl_mode": "atr",
                "atr_window": ATR_WINDOW,
                "atr_mult": m,
                "intraday_tie_breaker": "stop",
                "conflict_policy": "first",
            },
        }
    )

# --------------------------------
# (C) Modell / Threshold-Search
# --------------------------------
# Marktgetrieben: keine künstliche Re-Balancierung.
# min_pred_down/up ist nur ein Guardrail gegen degenerierte Thresholds ("nur up").
MODEL_CFG = {
    "test_start": TEST_START,
    "train_frac_within_pretest": 0.8,
    "thr_min": 0.3,
    "thr_max": 0.7,
    "thr_step": 0.05,
    "min_pred_down": 1,
    "min_pred_up": 1,
    "stake_up": 100.0,
    "stake_down": 100.0,
    # Kostenmodell für Threshold-Optimierung (nur für Wahl der Schwellen)
    "tp_cost_pct": TP_PCT,
    # Setze konservativ; für ATR-Labels bleibt das bewusst ein "Kosten"-Proxy.
    "sl_cost_pct": 0.01,
}

# --------------------------------
# (D) EXPERIMENTS ERZEUGEN
# --------------------------------
exp_ids = []
for dv in DATA_VARIANTS:
    for lv in LABEL_VARIANTS:
        cfg = {
            "base": BASE,
            "data": deepcopy(dv["data"]),
            "label": deepcopy(lv["label"]),
            "model": deepcopy(MODEL_CFG),
            "notes": {"source_name": dv["name"], "label_variant": lv["name"]},
        }
        cfg_obj = save_experiment_config(cfg)
        exp_ids.append(cfg_obj.exp_id)
        print("[ok] EXP_ID:", cfg_obj.exp_id)


[ok] EXP_ID: at_v2__yahoo__h15__thr2pct__hit__first__adv0p4pct
[ok] EXP_ID: at_v2__yahoo__h15__thr2pct__hit__first__adv1pct
[ok] EXP_ID: at_v2__yahoo__h15__thr0pct__tp_sl__slfixed_pct__sl1pct__tp2pct
[ok] EXP_ID: at_v2__yahoo__h15__thr0pct__tp_sl__slfixed_pct__sl1p5pct__tp2pct
[ok] EXP_ID: at_v2__yahoo__h15__thr0pct__tp_sl__slatr__tp2pct__atr14__atrm1
[ok] EXP_ID: at_v2__yahoo__h15__thr0pct__tp_sl__slatr__tp2pct__atr14__atrm1p5
[ok] EXP_ID: at_v2__eodhd__h15__thr2pct__hit__first__adv0p4pct
[ok] EXP_ID: at_v2__eodhd__h15__thr2pct__hit__first__adv1pct
[ok] EXP_ID: at_v2__eodhd__h15__thr0pct__tp_sl__slfixed_pct__sl1pct__tp2pct
[ok] EXP_ID: at_v2__eodhd__h15__thr0pct__tp_sl__slfixed_pct__sl1p5pct__tp2pct
[ok] EXP_ID: at_v2__eodhd__h15__thr0pct__tp_sl__slatr__tp2pct__atr14__atrm1
[ok] EXP_ID: at_v2__eodhd__h15__thr0pct__tp_sl__slatr__tp2pct__atr14__atrm1p5


In [3]:
# ==============================
# PIPELINE LAUFEN LASSEN
# ==============================
# Outputs pro EXP_ID unter data/processed/v2/...

for exp_id in exp_ids:
    print("\n=== RUN ===", exp_id)
    get_ipython().system(f"python3 -m scripts.run_two_stage_experiment_v2 --exp-id {exp_id}")



=== RUN === at_v2__yahoo__h15__thr2pct__hit__first__adv0p4pct
[ok] v2 pipeline fertig:
   config: data/processed/v2/experiments/at_v2__yahoo__h15__thr2pct__hit__first__adv0p4pct_config.json
   labels: data/processed/v2/fx/eurusd_labels__at_v2__yahoo__h15__thr2pct__hit__first__adv0p4pct.csv
   dataset: data/processed/v2/datasets/eurusd_price_training__at_v2__yahoo__h15__thr2pct__hit__first__adv0p4pct.csv
   results: data/processed/v2/results/two_stage__at_v2__yahoo__h15__thr2pct__hit__first__adv0p4pct.json
   preds  : data/processed/v2/results/two_stage__at_v2__yahoo__h15__thr2pct__hit__first__adv0p4pct_predictions.csv
   report : data/processed/v2/reports/two_stage__at_v2__yahoo__h15__thr2pct__hit__first__adv0p4pct.pdf
   manifest: data/processed/v2/results/two_stage__at_v2__yahoo__h15__thr2pct__hit__first__adv0p4pct_manifest.json

=== RUN === at_v2__yahoo__h15__thr2pct__hit__first__adv1pct
[ok] v2 pipeline fertig:
   config: data/processed/v2/experiments/at_v2__yahoo__h15__thr2pct__h