# 2 – Training (Final, MT5 H1)

Dieses Notebook trainiert für eine gegebene `EXP_ID` das Zwei-Stufen-
XGBoost-Modell (Signal + Richtung). Es liest den Trainingsdatensatz
aus `data/processed/datasets/...` und nutzt den Feature-Mode aus
Variablen (nicht aus der EXP_ID).


In [7]:
import sys
from pathlib import Path
import os

cwd = Path.cwd()
project_root = cwd
while not (project_root / 'src').is_dir():
    if project_root.parent == project_root:
        raise RuntimeError("Projektwurzel mit 'src' nicht gefunden.")
    project_root = project_root.parent

print('Erkannte Projektwurzel:', project_root)
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

os.chdir(project_root)
print('Arbeitsverzeichnis gesetzt auf:', Path.cwd())


Erkannte Projektwurzel: /Users/jeremynathan/Documents/GitHub/hs2025_ml_project/hs2025_ml_project
Arbeitsverzeichnis gesetzt auf: /Users/jeremynathan/Documents/GitHub/hs2025_ml_project/hs2025_ml_project


In [8]:
# Bitte EXP_ID explizit setzen, passend zur Data-Prep.
EXP_ID = 'flex_5'
assert EXP_ID != 'CHANGE_ME', 'Bitte EXP_ID oben setzen.'

# True = mit News-Merge, False = nur Preise
USE_NEWS = True
FEATURE_MODE = 'news+price' if USE_NEWS else 'price_only'

# Splits / Threshold-Tuning
TRAIN_FRAC_PRETEST = 0.8  # später für Schritt 3 auf 0.70 setzen
# USE_VALIDATION=False: kein Val-Split (Train nutzt alles vor test_start)
USE_VALIDATION = True
# 'val' (default) ist sauber; 'train' ist optimistisch; 'test' = Leakage (nicht empfohlen)
TUNE_THRESHOLDS_ON = 'val'

# Thresholding-Logik (Stage 2)
# - True: erlaubt eine neutrale Zone (prob zwischen DOWN/UP)
# - False: wenn Signal==1, wird immer up/down entschieden (keine Neutral-Zone)
ALLOW_DIRECTION_NEUTRAL = True

# Optional: weiche Penalty, damit die Threshold-Suche nicht zu extrem "keine Trades" wählt.
# TARGET_TRADE_RATE bezieht sich auf Trades pro Signal-Trade im Val-Split (0..1). Beispiel: 0.6
# TRADE_RATE_PENALTY ist in "CHF"-Punkten auf derselben Skala wie die P&L-Kostenfunktion.
TARGET_TRADE_RATE = None
TRADE_RATE_PENALTY = 0.0

# Optimierungsziel für die Schwellen (kombinierte 3-Klassen-Entscheidung auf Val)
# - 'pnl':    maximiert vereinfachtes P&L (kann bewusst wenig/keine Trades wählen)
# - 'macro_f1': maximiert Macro-F1 über {neutral, up, down} (erzwingt, dass up/down nicht komplett ignoriert werden)
THRESH_OPT_OBJECTIVE = 'macro_f1'

# Optimierungsziel für DIR_THRESHOLD (nur Reporting der Direction-Metriken)
# 'macro_f1' vermeidet degeneriert 'immer up' / 'immer down'.
DIR_THR_OPT_OBJECTIVE = 'macro_f1'

# Optional: ohne Validierung/Optimierung feste Schwellen verwenden (Baseline)
# Trade-Profil: schneller Vergleich zwischen 'mehr Trades' und 'mehr Präzision'
TRADE_PROFILE = 'more_trades'  # 'more_trades'|'balanced'|'more_precision'
AUTO_FIXED_DIR_THRESHOLDS = True  # setzt DIR_THR_DOWN/UP via Quantiles aus dem Tuning-Split
FIXED_DIR_Q_SINGLE = 0.50  # wenn ALLOW_DIRECTION_NEUTRAL=False (ein einziger DIR-Threshold)
FIXED_DIR_Q_DOWN = 0.25
FIXED_DIR_Q_UP = 0.75
MIN_DIR_GAP = 0.01

# Tipp: wenn du wirklich überall 0.5 willst, setze ALLOW_DIRECTION_NEUTRAL=False.
USE_FIXED_THRESHOLDS = True
FIXED_SIGNAL_TRADE_THRESHOLD = 0.55
FIXED_DIR_THRESHOLD = 0.5
# Falls ALLOW_DIRECTION_NEUTRAL=True, braucht es eine Bandbreite (sonst DOWN/UP kollidieren).
FIXED_DIR_THRESHOLD_DOWN = 0.48
FIXED_DIR_THRESHOLD_UP = 0.52

if TRADE_PROFILE == 'more_trades':
    # viele Trades: niedriger Signal-Filter + keine Neutral-Zone (immer up/down wenn Signal==1)
    ALLOW_DIRECTION_NEUTRAL = False
    FIXED_SIGNAL_TRADE_THRESHOLD = 0.45
    FIXED_DIR_Q_SINGLE = 0.50
elif TRADE_PROFILE == 'more_precision':
    # weniger Trades: höherer Signal-Filter + breitere Neutral-Zone
    ALLOW_DIRECTION_NEUTRAL = True
    FIXED_SIGNAL_TRADE_THRESHOLD = 0.65
    FIXED_DIR_Q_DOWN = 0.30
    FIXED_DIR_Q_UP = 0.70
else:  # balanced
    ALLOW_DIRECTION_NEUTRAL = True
    FIXED_SIGNAL_TRADE_THRESHOLD = 0.55
    FIXED_DIR_Q_DOWN = 0.35
    FIXED_DIR_Q_UP = 0.65

print('[settings] EXP_ID:', EXP_ID)
print('[settings] TRADE_PROFILE:', TRADE_PROFILE)
print('[settings] USE_VALIDATION:', USE_VALIDATION, 'TRAIN_FRAC_PRETEST:', TRAIN_FRAC_PRETEST)
print('[settings] USE_FIXED_THRESHOLDS:', USE_FIXED_THRESHOLDS, 'AUTO_FIXED_DIR_THRESHOLDS:', AUTO_FIXED_DIR_THRESHOLDS)
print('[settings] ALLOW_DIRECTION_NEUTRAL:', ALLOW_DIRECTION_NEUTRAL)
print('[settings] FIXED_SIGNAL_TRADE_THRESHOLD:', FIXED_SIGNAL_TRADE_THRESHOLD)
print('[settings] FIXED_DIR_Q_SINGLE:', FIXED_DIR_Q_SINGLE, 'FIXED_DIR_Q_DOWN/UP:', FIXED_DIR_Q_DOWN, FIXED_DIR_Q_UP, 'MIN_DIR_GAP:', MIN_DIR_GAP)


# Optional: XGBoost-Parameter überschreiben (z.B. max_depth)
# Hinweis: zu hohe max_depth führt oft zu Overfitting (Train gut, Val/Test schlechter).
SIGNAL_XGB_PARAMS = {'max_depth': 2, 'min_child_weight': 5, 'subsample': 0.8, 'colsample_bytree': 0.8, 'reg_lambda': 2.0}
DIRECTION_XGB_PARAMS = {'max_depth': 2, 'min_child_weight': 5, 'subsample': 0.8, 'colsample_bytree': 0.8, 'reg_lambda': 2.0}

# Optional: zusätzlich ein direktes 3-Klassen-Modell (neutral/up/down) als Baseline trainieren
TRAIN_MULTICLASS_BASELINE = True


[settings] EXP_ID: flex_5
[settings] TRADE_PROFILE: more_trades
[settings] USE_VALIDATION: True TRAIN_FRAC_PRETEST: 0.8
[settings] USE_FIXED_THRESHOLDS: True AUTO_FIXED_DIR_THRESHOLDS: True
[settings] ALLOW_DIRECTION_NEUTRAL: False
[settings] FIXED_SIGNAL_TRADE_THRESHOLD: 0.45
[settings] FIXED_DIR_Q_SINGLE: 0.5 FIXED_DIR_Q_DOWN/UP: 0.25 0.75 MIN_DIR_GAP: 0.01


In [9]:
import json
from pathlib import Path

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from src.utils.io import DATA_PROCESSED
from src.models.train_xgboost_two_stage import (
    split_train_val_test,
    build_signal_targets,
    build_direction_targets,
    train_xgb_binary,
    get_feature_cols,
)

plt.style.use('seaborn-v0_8')

# Datensatz laden
# Feature-Mode aus Data-Prep-Config übernehmen (verhindert Mismatch zwischen 1_data_prep und 2_train)
exp_meta_dir = DATA_PROCESSED / 'experiments'
exp_config_path = exp_meta_dir / f'{EXP_ID}_config.json'
if not exp_config_path.exists():
    raise FileNotFoundError(
        f'Experiment-Config nicht gefunden: {exp_config_path}\n'
        '→ Bitte zuerst notebooks/final_two_stage_h1/1_data_prep_h1.ipynb mit derselben EXP_ID ausführen.'
    )
with exp_config_path.open('r', encoding='utf-8') as f:
    exp_config = json.load(f)
cfg_feature_mode = exp_config.get('feature_mode')
if cfg_feature_mode in {'price_only', 'news+price'}:
    if cfg_feature_mode != FEATURE_MODE:
        print(f"[info] FEATURE_MODE überschrieben durch Config: {FEATURE_MODE} -> {cfg_feature_mode}")
    FEATURE_MODE = cfg_feature_mode

ds_kind = 'news' if FEATURE_MODE=='news+price' else 'price'
ds_filename = f"eurusd_{ds_kind}_training__{EXP_ID}.csv"
ds_path = DATA_PROCESSED / 'datasets' / ds_filename
print('Verwende Datensatz:', ds_path)
if not ds_path.exists():
    raise FileNotFoundError(
        f"Datensatz nicht gefunden: {ds_path}\n"
        "→ Bitte zuerst das Data-Prep-Notebook (notebooks/final_two_stage_h1/1_data_prep_h1.ipynb) mit genau derselben EXP_ID ausführen."
    )
df = pd.read_csv(ds_path, parse_dates=['date']).sort_values('date').reset_index(drop=True)
print(df.shape)
base_cols = get_feature_cols(df)
if FEATURE_MODE == 'price_only':
    news_cols = {
        'article_count','avg_polarity','avg_neg','avg_neu','avg_pos','pos_share','neg_share'
    }
    feature_cols = [c for c in base_cols if not c.startswith('news_') and c not in news_cols]
else:
    feature_cols = base_cols

print('Feature-Spalten:', len(feature_cols))


Verwende Datensatz: data/processed/datasets/eurusd_news_training__flex_5.csv
(1169, 60)
Feature-Spalten: 44


In [10]:
# Zeitliche Splits
test_start = '2025-01-01'
train_frac_pretest = float(TRAIN_FRAC_PRETEST) if USE_VALIDATION else 1.0

splits = split_train_val_test(
    df, pd.to_datetime(test_start), train_frac_within_pretest=train_frac_pretest
)
for name, split_df in splits.items():
    print(name, split_df['date'].min().date(), '->', split_df['date'].max().date(), 'n=', len(split_df))


train 2020-04-14 -> 2024-04-09 n= 755
val 2024-04-10 -> 2024-12-31 n= 189
test 2025-01-02 -> 2025-11-12 n= 225


In [11]:
# Optional: Debug-Zelle (nur nötig, wenn du etwas prüfen willst).
# Hinweis: Diese Zelle funktioniert erst, nachdem oben Datensatz+Splits geladen wurden.
if 'splits' not in globals() or 'feature_cols' not in globals():
    raise RuntimeError(
        "Bitte zuerst die Zellen oben ausführen (Datensatz laden + Splits + feature_cols)."
    )
if 'X_train_dir' not in globals() or 'y_train_dir' not in globals():
    X_train_dir, y_train_dir = build_direction_targets(splits['train'], feature_cols=feature_cols)

print('X_train_dir', getattr(X_train_dir, 'shape', None))
print('y_train_dir unique:', np.unique(y_train_dir, return_counts=True))
print('train signal counts:', splits['train']['signal'].value_counts().to_dict())
print('train direction counts (signal==1):', splits['train'].query('signal==1')['direction'].value_counts().to_dict())


X_train_dir (186, 44)
y_train_dir unique: (array([0, 1]), array([108,  78]))
train signal counts: {0: 569, 1: 186}
train direction counts (signal==1): {0.0: 108, 1.0: 78}


In [12]:
from pathlib import Path
from sklearn.metrics import classification_report, confusion_matrix
import json

# Schwelle für das Signal-Modell (Stufe 1).
# Höhere Werte -> höhere Precision, geringerer Recall.
SIGNAL_THRESHOLD = 0.5

# --- Signal-Modell trainieren ---
y_train_signal = build_signal_targets(splits['train'])
y_val_signal   = build_signal_targets(splits['val'])
y_test_signal  = build_signal_targets(splits['test'])

X_train_signal = splits['train'][feature_cols]
X_val_signal   = splits['val'][feature_cols]
X_test_signal  = splits['test'][feature_cols]

# Class-Imbalance für das Signal-Modell explizit berücksichtigen
n_pos_signal = int((y_train_signal == 1).sum())
n_neg_signal = int((y_train_signal == 0).sum())
scale_pos_weight_signal = n_neg_signal / max(n_pos_signal, 1)
print('Signal scale_pos_weight:', scale_pos_weight_signal)

model_signal = train_xgb_binary(
    X_train_signal,
    y_train_signal,
    X_val_signal,
    y_val_signal,
    scale_pos_weight=scale_pos_weight_signal,
    xgb_params=SIGNAL_XGB_PARAMS,
)
print('[ok] Signal-Modell trainiert.')
try:
    _b = model_signal.get_booster()
    print('[debug] Signal boosted rounds:', _b.num_boosted_rounds())
    if hasattr(model_signal, 'best_iteration'):
        print('[debug] Signal best_iteration:', getattr(model_signal, 'best_iteration', None))
    if hasattr(model_signal, 'best_score'):
        print('[debug] Signal best_score:', getattr(model_signal, 'best_score', None))
    print('[debug] Signal scale_pos_weight (used):', model_signal.get_xgb_params().get('scale_pos_weight'))
except Exception as e:
    print('[warn] Konnte Signal-Booster-Infos nicht lesen:', e)

# --- Richtungs-Modell trainieren ---
X_train_dir, y_train_dir = build_direction_targets(splits['train'], feature_cols=feature_cols)
X_val_dir,   y_val_dir   = build_direction_targets(splits['val'],   feature_cols=feature_cols)
X_test_dir,  y_test_dir  = build_direction_targets(splits['test'],  feature_cols=feature_cols)

def _count01(y):
    if y is None or len(y) == 0:
        return {}
    u, c = np.unique(y, return_counts=True)
    return {int(uu): int(cc) for uu, cc in zip(u, c)}

print('[debug] Signal train counts:', _count01(y_train_signal))
print('[debug] Signal val counts  :', _count01(y_val_signal))
print('[debug] Signal test counts :', _count01(y_test_signal))
print('[debug] Direction train X/y:', getattr(X_train_dir, 'shape', None), _count01(y_train_dir))
print('[debug] Direction val   X/y:', getattr(X_val_dir, 'shape', None), _count01(y_val_dir))
print('[debug] Direction test  X/y:', getattr(X_test_dir, 'shape', None), _count01(y_test_dir))
if len(X_val_dir) == 0:
    print("[warn] Val-Split hat 0 Bewegungstage (signal==1) → DIR_THRESHOLD kann nicht val-basiert optimiert werden.")
if len(X_test_dir) == 0:
    print("[warn] Test-Split hat 0 Bewegungstage (signal==1) → Direction-Metriken sind leer.")

# Class-Imbalance für das Richtungs-Modell berücksichtigen
# (positive Klasse = up=1, negative Klasse = down=0)
n_pos_dir = int((y_train_dir == 1).sum())
n_neg_dir = int((y_train_dir == 0).sum())
scale_pos_weight_dir = n_neg_dir / max(n_pos_dir, 1)
print('Direction scale_pos_weight:', scale_pos_weight_dir)

model_dir = train_xgb_binary(
    X_train_dir,
    y_train_dir,
    X_val_dir,
    y_val_dir,
    scale_pos_weight=scale_pos_weight_dir,
    xgb_params=DIRECTION_XGB_PARAMS,
)
print('[ok] Richtungs-Modell trainiert.')
try:
    _b = model_dir.get_booster()
    print('[debug] Direction boosted rounds:', _b.num_boosted_rounds())
    if hasattr(model_dir, 'best_iteration'):
        print('[debug] Direction best_iteration:', getattr(model_dir, 'best_iteration', None))
    if hasattr(model_dir, 'best_score'):
        print('[debug] Direction best_score:', getattr(model_dir, 'best_score', None))
    print('[debug] Direction scale_pos_weight (used):', model_dir.get_xgb_params().get('scale_pos_weight'))
except Exception as e:
    print('[warn] Konnte Direction-Booster-Infos nicht lesen:', e)

# --- Metriken berechnen und speichern ---

def binary_metrics_dict(y_true, y_prob, threshold, target_names):
    if y_true is None or len(y_true) == 0:
        return {
            'threshold': float(threshold),
            'report': {},
            'confusion_matrix': [],
        }
    y_pred = (y_prob >= threshold).astype(int)
    report = classification_report(
        y_true,
        y_pred,
        target_names=target_names,
        output_dict=True,
        digits=3,
    )
    cm = confusion_matrix(y_true, y_pred).tolist()
    return {
        'threshold': float(threshold),
        'report': report,
        'confusion_matrix': cm,
    }

def proba_pos(model, X):
    """P(positive Klasse) als 1D-Array; liefert [] wenn X leer ist."""
    if X is None or len(X) == 0:
        return np.array([])
    proba = model.predict_proba(X)
    if getattr(proba, 'ndim', 0) != 2 or proba.shape[1] < 2:
        raise ValueError(
            f"predict_proba lieferte unerwartete Form {getattr(proba, 'shape', None)}. "
            "Das Modell ist evtl. degeneriert (z.B. Training hatte nur 1 Klasse oder leere Daten)."
        )
    return proba[:, 1]

# Wahrscheinlichkeiten
p_train_signal = proba_pos(model_signal, X_train_signal)
p_val_signal   = proba_pos(model_signal, X_val_signal)
p_test_signal  = proba_pos(model_signal, X_test_signal)

signal_metrics = {
    'train': binary_metrics_dict(y_train_signal, p_train_signal, SIGNAL_THRESHOLD, ['neutral', 'move']),
    'val':   binary_metrics_dict(y_val_signal,   p_val_signal,   SIGNAL_THRESHOLD, ['neutral', 'move']),
    'test':  binary_metrics_dict(y_test_signal,  p_test_signal,  SIGNAL_THRESHOLD, ['neutral', 'move']),
}

p_train_dir = proba_pos(model_dir, X_train_dir)
p_val_dir   = proba_pos(model_dir, X_val_dir)
p_test_dir  = proba_pos(model_dir, X_test_dir)

# Threshold für das Richtungs-Modell (down vs up) anhand des Val-Splits optimieren
# Hinweis: DIR_THRESHOLD beeinflusst nur die *Reporting*-Metriken für das Direction-Modell.
# Für die kombinierte 3-Klassen-Entscheidung werden SIG_THR_TRADE + DIR_THR_DOWN/UP verwendet.
from sklearn.metrics import f1_score

DIR_THRESHOLD = 0.5
best_score_dir = None
if y_val_dir is not None and len(y_val_dir) > 0 and len(np.unique(y_val_dir)) > 1:
    thr_grid = np.linspace(0.2, 0.8, 31)
    best_thr = 0.5
    best_score_dir = -1.0
    for thr in thr_grid:
        y_val_pred = (p_val_dir >= thr).astype(int)
        if DIR_THR_OPT_OBJECTIVE == 'f1_up':
            rep = classification_report(
                y_val_dir,
                y_val_pred,
                target_names=['down', 'up'],
                output_dict=True,
                digits=3,
                zero_division=0,
            )
            score = float(rep.get('up', {}).get('f1-score', -1.0))
        else:
            # macro_f1 (default) -> verhindert degenerierte Lösungen
            score = float(f1_score(y_val_dir, y_val_pred, average='macro', zero_division=0))

        if score > best_score_dir:
            best_score_dir = score
            best_thr = thr

    DIR_THRESHOLD = float(best_thr)
    print('Richtungs-Schwelle (val-basiert):', DIR_THRESHOLD, f"{DIR_THR_OPT_OBJECTIVE}(val):", best_score_dir)
else:
    print('[warn] Val-Split hat zu wenig Direction-Samples (0 oder nur 1 Klasse) → DIR_THRESHOLD=0.5')

direction_metrics = {
    'train': binary_metrics_dict(y_train_dir, p_train_dir, DIR_THRESHOLD, ['down', 'up']),
    'val':   binary_metrics_dict(y_val_dir,   p_val_dir,   DIR_THRESHOLD, ['down', 'up']),
    'test':  binary_metrics_dict(y_test_dir,  p_test_dir,  DIR_THRESHOLD, ['down', 'up']),
}

# --- Kostenbasierte Schwellen für das Richtungs-Modell bestimmen ---
from src.utils.io import DATA_PROCESSED  # für Zugriff auf Experiment-Config
exp_meta_dir = DATA_PROCESSED / 'experiments'
exp_config_path = exp_meta_dir / f'{EXP_ID}_config.json'
with exp_config_path.open('r', encoding='utf-8') as f:
    _cfg = json.load(f)
label_params = _cfg.get('label_params', {})
up_thr_label = float(label_params.get('up_threshold', 0.0))
down_thr_label = float(label_params.get('down_threshold', 0.0))
max_adv_label = label_params.get('max_adverse_move_pct', 0.01) or 0.01

# Einsatz-Größen für die Kostenfunktion (müssen zu Strategie A im Report passen)
stake_up = 100.0
stake_down = 100.0

def cost_per_trade(true_label: str, pred_label: str) -> float:
    """Approx. Trade-Kosten in CHF für Strategie A.

    Vereinfachte Annahme:
    - Korrekte Trades verdienen ca. Schwelle * Einsatz.
    - Falsche Trades bzw. Trades auf neutralen Tagen verlieren ca.
      max_adverse_move_pct * Einsatz.
    """
    true_label = str(true_label)
    pred_label = str(pred_label)

    if pred_label == 'neutral':
        return 0.0
    if true_label == 'neutral':
        # konservativ: immer Stop-Loss
        return -stake_up * max_adv_label if pred_label == 'up' else -stake_down * max_adv_label
    if pred_label == 'up':
        if true_label == 'up':
            return stake_up * up_thr_label
        else:  # true_label == 'down'
            return -stake_up * max_adv_label
    if pred_label == 'down':
        if true_label == 'down':
            return stake_down * (-down_thr_label)
        else:  # true_label == 'up'
            return -stake_down * max_adv_label
    return 0.0

# --- Schwellen (Signal + Richtung) ---
TUNE_SPLIT = None
# Wenn USE_FIXED_THRESHOLDS=True: nutze fixe Werte (Baseline, kein Tuning).
# Sonst: gemeinsame Optimierung auf dem gewählten Tuning-Split.
if USE_FIXED_THRESHOLDS:
    SIG_THR_TRADE = float(FIXED_SIGNAL_TRADE_THRESHOLD)
    # Für Auto-Quantile brauchen wir einen Split mit Daten.
    split_for_fixed = 'val' if (USE_VALIDATION and len(splits.get('val', [])) > 0) else 'train'
    TUNE_SPLIT = f'fixed:{split_for_fixed}'

    if ALLOW_DIRECTION_NEUTRAL:
        # Entweder feste Bandbreite oder automatisch via Quantile der Direction-Probabilities
        if AUTO_FIXED_DIR_THRESHOLDS:
            p_sig_map = {'train': p_train_signal, 'val': p_val_signal, 'test': p_test_signal}
            p_sig = p_sig_map[split_for_fixed]
            p_dir = proba_pos(model_dir, splits[split_for_fixed][feature_cols])
            sig_trade = (p_sig >= SIG_THR_TRADE)
            p_trade = p_dir[sig_trade] if len(p_dir) else np.array([])
            if len(p_trade):
                qd = float(np.quantile(p_trade, float(FIXED_DIR_Q_DOWN)))
                qu = float(np.quantile(p_trade, float(FIXED_DIR_Q_UP)))
                # sicherstellen, dass es eine echte Neutral-Zone gibt
                if qu - qd < float(MIN_DIR_GAP):
                    mid = 0.5 * (qd + qu)
                    qd = float(max(0.0, mid - 0.5 * float(MIN_DIR_GAP)))
                    qu = float(min(1.0, mid + 0.5 * float(MIN_DIR_GAP)))
                DIR_THR_DOWN = qd
                DIR_THR_UP = qu
            else:
                DIR_THR_DOWN = float(FIXED_DIR_THRESHOLD_DOWN)
                DIR_THR_UP = float(FIXED_DIR_THRESHOLD_UP)
        else:
            DIR_THR_DOWN = float(FIXED_DIR_THRESHOLD_DOWN)
            DIR_THR_UP = float(FIXED_DIR_THRESHOLD_UP)
    else:
        # Keine Neutral-Zone: ein einziger Threshold (P(up) >= thr => up, sonst down)
        if AUTO_FIXED_DIR_THRESHOLDS:
            p_sig_map = {'train': p_train_signal, 'val': p_val_signal, 'test': p_test_signal}
            p_sig = p_sig_map[split_for_fixed]
            p_dir = proba_pos(model_dir, splits[split_for_fixed][feature_cols])
            sig_trade = (p_sig >= SIG_THR_TRADE)
            p_trade = p_dir[sig_trade] if len(p_dir) else np.array([])
            if len(p_trade):
                thr = float(np.quantile(p_trade, float(FIXED_DIR_Q_SINGLE)))
            else:
                thr = float(FIXED_DIR_THRESHOLD)
        else:
            thr = float(FIXED_DIR_THRESHOLD)
        DIR_THR_DOWN = thr
        DIR_THR_UP = thr

    print('[fixed] TRADE_PROFILE:', TRADE_PROFILE)
    print('[fixed] SIG_THR_TRADE:', SIG_THR_TRADE, 'DIR_THR_DOWN/UP:', DIR_THR_DOWN, DIR_THR_UP, 'split:', split_for_fixed)

    def _combined_counts(p_sig: np.ndarray, p_dir: np.ndarray) -> dict:
        if p_sig is None or len(p_sig) == 0:
            return {'n': 0, 'signal_trade': 0, 'up': 0, 'down': 0, 'neutral': 0}
        mask = (p_sig >= SIG_THR_TRADE)
        n_signal = int(mask.sum())
        if p_dir is None or len(p_dir) == 0:
            return {'n': int(len(p_sig)), 'signal_trade': n_signal, 'up': 0, 'down': 0, 'neutral': int(len(p_sig) - n_signal)}

        if not ALLOW_DIRECTION_NEUTRAL:
            # Partition ohne Neutral-Zone: >= thr => up, < thr => down (keine Überlappung)
            up = int(np.sum(mask & (p_dir >= DIR_THR_UP)))
            down = int(n_signal - up)
            neutral = int(len(p_sig) - n_signal)
        else:
            up = int(np.sum(mask & (p_dir >= DIR_THR_UP)))
            down = int(np.sum(mask & (p_dir <= DIR_THR_DOWN)))
            neutral = int(len(p_sig) - up - down)
        return {'n': int(len(p_sig)), 'signal_trade': n_signal, 'up': up, 'down': down, 'neutral': neutral}

    for _split, _p_sig, _df_split in [
        ('train', p_train_signal, splits['train']),
        ('val', p_val_signal, splits['val']),
        ('test', p_test_signal, splits['test']),
    ]:
        _p_dir = proba_pos(model_dir, _df_split[feature_cols])
        print('[fixed][counts]', _split, _combined_counts(_p_sig, _p_dir))
else:
    # --- Schwellen (Signal + Richtung) gemeinsam optimieren ---
    # Wichtig: getrennte Optimierung kann inkonsistent werden (z.B. Direction-Thresholds
    # werden für SIGNAL_THRESHOLD=0.5 optimiert, Trade-Threshold aber später geändert).
    #
    # Tuning-Split:
    # - 'val'  (default): sauber, kein Leakage
    # - 'train': erlaubt, aber optimistisch (Thresholds werden auf Trainingsdaten optimiert)
    # - 'test' : NICHT empfohlen → Leakage, Ergebnis nicht mehr vergleichbar
    TUNE_SPLIT = str(TUNE_THRESHOLDS_ON)
    if TUNE_SPLIT not in {'train', 'val', 'test'}:
        raise ValueError("TUNE_THRESHOLDS_ON muss 'train', 'val' oder 'test' sein.")
    if TUNE_SPLIT == 'test':
        print('[warn] TUNE_THRESHOLDS_ON="test" → Daten-Leakage. Test-Metriken sind dann optimistisch und nicht mehr fair.')
    if len(splits.get(TUNE_SPLIT, [])) == 0:
        print(f"[warn] Split '{TUNE_SPLIT}' ist leer → fallback auf 'train' für Threshold-Tuning.")
        TUNE_SPLIT = 'train'

    labels_tune = splits[TUNE_SPLIT]['label'].to_numpy()

    p_signal_map = {'train': p_train_signal, 'val': p_val_signal, 'test': p_test_signal}
    p_tune_signal = p_signal_map[TUNE_SPLIT]

    signal_pred_tune = (p_tune_signal >= SIGNAL_THRESHOLD).astype(int)
    p_tune_dir_all = proba_pos(model_dir, splits[TUNE_SPLIT][feature_cols])

    # Kandidaten dynamisch an die tatsächlich vorhergesagten Wahrscheinlichkeiten anpassen,
    # sonst kann die Optimierung eine Schwelle wählen, die nie erreicht wird (-> alles neutral).
    p_tune_trade = p_tune_dir_all[signal_pred_tune == 1] if len(p_tune_dir_all) else np.array([])
    if len(p_tune_trade):
        lo = float(np.nanmin(p_tune_trade))
        hi = float(np.nanmax(p_tune_trade))
        if not np.isfinite(lo) or not np.isfinite(hi) or lo == hi:
            thr_candidates = np.linspace(0.3, 0.7, 17)
        else:
            # etwas Puffer, damit die Grenzen auch erreichbar sind
            pad = 0.01
            thr_candidates = np.linspace(max(0.0, lo - pad), min(1.0, hi + pad), 17)
    else:
        thr_candidates = np.linspace(0.3, 0.7, 17)

    from sklearn.metrics import f1_score

    thr_sig_candidates = np.linspace(0.3, 0.7, 17)
    best_score = -1e18
    best_pnl = -1e18
    best_macro_f1 = -1.0
    best_sig_thr = SIGNAL_THRESHOLD
    best_thr_down = float(np.nanmin(thr_candidates)) if len(thr_candidates) else 0.4
    best_thr_up = float(np.nanmax(thr_candidates)) if len(thr_candidates) else 0.6
    best_thr_single = 0.5
    best_trade_rate = None
    best_counts = None

    def _apply_trade_rate_penalty(pnl: float, trade_rate: float | None) -> float:
        if TARGET_TRADE_RATE is None or TRADE_RATE_PENALTY <= 0 or trade_rate is None:
            return float(pnl)
        return float(pnl) - float(TRADE_RATE_PENALTY) * abs(float(trade_rate) - float(TARGET_TRADE_RATE))

    for thr_sig in thr_sig_candidates:
        sig_trade = (p_tune_signal >= thr_sig)
        n_signal = int(sig_trade.sum())

        if not ALLOW_DIRECTION_NEUTRAL:
            # Keine Neutral-Zone: wenn Signal==1, immer up/down.
            for thr in thr_candidates:
                pred = np.full(len(labels_tune), 'neutral', dtype=object)
                pred[sig_trade & (p_tune_dir_all >= thr)] = 'up'
                pred[sig_trade & (p_tune_dir_all < thr)] = 'down'

                n_trades = n_signal
                n_up = int((pred == 'up').sum())
                n_down = int((pred == 'down').sum())
                trade_rate = (n_trades / max(n_signal, 1)) if n_signal > 0 else None

                pnl = float(sum(cost_per_trade(t, p) for t, p in zip(labels_tune, pred)))
                macro_f1 = float(f1_score(labels_tune, pred, labels=['neutral', 'up', 'down'], average='macro', zero_division=0))

                if THRESH_OPT_OBJECTIVE == 'macro_f1':
                    score = macro_f1
                else:
                    score = _apply_trade_rate_penalty(pnl, trade_rate)

                if score > best_score:
                    best_score = score
                    best_pnl = pnl
                    best_macro_f1 = macro_f1
                    best_sig_thr = float(thr_sig)
                    best_thr_single = float(thr)
                    best_thr_down = float(thr)
                    best_thr_up = float(thr)
                    best_trade_rate = trade_rate
                    best_counts = {'signal': n_signal, 'trades': n_trades, 'up': n_up, 'down': n_down}
        else:
            # Neutral-Zone: up nur wenn prob>=UP, down nur wenn prob<=DOWN.
            for thr_down in thr_candidates:
                for thr_up in thr_candidates:
                    if thr_down >= thr_up:
                        continue
                    pred = np.full(len(labels_tune), 'neutral', dtype=object)
                    pred[sig_trade & (p_tune_dir_all >= thr_up)] = 'up'
                    pred[sig_trade & (p_tune_dir_all <= thr_down)] = 'down'

                    n_trades = int((pred != 'neutral').sum())
                    n_up = int((pred == 'up').sum())
                    n_down = int((pred == 'down').sum())
                    trade_rate = (n_trades / max(n_signal, 1)) if n_signal > 0 else None

                    pnl = float(sum(cost_per_trade(t, p) for t, p in zip(labels_tune, pred)))
                    macro_f1 = float(f1_score(labels_tune, pred, labels=['neutral', 'up', 'down'], average='macro', zero_division=0))

                    if THRESH_OPT_OBJECTIVE == 'macro_f1':
                        score = macro_f1
                    else:
                        score = _apply_trade_rate_penalty(pnl, trade_rate)

                    if score > best_score:
                        best_score = score
                        best_pnl = pnl
                        best_macro_f1 = macro_f1
                        best_sig_thr = float(thr_sig)
                        best_thr_down = float(thr_down)
                        best_thr_up = float(thr_up)
                        best_trade_rate = trade_rate
                        best_counts = {'signal': n_signal, 'trades': n_trades, 'up': n_up, 'down': n_down}

    SIG_THR_TRADE = float(best_sig_thr)
    DIR_THR_DOWN = float(best_thr_down)
    DIR_THR_UP = float(best_thr_up)
    print('[opt] THRESH_OPT_OBJECTIVE:', THRESH_OPT_OBJECTIVE)
    print('[opt] TUNE_SPLIT:', TUNE_SPLIT)
    print('[opt] SIG_THR_TRADE:', SIG_THR_TRADE, 'DIR_THR_DOWN/UP:', DIR_THR_DOWN, DIR_THR_UP)
    print('[opt] score:', best_score, f"pnl({TUNE_SPLIT}):", best_pnl, f"macro_f1({TUNE_SPLIT}):", best_macro_f1)
    print('[opt] trade_rate:', best_trade_rate, 'counts:', best_counts)
# Kombinierte 3-Klassen-Auswertung auf Test
X_test_all = splits['test'][feature_cols]
signal_prob_test = proba_pos(model_signal, X_test_all)
signal_pred_test = (signal_prob_test >= SIGNAL_THRESHOLD).astype(int)
dir_prob_test = proba_pos(model_dir, X_test_all)

combined_pred = np.full(len(signal_prob_test), 'neutral', dtype=object)
mask_signal_trade = signal_prob_test >= SIG_THR_TRADE
if not ALLOW_DIRECTION_NEUTRAL:
    # Keine Neutral-Zone: >= thr => up, < thr => down (equals geht nach up)
    combined_pred[mask_signal_trade & (dir_prob_test >= DIR_THR_UP)] = 'up'
    combined_pred[mask_signal_trade & (dir_prob_test <  DIR_THR_UP)] = 'down'
else:
    combined_pred[mask_signal_trade & (dir_prob_test >= DIR_THR_UP)] = 'up'
    combined_pred[mask_signal_trade & (dir_prob_test <= DIR_THR_DOWN)] = 'down'

combined_true = splits['test']['label'].to_numpy()

combined_report = classification_report(
    combined_true,
    combined_pred,
    labels=['neutral', 'up', 'down'],
    output_dict=True,
    digits=3,
)
combined_cm = confusion_matrix(
    combined_true,
    combined_pred,
    labels=['neutral', 'up', 'down'],
).tolist()

# Optional: 3-Klassen-Baseline (ein Modell statt Two-Stage)
multiclass_metrics = None
multiclass_params = None
if TRAIN_MULTICLASS_BASELINE:
    import xgboost as xgb
    label_map_mc = {'neutral': 0, 'up': 1, 'down': 2}

    def _y_mc(split_name: str):
        return splits[split_name]['label'].map(label_map_mc).astype(int).to_numpy()

    y_train_mc = _y_mc('train')
    y_val_mc = _y_mc('val')
    y_test_mc = _y_mc('test')

    # Inverse Klassenhäufigkeit als Sample-Weights
    counts = np.bincount(y_train_mc, minlength=3)
    total = float(len(y_train_mc))
    cls_w = {i: (total / (3.0 * max(float(c), 1.0))) for i, c in enumerate(counts)}
    w_train_mc = np.array([cls_w[int(y)] for y in y_train_mc], dtype=float)
    print('[mc] class counts train:', {i: int(c) for i, c in enumerate(counts)}, 'weights:', cls_w)

    model_mc = xgb.XGBClassifier(
        objective='multi:softprob',
        num_class=3,
        eval_metric='mlogloss',
        max_depth=3,
        learning_rate=0.05,
        n_estimators=600,
        subsample=0.9,
        colsample_bytree=0.9,
        random_state=42,
    )
    # Features: wie Signal-Modell (alle Tage)
    use_eval_mc = X_val_signal is not None and len(X_val_signal) > 0 and y_val_mc is not None and len(y_val_mc) > 0
    if use_eval_mc:
        model_mc.fit(
            X_train_signal,
            y_train_mc,
            sample_weight=w_train_mc,
            eval_set=[(X_val_signal, y_val_mc)],
            early_stopping_rounds=50,
            verbose=False,
        )
    else:
        # Kein Val-Split verfügbar -> ohne Early-Stopping trainieren
        model_mc.fit(
            X_train_signal,
            y_train_mc,
            sample_weight=w_train_mc,
            verbose=False,
        )

    def _mc_metrics(y_true, y_pred):
        if y_true is None or len(y_true) == 0:
            return {'report': {}, 'confusion_matrix': []}
        rep = classification_report(
            y_true,
            y_pred,
            target_names=['neutral', 'up', 'down'],
            output_dict=True,
            digits=3,
            zero_division=0,
        )
        cm = confusion_matrix(y_true, y_pred, labels=[0, 1, 2]).tolist()
        return {'report': rep, 'confusion_matrix': cm}

    pred_train_mc = model_mc.predict(X_train_signal)
    pred_val_mc = model_mc.predict(X_val_signal) if X_val_signal is not None and len(X_val_signal) > 0 else np.array([], dtype=int)
    pred_test_mc = model_mc.predict(X_test_signal) if X_test_signal is not None and len(X_test_signal) > 0 else np.array([], dtype=int)

    multiclass_metrics = {
        'train': _mc_metrics(y_train_mc, pred_train_mc),
        'val': _mc_metrics(y_val_mc, pred_val_mc),
        'test': _mc_metrics(y_test_mc, pred_test_mc),
        'labels': ['neutral', 'up', 'down'],
    }
    multiclass_params = model_mc.get_xgb_params()
    multiclass_params['feature_importances_'] = model_mc.feature_importances_.tolist()
    print('[ok] 3-Klassen-Baseline trainiert.')

# Modell-Parameter + Feature-Importances
signal_params = model_signal.get_xgb_params()
direction_params = model_dir.get_xgb_params()
signal_params['feature_importances_'] = model_signal.feature_importances_.tolist()
direction_params['feature_importances_'] = model_dir.feature_importances_.tolist()

# Config laden
from src.utils.io import DATA_PROCESSED
exp_meta_dir = DATA_PROCESSED / 'experiments'
exp_config_path = exp_meta_dir / f'{EXP_ID}_config.json'
if 'exp_config' not in globals():
    with exp_config_path.open('r', encoding='utf-8') as f:
        exp_config = json.load(f)

feature_mode = FEATURE_MODE

config_block = {
    'exp_id': exp_config.get('exp_id', EXP_ID),
    'price_source': exp_config.get('label_params', {}).get('price_source'),
    'drop_weekends': exp_config.get('label_params', {}).get('drop_weekends'),
    'horizon_days': exp_config.get('label_params', {}).get('horizon_days'),
    'up_threshold': exp_config.get('label_params', {}).get('up_threshold'),
    'down_threshold': exp_config.get('label_params', {}).get('down_threshold'),
    'strict_monotonic': exp_config.get('label_params', {}).get('strict_monotonic'),
    'max_adverse_move_pct': exp_config.get('label_params', {}).get('max_adverse_move_pct'),
    'hit_within_horizon': exp_config.get('label_params', {}).get('hit_within_horizon'),
    'first_hit_wins': exp_config.get('label_params', {}).get('first_hit_wins'),
    'dataset_path': str(ds_path),
    'feature_cols': feature_cols,
    'test_start': test_start,
    'train_frac_within_pretest': train_frac_pretest,
    'use_validation': bool(USE_VALIDATION),
    'tune_thresholds_on': str(TUNE_THRESHOLDS_ON),
    'signal_threshold': SIGNAL_THRESHOLD,
    'signal_threshold_trade': SIG_THR_TRADE,
    'direction_threshold': DIR_THRESHOLD,
    'direction_threshold_down': DIR_THR_DOWN,
    'direction_threshold_up': DIR_THR_UP,
    'allow_direction_neutral': bool(ALLOW_DIRECTION_NEUTRAL),
    'threshold_opt_objective': THRESH_OPT_OBJECTIVE,
    'threshold_tune_split': str(TUNE_SPLIT),
    'use_fixed_thresholds': bool(USE_FIXED_THRESHOLDS),
    'fixed_signal_trade_threshold': float(FIXED_SIGNAL_TRADE_THRESHOLD),
    'fixed_dir_threshold': float(FIXED_DIR_THRESHOLD),
    'fixed_dir_threshold_down': float(FIXED_DIR_THRESHOLD_DOWN),
    'fixed_dir_threshold_up': float(FIXED_DIR_THRESHOLD_UP),
    'trade_profile': str(TRADE_PROFILE),
    'auto_fixed_dir_thresholds': bool(AUTO_FIXED_DIR_THRESHOLDS),
    'fixed_dir_q_down': float(FIXED_DIR_Q_DOWN),
    'fixed_dir_q_up': float(FIXED_DIR_Q_UP),
    'min_dir_gap': float(MIN_DIR_GAP),
    'target_trade_rate': TARGET_TRADE_RATE,
    'trade_rate_penalty': float(TRADE_RATE_PENALTY),
    'train_multiclass_baseline': bool(TRAIN_MULTICLASS_BASELINE),
    'signal_xgb_params': SIGNAL_XGB_PARAMS,
    'direction_xgb_params': DIRECTION_XGB_PARAMS,
    'feature_mode': feature_mode,
}

results = {
    'config': config_block,
    'model_params': {
        'signal': signal_params,
        'direction': direction_params,
    },
    'signal': signal_metrics,
    'direction': direction_metrics,
    'combined_test': {
        'report': combined_report,
        'confusion_matrix': combined_cm,
        'labels': ['neutral', 'up', 'down'],
    },
}
if multiclass_metrics is not None:
    results['multiclass'] = multiclass_metrics
    results['model_params']['multiclass'] = multiclass_params

# Ergebnisse in Standard- und Final-Ordner schreiben
base_results_dir = Path('notebooks') / 'results'
final_results_dir = base_results_dir / 'final_two_stage'
base_results_dir.mkdir(parents=True, exist_ok=True)
final_results_dir.mkdir(parents=True, exist_ok=True)

json_base = base_results_dir / f'two_stage__{EXP_ID}.json'
json_final = final_results_dir / f'two_stage_final__{EXP_ID}.json'

with json_base.open('w') as f:
    json.dump(results, f, indent=2)
with json_final.open('w') as f:
    json.dump(results, f, indent=2)

# einfache Metrik-Tabelle (F1 der positiven Klasse)
rows = []
for model_key, model_name, pos_label in [
    ('signal', 'signal', 'move'),
    ('direction', 'direction', 'up'),
]:
    metrics = results[model_key]
    for split, m in metrics.items():
        rep = m['report']
        cls = rep.get(pos_label, {})
        rows.append({
            'model': model_name,
            'split': split,
            'precision_pos': cls.get('precision'),
            'recall_pos': cls.get('recall'),
            'f1_pos': cls.get('f1-score'),
        })

metrics_df = pd.DataFrame(rows)
csv_final = final_results_dir / f'two_stage_final__{EXP_ID}_metrics.csv'
metrics_df.to_csv(csv_final, index=False)

# Test-Predictions als CSV für Fehlklassifikations-Analysen speichern
test_dates = splits['test']['date'].to_numpy()
test_labels = splits['test']['label'].to_numpy()

# Optional: 3-Klassen-Baseline-Predictions (falls TRAIN_MULTICLASS_BASELINE=True)
mc_pred_test = None
mc_proba_test = None
if TRAIN_MULTICLASS_BASELINE:
    inv = {0: 'neutral', 1: 'up', 2: 'down'}
    mc_pred_test = np.array([inv[int(i)] for i in pred_test_mc], dtype=object)
    mc_proba_test = model_mc.predict_proba(X_test_signal)

pred_df = pd.DataFrame({
    'date': test_dates,
    'label_true': test_labels,
    'signal_prob': signal_prob_test.astype(float),
    'signal_pred': signal_pred_test.astype(int),
    'direction_prob_up': dir_prob_test.astype(float),
    'direction_pred_up': np.where(combined_pred == 'up', 1, np.where(combined_pred == 'down', 0, -1)).astype(int),
    'combined_pred': combined_pred,
})

if mc_pred_test is not None and mc_proba_test is not None and len(mc_proba_test) == len(pred_df):
    pred_df['multiclass_pred'] = mc_pred_test
    pred_df['multiclass_prob_neutral'] = mc_proba_test[:, 0].astype(float)
    pred_df['multiclass_prob_up'] = mc_proba_test[:, 1].astype(float)
    pred_df['multiclass_prob_down'] = mc_proba_test[:, 2].astype(float)

pred_path = final_results_dir / f'two_stage_final__{EXP_ID}_predictions.csv'
pred_df.to_csv(pred_path, index=False)

print('[ok] Ergebnisse gespeichert unter:')
print('   JSON base :', json_base)
print('   JSON final:', json_final)
print('   CSV final :', csv_final)
print('   Predictions:', pred_path)


Signal scale_pos_weight: 3.0591397849462365
[ok] Signal-Modell trainiert.
[debug] Signal boosted rounds: 109
[debug] Signal best_iteration: 58
[debug] Signal best_score: 0.6430864583247553
[debug] Signal scale_pos_weight (used): 3.0591397849462365
[debug] Signal train counts: {0: 569, 1: 186}
[debug] Signal val counts  : {0: 151, 1: 38}
[debug] Signal test counts : {0: 166, 1: 59}
[debug] Direction train X/y: (186, 44) {0: 108, 1: 78}
[debug] Direction val   X/y: (38, 44) {0: 23, 1: 15}
[debug] Direction test  X/y: (59, 44) {0: 19, 1: 40}
Direction scale_pos_weight: 1.3846153846153846
[ok] Richtungs-Modell trainiert.
[debug] Direction boosted rounds: 51
[debug] Direction best_iteration: 0
[debug] Direction best_score: 0.7020537492476011
[debug] Direction scale_pos_weight (used): 1.3846153846153846
Richtungs-Schwelle (val-basiert): 0.5 macro_f1(val): 0.3909407665505227
[fixed] TRADE_PROFILE: more_trades
[fixed] SIG_THR_TRADE: 0.45 DIR_THR_DOWN/UP: 0.5065287351608276 0.5065287351608276 s



[ok] 3-Klassen-Baseline trainiert.
[ok] Ergebnisse gespeichert unter:
   JSON base : notebooks/results/two_stage__flex_5.json
   JSON final: notebooks/results/final_two_stage/two_stage_final__flex_5.json
   CSV final : notebooks/results/final_two_stage/two_stage_final__flex_5_metrics.csv
   Predictions: notebooks/results/final_two_stage/two_stage_final__flex_5_predictions.csv
