# BA Trading System - Schritt-fuer-Schritt Notebook

Dieses Notebook fuehrt dich durch den kompletten Projektablauf:
- Umgebung vorbereiten
- Config sichten (Notebook-Kopie) und optional anpassen
- Datenquelle waehlen (LSEG API oder lokale Excel aus DataStorage)
- Features pruefen
- Kompletten Modellvergleich starten und Ergebnisse ansehen


## 0. Voraussetzungen (einmalig)
- Python >= 3.10
- Virtuelle Umgebung empfohlen
- Abhaengigkeiten installieren: `pip install -r requirements.txt`
- Falls LSEG API genutzt wird: Zugangsdaten fuer `lseg.data` bereitstellen


In [None]:
from pathlib import Path
import os, sys, platform

def find_project_root() -> Path:
    # Suche nach config.yaml + main.py in aktueller Hierarchie
    start = Path.cwd().resolve()
    candidates = [start] + list(start.parents)
    for base in candidates:
        if (base / 'config.yaml').exists() and (base / 'main.py').exists():
            return base
        if (base / 'Combination' / 'config.yaml').exists() and (base / 'Combination' / 'main.py').exists():
            return base / 'Combination'
    # Fallback: aktuelles Verzeichnis
    return start

PROJECT_ROOT = find_project_root()
os.chdir(PROJECT_ROOT)

print(f'Arbeitsverzeichnis: {PROJECT_ROOT}')
print(f'Python-Version: {sys.version.split()[0]}')
print(f'Plattform: {platform.platform()}')


In [8]:
# Optional: Dependencies installieren (nur beim ersten Start noetig)
# !pip install -r requirements.txt


## 1. Config laden (Notebook-Kopie)
Wir nutzen eine Kopie der `config.yaml`, damit du hier experimentieren kannst, ohne die Originaldatei zu veraendern.


In [9]:
import shutil
import yaml

CONFIG_PATH = PROJECT_ROOT / 'config.yaml'
CONFIG_NOTEBOOK_PATH = PROJECT_ROOT / 'config_notebook.yaml'

if not CONFIG_PATH.exists():
    raise FileNotFoundError(f'config.yaml nicht gefunden. Gefundener PROJECT_ROOT: {PROJECT_ROOT}')

if not CONFIG_NOTEBOOK_PATH.exists():
    shutil.copy(CONFIG_PATH, CONFIG_NOTEBOOK_PATH)
    print('Notebook-Config angelegt: config_notebook.yaml')
else:
    print('Notebook-Config vorhanden: config_notebook.yaml')

CONFIG_PATH = CONFIG_NOTEBOOK_PATH

with open(CONFIG_PATH, 'r') as f:
    cfg = yaml.safe_load(f)

portfolios = list(cfg.get('data', {}).get('portfolios', {}).keys())
features = cfg.get('features', {}).get('input_features', [])
active_models = cfg.get('models', {}).get('active_models', [])

print('Portfolios:', portfolios)
print('Features:', features)
print('Aktive Modelle (laut config):', active_models if active_models else 'alle mit enabled=true')


FileNotFoundError: config.yaml nicht gefunden. Gefundener PROJECT_ROOT: /Users/dennissydow/PycharmProjects/BA_combination/Combination/notebooks

## 2. Datenquelle waehlen
- `USE_LSEG_API = True`: holt frische Daten ueber die LSEG/Refinitiv API (Zugangsdaten noetig).
- `USE_LSEG_API = False`: nutzt die vorhandenen Excel-Dateien in `DataStorage/` (offline Demo).


In [10]:
USE_LSEG_API = False  # True = LSEG API, False = lokale Excel-Dateien
DATA_DIR = PROJECT_ROOT / 'DataStorage'

print(f'Nutze LSEG API: {USE_LSEG_API}')
print(f'DataStorage vorhanden: {DATA_DIR.exists()}')


Nutze LSEG API: False
DataStorage vorhanden: False


In [12]:
import pandas as pd
from contextlib import contextmanager
from typing import Dict, Tuple
from ConfigManager import ConfigManager

def clean_column_name(raw_name: object, index_prefix: str | None = None) -> str:
    # Bereinigt Spaltennamen aus den Excel-Sheets.
    col = str(raw_name)
    if col == 'TRDPRC_1' and index_prefix:
        return f'{index_prefix}_TRDPRC_1'
    col = col.replace('(', '').replace(')', '').replace("'", '')
    col = col.replace(' ', '').replace(',', '_')
    col = col.replace('__', '_').strip('_')
    return col

def load_price_excel(excel_path: Path, index_prefix: str | None = None) -> pd.DataFrame:
    if not excel_path.exists():
        raise FileNotFoundError(f'Datei fehlt: {excel_path}')

    sheets = pd.read_excel(excel_path, sheet_name=None)
    merged = None

    for _, df in sheets.items():
        if df is None or df.empty:
            continue

        if 'Date' not in df.columns:
            df = df.rename(columns={df.columns[0]: 'Date'})

        df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
        df = df.dropna(subset=['Date'])

        value_cols = [c for c in df.columns if c != 'Date']
        if not value_cols:
            continue

        for col in value_cols:
            cleaned = clean_column_name(col, index_prefix=index_prefix)
            series = pd.to_numeric(df[col], errors='coerce')
            tmp = pd.DataFrame({cleaned: series.values}, index=df['Date'])
            merged = tmp if merged is None else merged.join(tmp, how='outer')

    if merged is None:
        raise ValueError(f'Keine gueltigen Daten in {excel_path}')

    merged.index = pd.to_datetime(merged.index)
    return merged.sort_index()

def load_company_excel(excel_path: Path) -> pd.DataFrame:
    if not excel_path.exists():
        raise FileNotFoundError(f'Datei fehlt: {excel_path}')

    sheets = pd.read_excel(excel_path, sheet_name=None)
    merged = None

    for name, df in sheets.items():
        if df is None or df.empty:
            continue

        if 'Date' not in df.columns:
            df = df.rename(columns={df.columns[0]: 'Date'})

        df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
        df = df.dropna(subset=['Date'])

        value_cols = [c for c in df.columns if c != 'Date']
        if not value_cols:
            continue

        cleaned_cols = {col: clean_column_name(f'{name}_{col}') for col in value_cols}
        tmp = df[['Date'] + value_cols].rename(columns=cleaned_cols).set_index('Date')
        merged = tmp if merged is None else merged.join(tmp, how='outer')

    if merged is None:
        raise ValueError(f'Keine gueltigen Company-Daten in {excel_path}')

    merged.index = pd.to_datetime(merged.index)
    return merged.sort_index()

def load_cached_data(config_path: Path, data_dir: Path) -> Tuple[Dict[str, Dict[str, pd.DataFrame]], Dict[str, pd.DataFrame]]:
    config = ConfigManager(str(config_path))
    portfolios = config.get('data.portfolios', {})
    price_data: Dict[str, Dict[str, pd.DataFrame]] = {}
    company_data: Dict[str, pd.DataFrame] = {}

    for p_name, p_cfg in portfolios.items():
        index_prefix = p_cfg.get('index', '').replace('.', '')
        price_data[p_name] = {}

        for period in ['daily', 'intraday']:
            excel_path = data_dir / f'{p_name}_{period}.xlsx'
            if excel_path.exists():
                price_data[p_name][period] = load_price_excel(excel_path, index_prefix=index_prefix)
            else:
                print(f'WARNUNG: {excel_path} fehlt.')

        comp_path = data_dir / f'{p_name}_company_data.xlsx'
        if comp_path.exists():
            company_data[p_name] = load_company_excel(comp_path)
        else:
            print(f'Hinweis: {comp_path} nicht gefunden (FFC optional).')

    return price_data, company_data

@contextmanager
def patch_datagrabber_with_cache(cached_prices, cached_companies):
    from Datagrabber import DataGrabber

    original_fetch_all = DataGrabber.fetch_all_data
    original_fetch_company = DataGrabber.fetch_company_data

    DataGrabber.fetch_all_data = lambda self: cached_prices
    DataGrabber.fetch_company_data = lambda self: cached_companies

    try:
        yield
    finally:
        DataGrabber.fetch_all_data = original_fetch_all
        DataGrabber.fetch_company_data = original_fetch_company


ModuleNotFoundError: No module named 'ConfigManager'

In [None]:
from Datagrabber import DataGrabber

if USE_LSEG_API:
    grabber = DataGrabber(str(CONFIG_PATH))
    all_data = grabber.fetch_all_data()
    company_data = grabber.fetch_company_data()
else:
    all_data, company_data = load_cached_data(CONFIG_PATH, DATA_DIR)

print('Daten geladen:')
for p_name, period_dict in all_data.items():
    for period, df in period_dict.items():
        print(f'- {p_name} | {period}: {df.shape}')


## 3. Feature Engineering pruefen (Beispiel)
Wir erzeugen fuer ein Portfolio/Periode die Features, um sicherzustellen, dass die Daten passen.


In [None]:
from IPython.display import display
from Dataprep import DataPrep

if not all_data:
    raise ValueError('Keine Daten geladen. Bitte vorherigen Schritt pruefen.')

portfolio_name = list(all_data.keys())[0]
period_type = 'daily' if 'daily' in all_data[portfolio_name] else list(all_data[portfolio_name].keys())[0]

prep = DataPrep(str(CONFIG_PATH))
sample_df = all_data[portfolio_name][period_type]

X_sample, y_sample = prep.prepare_data(
    sample_df,
    portfolio_name=portfolio_name,
    period_type=period_type
)

print(f'Portfolio: {portfolio_name} | Period: {period_type}')
print(f'X Shape: {X_sample.shape}, y Shape: {y_sample.shape}')
display(X_sample.head())
display(y_sample.head())


## 4. Kompletten Modellvergleich starten
- Mit LSEG API: nutzt den regulaeren Datenabruf.
- Ohne API: patched `DataGrabber`, damit der Vergleich die geladenen Cache-Daten verwendet.
Je nach Hardware und Config kann das Training einige Minuten dauern.


In [None]:
from ModelComparison import ModelComparison

def run_full_comparison(use_api: bool = USE_LSEG_API):
    if use_api:
        comparison = ModelComparison(str(CONFIG_PATH))
        comparison.run_full_comparison()
    else:
        with patch_datagrabber_with_cache(all_data, company_data):
            comparison = ModelComparison(str(CONFIG_PATH))
            comparison.run_full_comparison()

run_full_comparison()


## 5. Ergebnisse ansehen
Die Resultate stehen in `Results/model_comparison.xlsx`. Hier ein kurzer Blick auf die erste Tabelle.


In [None]:
results_path = PROJECT_ROOT / 'Results' / 'model_comparison.xlsx'

if results_path.exists():
    full_df = pd.read_excel(results_path, sheet_name='Full_Comparison')
    print(f'Gefundene Datei: {results_path}')
    display(full_df.head())
else:
    print('Noch keine Ergebnisse geschrieben. Bitte erst den Modellvergleich laufen lassen.')
