In [14]:
import pandas as pd
import numpy as np
import os

# ----------------------------------------------------------------------------
# 0. ZIEL-PFAD (Wie von dir angegeben)
# ----------------------------------------------------------------------------
# Stelle sicher, dass dein Google Drive in Colab gemountet ist!
# (Führe die Zelle aus, die 'from google.colab import drive' enthält)
TARGET_DIR = "/content/drive/MyDrive/Data/"
print(f"Ziel-Verzeichnis: {TARGET_DIR}")

# Stelle sicher, dass das Verzeichnis existiert
os.makedirs(TARGET_DIR, exist_ok=True)


# ----------------------------------------------------------------------------
# 1. DATASET-GENERATOREN (Die "Rezepte")
# ----------------------------------------------------------------------------

def generate_ds1(config: dict, assets: list, filename: str):
    """ DS1: Deterministischer Sieger (Buy & Hold) """
    print(f"Generiere {filename}...")
    dates = pd.date_range(start=config["start_date"], periods=config["num_days"])
    df = pd.DataFrame(index=dates, columns=["Date"] + assets)
    df["Date"] = dates
    df[assets[0]] = np.full(config["num_days"], config["winner_mean"])
    df[assets[1]] = np.full(config["num_days"], config["loser_mean"])
    df.to_csv(filename, index=False)

def generate_ds2(config: dict, assets: list, filename: str):
    """ DS2: Deterministisches Timing (Profitabel) """
    print(f"Generiere {filename}...")
    num_days = config["num_days"]
    dates = pd.date_range(start=config["start_date"], periods=num_days)
    df = pd.DataFrame(index=dates, columns=["Date"] + assets)
    df["Date"] = dates

    # Brutto-Rendite = Ziel-Nettorendite + Transaktionskosten
    signal_mean = config["net_target_return"] + config["transaction_cost"]

    stock_a = np.zeros(num_days)
    stock_a[0::2] = signal_mean  # +0.2%
    stock_a[1::2] = -signal_mean # -0.2%

    df[assets[0]] = stock_a
    df[assets[1]] = -stock_a # Exakt umgekehrt
    df.to_csv(filename, index=False)

def generate_ds3(config: dict, assets: list, filename: str):
    """ DS3: Signal im Rauschen (Buy & Hold) """
    print(f"Generiere {filename}...")
    dates = pd.date_range(start=config["start_date"], periods=config["num_days"])
    df = pd.DataFrame(index=dates, columns=["Date"] + assets)
    df["Date"] = dates
    df[assets[0]] = np.random.normal(loc=config["winner_mean"], scale=config["std_dev"], size=config["num_days"])
    df[assets[1]] = np.random.normal(loc=config["loser_mean"], scale=config["std_dev"], size=config["num_days"])
    df.to_csv(filename, index=False)

def generate_ds4(config: dict, assets: list, filename: str):
    """ DS4: Risiko-Wahl (Buy & Hold) """
    print(f"Generiere {filename}...")
    dates = pd.date_range(start=config["start_date"], periods=config["num_days"])
    df = pd.DataFrame(index=dates, columns=["Date"] + assets)
    df["Date"] = dates

    cfg_stable = config["stable_config"]
    cfg_wild = config["wild_config"]

    df[assets[0]] = np.random.normal(loc=cfg_stable["mean"], scale=cfg_stable["std_dev"], size=config["num_days"])
    df[assets[1]] = np.random.normal(loc=cfg_wild["mean"], scale=cfg_wild["std_dev"], size=config["num_days"])
    df.to_csv(filename, index=False)

def generate_ds5(config: dict, assets: list, filename: str):
    """ DS5: Regime-Wechsel (Timing) """
    print(f"Generiere {filename}...")
    num_days = config["num_days"]
    regime_length = config["regime_length"]
    signal_mean = config["signal_mean"]
    std = config["std"]

    dates = pd.date_range(start=config["start_date"], periods=num_days)
    df = pd.DataFrame(index=dates, columns=["Date"] + assets)
    df["Date"] = dates

    returns_a = np.zeros(num_days)
    returns_b = np.zeros(num_days)

    for t in range(0, num_days, regime_length):
        end_t = min(t + regime_length, num_days)
        current_length = end_t - t

        # Bestimme, wer in diesem Regime gewinnt
        # (Ungerade Regime-Nummern: A gewinnt. Gerade: B gewinnt)
        if (t // regime_length) % 2 == 0:
            mean_a = signal_mean
            mean_b = -signal_mean
        else:
            mean_a = -signal_mean
            mean_b = signal_mean

        returns_a[t:end_t] = np.random.normal(mean_a, std, current_length)
        returns_b[t:end_t] = np.random.normal(mean_b, std, current_length)

    df[assets[0]] = returns_a
    df[assets[1]] = returns_b
    df.to_csv(filename, index=False)

def generate_ds6(config: dict, assets: list, filename: str):
    """ DS6: Korrelations-Falle (B&H) """
    print(f"Generiere {filename}...")
    num_days = config["num_days"]
    std = config["std"]

    # Definiere Means und Covarianz-Matrix
    means = [config["winner_mean"], config["trap_mean"], config["loser_mean"]]

    # Covarianz-Matrix aufbauen
    # Cov(A,B) = Corr(A,B) * Std(A) * Std(B)
    cov_winner_trap = config["correlation"] * std * std

    # Annahme: Verlierer ist unkorreliert
    cov_matrix = [
        [std**2, cov_winner_trap, 0],         # Winner
        [cov_winner_trap, std**2, 0],         # Trap
        [0, 0, std**2]                        # Loser
    ]

    # Generiere korrelierte Daten
    data = np.random.multivariate_normal(means, cov_matrix, num_days)

    dates = pd.date_range(start=config["start_date"], periods=num_days)
    df = pd.DataFrame(data, columns=assets, index=dates)
    df["Date"] = dates.strftime('%Y-%m-%d')
    df.to_csv(filename, index=False)

def generate_ds7(config: dict, assets: list, filename: str):
    """ DS7: Nadel im Heuhaufen (B&H) """
    print(f"Generiere {filename}...")
    num_days = config["num_days"]
    dates = pd.date_range(start=config["start_date"], periods=num_days)
    df = pd.DataFrame(index=dates, columns=["Date"] + assets)
    df["Date"] = dates

    winner_col = assets[0]
    loser_cols = assets[1:]

    # Generiere Sieger
    df[winner_col] = np.random.normal(config["winner_mean"], config["std"], num_days)

    # Generiere Verlierer
    for col in loser_cols:
        df[col] = np.random.normal(config["loser_mean"], config["std"], num_days)

    df.to_csv(filename, index=False)

def generate_ds8(config: dict, assets: list, filename: str):
    """ DS8: Sektor-Diversifikation (B&H) """
    print(f"Generiere {filename}...")
    num_days = config["num_days"]

    # 9 Assets in 3 Sektoren
    num_sectors = 3
    assets_per_sector = 3

    # Baue die 9x9 Kovarianz-Block-Matrix
    mean_vec = [config["mean"]] * len(assets)
    std = config["std"]
    variance = std**2
    intra_corr = config["intra_sector_correlation"]
    intra_cov = intra_corr * variance

    # Erstelle einen 3x3 Sektor-Block
    sector_block = np.full((assets_per_sector, assets_per_sector), intra_cov)
    np.fill_diagonal(sector_block, variance)

    # Erstelle die finale 9x9 Block-diagonale Matrix
    from scipy.linalg import block_diag
    cov_matrix = block_diag(sector_block, sector_block, sector_block)

    data = np.random.multivariate_normal(mean_vec, cov_matrix, num_days)

    dates = pd.date_range(start=config["start_date"], periods=num_days)
    df = pd.DataFrame(data, columns=assets, index=dates)
    df["Date"] = dates.strftime('%Y-%m-%d')
    df.to_csv(filename, index=False)

def generate_ds9(config: dict, assets: list, filename: str):
    """ DS9: Markt-Faktor & Alpha-Jagd (B&H) """
    print(f"Generiere {filename}...")
    num_days = config["num_days"]
    num_assets = len(assets)
    num_alpha = config["num_alpha_stocks"]

    dates = pd.date_range(start=config["start_date"], periods=num_days)
    df = pd.DataFrame(index=dates, columns=["Date"] + assets)
    df["Date"] = dates

    # 1. Generiere den "Markt"-Faktor
    market_returns = np.random.normal(config["market_mean"], config["market_std"], num_days)

    # 2. Generiere Beta und Rauschen für jede Aktie
    betas = np.random.uniform(0.5, 1.5, num_assets)
    noise_std = config["noise_std"]

    for i, col in enumerate(assets):
        # Jede Aktie = Beta * Markt + Rauschen
        stock_noise = np.random.normal(0, noise_std, num_days)
        stock_return = betas[i] * market_returns + stock_noise

        # 3. Füge "Alpha" für die ersten 'num_alpha' Aktien hinzu
        if i < num_alpha:
            stock_return += config["alpha_mean"] # Füge das deterministische Alpha hinzu

        df[col] = stock_return

    df.to_csv(filename, index=False)

def generate_ds10(config: dict, assets: list, filename: str):
    """ DS10: S&P 100 Sim (Timing + Auswahl) """
    print(f"Generiere {filename}...")
    num_days = config["num_days"]
    num_assets = len(assets)
    regime_length = config["regime_length"]

    dates = pd.date_range(start=config["start_date"], periods=num_days)
    df = pd.DataFrame(index=dates, columns=["Date"] + assets)
    df["Date"] = dates

    # 1. Generiere den stabilen "Markt"-Faktor
    market_returns = np.random.normal(config["market_mean"], config["market_std"], num_days)

    # 2. Generiere den "Regime"-Faktor (alternierend)
    regime_signal = np.zeros(num_days)
    for t in range(0, num_days, regime_length):
        end_t = min(t + regime_length, num_days)
        # Regime 1: Positiv, Regime 2: Negativ
        signal = config["regime_mean"] if (t // regime_length) % 2 == 0 else -config["regime_mean"]
        regime_signal[t:end_t] = signal

    # 3. Definiere Assets
    value_assets = assets[:num_assets//2] # Die ersten 50 sind "Value"
    growth_assets = assets[num_assets//2:] # Die letzten 50 sind "Growth"

    betas = np.random.uniform(0.8, 1.2, num_assets)
    noise_std = config["noise_std"]

    for i, col in enumerate(assets):
        stock_noise = np.random.normal(0, noise_std, num_days)
        stock_return = betas[i] * market_returns + stock_noise

        # Value-Aktien profitieren in Regime 1
        if col in value_assets:
            stock_return += regime_signal # Addiert pos. Signal in Regime 1, neg. in R2
        # Growth-Aktien profitieren in Regime 2
        elif col in growth_assets:
            stock_return -= regime_signal # Addiert neg. Signal in Regime 1, pos. in R2

        df[col] = stock_return

    df.to_csv(filename, index=False)

# ----------------------------------------------------------------------------
# 2. DAS "REZEPTBUCH" (KONFIGURATION)
# ----------------------------------------------------------------------------

# Hilfskonstanten für die Erwartungswert-Berechnung
# E[log(1+R)] ≈ R_mean - (R_std^2 / 2)
# Ziel-E[log(R)] ist 0.001
# -> R_mean = 0.001 + (R_std^2 / 2)
STD_05 = 0.005 # 0.5%
STD_10 = 0.01  # 1.0%
STD_20 = 0.02  # 2.0%
STD_30 = 0.03  # 3.0%

MEAN_FOR_STD_05 = 0.001 + (STD_05**2 / 2) # 0.0010125
MEAN_FOR_STD_10 = 0.001 + (STD_10**2 / 2) # 0.00105
MEAN_FOR_STD_20 = 0.001 + (STD_20**2 / 2) # 0.0012
MEAN_FOR_STD_30 = 0.001 + (STD_30**2 / 2) # 0.00145

TARGET_REWARD = 0.0009995 # (log(1.001))

all_scenarios = {
    "DS1_Winner_B&H": {
        "generator_function": generate_ds1,
        "csv_filename": os.path.join(TARGET_DIR, "DS1_Winner_B&H.csv"),
        "assets": ["WinnerStock_Return", "LoserStock_Return"],
        "config": {"num_days": 200, "start_date": "2020-01-01", "winner_mean": 0.001, "loser_mean": -0.001, "std_dev": 0.0},
        "expected_reward_per_step": TARGET_REWARD
    },
    "DS2_Perfect_Timing": {
        "generator_function": generate_ds2,
        "csv_filename": os.path.join(TARGET_DIR, "DS2_Perfect_Timing.csv"),
        "assets": ["StockA_Return", "StockB_Return"],
        "config": {"num_days": 200, "start_date": "2020-01-01", "net_target_return": 0.001, "transaction_cost": 0.001},
        "expected_reward_per_step": TARGET_REWARD
    },
    "DS3_Signal_in_Noise": {
        "generator_function": generate_ds3,
        "csv_filename": os.path.join(TARGET_DIR, "DS3_Signal_in_Noise.csv"),
        "assets": ["WinnerStock_Return", "LoserStock_Return"],
        "config": {"num_days": 1000, "start_date": "2020-01-01", "winner_mean": MEAN_FOR_STD_10, "loser_mean": -0.001, "std_dev": STD_10},
        "expected_reward_per_step": TARGET_REWARD
    },
    "DS4_Risk_Choice": {
        "generator_function": generate_ds4,
        "csv_filename": os.path.join(TARGET_DIR, "DS4_Risk_Choice.csv"),
        "assets": ["StableStock_Return", "WildStock_Return"],
        "config": {
            "num_days": 1000, "start_date": "2020-01-01",
            "stable_config": {"mean": MEAN_FOR_STD_05, "std_dev": STD_05},
            "wild_config": {"mean": MEAN_FOR_STD_30, "std_dev": STD_30}
        },
        "expected_reward_per_step": TARGET_REWARD
    },
    "DS5_Regime_Switch": {
        "generator_function": generate_ds5,
        "csv_filename": os.path.join(TARGET_DIR, "DS5_Regime_Switch.csv"),
        "assets": ["StockA_Return", "StockB_Return"],
        "config": {"num_days": 1000, "start_date": "2020-01-01", "regime_length": 20, "signal_mean": 0.00105 + (0.001/20), "std": STD_10},
        "expected_reward_per_step": TARGET_REWARD
    },
    "DS6_Correlation_Trap": {
        "generator_function": generate_ds6,
        "csv_filename": os.path.join(TARGET_DIR, "DS6_Correlation_Trap.csv"),
        "assets": ["Winner_Return", "Trap_Return", "Loser_Return"],
        "config": {"num_days": 1000, "start_date": "2020-01-01", "winner_mean": MEAN_FOR_STD_10, "trap_mean": 0.0, "loser_mean": -0.001, "std": STD_10, "correlation": 0.95},
        "expected_reward_per_step": TARGET_REWARD
    },
    "DS7_Needle_in_Haystack": {
        "generator_function": generate_ds7,
        "csv_filename": os.path.join(TARGET_DIR, "DS7_Needle_in_Haystack.csv"),
        "assets": ["Winner_Return"] + [f"Loser_{i}_Return" for i in range(9)],
        "config": {"num_days": 2000, "start_date": "2020-01-01", "winner_mean": MEAN_FOR_STD_20, "loser_mean": -0.0005, "std": STD_20},
        "expected_reward_per_step": TARGET_REWARD
    },
    "DS8_Sector_Diversification": {
        "generator_function": generate_ds8,
        "csv_filename": os.path.join(TARGET_DIR, "DS8_Sector_Diversification.csv"),
        "assets": [f"Tech_{i}" for i in range(3)] + [f"Health_{i}" for i in range(3)] + [f"Consumer_{i}" for i in range(3)],
        "config": {"num_days": 2000, "start_date": "2020-01-01", "mean": MEAN_FOR_STD_20, "std": STD_20, "intra_sector_correlation": 0.9},
        "expected_reward_per_step": TARGET_REWARD
    },
    "DS9_Factor_Timing": {
        "generator_function": generate_ds9,
        "csv_filename": os.path.join(TARGET_DIR, "DS9_Factor_Timing.csv"),
        "assets": [f"Stock_{i}" for i in range(50)],
        "config": {"num_days": 5000, "start_date": "2017-01-01", "market_mean": 0.0005, "market_std": 0.01, "noise_std": 0.01, "num_alpha_stocks": 5, "alpha_mean": (TARGET_REWARD-0.0005)}, # Alpha muss Differenz zum Ziel ausgleichen
        "expected_reward_per_step": TARGET_REWARD
    },
    "DS10_SP100_Sim": {
        "generator_function": generate_ds10,
        "csv_filename": os.path.join(TARGET_DIR, "DS10_SP100_Sim.csv"),
        "assets": [f"Value_{i}" for i in range(50)] + [f"Growth_{i}" for i in range(50)],
        "config": {"num_days": 5000, "start_date": "2017-01-01", "market_mean": 0.0004, "market_std": 0.01, "regime_length": 20, "regime_mean": (TARGET_REWARD - 0.0004 + 0.001/20), "noise_std": 0.02},
        "expected_reward_per_step": TARGET_REWARD
    }
}

# ----------------------------------------------------------------------------
# 3. HAUPT-AUSFÜHRUNG
# ----------------------------------------------------------------------------

def create_all_datasets(target_dir: str, scenarios: dict):
    """
    Geht durch das 'all_scenarios'-Dictionary und ruft die entsprechende
    Generator-Funktion auf, um alle CSV-Dateien zu erstellen.
    """
    print(f"Starte Datensatzerstellung im Verzeichnis: {target_dir}")

    for key, scenario in scenarios.items():
        print(f"\n--- Verarbeite Szenario: {key} ---")
        generator_func = scenario.get("generator_function")

        if generator_func and callable(generator_func):
            generator_func(
                config=scenario["config"],
                assets=scenario["assets"],
                filename=scenario["csv_filename"]
            )
        else:
            print(f"WARNUNG: Keine gültige 'generator_function' für {key} gefunden. Überspringe.")

    print("\nAlle Datasets erfolgreich generiert.")

# Führe die Hauptfunktion aus
create_all_datasets(target_dir=TARGET_DIR, scenarios=all_scenarios)

Ziel-Verzeichnis: /content/drive/MyDrive/Data/
Starte Datensatzerstellung im Verzeichnis: /content/drive/MyDrive/Data/

--- Verarbeite Szenario: DS1_Winner_B&H ---
Generiere /content/drive/MyDrive/Data/DS1_Winner_B&H.csv...

--- Verarbeite Szenario: DS2_Perfect_Timing ---
Generiere /content/drive/MyDrive/Data/DS2_Perfect_Timing.csv...

--- Verarbeite Szenario: DS3_Signal_in_Noise ---
Generiere /content/drive/MyDrive/Data/DS3_Signal_in_Noise.csv...

--- Verarbeite Szenario: DS4_Risk_Choice ---
Generiere /content/drive/MyDrive/Data/DS4_Risk_Choice.csv...

--- Verarbeite Szenario: DS5_Regime_Switch ---
Generiere /content/drive/MyDrive/Data/DS5_Regime_Switch.csv...

--- Verarbeite Szenario: DS6_Correlation_Trap ---
Generiere /content/drive/MyDrive/Data/DS6_Correlation_Trap.csv...

--- Verarbeite Szenario: DS7_Needle_in_Haystack ---
Generiere /content/drive/MyDrive/Data/DS7_Needle_in_Haystack.csv...

--- Verarbeite Szenario: DS8_Sector_Diversification ---
Generiere /content/drive/MyDrive/Da

In [15]:
!ls

drive  sample_data
