# Deep Reinforcement Learning f√ºr die dynamische Portfolioallokation: Methoden, Chancen und Grenzen


## 1. Installieren und Importieren ben√∂tigter Pakete

In [None]:
import numpy as np

if not hasattr(np, 'float'):
    np.float = float

In [None]:

!pip install swig

# FinRL und Finanz-Daten
!pip install finrl
!pip install yfinance
!pip install wrds

# Reinforcement Learning
!pip install stable-baselines3
!pip install shimmy>=0.2.1 
!pip install gymnasium

# Portfolio-Optimierung (Markowitz Baseline)
!pip install pyportfolioopt

# Hyperparameter Tuning 
!pip install optuna

# Standard Data Science
!pip install pandas numpy matplotlib
!pip install protobuf==3.20.3

# FinRL 
!pip install finrl


In [None]:

%pip install -q "git+https://github.com/AI4Finance-Foundation/FinRL.git@master" ta

In [None]:
# Basisimporte und Verzeichnisse vorbereiten (Kommentare auf Deutsch)
import warnings
warnings.filterwarnings("ignore")

from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

from finrl import config
from finrl.config import INDICATORS


from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import data_split

from finrl.agents.stablebaselines3.models import DRLAgent
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline
from finrl.meta.preprocessor.preprocessors import FeatureEngineer
from finrl.plot import convert_daily_return_to_pyfolio_ts

from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models, expected_returns

from pyfolio import timeseries

import optuna
from optuna.samplers import TPESampler
from optuna.pruners import MedianPruner




import os
from datetime import timedelta

In [None]:
from stable_baselines3.common.utils import set_random_seed

# WICHTIG: Einmal setzen, bevor irgendwelche Environments oder Modelle erstellt werden!
MY_SEED = 42
set_random_seed(MY_SEED)

In [None]:


if not os.path.exists("./" + config.DATA_SAVE_DIR): os.makedirs("./" + config.DATA_SAVE_DIR) 
if not os.path.exists("./" + config.TRAINED_MODEL_DIR): os.makedirs("./" + config.TRAINED_MODEL_DIR) 
if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR): os.makedirs("./" + config.TENSORBOARD_LOG_DIR)
if not os.path.exists("./" + config.RESULTS_DIR): os.makedirs("./" + config.RESULTS_DIR)

## 2. Download DJI, Festlegung von Umgebungsparamtern und Zeitr√§umen

In [None]:
DOW_30_TICKER = [
    'AAPL', 'AMGN', 'AMZN', 'AXP', 'BA', 'CAT', 'CRM', 'CSCO', 'CVX', 'DIS', 
    'GS', 'HD', 'HON', 'IBM', 'JNJ', 'JPM', 'KO', 'MCD', 'MMM', 'MRK', 
    'MSFT', 'NKE', 'NVDA', 'PG', 'SHW', 'TRV', 'UNH', 'V', 'VZ', 'WMT'
]

In [None]:
# Dow-Jones-Konfiguration und Trainings-/Testzeitr√§ume (Kommentare auf Deutsch)
ticker_list = DOW_30_TICKER


train_start_date = "2010-01-01"
train_end_date = "2020-12-31"

validate_start_date = "2021-01-01"
validate_end_date = "2022-12-31"  # Validierung inkl. COVID + 2022-Crash

trade_start_date = "2023-01-01"  # Test beginnt nach dem Crash
trade_end_date = "2025-11-01"

initial_capital = 1000000
transaction_cost_pct = 0.001  # 10 Basispunkte pro Trade
hmax = 10000  # maximale St√ºckzahl pro Order
reward_scaling = 1e-4  
num_stock_shares = 1000

In [None]:
df_raw = YahooDownloader(
    start_date=train_start_date,
    end_date=trade_end_date,
    ticker_list= DOW_30_TICKER,
).fetch_data()
df_raw.to_csv('dow_30_data.csv')

In [None]:
df_raw

df_raw.isnull().values.any()

## 3. Datenverarbeitung

In [None]:


fe = FeatureEngineer(
    use_technical_indicator=True,
    use_vix=True,        
    use_turbulence=True,  
    user_defined_feature=False
)
df = fe.preprocess_data(df_raw)

df=df.sort_values(['date','tic'],ignore_index=True)
df.index = df.date.factorize()[0]

cov_list = []
return_list = []


lookback=252
for i in range(lookback,len(df.index.unique())):
  data_lookback = df.loc[i-lookback:i,:]
  price_lookback=data_lookback.pivot_table(index = 'date',columns = 'tic', values = 'close')
  return_lookback = price_lookback.pct_change().dropna()
  return_list.append(return_lookback)

  covs = return_lookback.cov().values 
  cov_list.append(covs)

  
df_cov = pd.DataFrame({'date':df.date.unique()[lookback:],'cov_list':cov_list,'return_list':return_list})
df = df.merge(df_cov, on='date')
df = df.sort_values(['date','tic']).reset_index(drop=True)


df

## 4. Erstellung der Umgebungen f√ºr Training, Validierung und Test

In [None]:
stock_dim = len(df.tic.unique())

print(f"State-Dimension: {stock_dim} | Aktienanzahl: {stock_dim}")

env_kwargs = {
    "hmax": hmax,
    "initial_amount": initial_capital,
    "transaction_cost_pct": transaction_cost_pct,
    "state_space": stock_dim,
    "stock_dim": stock_dim,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dim,
    "reward_scaling": reward_scaling,
}



In [None]:
train = data_split(df,train_start_date,train_end_date) 
validate = data_split(df,validate_start_date, validate_end_date) 
test = data_split(df,validate_start_date, trade_end_date) 
test_opt = data_split(df,trade_start_date, trade_end_date)

In [None]:
from __future__ import annotations

import gymnasium as gym
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from gymnasium import spaces
from gymnasium.utils import seeding
from stable_baselines3.common.vec_env import DummyVecEnv

matplotlib.use("Agg")

class StockPortfolioEnv(gym.Env):
    """
    MODIFIZIERTE FinRL Portfolio Umgebung f√ºr die Bachelorarbeit.
    
    Erweiterungen:
    1. Transaktionskosten werden nun mathematisch ber√ºcksichtigt (Abzug vom Portfoliowert).
    2. Die Gewichtung des Vortages (w_{t-1}) ist nun Teil des State Space.
    """

    metadata = {"render.modes": ["human"]}

    def __init__(
        self,
        df,
        stock_dim,
        hmax,
        initial_amount,
        transaction_cost_pct,
        reward_scaling,
        state_space,
        action_space,
        tech_indicator_list,
        turbulence_threshold=None,
        lookback=252,
        day=0,
    ):
        self.day = day
        self.lookback = lookback
        self.df = df
        self.stock_dim = stock_dim
        self.hmax = hmax
        self.initial_amount = initial_amount
        self.transaction_cost_pct = transaction_cost_pct
        self.reward_scaling = reward_scaling
        self.state_space = state_space
        self.action_space = action_space
        self.tech_indicator_list = tech_indicator_list

        # Action Space: Gewichte f√ºr alle Aktien (werden per Softmax normalisiert)
        self.action_space = spaces.Box(low=0, high=1, shape=(self.action_space,))

        # --- MODIFIKATION 1: State Space Erweiterung ---
        # Wir f√ºgen EINE Zeile hinzu (+1) f√ºr die Portfolio-Gewichte vom Vortag
        # Shape: (Anzahl Features + 1 f√ºr Gewichte, Anzahl Aktien)
        self.observation_space = spaces.Box(
            low=-np.inf,
            high=np.inf,
            shape=(self.state_space + len(self.tech_indicator_list) + 1, self.state_space),
        )

        # Initiale Daten laden
        self.data = self.df.loc[self.day, :]
        self.covs = self.data["cov_list"].values[0]
        
        # --- MODIFIKATION 2: Initiale Gewichte f√ºr den ersten State ---
        # Zum Start (Tag 0) nehmen wir eine Gleichverteilung an (1/N)
        self.last_weights = np.array([1/self.stock_dim] * self.stock_dim)
        
        # State bauen (ausgelagerte Funktion nutzen)
        self.state = self._update_state()

        self.terminal = False
        self.turbulence_threshold = turbulence_threshold
        self.portfolio_value = self.initial_amount

        # Memory initialisieren
        self.asset_memory = [self.initial_amount]
        self.portfolio_return_memory = [0]
        self.actions_memory = [self.last_weights] # Speichert die Start-Gewichte
        self.date_memory = [self.data.date.unique()[0]]

    def step(self, actions):
        self.terminal = self.day >= len(self.df.index.unique()) - 1

        if self.terminal:
            # --- Plotting am Ende der Episode ---
            df = pd.DataFrame(self.portfolio_return_memory)
            df.columns = ["daily_return"]
            plt.plot(df.daily_return.cumsum(), "r")
            plt.savefig("results/cumulative_reward.png")
            plt.close()

            plt.plot(self.portfolio_return_memory, "r")
            plt.savefig("results/rewards.png")
            plt.close()

            print("=================================")
            print(f"begin_total_asset:{self.asset_memory[0]}")
            print(f"end_total_asset:{self.portfolio_value}")

            df_daily_return = pd.DataFrame(self.portfolio_return_memory)
            df_daily_return.columns = ["daily_return"]
            if df_daily_return["daily_return"].std() != 0:
                sharpe = (
                    (252**0.5)
                    * df_daily_return["daily_return"].mean()
                    / df_daily_return["daily_return"].std()
                )
                print("Sharpe: ", sharpe)
            print("=================================")

            return self.state, self.reward, self.terminal, False, {}

        else:
            # 1. Aktionen (Logits) in Gewichte (Summe=1) umwandeln
            weights = self.softmax_normalization(actions)
            
            # --- MODIFIKATION 3: Transaktionskosten berechnen ---
            # Turnover = Summe der absoluten √Ñnderungen der Gewichte
            turnover = np.sum(np.abs(weights - self.last_weights)) 
            
            # Kosten = Turnover * Kostenrate * Aktueller Portfoliowert
            # (Annahme: Kosten fallen proportional zum gehandelten Volumen an)
            transaction_cost = turnover * self.transaction_cost_pct * self.portfolio_value
            
            # Memory Update
            self.actions_memory.append(weights)
            self.last_weights = weights # Wichtig: Merken f√ºr n√§chsten Schritt!
            
            last_day_memory = self.data

            # Zeit-Schritt: Einen Tag vorw√§rts
            self.day += 1
            self.data = self.df.loc[self.day, :]
            
            # 2. Portfolio Return berechnen (Brutto, nur Kursgewinne)
            # Formel: Summe( (Preis_neu / Preis_alt - 1) * Gewicht )
            raw_portfolio_return = sum(
                ((self.data.close.values / last_day_memory.close.values) - 1) * weights
            )
            
            # 3. Neuen Portfoliowert berechnen (MIT Kostenabzug)
            # Wert_neu = (Wert_alt * (1 + Rendite)) - Kosten
            new_portfolio_value = (self.portfolio_value * (1 + raw_portfolio_return)) - transaction_cost
            
            # Effektive Netto-Rendite berechnen
            portfolio_return = (new_portfolio_value - self.portfolio_value) / self.portfolio_value
            
            self.portfolio_value = new_portfolio_value

            # Memory speichern
            self.portfolio_return_memory.append(portfolio_return)
            self.date_memory.append(self.data.date.unique()[0])
            self.asset_memory.append(new_portfolio_value)

            # Reward setzen (hier: Neuer Portfoliowert)
            self.reward = new_portfolio_value
            # Alternativ f√ºr stabileres Training: self.reward = np.log(new_portfolio_value / self.asset_memory[-2])

            # --- MODIFIKATION 4: State Update (mit neuen Gewichten) ---
            self.covs = self.data["cov_list"].values[0]
            self.state = self._update_state()

        return self.state, self.reward, self.terminal, False, {}

    def reset(self, *, seed=None, options=None):
        self.asset_memory = [self.initial_amount]
        self.day = 0
        self.data = self.df.loc[self.day, :]
        
        self.covs = self.data["cov_list"].values[0]
        
        # Reset auf Gleichverteilung
        self.last_weights = np.array([1/self.stock_dim] * self.stock_dim)
        
        # State bauen
        self.state = self._update_state()
        
        self.portfolio_value = self.initial_amount
        self.terminal = False
        self.portfolio_return_memory = [0]
        self.actions_memory = [self.last_weights]
        self.date_memory = [self.data.date.unique()[0]]
        return self.state, {}

    def _update_state(self):
        """Hilfsfunktion: Baut den State-Tensor zusammen."""
        # 1. Tech Indicators holen
        tech_data = [self.data[tech].values.tolist() for tech in self.tech_indicator_list]
        
        # 2. Alles stapeln: [Covariance, Tech Indicators, Last Weights]
        # Wir nutzen np.append mit axis=0, um Zeilen anzuf√ºgen
        new_state = np.append(
            np.array(self.covs),
            tech_data,
            axis=0
        )
        # Die Gewichte als letzte Zeile anf√ºgen
        new_state = np.append(new_state, [self.last_weights], axis=0)
        
        return new_state

    def render(self, mode="human"):
        return self.state

    def softmax_normalization(self, actions):
        numerator = np.exp(actions)
        denominator = np.sum(np.exp(actions))
        softmax_output = numerator / denominator
        return softmax_output

    def save_asset_memory(self):
        date_list = self.date_memory
        portfolio_return = self.portfolio_return_memory
        df_account_value = pd.DataFrame(
            {"date": date_list, "daily_return": portfolio_return}
        )
        return df_account_value

    def save_action_memory(self):
        date_list = self.date_memory
        df_date = pd.DataFrame(date_list)
        df_date.columns = ["date"]

        action_list = self.actions_memory
        df_actions = pd.DataFrame(action_list)
        df_actions.columns = self.data.tic.values
        df_actions.index = df_date.date
        return df_actions

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def get_sb_env(self):
        e = DummyVecEnv([lambda: self])
        obs = e.reset()
        return e, obs

In [None]:
train_env = StockPortfolioEnv(df = train, **env_kwargs )
validate_env = StockPortfolioEnv(df = validate, **env_kwargs)
test_env = StockPortfolioEnv(df = test, **env_kwargs)
test_opt_env = StockPortfolioEnv(df = test_opt ,**env_kwargs)

## 5.Agenten


In [None]:
agent_train =DRLAgent(env = train_env)
timesteps = 100000
model_ppo = agent_train.get_model(model_name="ppo")
model_ddpg = agent_train.get_model(model_name="ddpg")
model_td3 = agent_train.get_model(model_name="td3")


trained_ppo = agent_train.train_model(model=model_ppo, tb_log_name='ppo',
                                total_timesteps=timesteps)
trained_ddpg= agent_train.train_model(model=model_ddpg, tb_log_name='ddpg',
                                total_timesteps=timesteps)
trained_td3 = agent_train.train_model(model=model_td3, tb_log_name='td3',
                                total_timesteps=timesteps)



In [None]:
df_daily_return_ppo, df_actions_ppo = DRLAgent.DRL_prediction(model=trained_ppo, environment = test_env)

df_daily_return_ddpg, df_actions_ddpg = DRLAgent.DRL_prediction(model=trained_ddpg, environment = test_env)

df_daily_return_td3, df_actions_td3 = DRLAgent.DRL_prediction(model=trained_td3, environment = test_env)


In [None]:
df_daily_return_ppo.to_csv("results/ppo_performance.csv", index=False)
df_daily_return_ddpg.to_csv("results/ddpg__performance.csv", index=False)
df_daily_return_td3.to_csv("results/td3_performance.csv", index=False)

df_actions_ppo.to_csv("results/ppo_actions.csv", index=False)
df_actions_ddpg.to_csv("results/ddpg_actions.csv", index=False)
df_actions_td3.to_csv("results/td3_actions.csv", index=False)

In [None]:
ppo_cumpod =(df_daily_return_ppo.daily_return+1).cumprod()-1
ddpg_cumpod =(df_daily_return_ddpg.daily_return+1).cumprod()-1
td3_cumpod =(df_daily_return_td3.daily_return+1).cumprod()-1

DRL_strat_ppo = convert_daily_return_to_pyfolio_ts(df_daily_return_ppo)
DRL_strat_ddpg = convert_daily_return_to_pyfolio_ts(df_daily_return_ddpg)
DRL_strat_td3 = convert_daily_return_to_pyfolio_ts(df_daily_return_td3)


perf_func = timeseries.perf_stats 


perf_stats_all_ppo = perf_func( returns=DRL_strat_ppo, factor_returns=DRL_strat_ppo, positions=None, transactions=None, turnover_denom="AGB")

perf_stats_all_ddpg = perf_func( returns=DRL_strat_ddpg, factor_returns=DRL_strat_ddpg, positions=None, transactions=None, turnover_denom="AGB")

perf_stats_all_td3 = perf_func( returns=DRL_strat_td3, factor_returns=DRL_strat_td3, positions=None, transactions=None, turnover_denom="AGB")

def extract_weights(drl_actions_list):
  a2c_weight_df = {'date':[], 'weights':[]}
  for i in range(len(drl_actions_list)):
    date = drl_actions_list.index[i]
    tic_list = list(drl_actions_list.columns)
    weights_list = drl_actions_list.reset_index()[list(drl_actions_list.columns)].iloc[i].values
    weight_dict = {'tic':[], 'weight':[]}
    for j in range(len(tic_list)):
      weight_dict['tic'] += [tic_list[j]]
      weight_dict['weight'] += [weights_list[j]]

    a2c_weight_df['date'] += [date]
    a2c_weight_df['weights'] += [pd.DataFrame(weight_dict)]

  a2c_weights = pd.DataFrame(a2c_weight_df)
  return a2c_weights


ppo_weights = extract_weights(df_actions_ppo)
ddpg_weights = extract_weights(df_actions_ddpg)
td3_weights = extract_weights(df_actions_td3)


## 5.Tuning der Agenten

In [None]:
import numpy as np
from stable_baselines3 import PPO, DDPG, TD3
from stable_baselines3.common.noise import NormalActionNoise

def objective(trial, agent_name, train_env, validation_env):
    
    policy_kwargs = dict(net_arch=[64, 64])

    n_actions = train_env.action_space.shape[-1]

    action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

    # HYPERPARAMETER 
    
    if agent_name == "ppo":
        params = {

            "n_steps": trial.suggest_categorical("n_steps", [2048, 4096, 8192]), 
            "ent_coef": trial.suggest_loguniform("ent_coef", 1e-8, 0.01),        
            "learning_rate": trial.suggest_loguniform("learning_rate", 1e-5, 5e-4),
            "batch_size": trial.suggest_categorical("batch_size", [64, 128, 256]),
            "gamma": trial.suggest_categorical("gamma", [0.99, 0.995, 0.999]), 
            "clip_range": trial.suggest_categorical("clip_range", [0.1, 0.2, 0.3]),
        }
        model = PPO("MlpPolicy", train_env, verbose=0, policy_kwargs=policy_kwargs, **params)
    
    # DPG Hyperparameter
    elif agent_name == "ddpg":
        params = {
            "buffer_size": trial.suggest_categorical("buffer_size", [50000, 100000, 200000]),
            "learning_rate": trial.suggest_loguniform("learning_rate", 1e-5, 1e-3),
            "batch_size": trial.suggest_categorical("batch_size", [64, 128, 256]),
            "gamma": trial.suggest_categorical("gamma", [0.99, 0.995, 0.999]),
            "tau": trial.suggest_categorical("tau", [0.001, 0.005, 0.01, 0.02]),
        }

        model = DDPG("MlpPolicy", train_env, action_noise=action_noise, verbose=0, policy_kwargs=policy_kwargs, **params)
   
    # TD3 Hyperparameter
    elif agent_name == "td3":
         params = {
            "buffer_size": trial.suggest_categorical("buffer_size", [50000, 100000, 200000]),
            "learning_rate": trial.suggest_loguniform("learning_rate", 1e-5, 1e-3),
            "batch_size": trial.suggest_categorical("batch_size", [64, 128, 256]),
            "gamma": trial.suggest_categorical("gamma", [0.99, 0.995, 0.999]),
            "tau": trial.suggest_categorical("tau", [0.001, 0.005, 0.01, 0.02]),
        }

         model = TD3("MlpPolicy", train_env, action_noise=action_noise, verbose=0, policy_kwargs=policy_kwargs, **params)


    try:
        model.learn(total_timesteps=100000)
    except:
        return -9999.0

    obs = validation_env.reset()
    if isinstance(obs, tuple): obs = obs[0]

    portfolio_values = [] 
    
    done = False
    while not done:
        action, _ = model.predict(obs, deterministic=True)
        step_result = validation_env.step(action)
        
        if len(step_result) == 5:
            obs, reward, terminal, truncated, info = step_result
            done = terminal or truncated
        elif len(step_result) == 4:
            obs, reward, done, info = step_result
        else:
            obs, reward, dones, info = step_result
            done = dones[0]
            

        if isinstance(reward, (list, np.ndarray)):
            val = reward[0]
        else:
            val = reward
        portfolio_values.append(val)


    if len(portfolio_values) < 2: return -9999.0
    

    df_values = pd.Series(portfolio_values)
    daily_returns = df_values.pct_change().dropna()
    

    if daily_returns.empty or daily_returns.std() == 0:
        return -9999.0
        
    std_dev = daily_returns.std()
    mean_return = daily_returns.mean()
    

    sharpe = (252**0.5) * mean_return / std_dev
    

    if np.isnan(sharpe) or np.isinf(sharpe):
        return -9999.0
        
    return sharpe



def tune_agent(agent_name, train_data, val_data, n_trials=20):
    env_train_opt,_ = StockPortfolioEnv(df=train_data, **env_kwargs).get_sb_env()
    env_val,_ = StockPortfolioEnv(df=val_data, **env_kwargs).get_sb_env()

    # Optuna Study
    study = optuna.create_study(direction="maximize")
    
    # Lambda-Funktion
    study.optimize(lambda trial: objective(trial, agent_name, env_train_opt, env_val), n_trials=n_trials)
    
    return study.best_params


In [None]:
# A) DDPG Tunen 
print("\n--- Start Tuning DDPG ---")
best_params_ddpg = tune_agent("ddpg", train, validate, n_trials=35)

#B) PPO Tunen
print("\n--- Start Tuning PPO ---")
best_params_ppo = tune_agent("ppo", train, validate, n_trials=35)


#C) TD3 Tunen
print("\n--- Start Tuning TD3 ---")
best_params_td3 = tune_agent("td3", train, validate, n_trials=35)

In [None]:
timesteps = 100000
agent = DRLAgent(env=train_env)


model_ddpg_opt = agent.get_model("ddpg", model_kwargs=best_params_ddpg) 
trained_ddpg_opt = agent.train_model(model=model_ddpg_opt, tb_log_name='ddpg',
                                total_timesteps=timesteps)


model_ppo_opt = agent.get_model("ppo", model_kwargs=best_params_ppo) 
trained_ppo_opt = agent.train_model(model=model_ppo_opt, tb_log_name='ppo',
                                total_timesteps=timesteps)


model_td3_opt = agent.get_model("td3", model_kwargs=best_params_td3) 
trained_td3_opt = agent.train_model(model=model_td3_opt, tb_log_name='td3',
                                total_timesteps=timesteps)






df_daily_return_ddpg_opt, df_actions_ddpg_opt = DRLAgent.DRL_prediction(model=trained_ddpg_opt, environment = test_opt_env)
df_daily_return_ppo_opt, df_actions_ppo_opt = DRLAgent.DRL_prediction(model=trained_ppo_opt, environment = test_opt_env)

df_daily_return_td3_opt, df_actions_td3_opt = DRLAgent.DRL_prediction(model=trained_td3_opt, environment = test_opt_env)

In [None]:
df_daily_return_ddpg_opt.to_csv("results/ddpg_Opt_performance.csv", index=False)
df_actions_ddpg_opt.to_csv("results/ddpg_Opt_actions.csv", index=False)

df_daily_return_ppo_opt.to_csv("results/ppo_Opt_performance.csv", index=False)
df_actions_ppo_opt.to_csv("results/ppo_Opt_actions.csv", index=False)

df_daily_return_td3_opt.to_csv("results/td3_Opt_performance.csv", index=False)
df_actions_td3_opt.to_csv("results/td3_Opt_actions.csv", index=False)

## 6. Baseines (DJI Buy & Hold, 1/N, MVO mit max. SR)

In [None]:
baseline_df = get_baseline(
        ticker="^DJI", 
        start = trade_start_date,
        end =  trade_end_date)

baseline_returns = get_daily_return(baseline_df, value_col_name="close")
baseline_returns


In [None]:
MARKOWITZ_LOOKBACK = 252
MARKOWITZ_TXN_COST = 0.001
MARKOWITZ_REBAL_FREQ = "QS"

prices_all = (
    df.pivot_table(index="date", columns="tic", values="close")
      .sort_index()
      .ffill()
)
prices_all.index = pd.to_datetime(prices_all.index)
prices_trade = prices_all.loc[trade_start_date:trade_end_date]

prices_trade = prices_trade.dropna(axis=1, how="all")


def run_markowitz_rolling_portfolio(
    prices_trade_period: pd.DataFrame,
    prices_full_history: pd.DataFrame,
    initial_capital: float,
    lookback_days: int = MARKOWITZ_LOOKBACK,
    transaction_cost: float = MARKOWITZ_TXN_COST,
    rebalance_frequency: str = MARKOWITZ_REBAL_FREQ,
):

    returns_trade = prices_trade_period.pct_change().dropna(how="all")
    if returns_trade.empty:
        raise ValueError("Keine Handelsdaten f√ºr den angegebenen Zeitraum.")
    
    rebal_dates = (
        returns_trade.index.to_series()
        .resample(rebalance_frequency)
        .first()
        .dropna()
        .tolist()
    )
    
    first_trade_day = returns_trade.index[0]
    if not rebal_dates or first_trade_day < rebal_dates[0]:
        rebal_dates.insert(0, first_trade_day)
    
    rebal_dates = [d for d in rebal_dates if d <= returns_trade.index[-1]]
    rebal_dates = sorted(set(rebal_dates))
    
    portfolio_value = initial_capital
    prev_weights = pd.Series(0.0, index=prices_trade_period.columns)
    
    value_records = []
    rebalancing_records = []
    
    for idx, rebal_date in enumerate(rebal_dates):

        hist_end_date = rebal_date - pd.Timedelta(days=1)
        hist_window = prices_full_history.loc[:hist_end_date, prices_trade_period.columns].tail(lookback_days)
        
       
        
        # Mean-Variance-Optimierung mit pypfopt
        try:
            mu = expected_returns.mean_historical_return(hist_window, frequency=252)
            S = risk_models.sample_cov(hist_window, frequency=252)
            ef = EfficientFrontier(mu, S)
            ef.max_sharpe()  # Portfolio mit maximaler Sharpe Ratio
            weights_dict = ef.clean_weights()
            weights = pd.Series(weights_dict)
        except Exception as e:
            print(f"[Fehler] Optimierung fehlgeschlagen f√ºr {rebal_date.date()}: {e}")
            continue
        
        weights = weights.reindex(prices_trade_period.columns).fillna(0.0)
        
        
        # Transaktionskosten berechnen 
        turnover = (weights - prev_weights).abs().sum()
        txn_cost_value = turnover * transaction_cost * portfolio_value
        portfolio_value -= txn_cost_value
        
        prev_weights = weights.copy()
        
        rebalancing_records.append({
            "date": rebal_date,
            "transaction_cost": txn_cost_value,
            "turnover": turnover,
            "portfolio_value_after_costs": portfolio_value,
            **weights.to_dict(),
        })
        
        if idx < len(rebal_dates) - 1:
            period_mask = (returns_trade.index >= rebal_date) & (returns_trade.index < rebal_dates[idx + 1])
        else:
            period_mask = returns_trade.index >= rebal_date
        
        period_rets = returns_trade.loc[period_mask]
        
        if period_rets.empty:
            continue
     
        portfolio_daily_returns = (period_rets * weights).sum(axis=1)
        
        cumulative_factors = (1 + portfolio_daily_returns).cumprod()
        portfolio_values = portfolio_value * cumulative_factors
        
        portfolio_value = portfolio_values.iloc[-1]
        
        for dt, daily_ret, pv in zip(period_rets.index, portfolio_daily_returns, portfolio_values):
            value_records.append({
                "date": dt,
                "daily_return": daily_ret,
                "portfolio_value": pv,
            })
    
    df_portfolio = pd.DataFrame(value_records)
    df_rebal = pd.DataFrame(rebalancing_records)
    
    if not df_portfolio.empty:
        df_portfolio["date"] = pd.to_datetime(df_portfolio["date"])
        df_portfolio.sort_values("date", inplace=True)
    
    if not df_rebal.empty:
        df_rebal["date"] = pd.to_datetime(df_rebal["date"])
        df_rebal.sort_values("date", inplace=True)
    
    return df_portfolio, df_rebal


df_markowitz_portfolio, df_markowitz_rebalancing = run_markowitz_rolling_portfolio(
    prices_trade_period=prices_trade,
    prices_full_history=prices_all,
    initial_capital=initial_capital,
)

df_markowitz_export = df_markowitz_portfolio[["date", "daily_return"]].copy()
df_markowitz_export.columns = ["Datum", "daily_return"]
df_markowitz_export["Modell"] = "MARKOWITZ"
df_markowitz_export.to_csv("results/markowitz_daily_returns.csv", index=False)
print(f"Markowitz Daily Returns: results/markowitz_daily_returns.csv ({len(df_markowitz_export)} Tage)")

if not df_markowitz_rebalancing.empty:
    stock_names = prices_trade.columns.tolist()
    markowitz_allocations = []
    rebal_sorted = df_markowitz_rebalancing.sort_values('date').reset_index(drop=True)
    
    for idx, row in df_markowitz_portfolio.iterrows():
        current_date = pd.to_datetime(row['date'])
        rebal_before = rebal_sorted[rebal_sorted['date'] <= current_date]
        
        if not rebal_before.empty:
            latest_rebal = rebal_before.iloc[-1]
            record = {'Datum': current_date.strftime('%Y-%m-%d'), 'Modell': 'MARKOWITZ'}
            for stock in stock_names:
                record[stock] = latest_rebal[stock] if stock in latest_rebal else 0.0
            markowitz_allocations.append(record)
    
    if markowitz_allocations:
        df_markowitz_alloc = pd.DataFrame(markowitz_allocations)
        df_markowitz_alloc.to_csv("results/markowitz_asset_allocation.csv", index=False)
        print(f"Markowitz Asset Allocation: results/markowitz_asset_allocation.csv ({len(df_markowitz_alloc)} Tage)")


In [None]:
returns_trade = prices_trade.pct_change().dropna(how="all")

n_stocks = len(prices_trade.columns)
equal_weights = pd.Series(1.0 / n_stocks, index=prices_trade.columns)

equal_weight_returns = (returns_trade * equal_weights).sum(axis=1)

initial_value = initial_capital
equal_weight_values = initial_value * (1 + equal_weight_returns).cumprod()

df_equal_weight = pd.DataFrame({
    "date": equal_weight_returns.index,
    "daily_return": equal_weight_returns.values,
    "portfolio_value": equal_weight_values.values
})

df_equal_export = df_equal_weight[["date", "daily_return"]].copy()
df_equal_export.columns = ["Datum", "daily_return"]
df_equal_export["Modell"] = "EQUAL_WEIGHT"
df_equal_export.to_csv("results/equal_weight_daily_returns.csv", index=False)
print(f"üíæ Equal-Weight Daily Returns: results/equal_weight_daily_returns.csv ({len(df_equal_export)} Tage)")

stock_names = prices_trade.columns.tolist()
n_stocks = len(stock_names)
equal_weight = 1.0 / n_stocks
equal_allocations = []

for idx, row in df_equal_weight.iterrows():
    current_date = pd.to_datetime(row['date'])
    record = {'Datum': current_date.strftime('%Y-%m-%d'), 'Modell': 'EQUAL_WEIGHT'}
    for stock in stock_names:
        record[stock] = equal_weight
    equal_allocations.append(record)

if equal_allocations:
    df_equal_alloc = pd.DataFrame(equal_allocations)
    df_equal_alloc.to_csv("results/equal_weight_asset_allocation.csv", index=False)
    print(f"üíæ Equal-Weight Asset Allocation: results/equal_weight_asset_allocation.csv ({len(df_equal_alloc)} Tage)")
