In [None]:
pip install gym

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/apps/conda/envs/ood-jupyterlab-4.2/bin/python -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [None]:
pip install stable_baselines3

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/apps/conda/envs/ood-jupyterlab-4.2/bin/python -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import gym
from gym import spaces
import torch
import torch.nn as nn
import torch.optim as optim
from stable_baselines3 import PPO, DQN
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import BaseCallback

In [None]:
# Create directories
for directory in ['data', 'figures', 'models', 'results']:
    os.makedirs(directory, exist_ok=True)

In [None]:
def efeatures(df):

    print("Adding enhanced features...")

    # Get all unique tickers
    assets = set()
    for col in df.columns:
        if '_close' in col:
            ticker = col.split('_')[0]
            assets.add(ticker)

    print(f"Processing features for {len(assets)} tickers")

    # Technical indicators
    print("Calculating technical indicators...")

    # RSI (Relative Strength Index)
    for ticker in assets:
        if f'{ticker}_close' in df.columns:
            print(f"Calculating RSI for {ticker}")
            # Calculate RSI (14-day period)
            delta = df[f'{ticker}_close'].diff()
            gain = delta.where(delta > 0, 0)
            loss = -delta.where(delta < 0, 0)
            avg_gain = gain.rolling(window=14).mean()
            avg_loss = loss.rolling(window=14).mean()
            rs = avg_gain / (avg_loss + 1e-10)
            df[f'{ticker}_rsi'] = 100 - (100 / (1 + rs))

    # Add moving averages and trend indicators
    print("Calculating trend features for all tickers...")
    for ticker in assets:
        if f'{ticker}_close' in df.columns:
            print(f"Calculating moving averages for {ticker}")
            # moving averages
            df[f'{ticker}_sma_20'] = df[f'{ticker}_close'].rolling(20).mean()
            df[f'{ticker}_sma_50'] = df[f'{ticker}_close'].rolling(50).mean()

            # Price relative to MA
            df[f'{ticker}_rel_sma20'] = df[f'{ticker}_close'] / (df[f'{ticker}_sma_20'] + 1e-10)
            df[f'{ticker}_rel_sma50'] = df[f'{ticker}_close'] / (df[f'{ticker}_sma_50'] + 1e-10)

            # Golden/death cross indicator (1 for golden, -1 for death, 0 for neither)
            df[f'{ticker}_cross'] = 0
            golden_cross = (df[f'{ticker}_sma_20'] > df[f'{ticker}_sma_50']) & (df[f'{ticker}_sma_20'].shift(1) <= df[f'{ticker}_sma_50'].shift(1))
            death_cross = (df[f'{ticker}_sma_20'] < df[f'{ticker}_sma_50']) & (df[f'{ticker}_sma_20'].shift(1) >= df[f'{ticker}_sma_50'].shift(1))
            df.loc[golden_cross, f'{ticker}_cross'] = 1
            df.loc[death_cross, f'{ticker}_cross'] = -1

    # Cross-market correlation features
    print("Calculating cross-market correlations...")

    # SPY correlations with crypto
    df['spy_btc_corr'] = df['SPY_return_30d'].rolling(30).corr(df['BTC_return_30d'])
    df['spy_eth_corr'] = df['SPY_return_30d'].rolling(30).corr(df['ETH_return_30d'])

    # Crypto correlations
    df['btc_eth_corr'] = df['BTC_return_30d'].rolling(30).corr(df['ETH_return_30d'])

    # Calculate sector-level features
    tech = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'META', 'NVDA', 'INTC', 'CSCO', 'ADBE', 'CRM']
    finance = ['JPM', 'BAC', 'GS', 'MS', 'BLK']
    healthcare = ['UNH', 'JNJ', 'PFE', 'MRK', 'ABBV', 'LLY', 'ABT', 'TMO', 'DHR', 'BMY']

    # Calculate sector returns
    print("Calculating sector-level features...")
    for sectorname, sector in [
        ('tech', tech),
        ('finance', finance),
        ('healthcare', healthcare
    ]:
        valid_tickers = [t for t in sector if f'{t}_return_30d' in df.columns]

        if valid_tickers:
            # average sector return
            sector_returns = pd.DataFrame()
            for ticker in valid_tickers:
                sector_returns[ticker] = df[f'{ticker}_return_30d']

            df[f'{sectorname}_return_30d'] = sector_returns.mean(axis=1)

            # sector volatility
            sector_vols = pd.DataFrame()
            for ticker in valid_tickers:
                sector_vols[ticker] = df[f'{ticker}_vol_10d']

            df[f'{sectorname}_vol_10d'] = sector_vols.mean(axis=1)

    # Market breadth indicators
    print("Calculating market breadth indicators...")

    # Calculate percentage of stocks with positive returns
    returncols = [col for col in df.columns if col.endswith('_return_30d')
                  and not col.startswith('BTC_') and not col.startswith('ETH_')
                  and not col in ['tech_return_30d', 'finance_return_30d', 'healthcare_return_30d']]

    positive_returns = pd.DataFrame()
    for col in returncols:
        positive_returns[col] = (df[col] > 0).astype(int)

    # Calculate market breadth
    for window in [5, 10, 20]:
        df[f'market_breadth_{window}d'] = positive_returns.rolling(window).mean().mean(axis=1)

    # Volatility ratio features
    print("Calculating volatility ratios...")

    df['crypto_equity_vol_ratio'] = (df['BTC_vol_10d'] + df['ETH_vol_10d']) / (2 * df['SPY_vol_10d'] + 1e-10)

    # Volatility regime features
    print("Detecting volatility regimes...")


    df['market_vol_regime'] = 0
    rolling_vol_mean = df['SPY_vol_10d'].rolling(90).mean()
    rolling_vol_std = df['SPY_vol_10d'].rolling(90).std()

    high_vol = df['SPY_vol_10d'] > (rolling_vol_mean + rolling_vol_std)
    low_vol = df['SPY_vol_10d'] < (rolling_vol_mean - rolling_vol_std)

    df.loc[high_vol, 'market_vol_regime'] = 1
    df.loc[low_vol, 'market_vol_regime'] = -1

    # Check for NaN values
    nan_counts = df.isna().sum()
    print(f"NaN values after feature engineering: {nan_counts[nan_counts > 0].sum()}")


    print("Handling NaN values...")

    # Forward fill first
    df = df.ffill()

    # Backward fill next
    df = df.bfill()

    # For any remaining NaNs, fill with zeros or median values
    for col in df.columns:
        if df[col].isna().any():
            if 'return' in col or 'vol' in col:
                df[col] = df[col].fillna(0)
            elif 'rsi' in col:
                df[col] = df[col].fillna(50)
            else:
                df[col] = df[col].fillna(df[col].median())

    nan_counts_after = df.isna().sum()
    print(f"NaN values after filling: {nan_counts_after[nan_counts_after > 0].sum()}")

    print("Feature engineering complete.")
    return df

In [None]:
def sfeatures(df, correlation_threshold=0.7, importance_threshold=0.01):

    print("Performing feature selection...")

    feature_df = df.copy()

    # Remove non-feature columns
    non_feature_cols = ['is_weekend', 'regime_name']

    for col in feature_df.columns:
        if pd.api.types.is_datetime64_any_dtype(feature_df[col]) or pd.api.types.is_object_dtype(feature_df[col]):
            if col not in non_feature_cols:
                non_feature_cols.append(col)

    feature_df = feature_df.drop(columns=non_feature_cols, errors='ignore')

    numeric_df = feature_df.apply(pd.to_numeric, errors='coerce')

    numeric_df = numeric_df.dropna(axis=1, how='all')

    print(f"Using {len(numeric_df.columns)} numeric features for selection")

    # feature selection
    target_cols = ['SPY_return_30d', 'BTC_return_30d', 'ETH_return_30d']
    valid_targets = [col for col in target_cols if col in numeric_df.columns]

    featureimportance = pd.DataFrame(index=numeric_df.columns)

    # Calculate correlation
    for target in valid_targets:
        correlations = numeric_df.corrwith(numeric_df[target]).abs()
        featureimportance[f'corr_{target}'] = correlations

    # average correlation
    featureimportance['avg_correlation'] = featureimportance.mean(axis=1)

    corr_matrix = numeric_df.corr().abs()
    upper_tri = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))

    high_corr_pairs = []
    for col in upper_tri.columns:
        correlated_cols = upper_tri.index[upper_tri[col] > correlation_threshold].tolist()
        for corr_col in correlated_cols:
            high_corr_pairs.append((col, corr_col))


    redundantfeatures = []
    for feat1, feat2 in high_corr_pairs:
        if feat1 not in redundantfeatures and feat2 not in redundantfeatures:
            if featureimportance.loc[feat1, 'avg_correlation'] > featureimportance.loc[feat2, 'avg_correlation']:
                redundantfeatures.append(feat2)
            else:
                redundantfeatures.append(feat1)

    # Select features with importance above threshold
    importantfeatures = featureimportance[
        featureimportance['avg_correlation'] > importance_threshold
    ].index.tolist()

    selectedfeatures = [feat for feat in importantfeatures if feat not in redundantfeatures]

    # critical features
    criticalfeatures = [
        'market_regime', 'market_vol_regime',
        'SPY_return_30d', 'BTC_return_30d', 'ETH_return_30d',
        'SPY_vol_10d', 'BTC_vol_10d', 'ETH_vol_10d',
        'spy_btc_corr', 'spy_eth_corr', 'btc_eth_corr',
        'market_breadth_20d', 'crypto_equity_vol_ratio'
    ]

    for feature in criticalfeatures:
        if feature in df.columns and feature not in selectedfeatures:
            selectedfeatures.append(feature)

    # price data
    for ticker in df.columns:
        if ticker.endswith('_close'):
            if ticker not in selectedfeatures:
                selectedfeatures.append(ticker)

    print(f"Selected {len(selectedfeatures)} features out of {len(feature_df.columns)}")
    return selectedfeatures

In [None]:
class MultiMarketPortfolioEnv(gym.Env):


    def __init__(self, data, selected_features=None, window_size=30,
                 train_period=(0, 1800), max_steps=252, transaction_cost=0.001,
                 mode='train', agent_type='dual'):
        super(MultiMarketPortfolioEnv, self).__init__()

        self.data = data
        self.selected_features = selected_features
        self.window_size = window_size
        self.train_start, self.train_end = train_period

        # appropriate max_steps
        train_window_size = self.train_end - self.train_start
        if max_steps is None or max_steps > train_window_size:
            self.max_steps = max(1, train_window_size // 2)
        else:
            self.max_steps = max_steps

        if self.train_end - self.train_start - self.window_size <= 0:
            # a minimal lookback
            self.window_size = max(1, (self.train_end - self.train_start) // 5)
            print(f"Adjusted window_size to {self.window_size} to fit training period")

        if self.max_steps > (self.train_end - self.train_start - self.window_size):
            self.max_steps = max(1, self.train_end - self.train_start - self.window_size)

        print(f"Using max_steps={self.max_steps} for training window {self.train_start}:{self.train_end}")

        self.transaction_cost = transaction_cost
        self.mode = mode
        self.agent_type = agent_type

        self.assets = []
        for col in data.columns:
            if col.endswith('_close'):
                ticker = col.split('_')[0]
                self.assets.append(ticker)

        self.n_assets = len(self.assets)
        print(f"Environment created with {self.n_assets} assets")

        # action space
        self.action_space = spaces.Box(
            low=0, high=1, shape=(self.n_assets,), dtype=np.float32
        )

        self.current_step = self.train_start + self.window_size
        self.portfolio_value = 1.0
        self.weights = np.ones(self.n_assets) / self.n_assets
        self.prev_weights = self.weights.copy()
        self.portfolio_values = [self.portfolio_value]
        self.returns = []
        self.sharpe_ratio = 0
        self.max_drawdown = 0

        if self.agent_type == 'strategic':
            # For strategic agent
            obs_dim = 5 + 3 + len(self.assets) + len(self.assets) + 3
        elif self.agent_type == 'tactical':
            # For tactical agent
            obs_dim = len(self.assets) * 3 + 3 + len(self.assets) + len(self.assets)
        else:
            obs_dim = (5 + 3 + len(self.assets) + len(self.assets) + 3) + \
                     (len(self.assets) * 3 + 3 + len(self.assets) + len(self.assets))

        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(obs_dim,), dtype=np.float32
        )

        # Now reset to initialize properly
        self.reset()

    def reset(self):

        if self.mode == 'train':
            low = self.train_start + self.window_size
            high = self.train_end - self.max_steps

            if low >= high:
                self.current_step = self.train_start + self.window_size
                print(f"Warning: Invalid random range ({low} >= {high}). Using fixed start index: {self.current_step}")
            else:
                self.current_step = np.random.randint(low, high)
        else:
            # beginning of test period
            self.current_step = self.train_start + self.window_size

        self.start_step = self.current_step
        self.end_step = min(self.current_step + self.max_steps, self.train_end)

        # Initialize portfolio
        self.portfolio_value = 1.0
        self.initial_portfolio_value = 1.0
        self.weights = np.ones(self.n_assets) / self.n_assets
        self.prev_weights = self.weights.copy()

        # Track portfolio
        self.portfolio_values = [self.portfolio_value]
        self.returns = []
        self.sharpe_ratio = 0
        self.max_drawdown = 0

        # initial state
        state = self.get_state()

        return state

    def get_state(self):
        if self.agent_type == 'strategic':
            return self.get_strategic_state()
        elif self.agent_type == 'tactical':
            return self.get_tactical_state()
        else:
            strategic_state = self.get_strategic_state()
            tactical_state = self.get_tactical_state()
            return np.concatenate([strategic_state, tactical_state])

    def get_strategic_state(self):
        # Market regime features
        regime = self.data.iloc[self.current_step]['market_regime']
        vol_regime = self.data.iloc[self.current_step]['market_vol_regime'] if 'market_vol_regime' in self.data.columns else 0

        # Market breadth indicator
        market_breadth = self.data.iloc[self.current_step]['market_breadth_20d'] if 'market_breadth_20d' in self.data.columns else 0.5

        # Cross-market correlations
        spy_btc_corr = self.data.iloc[self.current_step]['spy_btc_corr'] if 'spy_btc_corr' in self.data.columns else 0
        spy_eth_corr = self.data.iloc[self.current_step]['spy_eth_corr'] if 'spy_eth_corr' in self.data.columns else 0

        # Sector returns
        sector_returns = []
        for sector in ['tech', 'finance', 'healthcare']:
            if f'{sector}_return_30d' in self.data.columns:
                sector_returns.append(self.data.iloc[self.current_step][f'{sector}_return_30d'])
            else:
                sector_returns.append(0)

        # Asset returns
        asset_returns = []
        for asset in self.assets:
            if f'{asset}_return_30d' in self.data.columns:
                asset_returns.append(self.data.iloc[self.current_step][f'{asset}_return_30d'])
            else:
                asset_returns.append(0)

        # Previous portfolio weights
        prev_weights = self.prev_weights.copy()

        # Current portfolio
        if len(self.returns) > 0:
            recent_return = self.returns[-1]
            if len(self.returns) >= 30:
                rolling_sharpe = self.sharpe_ratio
            else:
                rolling_sharpe = 0
        else:
            recent_return = 0
            rolling_sharpe = 0

        state = np.concatenate([
            [regime, vol_regime, market_breadth, spy_btc_corr, spy_eth_corr],
            sector_returns,
            asset_returns,
            prev_weights,
            [recent_return, rolling_sharpe, self.max_drawdown]
        ])

        return state

    def get_tactical_state(self):
        # volatility
        asset_vols = []
        for asset in self.assets:
            if f'{asset}_vol_10d' in self.data.columns:
                asset_vols.append(self.data.iloc[self.current_step][f'{asset}_vol_10d'])
            else:
                asset_vols.append(0.01)

        # RSI indicators
        asset_rsis = []
        for asset in self.assets:
            if f'{asset}_rsi' in self.data.columns:
                asset_rsis.append(self.data.iloc[self.current_step][f'{asset}_rsi'])
            else:
                asset_rsis.append(50)

        # Trend indicators
        trend_indicators = []
        for asset in self.assets:
            if f'{asset}_cross' in self.data.columns:
                trend_indicators.append(self.data.iloc[self.current_step][f'{asset}_cross'])
            else:
                trend_indicators.append(0)

        # Cross-asset volatility ratio
        crypto_equity_ratio = self.data.iloc[self.current_step]['crypto_equity_vol_ratio'] if 'crypto_equity_vol_ratio' in self.data.columns else 1

        # Recent regime
        recent_regime_change = 0
        if self.current_step > 5:
            if (self.data.iloc[self.current_step]['market_regime'] !=
                self.data.iloc[self.current_step-5]['market_regime']):
                recent_regime_change = 1

        # Weekend feature
        is_weekend = 1 if self.data.iloc[self.current_step]['is_weekend'] else 0

        # Previous portfolio weights
        prev_weights = self.prev_weights.copy()

        price_changes = []
        for asset in self.assets:
            if self.current_step > 0 and f'{asset}_close' in self.data.columns:
                prev_price = self.data.iloc[self.current_step-1][f'{asset}_close']
                curr_price = self.data.iloc[self.current_step][f'{asset}_close']
                if prev_price > 0:
                    change = curr_price / prev_price - 1
                else:
                    change = 0
                price_changes.append(change)
            else:
                price_changes.append(0)

        state = np.concatenate([
            asset_vols,
            asset_rsis,
            trend_indicators,
            [crypto_equity_ratio, recent_regime_change, is_weekend],
            prev_weights,
            price_changes
        ])

        return state

    def step(self, action):
        # Normalize action
        action = np.clip(action, 0, 1)
        action_sum = np.sum(action)
        if action_sum > 0:
            action = action / action_sum
        else:
            action = np.ones(self.n_assets) / self.n_assets

        # transaction costs
        tc = np.sum(np.abs(action - self.prev_weights)) * self.transaction_cost

        # current weights
        self.prev_weights = action.copy()
        self.current_step += 1
        done = self.current_step >= self.end_step or self.current_step >= len(self.data) - 1

        if done:
            returns = np.zeros(self.n_assets)
            portfolio_return = 0
        else:
            # asset returns for day
            returns = np.zeros(self.n_assets)
            for i, asset in enumerate(self.assets):
                prev_price = self.data.iloc[self.current_step-1][f'{asset}_close']
                curr_price = self.data.iloc[self.current_step][f'{asset}_close']
                returns[i] = curr_price / prev_price - 1

            # Update portfolio
            portfolio_return = np.sum(action * returns) - tc
            self.portfolio_value *= (1 + portfolio_return)

        self.portfolio_values.append(self.portfolio_value)
        self.returns.append(portfolio_return)

        # rolling performance
        if len(self.returns) >= 30:
            recent_returns = np.array(self.returns[-30:])
            self.sharpe_ratio = np.mean(recent_returns) / (np.std(recent_returns) + 1e-6) * np.sqrt(252)

            peak = np.maximum.accumulate(self.portfolio_values)
            drawdown = (peak - self.portfolio_values) / peak
            self.max_drawdown = np.max(drawdown)

        #  rewards
        if self.agent_type == 'strategic':
            # Strategic reward: Sharpe ratio
            reward = self.sharpe_ratio
        elif self.agent_type == 'tactical':
            # Tactical reward
            vol_scaling = 1.0 / (np.std(returns) + 1e-6)
            reward = portfolio_return * vol_scaling
        else:
            # Dual - combined reward
            strategic_reward = self.sharpe_ratio

            vol_scaling = 1.0 / (np.std(returns) + 1e-6)
            tactical_reward = portfolio_return * vol_scaling

            reward = strategic_reward + tactical_reward

        next_state = self.get_state() if not done else np.zeros(self.observation_space.shape[0])

        info = {
            'portfolio_value': self.portfolio_value,
            'portfolio_return': portfolio_return,
            'transaction_cost': tc,
            'weights': action,
            'sharpe_ratio': self.sharpe_ratio,
            'max_drawdown': self.max_drawdown
        }

        return next_state, reward, done, info

In [None]:
from gym import Wrapper, spaces

class DiscreteActionWrapper(Wrapper):

    def __init__(self, env, actions):
        super(DiscreteActionWrapper, self).__init__(env)
        self.env = env
        self.actions = actions

        # Override the action space to be discrete
        self.action_space = spaces.Discrete(len(actions))

        # Store reference to assets
        self.assets = env.assets
        self.n_assets = len(env.assets)

        # Store reference to the current weights
        self.current_weights = np.ones(self.n_assets) / self.n_assets

    def step(self, action):
        # Convert discrete action to portfolio weights
        continuous_action = self.convertaction(action)

        # Call the original environment's step with the continuous action
        obs, reward, done, info = self.env.step(continuous_action)

        # Update current weights
        if 'weights' in info:
            self.current_weights = info['weights']

        return obs, reward, done, info

    def reset(self):
        obs = self.env.reset()
        self.current_weights = np.ones(self.n_assets) / self.n_assets
        return obs

    def convertaction(self, action_idx):
        # Get base weights from current environment
        base_weights = self.current_weights.copy()

        # If HOLD action, return base weights unchanged
        if action_idx == 0 or action_idx >= len(self.actions):
            return base_weights

        # Get the action details
        action = self.actions[action_idx]
        new_weights = base_weights.copy()

        # Identify crypto and equity indices
        crypto_indices = [i for i, a in enumerate(self.assets) if a in ['BTC', 'ETH']]
        equity_indices = [i for i, a in enumerate(self.assets) if a not in ['BTC', 'ETH']]

        # Apply the tactical action
        if action["type"] == "equity_increase":
            # Implementation as in your original code
            crypto_total = sum(new_weights[i] for i in crypto_indices)
            reduction = min(action["value"], crypto_total)

            # Reduce crypto weights
            for i in crypto_indices:
                if crypto_total > 0:
                    new_weights[i] -= reduction * (new_weights[i] / crypto_total)

            # Increase equity weights
            equity_total = sum(new_weights[i] for i in equity_indices)
            for i in equity_indices:
                if equity_total > 0:
                    new_weights[i] += reduction * (new_weights[i] / equity_total)

        elif action["type"] == "equity_decrease":
            # Implementation as in your original code
            equity_total = sum(new_weights[i] for i in equity_indices)
            reduction = min(action["value"], equity_total)

            for i in equity_indices:
                if equity_total > 0:
                    new_weights[i] -= reduction * (new_weights[i] / equity_total)

            for i in crypto_indices:
                new_weights[i] += reduction / len(crypto_indices)

        elif action["type"] == "crypto_increase":
            # Implementation as in your original code
            equity_total = sum(new_weights[i] for i in equity_indices)
            reduction = min(action["value"], equity_total)

            for i in equity_indices:
                if equity_total > 0:
                    new_weights[i] -= reduction * (new_weights[i] / equity_total)

            for i in crypto_indices:
                new_weights[i] += reduction / len(crypto_indices)

        elif action["type"] == "crypto_decrease":
            # Implementation as in your original code
            crypto_total = sum(new_weights[i] for i in crypto_indices)
            reduction = min(action["value"], crypto_total)

            for i in crypto_indices:
                if crypto_total > 0:
                    new_weights[i] -= reduction * (new_weights[i] / crypto_total)

            equity_total = sum(new_weights[i] for i in equity_indices)
            for i in equity_indices:
                if equity_total > 0:
                    new_weights[i] += reduction * (new_weights[i] / equity_total)

        # Ensure weights sum to 1
        new_weights = np.clip(new_weights, 0, 1)
        weight_sum = np.sum(new_weights)
        if weight_sum > 0:
            new_weights = new_weights / weight_sum

        return new_weights

In [None]:
class StrategicAgent:

    def __init__(self, env, learning_rate=3e-4, use_tensorboard=False):
        # Define network architecture
        policy_kwargs = dict(
            net_arch=[dict(pi=[128, 128, 64], vf=[128, 128, 64])],
            activation_fn=nn.ReLU
        )

        # Set tensorboard log path (None if not using)
        tensorboard_log = "./tensorboard/strategic_agent/" if use_tensorboard else None

        # Create PPO agent
        self.model = PPO(
            "MlpPolicy",
            env,
            device="cuda",
            verbose=1,
            policy_kwargs=policy_kwargs,
            learning_rate=learning_rate,
            n_steps=2048,
            batch_size=64,
            gamma=0.99,
            ent_coef=0.01,
            clip_range=0.2,
            tensorboard_log=tensorboard_log
        )

    def train(self, total_timesteps):
        print("Training strategic agent...")
        self.model.learn(total_timesteps=total_timesteps)

    def predict(self, state, deterministic=False):
        action, _ = self.model.predict(state, deterministic=deterministic)
        return action

    def save(self, path):
        self.model.save(path)

    def load(self, path):
        self.model = PPO.load(path)


class TacticalAgent:


    def __init__(self, env, n_discrete_actions=5, learning_rate=1e-4):
        self.env = env
        self.n_assets = len(env.assets)

        # Define discrete actions
        self.actions = self.discreteactions()
        self.n_discrete_actions = len(self.actions)

        # Create a wrapper for discrete actions
        self.wrapped_env = self.discrete_wrapper(env)

        # Define network architecture
        policy_kwargs = dict(
            net_arch=[128, 128, 64],
            activation_fn=nn.ReLU
        )

        # Create DQN agent
        self.model = DQN(
            "MlpPolicy",
            self.wrapped_env,
            device="cuda",
            verbose=1,
            policy_kwargs=policy_kwargs,
            learning_rate=learning_rate,
            buffer_size=100000,
            learning_starts=1000,
            batch_size=32,
            gamma=0.95,
            exploration_fraction=0.2,
            exploration_final_eps=0.05,
            tensorboard_log=None
        )

    def discrete_wrapper(self, env):
        return DiscreteActionWrapper(env, self.actions)

    def discreteactions(self):
        actions = {
            0: {"name": "HOLD", "modify": False},
            1: {"name": "EQUITY+", "type": "equity_increase", "value": 0.1},
            2: {"name": "EQUITY-", "type": "equity_decrease", "value": 0.1},
            3: {"name": "CRYPTO+", "type": "crypto_increase", "value": 0.05},
            4: {"name": "CRYPTO-", "type": "crypto_decrease", "value": 0.05}
        }
        return actions

    def train(self, total_timesteps):
        print("Training tactical agent (DQN)...")
        self.model.learn(total_timesteps=total_timesteps)

    def predict(self, state, base_weights=None, deterministic=False):

        if base_weights is None:
            base_weights = np.ones(self.n_assets) / self.n_assets

        # Get discrete action from model
        action, _ = self.model.predict(state, deterministic=deterministic)

        # Convert discrete action to weight adjustments
        modified_weights = self.aptactical_action(action, base_weights)

        return modified_weights

    def aptactical_action(self, action_idx, base_weights):
        # If HOLD action, return base weights unchanged
        if action_idx == 0:
            return base_weights

        # Get the action details
        action_idx = int(action_idx)  # Fix unhashable numpy int
        action = self.actions[action_idx]

        new_weights = base_weights.copy()

        # Identify crypto and equity indices
        assets = self.env.assets
        crypto_indices = [i for i, a in enumerate(assets) if a in ['BTC', 'ETH']]
        equity_indices = [i for i, a in enumerate(assets) if a not in ['BTC', 'ETH']]

        # Apply the tactical action
        if action["type"] == "equity_increase":
            # Increase equity allocation by reducing crypto
            crypto_total = sum(new_weights[i] for i in crypto_indices)
            reduction = min(action["value"], crypto_total)

            # Reduce crypto weights
            for i in crypto_indices:
                if crypto_total > 0:
                    new_weights[i] -= reduction * (new_weights[i] / crypto_total)

            # Increase equity weights
            equity_total = sum(new_weights[i] for i in equity_indices)
            for i in equity_indices:
                if equity_total > 0:
                    new_weights[i] += reduction * (new_weights[i] / equity_total)

        elif action["type"] == "equity_decrease":
            # Decrease equity allocation by increasing crypto
            equity_total = sum(new_weights[i] for i in equity_indices)
            reduction = min(action["value"], equity_total)

            # Reduce equity weights proportionally
            for i in equity_indices:
                if equity_total > 0:
                    new_weights[i] -= reduction * (new_weights[i] / equity_total)

            # Increase crypto weights proportionally
            for i in crypto_indices:
                new_weights[i] += reduction / len(crypto_indices)

        elif action["type"] == "crypto_increase":
            # Increase crypto allocation by reducing equity
            equity_total = sum(new_weights[i] for i in equity_indices)
            reduction = min(action["value"], equity_total)

            # Reduce equity weights
            for i in equity_indices:
                if equity_total > 0:
                    new_weights[i] -= reduction * (new_weights[i] / equity_total)

            # Increase crypto weights
            for i in crypto_indices:
                new_weights[i] += reduction / len(crypto_indices)

        elif action["type"] == "crypto_decrease":
            # Decrease crypto allocation by increasing equity
            crypto_total = sum(new_weights[i] for i in crypto_indices)
            reduction = min(action["value"], crypto_total)

            # Reduce crypto weights
            for i in crypto_indices:
                if crypto_total > 0:
                    new_weights[i] -= reduction * (new_weights[i] / crypto_total)

            # Increase equity weights
            equity_total = sum(new_weights[i] for i in equity_indices)
            for i in equity_indices:
                if equity_total > 0:
                    new_weights[i] += reduction * (new_weights[i] / equity_total)

        # Ensure weights sum to 1
        new_weights = np.clip(new_weights, 0, 1)
        weight_sum = np.sum(new_weights)
        if weight_sum > 0:
            new_weights = new_weights / weight_sum

        return new_weights

    def save(self, path):
        self.model.save(path)

    def load(self, path):
        self.model = PPO.load(path)


class DualAgentSystem:


    def __init__(self, env):
        self.env = env
        self.assets = env.assets
        self.n_assets = len(env.assets)

        # Create strategic and tactical environments
        strategic_env = MultiMarketPortfolioEnv(
            env.data,
            selected_features=env.selected_features,
            window_size=env.window_size,
            train_period=(env.train_start, env.train_end),
            max_steps=env.max_steps,
            transaction_cost=env.transaction_cost,
            mode=env.mode,
            agent_type='strategic'
        )

        tactical_env = MultiMarketPortfolioEnv(
            env.data,
            selected_features=env.selected_features,
            window_size=env.window_size,
            train_period=(env.train_start, env.train_end),
            max_steps=env.max_steps,
            transaction_cost=env.transaction_cost,
            mode=env.mode,
            agent_type='tactical'
        )


        self.strategic_agent = StrategicAgent(strategic_env)
        # DQN-based tactical agent
        self.tactical_agent = TacticalAgent(tactical_env)

        # Initialize auction mechanism
        self.auction_alpha = 0.5

        # For tracking
        self.strategic_actions = []
        self.tactical_actions = []
        self.combined_actions = []
        self.alpha_history = []

    def train(self, total_timesteps, train_strategic=True, train_tactical=True):
        if train_strategic:
            self.strategic_agent.train(total_timesteps=total_timesteps)

        if train_tactical:
            self.tactical_agent.train(total_timesteps=total_timesteps)

    def differentiable_auction(self, strategic_value, tactical_value, epsilon=1e-6):
        # Normalize values
        total_value = abs(strategic_value) + abs(tactical_value) + epsilon
        norm_strategic = abs(strategic_value) / total_value
        norm_tactical = abs(tactical_value) / total_value

        # Nash bargaining solution
        alpha = norm_strategic / (norm_strategic + norm_tactical)

        # Bound between 0.2 and 0.8 to prevent extreme allocations
        alpha = max(0.2, min(0.8, alpha))

        return alpha

    def predict(self, state, deterministic=False):
        # Split state each agent
        strategic_state_dim = self.strategic_agent.model.observation_space.shape[0]
        strategic_state = state[:strategic_state_dim]
        tactical_state = state[strategic_state_dim:]

        # actions from each agent
        strategic_action = self.strategic_agent.predict(strategic_state, deterministic)
        tactical_action = self.tactical_agent.predict(tactical_state, strategic_action, deterministic)

        # Get value estimates for auction
        strategic_value = self.strategic_agent.model.policy.evaluate_actions(
            torch.tensor(strategic_state, dtype=torch.float32, device=self.strategic_agent.model.device).reshape(1, -1),
            torch.tensor(strategic_action, dtype=torch.float32, device=self.strategic_agent.model.device).reshape(1, -1)
        )[0].item()

        tactical_value = self.tactical_agent.model.q_net(
            torch.tensor(tactical_state, dtype=torch.float32, device=self.tactical_agent.model.device).reshape(1, -1)
        ).max(dim=1)[0].item()


        # risk budget allocation - differentiable auction
        self.auction_alpha = self.differentiable_auction(strategic_value, tactical_value)

        # Combine actions
        combined_action = (
            self.auction_alpha * strategic_action +
            (1 - self.auction_alpha) * tactical_action
        )

        # Normalize
        combined_action = np.clip(combined_action, 0, 1)
        action_sum = np.sum(combined_action)
        if action_sum > 0:
            combined_action = combined_action / action_sum

        # Track actions
        self.strategic_actions.append(strategic_action)
        self.tactical_actions.append(tactical_action)
        self.combined_actions.append(combined_action)
        self.alpha_history.append(self.auction_alpha)

        return combined_action, self.auction_alpha

    def save(self, path_prefix):
        self.strategic_agent.save(f"{path_prefix}_strategic")
        self.tactical_agent.save(f"{path_prefix}_tactical")

    def load(self, path_prefix):
        self.strategic_agent.load(f"{path_prefix}_strategic")
        self.tactical_agent.load(f"{path_prefix}_tactical")

In [None]:
def strategic_backtest(data, idx, assets):
    # Market regime features
    regime = data.iloc[idx]['market_regime']
    vol_regime = data.iloc[idx]['market_vol_regime'] if 'market_vol_regime' in data.columns else 0

    # Market breadth indicator
    market_breadth = data.iloc[idx]['market_breadth_20d'] if 'market_breadth_20d' in data.columns else 0.5

    # Cross-market correlations
    spy_btc_corr = data.iloc[idx]['spy_btc_corr'] if 'spy_btc_corr' in data.columns else 0
    spy_eth_corr = data.iloc[idx]['spy_eth_corr'] if 'spy_eth_corr' in data.columns else 0

    # Sector returns
    sector_returns = []
    for sector in ['tech', 'finance', 'healthcare']:
        if f'{sector}_return_30d' in data.columns:
            sector_returns.append(data.iloc[idx][f'{sector}_return_30d'])
        else:
            sector_returns.append(0)

    # Asset returns (30-day rolling)
    asset_returns = []
    for asset in assets:
        if f'{asset}_return_30d' in data.columns:
            asset_returns.append(data.iloc[idx][f'{asset}_return_30d'])
        else:
            asset_returns.append(0)

    # Previous portfolio weights
    prev_weights = np.ones(len(assets)) / len(assets)

    # Current portfolio metrics
    recent_return = 0
    rolling_sharpe = 0
    max_drawdown = 0

    # state representation
    state = np.concatenate([
        [regime, vol_regime, market_breadth, spy_btc_corr, spy_eth_corr],
        sector_returns,
        asset_returns,
        prev_weights,
        [recent_return, rolling_sharpe, max_drawdown]
    ])

    return state

def tactical_backtest(data, idx, assets):
    # Get volatility for all assets
    asset_vols = []
    for asset in assets:
        if f'{asset}_vol_10d' in data.columns:
            asset_vols.append(data.iloc[idx][f'{asset}_vol_10d'])
        else:
            asset_vols.append(0.01)

    # RSI indicators for all assets
    asset_rsis = []
    for asset in assets:
        if f'{asset}_rsi' in data.columns:
            asset_rsis.append(data.iloc[idx][f'{asset}_rsi'])
        else:
            asset_rsis.append(50)

    # Trend indicators
    trend_indicators = []
    for asset in assets:
        if f'{asset}_cross' in data.columns:
            trend_indicators.append(data.iloc[idx][f'{asset}_cross'])
        else:
            trend_indicators.append(0)

    # Cross-asset volatility ratio
    crypto_equity_ratio = data.iloc[idx]['crypto_equity_vol_ratio'] if 'crypto_equity_vol_ratio' in data.columns else 1

    # Recent market regime changes
    recent_regime_change = 0
    if idx > 5:
        if (data.iloc[idx]['market_regime'] !=
            data.iloc[idx-5]['market_regime']):
            recent_regime_change = 1

    # Weekend feature
    is_weekend = 1 if data.iloc[idx]['is_weekend'] else 0

    # Previous portfolio weights
    prev_weights = np.ones(len(assets)) / len(assets)

    # Daily price changes
    price_changes = []
    for asset in assets:
        if idx > 0 and f'{asset}_close' in data.columns:
            prev_price = data.iloc[idx-1][f'{asset}_close']
            curr_price = data.iloc[idx][f'{asset}_close']
            if prev_price > 0:
                change = curr_price / prev_price - 1
            else:
                change = 0
            price_changes.append(change)
        else:
            price_changes.append(0)


    state = np.concatenate([
        asset_vols,
        asset_rsis,
        trend_indicators,
        [crypto_equity_ratio, recent_regime_change, is_weekend],
        prev_weights,
        price_changes
    ])

    return state

In [None]:
def backtestportfolio(model, data, start_idx, end_idx, transaction_cost=0.001):
    print(f"Backtesting from index {start_idx} to {end_idx}")

    # Initialize portfolio
    portfolio_value = 1.0
    portfolio_values = [portfolio_value]

    # Extract assets
    assets = []
    for col in data.columns:
        if col.endswith('_close'):
            ticker = col.split('_')[0]
            assets.append(ticker)

    # Initialize weights
    weights = np.ones(len(assets)) / len(assets)
    weights_history = [weights.copy()]
    returns_history = []
    alpha_history = []

    # Run backtest
    for i in range(start_idx, end_idx - 1):
        if i % 100 == 0:
            print(f"Backtesting step {i}/{end_idx-1}")

        # Prepare state
        try:
            if hasattr(model, 'predict') and callable(getattr(model, 'predict')):
                # For RL models
                if hasattr(model, 'strategic_agent') and hasattr(model, 'tactical_agent'):
                    # For dual-agent system
                    strategic_state = strategic_backtest(data, i, assets)
                    tactical_state = tactical_backtest(data, i, assets)
                    state = np.concatenate([strategic_state, tactical_state])
                elif hasattr(model, 'model'):
                    # Determine if strategic or tactical
                    if model.__class__.__name__ == 'StrategicAgent':
                        state = strategic_backtest(data, i, assets)
                    else:
                        state = tactical_backtest(data, i, assets)
                else:
                    # Use default state
                    print("Warning: Unknown model type, using generic state")
                    state = np.ones(318)

                # Get action
                action_result = model.predict(state, deterministic=True)
                if isinstance(action_result, tuple) and len(action_result) == 2:
                    action, alpha = action_result
                    alpha_history.append(float(alpha))
                else:
                    action = action_result
                    alpha_history.append(0.5)
            else:
                # For baseline models
                action = model(data, i, assets, weights)
        except Exception as e:
            print(f"Error during prediction: {e}")
            action = weights
            alpha_history.append(0.5)

        # Ensure action is a proper numpy array
        action = np.asarray(action, dtype=np.float32)
        if action.shape != weights.shape:
            print(f"Warning: Action shape {action.shape} doesn't match weights shape {weights.shape}")
            action = weights

        # Calculate transaction costs
        tc = np.sum(np.abs(action - weights)) * transaction_cost

        # Update weights
        weights = action.copy()
        weights_history.append(weights.copy())

        # Get asset returns for the next day
        next_day_returns = np.zeros(len(assets))
        for j, asset in enumerate(assets):
            prev_price = data.iloc[i][f'{asset}_close']
            curr_price = data.iloc[i+1][f'{asset}_close']
            next_day_returns[j] = curr_price / prev_price - 1

        # Update portfolio value
        portfolio_return = np.sum(weights * next_day_returns) - tc
        returns_history.append(float(portfolio_return))
        portfolio_value *= (1 + portfolio_return)
        portfolio_values.append(float(portfolio_value))

    portfolio_values = np.array(portfolio_values, dtype=np.float64)
    returns_array = np.array(returns_history, dtype=np.float64)


    if alpha_history:
        alpha_history = np.array(alpha_history, dtype=np.float64)


    # Annualized return
    total_return = portfolio_values[-1] / portfolio_values[0] - 1
    days = len(returns_history)
    annualized_return = (1 + total_return) ** (252 / days) - 1

    # Sharpe ratio
    sharpe_ratio = np.mean(returns_array) / np.std(returns_array) * np.sqrt(252)

    # Maximum drawdown
    peak = np.maximum.accumulate(portfolio_values)
    drawdown = (peak - portfolio_values) / peak
    max_drawdown = np.max(drawdown)

    # Sortino ratio
    negative_returns = returns_array[returns_array < 0]
    sortino_ratio = np.mean(returns_array) / (np.std(negative_returns) if len(negative_returns) > 0 else 1e-6) * np.sqrt(252)

    # Calmar ratio
    calmar_ratio = annualized_return / max_drawdown if max_drawdown > 0 else np.inf

    # Average turnover
    try:
        # list turnover values
        turnover_values = []
        for i in range(len(weights_history)-1):
            turnover = np.sum(np.abs(weights_history[i+1] - weights_history[i])) / 2
            turnover_values.append(turnover)

        # average turnover
        if len(weights_history) >= 2:
            turnovers = [
                np.sum(np.abs(weights_history[i] - weights_history[i-1])) / 2
                for i in range(1, len(weights_history))
            ]
            avg_turnover = np.mean(turnovers)
        else:
            avg_turnover = 0
    except Exception as e:
        print(f"Error calculating turnover: {e}")
        avg_turnover = 0

    # average strategic allocation
    avg_strategic_allocation = np.mean(alpha_history) if len(alpha_history) > 0 else None

    metrics = {
        "total_return": float(total_return),
        "annualized_return": float(annualized_return),
        "sharpe_ratio": float(sharpe_ratio),
        "sortino_ratio": float(sortino_ratio),
        "max_drawdown": float(max_drawdown),
        "calmar_ratio": float(calmar_ratio),
        "avg_turnover": float(avg_turnover),
        "avg_strategic_allocation": float(avg_strategic_allocation) if avg_strategic_allocation is not None else None
    }

    print("Backtest completed. Portfolio metrics:")
    for key, value in metrics.items():
        if value is not None:
            print(f"{key}: {value:.4f}")

    return portfolio_values, weights_history, alpha_history, metrics

In [None]:
def performancecomparison(results, data, train_end):
    test_dates = data.index[train_end:train_end+len(results['dual_agent']['portfolio_values'])]

    # Plot portfolio values
    plt.figure(figsize=(14, 10))
    plt.subplot(2, 1, 1)

    for model_name, model_results in results.items():
        plt.plot(test_dates, model_results['portfolio_values'], label=model_name)

    # Add market regimes
    if 'market_regime' in data.columns:
        regime_changes = data.iloc[train_end:]['market_regime'].ne(
            data.iloc[train_end:]['market_regime'].shift()
        ).cumsum()
        regime_groups = data.iloc[train_end:].groupby(regime_changes)

        colors = ['red', 'yellow', 'green']
        for _, group in regime_groups:
            if len(group) > 0:
                regime = group['market_regime'].iloc[0]
                if pd.notna(regime) and regime < len(colors):
                    plt.axvspan(group.index[0], group.index[-1], alpha=0.2,
                              color=colors[int(regime)])

    plt.title('Portfolio Value Comparison')
    plt.xlabel('Date')
    plt.ylabel('Portfolio Value')
    plt.grid(True)
    plt.legend()

    # Plot dual-agent risk budget
    if 'dual_agent' in results and 'alphas' in results['dual_agent']:
        plt.subplot(2, 1, 2)
        plt.plot(test_dates[:-1], results['dual_agent']['alphas'], 'b-')
        plt.axhline(y=0.5, color='r', linestyle='--', alpha=0.5)
        plt.title('Strategic Agent Risk Budget Allocation (α)')
        plt.xlabel('Date')
        plt.ylabel('Alpha')
        plt.ylim(0, 1)
        plt.grid(True)

    plt.tight_layout()
    plt.savefig('figures/performance_comparison.png')
    plt.close()

    # Plot asset allocations
    plt.figure(figsize=(14, 8))
    for model_name, model_results in results.items():
        plt.figure(figsize=(14, 6))
        plotasset(model_results['weights'], data.index[train_end:], model_name)
        plt.savefig(f'figures/allocation_{model_name}.png')
        plt.close()


def plotasset(weights_history, dates, title):
    weights_array = np.array(weights_history)

    if len(dates) > len(weights_array):
        dates = dates[:len(weights_array)]

    n_assets = weights_array.shape[1]

    # grouping similar assets
    crypto_indices = list(range(n_assets-2, n_assets))
    equity_indices = list(range(n_assets-2))

    agg_weights = []
    agg_labels = []

    # Add crypto
    for i in crypto_indices:
        agg_weights.append(weights_array[:, i])
        asset_name = "BTC" if i == n_assets-2 else "ETH"
        agg_labels.append(asset_name)

    # Group equities
    tech_indices = list(range(0, min(10, len(equity_indices))))
    finance_indices = list(range(10, min(20, len(equity_indices))))
    other_indices = list(range(20, len(equity_indices)))

    # Add sector groups
    if tech_indices:
        tech_weight = np.sum(weights_array[:, tech_indices], axis=1)
        agg_weights.append(tech_weight)
        agg_labels.append("Tech Stocks")

    if finance_indices:
        finance_weight = np.sum(weights_array[:, finance_indices], axis=1)
        agg_weights.append(finance_weight)
        agg_labels.append("Finance Stocks")

    if other_indices:
        other_weight = np.sum(weights_array[:, other_indices], axis=1)
        agg_weights.append(other_weight)
        agg_labels.append("Other Stocks")

    # Create stacked area plot
    plt.stackplot(dates, agg_weights, labels=agg_labels, alpha=0.8)
    plt.title(f'Asset Allocation - {title}')
    plt.xlabel('Date')
    plt.ylabel('Weight')
    plt.ylim(0, 1)
    plt.legend(loc='upper left')
    plt.grid(True)

def plot_windowperformance(x_values, portfolio_values, alpha_history, weights_history, window_idx):

    plt.figure(figsize=(14, 10))

    # Plot portfolio
    plt.subplot(2, 1, 1)
    plt.plot(x_values[:len(portfolio_values)], portfolio_values, 'b-')
    plt.title(f'Portfolio Performance - Window {window_idx}')
    plt.xlabel('Time Step')
    plt.ylabel('Portfolio Value')
    plt.grid(True)

    # risk budget allocation (alpha)
    if len(alpha_history) > 0:
        plt.subplot(2, 1, 2)
        plt.plot(x_values[:len(alpha_history)], alpha_history, 'r-')
        plt.axhline(y=0.5, color='black', linestyle='--', alpha=0.5)
        plt.title(f'Strategic Agent Risk Budget Allocation (α) - Window {window_idx}')
        plt.xlabel('Time Step')
        plt.ylabel('Alpha')
        plt.ylim(0, 1)
        plt.grid(True)

    plt.tight_layout()
    plt.savefig(f'figures/window_{window_idx}_performance.png')
    plt.close()

    # asset allocation
    if len(weights_history) > 0:  # Check if we have weights
        plt.figure(figsize=(14, 6))
        plotasset(weights_history, x_values[:len(weights_history)], f'Window {window_idx}')
        plt.savefig(f'figures/window_{window_idx}_allocation.png')
        plt.close()


In [None]:
def forward_validation(data, window_size=252, step_size=126, validation_size=126):
    windows = []
    data_length = len(data)

    if window_size + validation_size > data_length:
        raise ValueError("Not enough data for specified window and validation sizes")

    # Generate windows
    start = 0
    while start + window_size + validation_size <= data_length:
        train_start = start
        train_end = start + window_size
        val_start = train_end
        val_end = min(val_start + validation_size, data_length)

        # Ensure windows
        if val_end > val_start and train_end > train_start:
            windows.append((train_start, train_end, val_start, val_end))

        start += step_size

    return windows

def aggregate_window_results(results_dict):
    all_metrics = [res['metrics'] for res in results_dict.values() if 'metrics' in res]
    if not all_metrics:
        return {}

    sharpe_ratios = [m.get('sharpe', np.nan) for m in all_metrics]
    returns = [m.get('return', np.nan) for m in all_metrics]
    volatilities = [m.get('volatility', np.nan) for m in all_metrics]
    drawdowns = [m.get('max_drawdown', np.nan) for m in all_metrics]
    turnovers = [m.get('avg_turnover', np.nan) for m in all_metrics]
    calmars = [m.get('calmar_ratio', np.nan) for m in all_metrics]

    return {
        "windows": len(all_metrics),
        "mean_sharpe": np.nanmean(sharpe_ratios),
        "std_sharpe": np.nanstd(sharpe_ratios),
        "mean_return": np.nanmean(returns),
        "std_return": np.nanstd(returns),
        "mean_volatility": np.nanmean(volatilities),
        "mean_drawdown": np.nanmean(drawdowns),
        "worst_drawdown": np.nanmax(drawdowns),
        "mean_turnover": np.nanmean(turnovers),
        "mean_calmar": np.nanmean(calmars)
    }

def rollingwindow(data, selected_features, window_size=252, step_size=63, validation_size=63):

    print("Implementing walk-forward validation...")

    # validation windows
    windows = forward_validation(data, window_size, step_size, validation_size)
    print(f"Created {len(windows)} validation windows")

    results = {}

    for window_idx, (train_start, train_end, val_start, val_end) in enumerate(windows):
        print(f"\nProcessing window {window_idx+1}/{len(windows)}")
        print(f"Training period: {train_start} to {train_end-1}")
        print(f"Validation period: {val_start} to {val_end-1}")

        # max_steps
        training_duration = train_end - train_start
        lookback_window = min(30, training_duration // 3)
        max_steps = max(1, training_duration - lookback_window)

        # Create environment
        env_kwargs = {
            'data': data,
            'selected_features': selected_features,
            'window_size': lookback_window,
            'train_period': (train_start, train_end),
            'max_steps': max_steps,
            'transaction_cost': 0.001,
            'mode': 'train'
        }

        env = MultiMarketPortfolioEnv(**env_kwargs)

        # dual-agent system
        print(f"Training dual-agent system for window {window_idx+1}...")
        dual_agent = DualAgentSystem(env)

        training_steps = min(2000, window_size * 4)
        dual_agent.train(total_timesteps=training_steps)

        # Backtest
        print(f"Backtesting dual-agent system for window {window_idx+1}...")
        portfolio_values, weights_history, alpha_history, metrics = backtestportfolio(
            dual_agent, data, val_start, val_end
        )

        # Store results
        try:
            results[f'window_{window_idx+1}'] = {
                'train_period': (train_start, train_end-1),
                'val_period': (val_start, val_end-1),
                'portfolio_values': portfolio_values,
                'weights': weights_history,
                'alphas': alpha_history,
                'metrics': metrics
            }

            # Visualize
            plot_windowperformance(
                list(range(len(portfolio_values))),
                portfolio_values,
                alpha_history,
                weights_history,
                window_idx+1
            )
        except Exception as e:
            print(f"Error storing results for window {window_idx+1}: {e}")


    # Aggregate results across all
    try:
        aggregate_results = aggregate_window_results(results)
    except Exception as e:
        print(f"Error aggregating results: {e}")
        aggregate_results = {"error": str(e)}

    return results, aggregate_results

In [None]:
pip install shimmy

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/apps/conda/envs/ood-jupyterlab-4.2/bin/python -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [None]:
pip install tensorboard

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/apps/conda/envs/ood-jupyterlab-4.2/bin/python -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [None]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cuda


In [None]:
# the preprocessed data
print("Loading preprocessed data...")
try:
    aligned_data = pd.read_csv('/scratch1/srajasek/DL_f/clean_full_data.csv')
    print(f"Data loaded with shape: {aligned_data.shape}")
except Exception as e:
    print(f"Error loading data: {e}")


# Enhance features
print("Enhancing features...")
enhanced_data = efeatures(aligned_data)
enhanced_data.to_csv('/scratch1/srajasek/DL_f/data/enhanced_multimarket_data.csv')
print(f"Enhanced data saved with shape: {enhanced_data.shape}")

# Feature selection
print("Selecting features...")
selected_features = sfeatures(enhanced_data)
print(f"Selected {len(selected_features)} features")

# Implement walk-forward validation
window_size = 252
step_size = 63
validation_size = 63

window_results, aggregate_results = rollingwindow(
    enhanced_data,
    selected_features,
    window_size,
    step_size,
    validation_size
)

# Print aggregate
print("\nAggregate Performance Metrics:")
print(f"Number of windows: {aggregate_results['windows']}")
print(f"Mean Sharpe ratio: {aggregate_results['mean_sharpe']:.4f} ± {aggregate_results['std_sharpe']:.4f}")
print(f"Mean return: {aggregate_results['mean_return']:.4f} ± {aggregate_results['std_return']:.4f}")
print(f"Mean max drawdown: {aggregate_results['mean_drawdown']:.4f}")
print(f"Worst drawdown: {aggregate_results['worst_drawdown']:.4f}")
print(f"Mean Calmar ratio: {aggregate_results['mean_calmar']:.4f}")

# Save results
print("Saving results...")
import pickle
with open('results/rolling_window_results.pkl', 'wb') as f:
    pickle.dump({
        'window_results': window_results,
        'aggregate_results': aggregate_results
    }, f)

print("Implementation complete!")

Loading preprocessed data...
Data loaded with shape: (2580, 193)
Enhancing features...
Adding enhanced features...
Processing features for 63 tickers
Calculating technical indicators...
Calculating RSI for KO
Calculating RSI for AAPL
Calculating RSI for ORCL
Calculating RSI for VZ
Calculating RSI for INTU
Calculating RSI for NKE
Calculating RSI for SBUX
Calculating RSI for TSLA
Calculating RSI for ABT
Calculating RSI for RTX
Calculating RSI for ACN
Calculating RSI for DHR
Calculating RSI for PM
Calculating RSI for DIS
Calculating RSI for HON
Calculating RSI for MDT
Calculating RSI for JPM
Calculating RSI for ETH
Calculating RSI for CRM
Calculating RSI for BTC
Calculating RSI for AMZN
Calculating RSI for HD
Calculating RSI for LOW
Calculating RSI for MSFT
Calculating RSI for TMO
Calculating RSI for MRK
Calculating RSI for META
Calculating RSI for BRK-B
Calculating RSI for PEP
Calculating RSI for NEE
Calculating RSI for NFLX
Calculating RSI for CMCSA
Calculating RSI for COST
Calculating 

In [None]:
import pickle
import numpy as np

# Load saved results
with open('/content/rolling_window_results.pkl', 'rb') as f:
    saved_data = pickle.load(f)

window_results = saved_data['window_results']

# Initialize metric lists
sharpe_ratios = []
returns = []
drawdowns = []
calmars = []

# Extract metrics
for window_id, result in window_results.items():
    metrics = result.get("metrics", {})
    sharpe = metrics.get("sharpe_ratio", np.nan)
    ret = metrics.get("annualized_return", np.nan)
    drawdown = metrics.get("max_drawdown", np.nan)
    calmar = metrics.get("calmar_ratio", np.nan)

    if not np.isnan(sharpe): sharpe_ratios.append(sharpe)
    if not np.isnan(ret): returns.append(ret)
    if not np.isnan(drawdown): drawdowns.append(drawdown)
    if not np.isnan(calmar): calmars.append(calmar)

# Compute aggregate
aggregate_metrics = {
    "windows": len(sharpe_ratios),
    "mean_sharpe": np.mean(sharpe_ratios) if sharpe_ratios else np.nan,
    "std_sharpe": np.std(sharpe_ratios) if sharpe_ratios else np.nan,
    "mean_return": np.mean(returns) if returns else np.nan,
    "std_return": np.std(returns) if returns else np.nan,
    "mean_drawdown": np.mean(drawdowns) if drawdowns else np.nan,
    "worst_drawdown": np.max(drawdowns) if drawdowns else np.nan,
    "mean_calmar": np.mean(calmars) if calmars else np.nan,
}

print("\n Aggregate Performance Metrics:")
print(f"Number of valid windows: {aggregate_metrics['windows']}")
print(f"Mean Sharpe ratio: {aggregate_metrics['mean_sharpe']:.4f} ± {aggregate_metrics['std_sharpe']:.4f}")
print(f"Mean return: {aggregate_metrics['mean_return']:.4f} ± {aggregate_metrics['std_return']:.4f}")
print(f"Mean max drawdown: {aggregate_metrics['mean_drawdown']:.4f}")
print(f"Worst drawdown: {aggregate_metrics['worst_drawdown']:.4f}")
print(f"Mean Calmar ratio: {aggregate_metrics['mean_calmar']:.4f}")



 Aggregate Performance Metrics:
Number of valid windows: 36
Mean Sharpe ratio: 1.0809 ± 1.8955
Mean return: 0.1676 ± 0.3119
Mean max drawdown: 0.0738
Worst drawdown: 0.2380
Mean Calmar ratio: 6.0006
