In [6]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from tqdm import tqdm
import warnings
import itertools

warnings.filterwarnings('ignore')

# ==============================================================================
# CONTROL PANEL & CONFIGURATION
# ==============================================================================
CONFIG = {
    "USE_WALK_FORWARD_VALIDATION": True,
    "USE_RISK_MANAGEMENT_OVERLAY": True,
    "USE_DYNAMIC_TARGET_RETURN": True,
    "WINDOW_SIZE": 30,
    "MAX_DRAWDOWN_LIMIT": 0.20,
    "VOLATILITY_LIMIT_ATR": 1.5,
    "INITIAL_CASH": 210000,
}

# ==============================================================================
# 1. ADVANCED SIMULATION ENGINE (Unchanged)
# ==============================================================================
class MarketEnvironment:
    def __init__(self, num_steps, num_days_per_year=252.):
        self.num_steps = num_steps; self.dt = 1. / num_days_per_year
        self.interest_rates = self._simulate_ornstein_uhlenbeck(); self.econ_growth = self._simulate_gbm()
    def _simulate_ornstein_uhlenbeck(self):
        kappa, theta, sigma, rates = 0.5, 0.02, 0.03, np.zeros(self.num_steps); rates[0] = theta
        for t in range(1, self.num_steps): rates[t] = rates[t-1] + kappa * (theta - rates[t-1]) * self.dt + sigma * np.sqrt(self.dt) * np.random.randn()
        return rates
    def _simulate_gbm(self):
        mu, sigma, s0 = 0.05, 0.15, 1.0
        return s0 * np.exp((mu - 0.5 * sigma**2) * self.dt + sigma * np.sqrt(self.dt) * np.random.randn(self.num_steps).cumsum())

def simulate_heston_path(s0, drift_path, v0, kappa, theta, sigma, rho, num_steps, dt):
    prices, variances = np.zeros(num_steps), np.zeros(num_steps); prices[0], variances[0] = s0, v0
    for t in range(1, num_steps):
        w_s = np.random.randn(); w_v = rho * w_s + np.sqrt(1 - rho**2) * np.random.randn()
        variances[t] = np.maximum(0, variances[t-1] + kappa * (theta - variances[t-1]) * dt + sigma * np.sqrt(variances[t-1] * dt) * w_v)
        prices[t] = prices[t-1] * np.exp((drift_path[t-1] - 0.5 * variances[t-1]) * dt + np.sqrt(variances[t-1] * dt) * w_s)
    return prices

def create_multi_stock_dataframe(num_rows=7000):
    dt = 1./252; sectors = {
        'TECH': {'n_stocks': 30, 'beta_growth': 1.5, 'beta_rates': -0.2, 'add_vol': 0.15},
        'BANK': {'n_stocks': 30, 'beta_growth': 0.8, 'beta_rates': 0.9, 'add_vol': 0.20},
        'MFG': {'n_stocks': 30, 'beta_growth': 1.1, 'beta_rates': 0.3, 'add_vol': 0.10},
        'UTIL': {'n_stocks': 30, 'beta_growth': 0.3, 'beta_rates': -1.2, 'add_vol': 0.05},
        'HEALTH': {'n_stocks': 30, 'beta_growth': 0.5, 'beta_rates': -0.5, 'add_vol': 0.08},
        'ENERGY': {'n_stocks': 30, 'beta_growth': 0.9, 'beta_rates': 0.1, 'add_vol': 0.25},
        'CONSUMER': {'n_stocks': 30, 'beta_growth': 1.3, 'beta_rates': 0.5, 'add_vol': 0.12},}
    env = MarketEnvironment(num_rows); all_stocks_df = []; dates = pd.to_datetime(pd.date_range(start='2000-01-01', periods=num_rows))
    for sector, params in sectors.items():
        for i in range(params['n_stocks']):
            ticker = f"{sector}_{i+1}"; base_drift = 0.02
            dynamic_drift = base_drift + (params['beta_growth'] * env.econ_growth) + (params['beta_rates'] * env.interest_rates)
            s0, v0, theta, kappa, sigma, rho = 100 + np.random.uniform(-20, 20), 0.04 + params['add_vol'] * 0.1, 0.05 + params['add_vol'] * 0.2, 3.0, 0.4, -0.7
            prices = simulate_heston_path(s0, dynamic_drift, v0, kappa, theta, sigma, rho, num_rows, dt)
            stock_df = pd.DataFrame({'Date': dates, 'Ticker': ticker, 'Sector': sector, 'Close': prices})
            returns = stock_df['Close'].pct_change().fillna(0); volatility = returns.rolling(window=5).std().bfill() * 0.75
            stock_df['Open'] = stock_df['Close'].shift(1).fillna(stock_df['Close']) + np.random.randn(num_rows) * volatility
            stock_df['High'] = stock_df[['Open', 'Close']].max(axis=1) + np.random.uniform(0, 2, num_rows) * volatility
            stock_df['Low'] = stock_df[['Open', 'Close']].min(axis=1) - np.random.uniform(0, 2, num_rows) * volatility
            all_stocks_df.append(stock_df)
    full_df = pd.concat(all_stocks_df).reset_index(drop=True)
    full_df = full_df.groupby('Ticker', group_keys=False).apply(calculate_indicators).reset_index(drop=True); return full_df.dropna()

# ==============================================================================
# 2. INDICATOR CALCULATION & FEATURE ENGINEERING (Unchanged)
# ==============================================================================
def calculate_indicators(df):
    df = df.sort_values('Date')
    df['SMA_20'] = df['Close'].rolling(window=20).mean(); df['Std_Dev'] = df['Close'].rolling(window=20).std()
    df['BB_Upper'] = df['SMA_20'] + (df['Std_Dev'] * 2); df['BB_Lower'] = df['SMA_20'] - (df['Std_Dev'] * 2)
    delta = df['Close'].diff(1); gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean(); rs = gain / loss
    df['RSI_14'] = 100 - (100 / (1 + rs)); high_low = df['High'] - df['Low']
    high_close = np.abs(df['High'] - df['Close'].shift()); low_close = np.abs(df['Low'] - df['Close'].shift()); tr = pd.concat([high_low, high_close, low_close], axis=1).max(axis=1)
    df['ATR_14'] = tr.rolling(window=14).mean(); df['SMA_50'] = df['Close'].rolling(window=50).mean()
    df['SMA_200'] = df['Close'].rolling(window=200).mean()
    ema_12 = df['Close'].ewm(span=12, adjust=False).mean(); ema_26 = df['Close'].ewm(span=26, adjust=False).mean()
    df['MACD'] = ema_12 - ema_26; df['MACD_Signal'] = df['MACD'].ewm(span=9, adjust=False).mean(); return df

class FeatureEngineer:
    def __init__(self):
        self.scaler = StandardScaler(); self.encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
        self.feature_cols = ['SMA_20', 'RSI_14', 'ATR_14', 'MACD']
        self.scaled_feature_cols = [f"{c}_scaled" for c in self.feature_cols]; self.sector_cols = []
    def fit_transform(self, data):
        data = data.copy(); sector_encoded = self.encoder.fit_transform(data[['Sector']])
        self.sector_cols = [f"is_{cat}" for cat in self.encoder.categories_[0]]
        sector_df = pd.DataFrame(sector_encoded, index=data.index, columns=self.sector_cols)
        scaled_features = self.scaler.fit_transform(data[self.feature_cols])
        scaled_df = pd.DataFrame(scaled_features, index=data.index, columns=self.scaled_feature_cols)
        processed_df = pd.concat([data[['Date', 'Ticker', 'Close', 'ATR_14']], scaled_df, sector_df], axis=1)
        processed_df['Norm_ATR'] = data['ATR_14'] / data.groupby('Ticker')['ATR_14'].transform(lambda x: x.rolling(window=252, min_periods=1).mean())
        return processed_df.dropna()
    def transform(self, data):
        data = data.copy(); sector_encoded = self.encoder.transform(data[['Sector']])
        sector_df = pd.DataFrame(sector_encoded, index=data.index, columns=self.sector_cols)
        scaled_features = self.scaler.transform(data[self.feature_cols])
        scaled_df = pd.DataFrame(scaled_features, index=data.index, columns=self.scaled_feature_cols)
        processed_df = pd.concat([data[['Date', 'Ticker', 'Close', 'ATR_14']], scaled_df, sector_df], axis=1)
        processed_df['Norm_ATR'] = data['ATR_14'] / data.groupby('Ticker')['ATR_14'].transform(lambda x: x.rolling(window=252, min_periods=1).mean())
        return processed_df.dropna()

# ==============================================================================
# 3. CORE LOGIC (All Unchanged)
# ==============================================================================
def get_trajectories(data, window_size):
    all_states, all_actions, all_rtg, all_timesteps = [], [], [], []
    feature_cols = [c for c in data.columns if c.endswith('_scaled') or c.startswith('is_')]
    if not feature_cols: return np.array([]), np.array([]), np.array([]), np.array([])
    for ticker in data['Ticker'].unique():
        ticker_df = data[data['Ticker'] == ticker].copy()
        if len(ticker_df) <= window_size: continue
        short_ma = ticker_df['Close'].rolling(window=10).mean(); long_ma = ticker_df['Close'].rolling(window=30).mean()
        ma_policy_actions = np.where(short_ma > long_ma, 1, 0)
        s, a, r, t = _generate_trajectory_from_actions(ticker_df, ma_policy_actions, feature_cols)
        if len(s) <= window_size: continue
        rewards_to_go = np.cumsum(r[::-1])[::-1]
        for i in range(len(s) - window_size):
            all_states.append(s[i:i+window_size]); all_actions.append(a[i:i+window_size])
            all_rtg.append(rewards_to_go[i:i+window_size]); all_timesteps.append(t[i:i+window_size])
    return np.array(all_states), np.array(all_actions), np.array(all_rtg), np.array(all_timesteps)

def _generate_trajectory_from_actions(data, actions, feature_cols):
    rewards, cash, holdings = [], 10000, 0
    for i in range(1, len(data)):
        prev_portfolio_val = cash + holdings * data['Close'].iloc[i-1]; action = actions[i]; current_price = data['Close'].iloc[i]
        if action == 1 and cash > 0 and current_price > 0: holdings += cash / current_price; cash = 0
        elif action == 0 and holdings > 0 and current_price > 0: cash += holdings * current_price; holdings = 0
        rewards.append((cash + holdings * current_price) - prev_portfolio_val)
    return data[feature_cols].values[1:], actions[1:], np.array(rewards), np.arange(len(actions)-1)

class DecisionTransformer(nn.Module):
    def __init__(self, state_dim, act_dim, d_model, n_head, n_layer, max_ep_len):
        super().__init__()
        self.state_dim, self.act_dim, self.d_model = state_dim, act_dim, d_model
        self.transformer = nn.TransformerEncoder(nn.TransformerEncoderLayer(d_model, n_head, d_model * 4, 0.1, batch_first=True), n_layer)
        self.embed_timestep = nn.Embedding(max_ep_len, d_model); self.embed_return = nn.Linear(1, d_model)
        self.embed_state = nn.Linear(state_dim, d_model); self.embed_action = nn.Embedding(act_dim, d_model)
        self.embed_ln = nn.LayerNorm(d_model)
        self.predict_action = nn.Sequential(nn.Linear(d_model, act_dim), nn.Softmax(dim=-1))
    def forward(self, states, actions, returns_to_go, timesteps):
        batch_size, seq_len = states.shape[0], states.shape[1]
        state_embeds = self.embed_state(states); action_embeds = self.embed_action(actions)
        rtg_embeds = self.embed_return(returns_to_go); time_embeds = self.embed_timestep(timesteps)
        state_embeds += time_embeds; action_embeds += time_embeds; rtg_embeds += time_embeds
        stacked_inputs = torch.stack((rtg_embeds, state_embeds, action_embeds), dim=1).permute(0, 2, 1, 3).reshape(batch_size, 3 * seq_len, self.d_model)
        stacked_inputs = self.embed_ln(stacked_inputs); mask = nn.Transformer.generate_square_subsequent_mask(3 * seq_len).to(states.device)
        transformer_out = self.transformer(stacked_inputs, mask=mask); state_out = transformer_out[:, 1::3]
        return self.predict_action(state_out)

def train(model, states, actions, rtg, timesteps, epochs=5, batch_size=256):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu"); model.to(device).train()
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
    s, a, r, t = torch.from_numpy(states).float(), torch.from_numpy(actions).long(), torch.from_numpy(rtg).float(), torch.from_numpy(timesteps).long()
    for epoch in range(epochs):
        for i in range(0, len(states), batch_size):
            s_b, a_b, r_b, t_b = s[i:i+batch_size].to(device), a[i:i+batch_size].to(device), r[i:i+batch_size].to(device), t[i:i+batch_size].to(device)
            if s_b.shape[0] == 0: continue
            action_preds = model(s_b, a_b, r_b.unsqueeze(-1), t_b)
            loss = F.cross_entropy(action_preds.reshape(-1, model.act_dim), a_b.reshape(-1))
            optimizer.zero_grad(); loss.backward(); torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0); optimizer.step()
    return model

def backtest_dt(model, data, window_size, initial_cash):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu"); model.eval().to(device)
    cash, holdings, portfolio_values, high_water_mark = initial_cash, 0, [initial_cash], initial_cash
    feature_cols = [c for c in data.columns if c.endswith('_scaled') or c.startswith('is_')]
    context_states = torch.zeros(1, window_size, model.state_dim, device=device, dtype=torch.float32)
    context_actions = torch.zeros(1, window_size, dtype=torch.long, device=device)
    context_rtg = torch.zeros(1, window_size, 1, device=device, dtype=torch.float32)
    context_timesteps = torch.arange(window_size, device=device).reshape(1, window_size)
    for i in range(len(data)):
        current_state = torch.from_numpy(data[feature_cols].iloc[i].values).float().reshape(1, 1, model.state_dim).to(device)
        context_states = torch.cat([context_states[:, 1:], current_state], dim=1); target_return = initial_cash * 0.1
        if CONFIG["USE_DYNAMIC_TARGET_RETURN"] and 'Norm_ATR' in data.columns and not pd.isna(data['Norm_ATR'].iloc[i]) and data['Norm_ATR'].iloc[i] > 0: target_return *= data['Norm_ATR'].iloc[i]
        rtg_update = torch.tensor([[[target_return]]], device=device, dtype=torch.float32); context_rtg = torch.cat([context_rtg[:, 1:], rtg_update], dim=1)
        with torch.no_grad(): action_preds = model(context_states, context_actions, context_rtg, context_timesteps)
        final_action = torch.argmax(action_preds[0, -1, :]).item()
        if CONFIG["USE_RISK_MANAGEMENT_OVERLAY"]:
             current_drawdown = (high_water_mark - portfolio_values[-1]) / high_water_mark if high_water_mark > 0 else 0
             if current_drawdown > CONFIG["MAX_DRAWDOWN_LIMIT"]: final_action = 0
             if 'Norm_ATR' in data.columns and not pd.isna(data['Norm_ATR'].iloc[i]) and data['Norm_ATR'].iloc[i] > CONFIG["VOLATILITY_LIMIT_ATR"]: final_action = 0
        current_price = data['Close'].iloc[i]
        if final_action == 1 and cash > 0 and current_price > 0: holdings += cash / current_price; cash = 0
        elif final_action == 0 and holdings > 0 and current_price > 0: cash += holdings * current_price; holdings = 0
        new_value = cash + holdings * current_price; portfolio_values.append(new_value); high_water_mark = max(high_water_mark, new_value)
        context_actions = torch.cat([context_actions[:, 1:], torch.tensor([[final_action]], device=device)], dim=1)
    return pd.Series(portfolio_values[1:], index=data['Date'])

def backtest_traditional(data, signals, initial_cash):
    cash, holdings, portfolio_values = initial_cash, 0, []
    for i in range(len(data)):
        signal = signals.iloc[i]; current_price = data['Close'].iloc[i]
        if signal == 1 and holdings == 0 and current_price > 0: holdings = cash / current_price; cash = 0
        elif signal == -1 and holdings > 0 and current_price > 0: cash = holdings * current_price; holdings = 0
        portfolio_values.append(cash + holdings * current_price)
    return pd.Series(portfolio_values, index=data.index)

# ==============================================================================
# 4. TRADITIONAL STRATEGIES & ADAPTIVE OPTIMIZERS (Unchanged)
# ==============================================================================
def generate_ma_cross_signals(data, short_window=50, long_window=200):
    signals = pd.Series(index=data.index, dtype=int); short_ma = data['Close'].rolling(window=short_window).mean()
    long_ma = data['Close'].rolling(window=long_window).mean(); signals[short_ma > long_ma] = 1
    signals[short_ma < long_ma] = -1; return signals.ffill().fillna(0)
def generate_rsi_signals(data, buy_threshold=30, sell_threshold=70):
    signals = pd.Series(index=data.index, dtype=int); signals[data['RSI_14'] < buy_threshold] = 1
    signals[data['RSI_14'] > sell_threshold] = -1; return signals.ffill().fillna(0)
def generate_macd_signals(data):
    signals = pd.Series(index=data.index, dtype=int); signals[data['MACD'] > data['MACD_Signal']] = 1
    signals[data['MACD'] < data['MACD_Signal']] = -1; return signals.ffill().fillna(0)
class ParameterOptimizer:
    def __init__(self, strategy_func, param_grid):
        self.strategy_func = strategy_func; self.param_grid = param_grid
    def find_best_params_per_stock(self, train_data, tickers):
        optimal_params_for_all_stocks = {}
        keys, values = zip(*self.param_grid.items())
        param_combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]
        if 'short_window' in self.param_grid and 'long_window' in self.param_grid:
            param_combinations = [p for p in param_combinations if p['short_window'] < p['long_window']]
        for ticker in tickers:
            ticker_data = train_data[train_data['Ticker'] == ticker]
            if ticker_data.empty: continue
            best_ticker_params, best_ticker_sharpe = {}, -np.inf
            for params in param_combinations:
                signals = self.strategy_func(ticker_data, **params); portfolio = backtest_traditional(ticker_data, signals, 10000)
                returns = portfolio.pct_change().dropna(); sharpe_ratio = 0
                if not returns.empty and returns.std() > 0: sharpe_ratio = returns.mean() / returns.std()
                if sharpe_ratio > best_ticker_sharpe: best_ticker_sharpe, best_ticker_params = sharpe_ratio, params
            optimal_params_for_all_stocks[ticker] = best_ticker_params
        print(f"Completed per-stock optimization for {self.strategy_func.__name__}")
        return optimal_params_for_all_stocks

# ==============================================================================
# 5. --- NEW --- DETAILED PERFORMANCE REPORTING ENGINE
# ==============================================================================
def calculate_performance_metrics(portfolio_values):
    """Calculates a dictionary of detailed performance metrics."""
    if portfolio_values.empty or portfolio_values.iloc[0] == 0: return None
    returns = portfolio_values.pct_change().dropna()
    if returns.empty or returns.std() == 0: return None

    total_return = (portfolio_values.iloc[-1] - portfolio_values.iloc[0]) / portfolio_values.iloc[0]
    
    num_years = (portfolio_values.index[-1] - portfolio_values.index[0]).days / 365.25
    annualized_return = (1 + total_return) ** (1/num_years) - 1 if num_years > 0 else 0
    
    annualized_volatility = returns.std() * np.sqrt(252)
    sharpe_ratio = annualized_return / annualized_volatility if annualized_volatility > 0 else 0
    
    high_water_mark = portfolio_values.cummax()
    drawdown = (portfolio_values - high_water_mark) / high_water_mark
    max_drawdown = drawdown.min()
    
    calmar_ratio = annualized_return / abs(max_drawdown) if max_drawdown < 0 else 0

    return {
        "Total Return": total_return,
        "Annualized Return": annualized_return,
        "Annualized Volatility": annualized_volatility,
        "Max Drawdown": max_drawdown,
        "Sharpe Ratio": sharpe_ratio,
        "Calmar Ratio": calmar_ratio
    }

def print_performance_summary(name, metrics):
    """Prints a formatted summary of performance metrics."""
    print(f"\n--- Performance: {name} ---")
    print(f"Total Return: {metrics['Total Return']:.2%}")
    print(f"Annualized Return: {metrics['Annualized Return']:.2%}")
    print(f"Annualized Volatility: {metrics['Annualized Volatility']:.2%}")
    print(f"Max Drawdown: {metrics['Max Drawdown']:.2%}")
    print(f"Sharpe Ratio: {metrics['Sharpe Ratio']:.2f}")
    print(f"Calmar Ratio: {metrics['Calmar Ratio']:.2f}")

# ==============================================================================
# 6. MAIN EXECUTION LOGIC (Updated for detailed, per-sector reporting)
# ==============================================================================
if __name__ == "__main__":
    df = create_multi_stock_dataframe()
    if not CONFIG["USE_WALK_FORWARD_VALIDATION"]: pass
    else:
        print("Running Walk-Forward Validation with Per-Stock Adaptive Optimizers.")
        df['Date'] = pd.to_datetime(df['Date']); df = df.set_index('Date'); df.sort_index(inplace=True)
        # Create a Ticker -> Sector mapping for later use
        ticker_to_sector = df[['Ticker', 'Sector']].drop_duplicates().set_index('Ticker')['Sector'].to_dict()
        
        unique_years = df.index.year.unique(); train_window_yrs, test_window_yrs = 8, 2
        # --- MODIFIED --- Store individual stock results per fold
        all_folds_stock_results = []
        
        rsi_optimizer = ParameterOptimizer(generate_rsi_signals, {'buy_threshold': [25, 30, 35], 'sell_threshold': [70, 75, 80]})
        ma_optimizer = ParameterOptimizer(generate_ma_cross_signals, {'short_window': [20, 30, 50], 'long_window': [100, 150, 200]})

        for i in range(train_window_yrs, len(unique_years), test_window_yrs):
            train_start_year, train_end_year = unique_years[i - train_window_yrs], unique_years[i - 1]
            test_start_year = unique_years[i]; test_end_year = unique_years[min(i + test_window_yrs - 1, len(unique_years)-1)]
            print(f"\n===== FOLD: Training on {train_start_year}-{train_end_year}, Testing on {test_start_year}-{test_end_year} =====")
            train_df, test_df = df.loc[str(train_start_year):str(train_end_year)], df.loc[str(test_start_year):str(test_end_year)]
            
            engineer = FeatureEngineer(); processed_train = engineer.fit_transform(train_df.reset_index()); 
            processed_test = engineer.transform(test_df.reset_index())
            
            states, actions, rtg, timesteps = get_trajectories(processed_train, CONFIG["WINDOW_SIZE"])
            if states.shape[0] < 1: print("Not enough data for DT. Skipping fold."); continue
            
            state_dim = states.shape[2]; model = DecisionTransformer(state_dim, 2, d_model=128, n_head=4, n_layer=3, max_ep_len=10000)
            print("Training Decision Transformer on all stocks..."); model = train(model, states, actions, rtg, timesteps, epochs=5)
            
            tickers = test_df['Ticker'].unique()
            if len(tickers) == 0: continue
            cash_per_stock = CONFIG["INITIAL_CASH"] / len(tickers)
            
            print("Optimizing traditional strategies for each stock...")
            best_rsi_params_per_stock = rsi_optimizer.find_best_params_per_stock(train_df, train_df['Ticker'].unique())
            best_ma_params_per_stock = ma_optimizer.find_best_params_per_stock(train_df, train_df['Ticker'].unique())

            # --- MODIFIED --- Store results for each stock in a dictionary
            fold_stock_results = {ticker: {} for ticker in tickers}
            print("Backtesting all strategies...")
            for t in tqdm(tickers, desc="Backtesting Tickers"):
                ticker_data = test_df[test_df['Ticker'] == t]; proc_data = processed_test[processed_test['Ticker'] == t]
                if ticker_data.empty: continue
                # MA Cross
                params = best_ma_params_per_stock.get(t, {})
                if params: fold_stock_results[t]['MA Cross (Optimized)'] = backtest_traditional(ticker_data, generate_ma_cross_signals(ticker_data, **params), cash_per_stock)
                # RSI
                params = best_rsi_params_per_stock.get(t, {})
                if params: fold_stock_results[t]['RSI (Optimized)'] = backtest_traditional(ticker_data, generate_rsi_signals(ticker_data, **params), cash_per_stock)
                # MACD
                fold_stock_results[t]['MACD'] = backtest_traditional(ticker_data, generate_macd_signals(ticker_data), cash_per_stock)
                # Benchmark
                fold_stock_results[t]['Benchmark'] = ticker_data['Close'] * (cash_per_stock / ticker_data['Close'].iloc[0])
                # DT
                if not proc_data.empty: fold_stock_results[t]['DT'] = backtest_dt(model, proc_data, CONFIG["WINDOW_SIZE"], cash_per_stock)
            
            all_folds_stock_results.append(fold_stock_results)

        # --- NEW --- FINAL AGGREGATION & DETAILED REPORTING
        print("\n\n" + "="*60); print("===== FINAL WALK-FORWARD PERFORMANCE SUMMARY ====="); print("="*60)
        
        # 1. Combine results from all folds
        final_stock_results = {}
        for fold_res in all_folds_stock_results:
            for ticker, strategies in fold_res.items():
                if ticker not in final_stock_results: final_stock_results[ticker] = {}
                for strat, series in strategies.items():
                    if strat not in final_stock_results[ticker]: final_stock_results[ticker][strat] = []
                    final_stock_results[ticker][strat].append(series)
        
        for ticker, strats in final_stock_results.items():
            for strat, series_list in strats.items():
                final_stock_results[ticker][strat] = pd.concat(series_list)

        # 2. Structure results by strategy and sector
        strategy_keys = ["DT", "MA Cross (Optimized)", "RSI (Optimized)", "MACD", "Benchmark"]
        results_by_strategy = {strat: {} for strat in strategy_keys}
        for ticker, strats in final_stock_results.items():
            sector = ticker_to_sector.get(ticker)
            if not sector: continue
            for strat, series in strats.items():
                if sector not in results_by_strategy[strat]: results_by_strategy[strat][sector] = []
                results_by_strategy[strat][sector].append(series)
        
        # 3. Calculate and print all stats
        for strat, sectors in results_by_strategy.items():
            strat_name = strat.replace('Benchmark', 'Buy & Hold')
            print("\n" + "#"*60); print(f"## Strategy: {strat_name}"); print("#"*60)

            # --- Overall Performance ---
            all_stock_series = [s for sector_stocks in sectors.values() for s in sector_stocks]
            if not all_stock_series: continue
            overall_portfolio = pd.concat(all_stock_series, axis=1).sum(axis=1)
            overall_metrics = calculate_performance_metrics(overall_portfolio)
            if overall_metrics:
                print_performance_summary("Overall Portfolio", overall_metrics)

            # --- Per-Sector Performance ---
            print("\n--- Per-Sector Performance Breakdown ---")
            for sector, stock_series_list in sorted(sectors.items()):
                sector_metrics = []
                for series in stock_series_list:
                    metrics = calculate_performance_metrics(series)
                    if metrics: sector_metrics.append(metrics)
                
                if not sector_metrics: continue
                
                # Aggregate metrics for the sector
                avg_metrics = {k: np.mean([m[k] for m in sector_metrics]) for k in sector_metrics[0]}
                min_metrics = {k: np.min([m[k] for m in sector_metrics]) for k in sector_metrics[0]}
                max_metrics = {k: np.max([m[k] for m in sector_metrics]) for k in sector_metrics[0]}

                print(f"\n Sector: {sector} ({len(stock_series_list)} stocks)")
                print(f" {'Metric':<25} | {'Average':>12} | {'Max':>12} | {'Min':>12}")
                print("-" * 65)
                for k in avg_metrics:
                    is_percent = "Return" in k or "Volatility" in k or "Drawdown" in k
                    avg_str = f"{avg_metrics[k]:.2%}" if is_percent else f"{avg_metrics[k]:.2f}"
                    max_str = f"{max_metrics[k]:.2%}" if is_percent else f"{max_metrics[k]:.2f}"
                    min_str = f"{min_metrics[k]:.2%}" if is_percent else f"{min_metrics[k]:.2f}"
                    print(f" {k:<25} | {avg_str:>12} | {max_str:>12} | {min_str:>12}")

Running Walk-Forward Validation with Per-Stock Adaptive Optimizers.

===== FOLD: Training on 2000-2007, Testing on 2008-2009 =====
Training Decision Transformer on all stocks...
Optimizing traditional strategies for each stock...
Completed per-stock optimization for generate_rsi_signals
Completed per-stock optimization for generate_ma_cross_signals
Backtesting all strategies...


Backtesting Tickers: 100%|██████████| 210/210 [02:52<00:00,  1.22it/s]



===== FOLD: Training on 2002-2009, Testing on 2010-2011 =====
Training Decision Transformer on all stocks...
Optimizing traditional strategies for each stock...
Completed per-stock optimization for generate_rsi_signals
Completed per-stock optimization for generate_ma_cross_signals
Backtesting all strategies...


Backtesting Tickers: 100%|██████████| 210/210 [03:01<00:00,  1.16it/s]



===== FOLD: Training on 2004-2011, Testing on 2012-2013 =====
Training Decision Transformer on all stocks...


KeyboardInterrupt: 

In [9]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Categorical
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from tqdm import tqdm
import warnings
import itertools
import os
import webbrowser
import subprocess
import time

warnings.filterwarnings('ignore')

# ==============================================================================
# CONTROL PANEL & CONFIGURATION
# ==============================================================================
CONFIG = {
    "USE_WALK_FORWARD_VALIDATION": True,
    "USE_RISK_MANAGEMENT_OVERLAY": True,
    "USE_DYNAMIC_TARGET_RETURN": True,
    "WINDOW_SIZE": 30,
    "MAX_DRAWDOWN_LIMIT": 0.20,
    "VOLATILITY_LIMIT_ATR": 1.5,
    "INITIAL_CASH": 210000,
}
# Define device globally for all models
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ==============================================================================
# 1. & 2. SIMULATION & FEATURE ENGINEERING (Unchanged)
# ==============================================================================
class MarketEnvironment:
    def __init__(self, num_steps, num_days_per_year=252.):
        self.num_steps = num_steps; self.dt = 1. / num_days_per_year
        self.interest_rates = self._simulate_ornstein_uhlenbeck(); self.econ_growth = self._simulate_gbm()
    def _simulate_ornstein_uhlenbeck(self):
        kappa, theta, sigma, rates = 0.5, 0.02, 0.03, np.zeros(self.num_steps); rates[0] = theta
        for t in range(1, self.num_steps): rates[t] = rates[t-1] + kappa * (theta - rates[t-1]) * self.dt + sigma * np.sqrt(self.dt) * np.random.randn()
        return rates
    def _simulate_gbm(self):
        mu, sigma, s0 = 0.05, 0.15, 1.0
        return s0 * np.exp((mu - 0.5 * sigma**2) * self.dt + sigma * np.sqrt(self.dt) * np.random.randn(self.num_steps).cumsum())

def simulate_heston_path(s0, drift_path, v0, kappa, theta, sigma, rho, num_steps, dt):
    prices, variances = np.zeros(num_steps), np.zeros(num_steps); prices[0], variances[0] = s0, v0
    for t in range(1, num_steps):
        w_s = np.random.randn(); w_v = rho * w_s + np.sqrt(1 - rho**2) * np.random.randn()
        variances[t] = np.maximum(0, variances[t-1] + kappa * (theta - variances[t-1]) * dt + sigma * np.sqrt(variances[t-1] * dt) * w_v)
        prices[t] = prices[t-1] * np.exp((drift_path[t-1] - 0.5 * variances[t-1]) * dt + np.sqrt(variances[t-1] * dt) * w_s)
    return prices

def create_multi_stock_dataframe(num_rows=7000):
    dt = 1./252; sectors = {
        'TECH': {'n_stocks': 3, 'beta_growth': 1.5, 'beta_rates': -0.2, 'add_vol': 0.15}, 'BANK': {'n_stocks': 3, 'beta_growth': 0.8, 'beta_rates': 0.9, 'add_vol': 0.20},
        'MFG': {'n_stocks': 3, 'beta_growth': 1.1, 'beta_rates': 0.3, 'add_vol': 0.10}, 'UTIL': {'n_stocks': 3, 'beta_growth': 0.3, 'beta_rates': -1.2, 'add_vol': 0.05},
        'HEALTH': {'n_stocks': 3, 'beta_growth': 0.5, 'beta_rates': -0.5, 'add_vol': 0.08}, 'ENERGY': {'n_stocks': 3, 'beta_growth': 0.9, 'beta_rates': 0.1, 'add_vol': 0.25},
        'CONSUMER': {'n_stocks': 3, 'beta_growth': 1.3, 'beta_rates': 0.5, 'add_vol': 0.12},}
    env = MarketEnvironment(num_rows); all_stocks_df = []; dates = pd.to_datetime(pd.date_range(start='2000-01-01', periods=num_rows))
    for sector, params in sectors.items():
        for i in range(params['n_stocks']):
            ticker = f"{sector}_{i+1}"; base_drift = 0.02
            dynamic_drift = base_drift + (params['beta_growth'] * env.econ_growth) + (params['beta_rates'] * env.interest_rates)
            s0, v0, theta, kappa, sigma, rho = 100 + np.random.uniform(-20, 20), 0.04 + params['add_vol'] * 0.1, 0.05 + params['add_vol'] * 0.2, 3.0, 0.4, -0.7
            prices = simulate_heston_path(s0, dynamic_drift, v0, kappa, theta, sigma, rho, num_rows, dt)
            stock_df = pd.DataFrame({'Date': dates, 'Ticker': ticker, 'Sector': sector, 'Close': prices})
            returns = stock_df['Close'].pct_change().fillna(0); volatility = returns.rolling(window=5).std().bfill() * 0.75
            stock_df['Open'] = stock_df['Close'].shift(1).fillna(stock_df['Close']) + np.random.randn(num_rows) * volatility
            stock_df['High'] = stock_df[['Open', 'Close']].max(axis=1) + np.random.uniform(0, 2, num_rows) * volatility
            stock_df['Low'] = stock_df[['Open', 'Close']].min(axis=1) - np.random.uniform(0, 2, num_rows) * volatility
            all_stocks_df.append(stock_df)
    full_df = pd.concat(all_stocks_df).reset_index(drop=True)
    full_df = full_df.groupby('Ticker', group_keys=False).apply(calculate_indicators).reset_index(drop=True); return full_df.dropna()

def calculate_indicators(df):
    df = df.sort_values('Date')
    df['SMA_20'] = df['Close'].rolling(window=20).mean(); df['Std_Dev'] = df['Close'].rolling(window=20).std()
    df['BB_Upper'] = df['SMA_20'] + (df['Std_Dev'] * 2); df['BB_Lower'] = df['SMA_20'] - (df['Std_Dev'] * 2)
    delta = df['Close'].diff(1); gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean(); rs = gain / loss
    df['RSI_14'] = 100 - (100 / (1 + rs)); high_low = df['High'] - df['Low']
    high_close = np.abs(df['High'] - df['Close'].shift()); low_close = np.abs(df['Low'] - df['Close'].shift()); tr = pd.concat([high_low, high_close, low_close], axis=1).max(axis=1)
    df['ATR_14'] = tr.rolling(window=14).mean(); df['SMA_50'] = df['Close'].rolling(window=50).mean()
    df['SMA_200'] = df['Close'].rolling(window=200).mean()
    ema_12 = df['Close'].ewm(span=12, adjust=False).mean(); ema_26 = df['Close'].ewm(span=26, adjust=False).mean()
    df['MACD'] = ema_12 - ema_26; df['MACD_Signal'] = df['MACD'].ewm(span=9, adjust=False).mean(); return df

class FeatureEngineer:
    def __init__(self):
        self.scaler = StandardScaler(); self.encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
        self.feature_cols = ['SMA_20', 'RSI_14', 'ATR_14', 'MACD']
        self.scaled_feature_cols = [f"{c}_scaled" for c in self.feature_cols]; self.sector_cols = []
    def fit_transform(self, data):
        data = data.copy(); sector_encoded = self.encoder.fit_transform(data[['Sector']])
        self.sector_cols = [f"is_{cat}" for cat in self.encoder.categories_[0]]
        sector_df = pd.DataFrame(sector_encoded, index=data.index, columns=self.sector_cols)
        scaled_features = self.scaler.fit_transform(data[self.feature_cols])
        scaled_df = pd.DataFrame(scaled_features, index=data.index, columns=self.scaled_feature_cols)
        processed_df = pd.concat([data[['Date', 'Ticker', 'Close', 'ATR_14']], scaled_df, sector_df], axis=1)
        processed_df['Norm_ATR'] = data['ATR_14'] / data.groupby('Ticker')['ATR_14'].transform(lambda x: x.rolling(window=252, min_periods=1).mean())
        return processed_df.dropna()
    def transform(self, data):
        data = data.copy(); sector_encoded = self.encoder.transform(data[['Sector']])
        sector_df = pd.DataFrame(sector_encoded, index=data.index, columns=self.sector_cols)
        scaled_features = self.scaler.transform(data[self.feature_cols])
        scaled_df = pd.DataFrame(scaled_features, index=data.index, columns=self.scaled_feature_cols)
        processed_df = pd.concat([data[['Date', 'Ticker', 'Close', 'ATR_14']], scaled_df, sector_df], axis=1)
        processed_df['Norm_ATR'] = data['ATR_14'] / data.groupby('Ticker')['ATR_14'].transform(lambda x: x.rolling(window=252, min_periods=1).mean())
        return processed_df.dropna()

# ==============================================================================
# 3. SOPHISTICATED LSTM-PPO REINFORCEMENT LEARNING IMPLEMENTATION (Unchanged)
# ==============================================================================
class StockEnvLSTM:
    def __init__(self, data, feature_cols, window_size):
        self.data = data.reset_index(drop=True); self.features = self.data[feature_cols].values
        self.prices = self.data['Close'].values; self.window_size = window_size
        self.current_step = self.window_size; self.done = False; self.holdings = 0
        self.initial_cash = 10000; self.cash = self.initial_cash
    def _get_state(self):
        return self.features[self.current_step - self.window_size : self.current_step]
    def reset(self):
        self.current_step = self.window_size; self.done = False; self.holdings = 0
        self.cash = self.initial_cash; return self._get_state()
    def step(self, action):
        current_price = self.prices[self.current_step]; prev_portfolio_val = self.cash + self.holdings * current_price
        if action == 1 and self.cash > 0 and current_price > 0: self.holdings += self.cash / current_price; self.cash = 0
        elif action == 0 and self.holdings > 0: self.cash += self.holdings * current_price; self.holdings = 0
        self.current_step += 1; self.done = self.current_step >= len(self.data) - 1
        new_portfolio_val = self.cash + self.holdings * self.prices[self.current_step]
        reward = new_portfolio_val - prev_portfolio_val
        next_state = self._get_state() if not self.done else np.zeros_like(self.features[:self.window_size])
        return next_state, reward, self.done, {}

class ActorCriticLSTM(nn.Module):
    def __init__(self, state_dim, action_dim, lstm_hidden_dim=128):
        super(ActorCriticLSTM, self).__init__(); self.lstm = nn.LSTM(state_dim, lstm_hidden_dim, batch_first=True)
        self.layer1 = nn.Linear(lstm_hidden_dim, 128)
        self.actor = nn.Linear(128, action_dim); self.critic = nn.Linear(128, 1)
    def forward(self, state):
        lstm_out, _ = self.lstm(state); x = F.relu(self.layer1(lstm_out[:, -1, :])); return self.actor(x), self.critic(x)

def train_ppo_and_generate_trajectories(data, fold_num, writer):
    print("Starting LSTM-PPO training to generate expert trajectories..."); all_trajectories = []; trained_models = {}
    feature_cols = [c for c in data.columns if c.endswith('_scaled') or c.startswith('is_')]
    state_dim = len(feature_cols); action_dim = 2;
    for ticker in tqdm(data['Ticker'].unique(), desc="Training LSTM-PPO per stock"):
        ticker_data = data[data['Ticker'] == ticker]; env = StockEnvLSTM(ticker_data, feature_cols, CONFIG['WINDOW_SIZE'])
        model = ActorCriticLSTM(state_dim, action_dim).to(DEVICE); optimizer = optim.Adam(model.parameters(), lr=3e-4)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.9); gamma = 0.99; gae_lambda = 0.95; policy_clip = 0.2; n_epochs = 4;
        for episode in range(25):
            state = env.reset(); episode_done = False; episode_rewards = 0
            log_probs, values, rewards, states, actions = [], [], [], [], []
            while not episode_done:
                state_tensor = torch.FloatTensor(state).unsqueeze(0).to(DEVICE); states.append(state)
                with torch.no_grad(): action_logits, value = model(state_tensor)
                dist = Categorical(logits=action_logits); action = dist.sample(); actions.append(action.item())
                log_prob = dist.log_prob(action); log_probs.append(log_prob); values.append(value)
                next_state, reward, episode_done, _ = env.step(action.item())
                rewards.append(reward); episode_rewards += reward; state = next_state
            writer.add_scalar(f'PPO_Fold_{fold_num}/{ticker}/Episode Reward', episode_rewards, episode)
            with torch.no_grad(): _, last_value = model(torch.FloatTensor(next_state).unsqueeze(0).to(DEVICE))
            returns = torch.zeros(len(rewards) + 1).to(DEVICE); returns[-1] = last_value.squeeze()
            for i in reversed(range(len(rewards))): returns[i] = rewards[i] + gamma * returns[i+1]
            returns = returns[:-1]
            advantages = (returns - torch.cat(values).squeeze()).detach(); advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
            states_tensor = torch.FloatTensor(np.array(states)).to(DEVICE); actions_tensor = torch.LongTensor(actions).to(DEVICE); log_probs_tensor = torch.cat(log_probs).detach()
            for _ in range(n_epochs):
                idxs = np.arange(len(states)); np.random.shuffle(idxs)
                for i in range(0, len(states), 512):
                    batch_idxs = idxs[i:i+512]; s_b, a_b, lp_b, adv_b, ret_b = states_tensor[batch_idxs], actions_tensor[batch_idxs], log_probs_tensor[batch_idxs], advantages[batch_idxs], returns[batch_idxs]
                    new_action_logits, new_values = model(s_b)
                    new_dist = Categorical(logits=new_action_logits); new_log_probs = new_dist.log_prob(a_b)
                    ratio = torch.exp(new_log_probs - lp_b); surr1 = ratio * adv_b; surr2 = torch.clamp(ratio, 1 - policy_clip, 1 + policy_clip) * adv_b
                    actor_loss = -torch.min(surr1, surr2).mean(); critic_loss = F.mse_loss(new_values.squeeze(), ret_b); loss = actor_loss + 0.5 * critic_loss
                    optimizer.zero_grad(); loss.backward(); optimizer.step()
            scheduler.step()
        trained_models[ticker] = model
        state = env.reset(); episode_done = False
        while not episode_done:
            with torch.no_grad(): action_logits, _ = model(torch.FloatTensor(state).unsqueeze(0).to(DEVICE))
            action = Categorical(logits=action_logits).sample().item()
            next_state, reward, episode_done, _ = env.step(action)
            all_trajectories.append({'state': state[-1], 'action': action, 'reward': reward, 'ticker': ticker})
            state = next_state
    return pd.DataFrame(all_trajectories), trained_models

# ==============================================================================
# 4. --- NEW --- GAN, WORLD MODEL, AND ADVANCED DT TRAINING
# ==============================================================================
class Generator(nn.Module):
    def __init__(self, latent_dim, state_dim, window_size):
        super(Generator, self).__init__(); self.window_size, self.state_dim = window_size, state_dim
        self.model = nn.Sequential(nn.Linear(latent_dim, 256), nn.ReLU(), nn.Linear(256, 512), nn.ReLU(), nn.Linear(512, window_size * (state_dim + 2)),)
    def forward(self, z):
        output = self.model(z); return output.view(-1, self.window_size, self.state_dim + 2)

class Discriminator(nn.Module):
    def __init__(self, state_dim, window_size):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(nn.Linear(window_size * (state_dim + 2), 512), nn.LeakyReLU(0.2), nn.Linear(512, 256), nn.LeakyReLU(0.2), nn.Linear(256, 1), nn.Sigmoid(),)
    def forward(self, traj):
        return self.model(traj.view(traj.size(0), -1))

def train_gan_and_synthesize_trajectories(states, actions, rtg, fold_num, writer):
    print("Training TrajectoryGAN to augment dataset...")
    latent_dim, state_dim, window_size = 100, states.shape[2], states.shape[1]
    
    # --- CRITICAL FIX --- Correctly shape the rtg tensor for concatenation
    real_trajs = torch.cat([
        torch.from_numpy(states),
        torch.from_numpy(actions).unsqueeze(-1),
        torch.from_numpy(rtg).unsqueeze(-1) # Use RTG as the reward signal
    ], dim=-1).float()

    generator = Generator(latent_dim, state_dim, window_size).to(DEVICE); discriminator = Discriminator(state_dim, window_size).to(DEVICE)
    adversarial_loss = nn.BCELoss(); d_optimizer = optim.Adam(discriminator.parameters(), lr=2e-4, betas=(0.5, 0.999)); g_optimizer = optim.Adam(generator.parameters(), lr=2e-4, betas=(0.5, 0.999))
    
    for epoch in range(50):
        for i in range(0, len(real_trajs), 512):
            real_batch = real_trajs[i:i+512].to(DEVICE); d_optimizer.zero_grad()
            z = torch.randn(real_batch.size(0), latent_dim).to(DEVICE); fake_batch = generator(z)
            d_real = discriminator(real_batch); d_fake = discriminator(fake_batch.detach())
            d_loss = adversarial_loss(d_real, torch.ones_like(d_real)) + adversarial_loss(d_fake, torch.zeros_like(d_fake))
            d_loss.backward(); d_optimizer.step(); g_optimizer.zero_grad()
            d_fake_for_g = discriminator(fake_batch); g_loss = adversarial_loss(d_fake_for_g, torch.ones_like(d_fake_for_g))
            g_loss.backward(); g_optimizer.step()
        writer.add_scalar(f'GAN/Fold_{fold_num}/D_Loss', d_loss.item(), epoch); writer.add_scalar(f'GAN/Fold_{fold_num}/G_Loss', g_loss.item(), epoch)

    with torch.no_grad():
        z = torch.randn(len(real_trajs), latent_dim).to(DEVICE)
        synthetic_trajs = generator(z).cpu().numpy()
    
    s_synthetic = synthetic_trajs[:, :, :state_dim]
    a_synthetic = np.round(synthetic_trajs[:, :, state_dim:state_dim+1]).astype(int)
    r_synthetic = synthetic_trajs[:, :, state_dim+1:]
    rtg_synthetic = np.apply_along_axis(lambda x: np.cumsum(x[::-1])[::-1], 1, r_synthetic[:,:,0])

    s_combined = np.concatenate([states, s_synthetic]); a_combined = np.concatenate([actions, a_synthetic[:,:,0]])
    rtg_combined = np.concatenate([rtg, rtg_synthetic])
    # --- FIX --- Correctly create timesteps for the combined dataset
    t_combined = np.tile(np.arange(window_size), (len(s_combined), 1))
    return s_combined, a_combined, rtg_combined, t_combined

class WorldModelLSTM(nn.Module):
    def __init__(self, state_dim, action_dim):
        super().__init__(); self.lstm = nn.LSTM(state_dim + action_dim, 128, batch_first=True, num_layers=2); self.fc = nn.Linear(128, state_dim)
    def forward(self, state_seq, action_seq):
        action_one_hot = F.one_hot(action_seq.long(), num_classes=2).float(); x = torch.cat([state_seq, action_one_hot], dim=-1)
        lstm_out, _ = self.lstm(x); return self.fc(lstm_out)

def train_world_model(states, actions, fold_num, writer):
    print("Training World Model on PPO trajectories...")
    state_dim, action_dim = states.shape[2], 2; world_model = WorldModelLSTM(state_dim, action_dim).to(DEVICE)
    optimizer = optim.Adam(world_model.parameters(), lr=1e-3); loss_fn = nn.MSELoss()
    input_states = torch.from_numpy(states[:, :-1, :]).float().to(DEVICE)
    input_actions = torch.from_numpy(actions[:, :-1]).long().to(DEVICE)
    target_states = torch.from_numpy(states[:, 1:, :]).float().to(DEVICE)
    for epoch in range(30):
        for i in range(0, len(input_states), 1024):
            s_b, a_b, s_target_b = input_states[i:i+1024], input_actions[i:i+1024], target_states[i:i+1024]
            optimizer.zero_grad(); s_pred_b = world_model(s_b, a_b); loss = loss_fn(s_pred_b, s_target_b)
            loss.backward(); optimizer.step()
        writer.add_scalar(f'WorldModel/Fold_{fold_num}/Training Loss', loss.item(), epoch)
    return world_model

def online_finetune_dt(model_dt, train_data, feature_cols, fold_num, writer):
    print("Starting Online Fine-Tuning for Decision Transformer..."); optimizer = optim.Adam(model_dt.parameters(), lr=1e-5)
    for ticker in tqdm(train_data['Ticker'].unique(), desc="Online fine-tuning per stock"):
        ticker_data = train_data[train_data['Ticker'] == ticker]; env = StockEnvLSTM(ticker_data, feature_cols, CONFIG['WINDOW_SIZE'])
        state = env.reset(); episode_done = False; rewards, log_probs = [], []
        while not episode_done:
            context_states_np = np.tile(state[-1], (CONFIG['WINDOW_SIZE'], 1)); context_states = torch.from_numpy(context_states_np).float().unsqueeze(0).to(DEVICE)
            context_actions = torch.zeros(1, CONFIG['WINDOW_SIZE'], dtype=torch.long).to(DEVICE); context_rtg = torch.zeros(1, CONFIG['WINDOW_SIZE'], 1, dtype=torch.float32).to(DEVICE)
            context_timesteps = torch.arange(CONFIG['WINDOW_SIZE']).reshape(1, CONFIG['WINDOW_SIZE']).to(DEVICE)
            action_preds = model_dt(context_states, context_actions, context_rtg, context_timesteps)
            dist = Categorical(logits=action_preds[0, -1, :]); action = dist.sample()
            next_state, reward, episode_done, _ = env.step(action.item())
            log_probs.append(dist.log_prob(action)); rewards.append(reward); state = next_state
        returns = []; R = 0
        for r in reversed(rewards): R = r + 0.99 * R; returns.insert(0, R)
        returns = torch.tensor(returns).to(DEVICE); returns = (returns - returns.mean()) / (returns.std() + 1e-8)
        policy_loss = [-log_prob * R for log_prob, R in zip(log_probs, returns)]
        optimizer.zero_grad(); loss = torch.stack(policy_loss).sum(); loss.backward(); optimizer.step()
        writer.add_scalar(f'DT_Online/Fold_{fold_num}/{ticker}/Policy Loss', loss.item())
    return model_dt

class DecisionTransformer(nn.Module): # (Code Unchanged)
    def __init__(self, state_dim, act_dim, d_model, n_head, n_layer, max_ep_len):
        super().__init__(); self.state_dim, self.act_dim, self.d_model = state_dim, act_dim, d_model
        self.transformer = nn.TransformerEncoder(nn.TransformerEncoderLayer(d_model, n_head, d_model * 4, 0.1, batch_first=True), n_layer)
        self.embed_timestep = nn.Embedding(max_ep_len, d_model); self.embed_return = nn.Linear(1, d_model)
        self.embed_state = nn.Linear(state_dim, d_model); self.embed_action = nn.Embedding(act_dim, d_model)
        self.embed_ln = nn.LayerNorm(d_model); self.predict_action = nn.Sequential(nn.Linear(d_model, act_dim), nn.Softmax(dim=-1))
    def forward(self, states, actions, returns_to_go, timesteps):
        batch_size, seq_len = states.shape[0], states.shape[1]; state_embeds = self.embed_state(states); action_embeds = self.embed_action(actions)
        rtg_embeds = self.embed_return(returns_to_go); time_embeds = self.embed_timestep(timesteps); state_embeds += time_embeds; action_embeds += time_embeds; rtg_embeds += time_embeds
        stacked_inputs = torch.stack((rtg_embeds, state_embeds, action_embeds), dim=1).permute(0, 2, 1, 3).reshape(batch_size, 3 * seq_len, self.d_model)
        stacked_inputs = self.embed_ln(stacked_inputs); mask = nn.Transformer.generate_square_subsequent_mask(3 * seq_len).to(DEVICE)
        transformer_out = self.transformer(stacked_inputs, mask=mask); state_out = transformer_out[:, 1::3]; return self.predict_action(state_out)

def train_dt(model, states, actions, rtg, timesteps, fold_num, writer, epochs=5, batch_size=1024): # (Code Unchanged)
    model.to(DEVICE).train(); optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
    s, a, r, t = torch.from_numpy(states).float(), torch.from_numpy(actions).long(), torch.from_numpy(rtg).float(), torch.from_numpy(timesteps).long()
    for epoch in range(epochs):
        for i in range(0, len(states), batch_size):
            s_b, a_b, r_b, t_b = s[i:i+batch_size].to(DEVICE), a[i:i+batch_size].to(DEVICE), r[i:i+batch_size].to(DEVICE), t[i:i+batch_size].to(DEVICE)
            if s_b.shape[0] == 0: continue
            action_preds = model(s_b, a_b, r_b.unsqueeze(-1), t_b)
            loss = F.cross_entropy(action_preds.reshape(-1, model.act_dim), a_b.reshape(-1))
            writer.add_scalar(f'DT/Fold_{fold_num}/Initial Training Loss', loss.item(), epoch * (len(states)//batch_size) + (i//batch_size))
            optimizer.zero_grad(); loss.backward(); torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0); optimizer.step()
    return model

# ==============================================================================
# 5. BACKTESTING (With new MPC backtester)
# ==============================================================================
def backtest_mpc(world_model, actor_critic_model, data, feature_cols, initial_cash, plan_horizon=10, num_sequences=500):
    world_model.eval(); actor_critic_model.eval(); env = StockEnvLSTM(data, feature_cols, CONFIG['WINDOW_SIZE']); state = env.reset(); done = False; portfolio_values = [initial_cash]
    while not done:
        state_tensor = torch.FloatTensor(state).unsqueeze(0).to(DEVICE)
        with torch.no_grad():
            action_logits, _ = actor_critic_model(state_tensor); dist = Categorical(logits=action_logits)
            candidate_actions = dist.sample((num_sequences, plan_horizon)).permute(1, 0)
        best_return = -np.inf; best_action = 0; current_state_batch = state_tensor.repeat(num_sequences, 1, 1)
        cumulative_rewards = torch.zeros(num_sequences).to(DEVICE)
        for t in range(plan_horizon):
            actions_at_t = candidate_actions[:, t].unsqueeze(-1)
            with torch.no_grad(): next_state_pred_batch = world_model(current_state_batch, actions_at_t.unsqueeze(1))
            predicted_rewards = next_state_pred_batch[:, -1, 3] - current_state_batch[:, -1, 3] # Change in scaled MACD
            cumulative_rewards += predicted_rewards; current_state_batch = torch.cat([current_state_batch[:, 1:, :], next_state_pred_batch[:, -1, :].unsqueeze(1)], dim=1)
        best_sequence_idx = torch.argmax(cumulative_rewards); best_action = candidate_actions[best_sequence_idx, 0].item()
        state, _, done, _ = env.step(best_action); current_val = env.cash + env.holdings * env.prices[env.current_step -1]; portfolio_values.append(current_val)
    return pd.Series(portfolio_values[1:], index=data.index[CONFIG['WINDOW_SIZE']:])

def backtest_dt(model, data, window_size, initial_cash): # (Code Unchanged)
    model.eval().to(DEVICE); cash, holdings, portfolio_values, high_water_mark = initial_cash, 0, [initial_cash], initial_cash
    feature_cols = [c for c in data.columns if c.endswith('_scaled') or c.startswith('is_')]; state_dim = len(feature_cols)
    context_states = torch.zeros(1, window_size, state_dim, device=DEVICE, dtype=torch.float32); context_actions = torch.zeros(1, window_size, dtype=torch.long, device=DEVICE)
    context_rtg = torch.zeros(1, window_size, 1, device=DEVICE, dtype=torch.float32); context_timesteps = torch.arange(window_size, device=DEVICE).reshape(1, window_size)
    for i in range(len(data)):
        current_state_np = data[feature_cols].iloc[i].values; current_state = torch.from_numpy(current_state_np).float().reshape(1, 1, state_dim).to(DEVICE)
        context_states = torch.cat([context_states[:, 1:], current_state], dim=1); target_return = initial_cash * 0.15
        if CONFIG["USE_DYNAMIC_TARGET_RETURN"] and 'Norm_ATR' in data.columns and not pd.isna(data['Norm_ATR'].iloc[i]) and data['Norm_ATR'].iloc[i] > 0: target_return *= data['Norm_ATR'].iloc[i]
        rtg_update = torch.tensor([[[target_return]]], device=DEVICE, dtype=torch.float32); context_rtg = torch.cat([context_rtg[:, 1:], rtg_update], dim=1)
        with torch.no_grad(): action_preds = model(context_states, context_actions, context_rtg, context_timesteps)
        final_action = torch.argmax(action_preds[0, -1, :]).item()
        if CONFIG["USE_RISK_MANAGEMENT_OVERLAY"]:
             current_drawdown = (high_water_mark - portfolio_values[-1]) / high_water_mark if high_water_mark > 0 else 0
             if current_drawdown > CONFIG["MAX_DRAWDOWN_LIMIT"]: final_action = 0
             if 'Norm_ATR' in data.columns and not pd.isna(data['Norm_ATR'].iloc[i]) and data['Norm_ATR'].iloc[i] > CONFIG["VOLATILITY_LIMIT_ATR"]: final_action = 0
        current_price = data['Close'].iloc[i]
        if final_action == 1 and cash > 0 and current_price > 0: holdings += cash / current_price; cash = 0
        elif final_action == 0 and holdings > 0 and current_price > 0: cash += holdings * current_price; holdings = 0
        new_value = cash + holdings * current_price; portfolio_values.append(new_value); high_water_mark = max(high_water_mark, new_value)
        context_actions = torch.cat([context_actions[:, 1:], torch.tensor([[final_action]], device=DEVICE)], dim=1)
    return pd.Series(portfolio_values[1:], index=data.index[:-1])

def backtest_ppo(model, data, feature_cols, initial_cash): # (Code Unchanged)
    model.to(DEVICE).eval(); env = StockEnvLSTM(data, feature_cols, CONFIG['WINDOW_SIZE']); state = env.reset(); done = False; portfolio_values = [initial_cash]
    while not done:
        with torch.no_grad():
            action_logits, _ = model(torch.FloatTensor(state).unsqueeze(0).to(DEVICE))
            action = Categorical(logits=action_logits).sample().item()
        state, _, done, _ = env.step(action); current_val = env.cash + env.holdings * env.prices[env.current_step -1]; portfolio_values.append(current_val)
    return pd.Series(portfolio_values[1:], index=data.index[CONFIG['WINDOW_SIZE']:])

# ==============================================================================
# 6. TRADITIONAL STRATEGIES & REPORTING (Unchanged)
# ==============================================================================
def backtest_traditional(data, signals, initial_cash): # (Code Unchanged)
    cash, holdings, portfolio_values = initial_cash, 0, []
    for i in range(len(data)):
        signal = signals.iloc[i]; current_price = data['Close'].iloc[i]
        if signal == 1 and holdings == 0 and current_price > 0: holdings = cash / current_price; cash = 0
        elif signal == -1 and holdings > 0 and current_price > 0: cash = holdings * current_price; holdings = 0
        portfolio_values.append(cash + holdings * current_price)
    return pd.Series(portfolio_values, index=data.index)
def generate_ma_cross_signals(data, short_window=50, long_window=200): # (Code Unchanged)
    signals = pd.Series(index=data.index, dtype=int); short_ma = data['Close'].rolling(window=short_window).mean()
    long_ma = data['Close'].rolling(window=long_window).mean(); signals[short_ma > long_ma] = 1
    signals[short_ma < long_ma] = -1; return signals.ffill().fillna(0)
def generate_rsi_signals(data, buy_threshold=30, sell_threshold=70): # (Code Unchanged)
    signals = pd.Series(index=data.index, dtype=int); signals[data['RSI_14'] < buy_threshold] = 1
    signals[data['RSI_14'] > sell_threshold] = -1; return signals.ffill().fillna(0)
def generate_macd_signals(data): # (Code Unchanged)
    signals = pd.Series(index=data.index, dtype=int); signals[data['MACD'] > data['MACD_Signal']] = 1
    signals[data['MACD'] < data['MACD_Signal']] = -1; return signals.ffill().fillna(0)
class ParameterOptimizer: # (Code Unchanged)
    def __init__(self, strategy_func, param_grid, backtest_func):
        self.strategy_func = strategy_func; self.param_grid = param_grid; self.backtest_func = backtest_func
    def find_best_params_per_stock(self, train_data, tickers):
        optimal_params_for_all_stocks = {}
        keys, values = zip(*self.param_grid.items())
        param_combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]
        if 'short_window' in self.param_grid and 'long_window' in self.param_grid: param_combinations = [p for p in param_combinations if p['short_window'] < p['long_window']]
        for ticker in tickers:
            ticker_data = train_data[train_data['Ticker'] == ticker]
            if ticker_data.empty: continue; best_ticker_params, best_ticker_sharpe = {}, -np.inf
            for params in param_combinations:
                signals = self.strategy_func(ticker_data, **params); portfolio = self.backtest_func(ticker_data, signals, 10000)
                returns = portfolio.pct_change().dropna(); sharpe_ratio = 0
                if not returns.empty and returns.std() > 0: sharpe_ratio = returns.mean() / returns.std()
                if sharpe_ratio > best_ticker_sharpe: best_ticker_sharpe, best_ticker_params = sharpe_ratio, params
            optimal_params_for_all_stocks[ticker] = best_ticker_params
        print(f"Completed per-stock optimization for {self.strategy_func.__name__}"); return optimal_params_for_all_stocks
def calculate_performance_metrics(portfolio_values): # (Code Unchanged)
    if portfolio_values.empty or portfolio_values.iloc[0] == 0: return None
    portfolio_values = portfolio_values[~portfolio_values.index.duplicated(keep='first')]; returns = portfolio_values.pct_change().dropna()
    if returns.empty or returns.std() == 0: return None
    total_return = (portfolio_values.iloc[-1] - portfolio_values.iloc[0]) / portfolio_values.iloc[0]
    num_years = (portfolio_values.index[-1] - portfolio_values.index[0]).days / 365.25 if len(portfolio_values.index) > 1 else 0
    annualized_return = (1 + total_return) ** (1/num_years) - 1 if num_years > 0 else total_return
    annualized_volatility = returns.std() * np.sqrt(252); sharpe_ratio = annualized_return / annualized_volatility if annualized_volatility > 0 else 0
    high_water_mark = portfolio_values.cummax(); drawdown = (portfolio_values - high_water_mark) / high_water_mark
    max_drawdown = drawdown.min(); calmar_ratio = annualized_return / abs(max_drawdown) if max_drawdown < 0 else 0
    return {"Total Return": total_return, "Annualized Return": annualized_return, "Annualized Volatility": annualized_volatility, "Max Drawdown": max_drawdown, "Sharpe Ratio": sharpe_ratio, "Calmar Ratio": calmar_ratio}
def print_performance_summary(name, metrics): # (Code Unchanged)
    print(f"\n--- Performance: {name} ---"); print(f"Total Return: {metrics['Total Return']:.2%}")
    print(f"Annualized Return: {metrics['Annualized Return']:.2%}"); print(f"Annualized Volatility: {metrics['Annualized Volatility']:.2%}")
    print(f"Max Drawdown: {metrics['Max Drawdown']:.2%}"); print(f"Sharpe Ratio: {metrics['Sharpe Ratio']:.2f}"); print(f"Calmar Ratio: {metrics['Calmar Ratio']:.2f}")

# ==============================================================================
# 7. MAIN EXECUTION LOGIC
# ==============================================================================
def launch_tensorboard(): # (Code Unchanged)
    log_dir = 'logs'; proc = subprocess.Popen(['tensorboard', '--logdir', log_dir, '--port', '6006'])
    print("Waiting 5s for TensorBoard to start..."); time.sleep(5)
    url = "http://localhost:6006/"; print(f"Opening TensorBoard at {url}"); webbrowser.open(url)
    return proc

if __name__ == "__main__":
    df = create_multi_stock_dataframe()
    if not CONFIG["USE_WALK_FORWARD_VALIDATION"]: pass
    else:
        print("Starting Main Walk-Forward Validation Process...")
        df['Date'] = pd.to_datetime(df['Date']); df = df.set_index('Date'); df.sort_index(inplace=True)
        ticker_to_sector = df[['Ticker', 'Sector']].drop_duplicates().set_index('Ticker')['Sector'].to_dict()
        os.makedirs('logs', exist_ok=True); writer = SummaryWriter('logs')
        unique_years = df.index.year.unique(); train_window_yrs, test_window_yrs = 8, 2; all_folds_stock_results = []
        rsi_optimizer = ParameterOptimizer(generate_rsi_signals, {'buy_threshold': [25, 30, 35], 'sell_threshold': [70, 75, 80]}, backtest_traditional)
        ma_optimizer = ParameterOptimizer(generate_ma_cross_signals, {'short_window': [20, 30, 50], 'long_window': [100, 150, 200]}, backtest_traditional)

        for i in range(train_window_yrs, len(unique_years), test_window_yrs):
            fold_num = i // test_window_yrs; train_start_year, train_end_year = unique_years[i - train_window_yrs], unique_years[i - 1]
            test_start_year = unique_years[i]; test_end_year = unique_years[min(i + test_window_yrs - 1, len(unique_years)-1)]
            print(f"\n===== FOLD {fold_num}: Training on {train_start_year}-{train_end_year}, Testing on {test_start_year}-{test_end_year} =====")
            train_df, test_df = df.loc[str(train_start_year):str(train_end_year)], df.loc[str(test_start_year):str(test_end_year)]
            engineer = FeatureEngineer(); processed_train = engineer.fit_transform(train_df.reset_index()); processed_test = engineer.transform(test_df.reset_index())
            
            ppo_trajectories, trained_ppo_models = train_ppo_and_generate_trajectories(processed_train, fold_num, writer)
            s_ppo, a_ppo, rtg_ppo, t_ppo = get_trajectories(ppo_trajectories, CONFIG["WINDOW_SIZE"])
            if s_ppo.shape[0] < 1: print("Not enough PPO data. Skipping fold."); continue
            
            s_aug, a_aug, rtg_aug, t_aug = train_gan_and_synthesize_trajectories(s_ppo, a_ppo, rtg_ppo, fold_num, writer)
            world_model = train_world_model(s_ppo, a_ppo, fold_num, writer)
            
            state_dim = s_aug.shape[2]; model_dt = DecisionTransformer(state_dim, 2, d_model=128, n_head=4, n_layer=3, max_ep_len=10000)
            print("Training Decision Transformer on Augmented PPO+GAN trajectories..."); 
            model_dt = train_dt(model_dt, s_aug, a_aug, rtg_aug, t_aug, fold_num=fold_num, writer=writer, epochs=5)
            model_dt = online_finetune_dt(model_dt, processed_train, engineer.scaled_feature_cols + engineer.sector_cols, fold_num, writer)
            
            tickers = test_df['Ticker'].unique()
            if len(tickers) == 0: continue
            cash_per_stock = CONFIG["INITIAL_CASH"] / len(tickers)
            print("Optimizing traditional strategies for each stock...")
            best_rsi_params_per_stock = rsi_optimizer.find_best_params_per_stock(train_df, train_df['Ticker'].unique())
            best_ma_params_per_stock = ma_optimizer.find_best_params_per_stock(train_df, train_df['Ticker'].unique())
            fold_stock_results = {ticker: {} for ticker in tickers}
            print("Backtesting all strategies...")
            for t in tqdm(tickers, desc="Backtesting Tickers"):
                ticker_data = test_df[test_df['Ticker'] == t]; proc_data = processed_test[processed_test['Ticker'] == t]
                if ticker_data.empty or proc_data.empty: continue
                params = best_ma_params_per_stock.get(t, {}); 
                if params: fold_stock_results[t]['MA Cross (Optimized)'] = backtest_traditional(ticker_data, generate_ma_cross_signals(ticker_data, **params), cash_per_stock)
                params = best_rsi_params_per_stock.get(t, {}); 
                if params: fold_stock_results[t]['RSI (Optimized)'] = backtest_traditional(ticker_data, generate_rsi_signals(ticker_data, **params), cash_per_stock)
                fold_stock_results[t]['MACD'] = backtest_traditional(ticker_data, generate_macd_signals(ticker_data), cash_per_stock)
                fold_stock_results[t]['Benchmark'] = ticker_data['Close'] * (cash_per_stock / ticker_data['Close'].iloc[0])
                ppo_model = trained_ppo_models.get(t)
                if ppo_model: 
                    fold_stock_results[t]['PPO'] = backtest_ppo(ppo_model, proc_data, engineer.scaled_feature_cols + engineer.sector_cols, cash_per_stock)
                    fold_stock_results[t]['Model-Based (MPC)'] = backtest_mpc(world_model, ppo_model, proc_data, engineer.scaled_feature_cols + engineer.sector_cols, cash_per_stock)
                fold_stock_results[t]['DT (GAN+Online)'] = backtest_dt(model_dt, proc_data, CONFIG["WINDOW_SIZE"], cash_per_stock)
            all_folds_stock_results.append(fold_stock_results)

        # FINAL AGGREGATION & REPORTING
        print("\n\n" + "="*60); print("===== FINAL WALK-FORWARD PERFORMANCE SUMMARY ====="); print("="*60)
        final_stock_results = {}; 
        for fold_res in all_folds_stock_results:
            for ticker, strategies in fold_res.items():
                if ticker not in final_stock_results: final_stock_results[ticker] = {}
                for strat, series in strategies.items():
                    if strat not in final_stock_results[ticker]: final_stock_results[ticker][strat] = []; final_stock_results[ticker][strat].append(series)
        for ticker, strats in final_stock_results.items():
            for strat, series_list in strats.items():
                full_series = pd.concat(series_list); final_stock_results[ticker][strat] = full_series[~full_series.index.duplicated(keep='first')]
        
        strategy_keys = ["DT (GAN+Online)", "PPO", "Model-Based (MPC)", "MA Cross (Optimized)", "RSI (Optimized)", "MACD", "Benchmark"]
        results_by_strategy = {strat: {} for strat in strategy_keys}
        for ticker, strats in final_stock_results.items():
            sector = ticker_to_sector.get(ticker)
            if not sector: continue
            for strat, series in strats.items():
                if strat not in results_by_strategy: continue
                if sector not in results_by_strategy[strat]: results_by_strategy[strat][sector] = []; results_by_strategy[strat][sector].append(series)
        
        for strat, sectors in results_by_strategy.items():
            strat_name = strat.replace('Benchmark', 'Buy & Hold'); print("\n" + "#"*60); print(f"## Strategy: {strat_name}"); print("#"*60)
            all_stock_series = [s for sector_stocks in sectors.values() for s in sector_stocks]
            if not all_stock_series: print("\nNo trades were made by this strategy."); continue
            overall_portfolio = pd.concat(all_stock_series, axis=1).sum(axis=1); overall_portfolio = overall_portfolio[~overall_portfolio.index.duplicated(keep='first')]
            overall_metrics = calculate_performance_metrics(overall_portfolio)
            if overall_metrics: print_performance_summary("Overall Portfolio", overall_metrics)
            print("\n--- Per-Sector Performance Breakdown ---")
            for sector, stock_series_list in sorted(sectors.items()):
                sector_metrics = [m for s in stock_series_list if (m := calculate_performance_metrics(s)) is not None]
                if not sector_metrics: continue
                avg_metrics = {k: np.mean([m[k] for m in sector_metrics]) for k in sector_metrics[0]}
                min_metrics = {k: np.min([m[k] for m in sector_metrics]) for k in sector_metrics[0]}
                max_metrics = {k: np.max([m[k] for m in sector_metrics]) for k in sector_metrics[0]}
                print(f"\n Sector: {sector} ({len(stock_series_list)} stocks)"); print(f" {'Metric':<25} | {'Average':>12} | {'Max':>12} | {'Min':>12}"); print("-" * 65)
                for k in avg_metrics:
                    is_percent = "Return" in k or "Volatility" in k or "Drawdown" in k
                    avg_str = f"{avg_metrics[k]:.2%}" if is_percent else f"{avg_metrics[k]:.2f}"
                    max_str = f"{max_metrics[k]:.2%}" if is_percent else f"{max_metrics[k]:.2f}"
                    min_str = f"{min_metrics[k]:.2%}" if is_percent else f"{min_metrics[k]:.2f}"
                    print(f" {k:<25} | {avg_str:>12} | {max_str:>12} | {min_str:>12}")
        writer.close()
        launch_tensorboard()

Starting Main Walk-Forward Validation Process...

===== FOLD 4: Training on 2000-2007, Testing on 2008-2009 =====
Starting LSTM-PPO training to generate expert trajectories...


Training LSTM-PPO per stock:  14%|█▍        | 3/21 [01:37<09:45, 32.53s/it]


KeyboardInterrupt: 