In [9]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from tqdm import tqdm
import warnings

warnings.filterwarnings('ignore')

# ==============================================================================
# CONTROL PANEL & CONFIGURATION
# ==============================================================================
CONFIG = {
    "USE_ENSEMBLE_TRAJECTORIES": True,
    "USE_WALK_FORWARD_VALIDATION": True,
    "USE_RISK_MANAGEMENT_OVERLAY": True,
    "USE_DYNAMIC_TARGET_RETURN": True,
    "N_COMPONENTS_PCA": 10,
    "WINDOW_SIZE": 30,
    "MAX_DRAWDOWN_LIMIT": 0.20,
    "VOLATILITY_LIMIT_ATR": 1.5,
    "INITIAL_CASH": 10000,
}

# ==============================================================================
# DUMMY DATA & INDICATOR CALCULATION
# ==============================================================================
def calculate_indicators(df):
    """Calculates all necessary technical indicators for strategies."""
    df['SMA_20'] = df['Close'].rolling(window=20).mean()
    df['Std_Dev'] = df['Close'].rolling(window=20).std()
    df['BB_Upper'] = df['SMA_20'] + (df['Std_Dev'] * 2)
    df['BB_Lower'] = df['SMA_20'] - (df['Std_Dev'] * 2)
    
    delta = df['Close'].diff(1)
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI_14'] = 100 - (100 / (1 + rs))
    
    df['ATR_14'] = (df['High'] - df['Low']).rolling(window=14).mean()
    return df.dropna()

def create_dummy_dataframe(num_rows=5000):
    dates = pd.to_datetime(pd.date_range(start='2005-01-01', periods=num_rows))
    base_price = 100 + np.linspace(0, 200, num_rows) + np.sin(np.arange(num_rows) / 50) * 20 + np.random.randn(num_rows) * 10
    data = {'Date': dates, 'Ticker': 'DUMMY', 'Open': base_price + np.random.randn(num_rows) * 2}
    data['High'] = data['Open'] + np.random.uniform(0, 5, num_rows)
    data['Low'] = data['Open'] - np.random.uniform(0, 5, num_rows)
    data['Close'] = (data['Open'] + data['High'] + data['Low']) / 3 + np.random.randn(num_rows)
    df = pd.DataFrame(data)
    df = calculate_indicators(df)
    return df

df = create_dummy_dataframe()

# ==============================================================================
# 1. FEATURE ENGINEERING (Robust PCA)
# ==============================================================================
class FeatureEngineer:
    def __init__(self, n_components=10):
        self.scaler = StandardScaler()
        self.n_components_config = n_components
        self.pca = None
        self.feature_cols = [col for col in ['SMA_20', 'Std_Dev', 'RSI_14', 'ATR_14'] if col in df.columns]

    def fit_transform(self, data):
        data = data.copy()
        scaled_features = self.scaler.fit_transform(data[self.feature_cols])
        n_samples, n_features = scaled_features.shape
        actual_n_components = min(self.n_components_config, n_samples, n_features)
        
        if actual_n_components < self.n_components_config:
            print(f"PCA Warning: Using {actual_n_components} components instead of desired {self.n_components_config}.")
            
        self.pca = PCA(n_components=actual_n_components)
        pca_features = self.pca.fit_transform(scaled_features)
        
        feature_df = pd.DataFrame(pca_features, index=data.index, columns=[f'PC_{i+1}' for i in range(pca_features.shape[1])])
        processed_df = pd.concat([data[['Date', 'Close', 'ATR_14']], feature_df], axis=1)
        processed_df['Norm_ATR'] = data['ATR_14'] / data['ATR_14'].rolling(window=252, min_periods=1).mean()
        return processed_df.dropna()

    def transform(self, data):
        data = data.copy()
        scaled_features = self.scaler.transform(data[self.feature_cols])
        pca_features = self.pca.transform(scaled_features)
        feature_df = pd.DataFrame(pca_features, index=data.index, columns=[f'PC_{i+1}' for i in range(pca_features.shape[1])])
        processed_df = pd.concat([data[['Date', 'Close', 'ATR_14']], feature_df], axis=1)
        processed_df['Norm_ATR'] = data['ATR_14'] / data['ATR_14'].rolling(window=252, min_periods=1).mean()
        return processed_df.dropna()

# ==============================================================================
# 2. TRAJECTORY GENERATION
# ==============================================================================
def get_trajectories(data, window_size):
    short_ma = data['Close'].rolling(window=10).mean(); long_ma = data['Close'].rolling(window=30).mean()
    ma_policy_actions = np.where(short_ma > long_ma, 1, 0)
    
    s, a, r, t = _generate_trajectory_from_actions(data, ma_policy_actions)
    if len(s) <= window_size: return np.array([]), np.array([]), np.array([]), np.array([])

    rewards_to_go = np.cumsum(r[::-1])[::-1]
    
    all_states, all_actions, all_rtg, all_timesteps = [], [], [], []
    for i in range(len(s) - window_size):
        all_states.append(s[i:i+window_size])
        all_actions.append(a[i:i+window_size])
        all_rtg.append(rewards_to_go[i:i+window_size])
        all_timesteps.append(t[i:i+window_size])
            
    return np.array(all_states), np.array(all_actions), np.array(all_rtg), np.array(all_timesteps)

def _generate_trajectory_from_actions(data, actions):
    rewards = []
    cash, holdings = CONFIG["INITIAL_CASH"], 0
    feature_cols = [c for c in data.columns if c.startswith('PC_')]
    
    for i in range(1, len(data)):
        prev_portfolio_val = cash + holdings * data['Close'].iloc[i-1]
        action = actions[i]; current_price = data['Close'].iloc[i]

        if action == 1 and cash > current_price: holdings += cash / current_price; cash = 0
        elif action == 0 and holdings > 0: cash += holdings * current_price; holdings = 0
        
        current_portfolio_val = cash + holdings * current_price
        rewards.append(current_portfolio_val - prev_portfolio_val)
    
    states = data[feature_cols].values[1:]
    return states, actions[1:], np.array(rewards), np.arange(len(states))

# ==============================================================================
# 3. DECISION TRANSFORMER (PAPER-ACCURATE)
# ==============================================================================
class DecisionTransformer(nn.Module):
    def __init__(self, state_dim, act_dim, d_model, n_head, n_layer, max_ep_len):
        super().__init__()
        self.state_dim, self.act_dim, self.d_model = state_dim, act_dim, d_model
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model, n_head, d_model * 4, 0.1, batch_first=True), n_layer)
        self.embed_timestep = nn.Embedding(max_ep_len, d_model)
        self.embed_return = nn.Linear(1, d_model)
        self.embed_state = nn.Linear(state_dim, d_model)
        self.embed_action = nn.Embedding(act_dim, d_model)
        self.embed_ln = nn.LayerNorm(d_model)
        self.predict_action = nn.Sequential(nn.Linear(d_model, act_dim), nn.Softmax(dim=-1))

    def forward(self, states, actions, returns_to_go, timesteps):
        batch_size, seq_len = states.shape[0], states.shape[1]
        state_embeds = self.embed_state(states)
        action_embeds = self.embed_action(actions)
        rtg_embeds = self.embed_return(returns_to_go)
        time_embeds = self.embed_timestep(timesteps)
        state_embeds += time_embeds; action_embeds += time_embeds; rtg_embeds += time_embeds
        
        stacked_inputs = torch.stack((rtg_embeds, state_embeds, action_embeds), dim=1
        ).permute(0, 2, 1, 3).reshape(batch_size, 3 * seq_len, self.d_model)
        stacked_inputs = self.embed_ln(stacked_inputs)
        
        mask = nn.Transformer.generate_square_subsequent_mask(3 * seq_len).to(states.device)
        transformer_out = self.transformer(stacked_inputs, mask=mask)
        
        state_out = transformer_out[:, 1::3]
        action_preds = self.predict_action(state_out)
        return action_preds

# ==============================================================================
# 4. TRAINING & BACKTESTING
# ==============================================================================
def train(model, states, actions, rtg, timesteps, epochs=5, batch_size=64):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device).train()
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
    s = torch.from_numpy(states).float()
    a = torch.from_numpy(actions).long()
    r = torch.from_numpy(rtg).float()
    t = torch.from_numpy(timesteps).long()

    for epoch in range(epochs):
        for i in tqdm(range(0, len(states), batch_size), desc=f"Epoch {epoch+1}"):
            s_b, a_b, r_b, t_b = s[i:i+batch_size].to(device), a[i:i+batch_size].to(device), r[i:i+batch_size].to(device), t[i:i+batch_size].to(device)
            if s_b.shape[0] == 0: continue
            action_preds = model(s_b, a_b, r_b.unsqueeze(-1), t_b)
            loss = F.cross_entropy(action_preds.reshape(-1, model.act_dim), a_b.reshape(-1))
            optimizer.zero_grad(); loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
    return model

def backtest_dt(model, data, window_size, initial_cash):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval().to(device)
    cash, holdings = initial_cash, 0
    portfolio_values, high_water_mark = [initial_cash], initial_cash
    feature_cols = [c for c in data.columns if c.startswith('PC_')]
    
    context_states = torch.zeros(1, window_size, model.state_dim, device=device, dtype=torch.float32)
    context_actions = torch.zeros(1, window_size, dtype=torch.long, device=device)
    context_rtg = torch.zeros(1, window_size, 1, device=device, dtype=torch.float32)
    context_timesteps = torch.arange(window_size, device=device).reshape(1, window_size)

    print("Running Decision Transformer Backtest...")
    for i in tqdm(range(len(data))):
        current_state = torch.from_numpy(data[feature_cols].iloc[i].values).float().reshape(1, 1, model.state_dim).to(device)
        context_states = torch.cat([context_states[:, 1:], current_state], dim=1)
        
        target_return = initial_cash * 0.1
        if CONFIG["USE_DYNAMIC_TARGET_RETURN"] and 'Norm_ATR' in data.columns and data['Norm_ATR'].iloc[i] > 0:
            target_return *= data['Norm_ATR'].iloc[i]
        
        rtg_update = torch.tensor([[[target_return]]], device=device, dtype=torch.float32)
        context_rtg = torch.cat([context_rtg[:, 1:], rtg_update], dim=1)

        with torch.no_grad(): action_preds = model(context_states, context_actions, context_rtg, context_timesteps)
        proposed_action = torch.argmax(action_preds[0, -1, :]).item()
        
        final_action = proposed_action
        if CONFIG["USE_RISK_MANAGEMENT_OVERLAY"]:
             current_drawdown = (high_water_mark - portfolio_values[-1]) / high_water_mark if high_water_mark > 0 else 0
             if current_drawdown > CONFIG["MAX_DRAWDOWN_LIMIT"]: final_action = 0
             if 'Norm_ATR' in data.columns and data['Norm_ATR'].iloc[i] > CONFIG["VOLATILITY_LIMIT_ATR"]: final_action = 0
        
        current_price = data['Close'].iloc[i]
        if final_action == 1 and cash > current_price: holdings += cash / current_price; cash = 0
        elif final_action == 0 and holdings > 0: cash += holdings * current_price; holdings = 0
        
        new_value = cash + holdings * current_price
        portfolio_values.append(new_value); high_water_mark = max(high_water_mark, new_value)
        context_actions = torch.cat([context_actions[:, 1:], torch.tensor([[final_action]], device=device)], dim=1)

    return pd.Series(portfolio_values[1:], index=data['Date'])

# ==============================================================================
# 5. TRADITIONAL STRATEGIES & BENCHMARKING
# ==============================================================================
def generate_rsi_signals(data, buy_threshold=30, sell_threshold=70):
    signals = pd.Series(index=data.index, dtype=int)
    signals[data['RSI_14'] < buy_threshold] = 1
    signals[data['RSI_14'] > sell_threshold] = -1
    signals = signals.ffill().fillna(0)
    return signals

def generate_bb_signals(data):
    signals = pd.Series(index=data.index, dtype=int)
    signals[data['Close'] > data['BB_Upper']] = 1
    signals[data['Close'] < data['BB_Lower']] = -1
    signals = signals.ffill().fillna(0)
    return signals

def backtest_traditional(data, signals, initial_cash):
    cash = initial_cash
    holdings = 0
    portfolio_values = []

    for i in range(len(data)):
        signal = signals.iloc[i]
        current_price = data['Close'].iloc[i]
        
        if signal == 1 and holdings == 0:
            holdings = cash / current_price; cash = 0
        elif signal == -1 and holdings > 0:
            cash = holdings * current_price; holdings = 0
        
        portfolio_values.append(cash + holdings * current_price)
        
    return pd.Series(portfolio_values, index=data.index)

def display_performance(name, portfolio_values, benchmark_series):
    returns = portfolio_values.pct_change().dropna()
    if returns.empty or returns.std() == 0:
        print(f"\n--- Performance: {name} ---\nStrategy made no trades or had no volatility."); return {}
    
    final_val = portfolio_values.iloc[-1]
    total_return = (final_val - portfolio_values.iloc[0]) / portfolio_values.iloc[0]
    high_water_mark = portfolio_values.cummax()
    drawdown = (portfolio_values - high_water_mark) / high_water_mark
    max_drawdown = drawdown.min()
    sharpe_ratio = returns.mean() / returns.std() * np.sqrt(252) if returns.std() > 0 else 0
    
    print(f"\n--- Performance: {name} ---")
    print(f"Total Return: {total_return:.2%}")
    print(f"Max Drawdown: {max_drawdown:.2%}")
    print(f"Sharpe Ratio: {sharpe_ratio:.2f}")
    
    return {'Return': total_return, 'Max Drawdown': max_drawdown, 'Sharpe': sharpe_ratio}

# ==============================================================================
# 6. MAIN EXECUTION LOGIC
# ==============================================================================
if __name__ == "__main__":
    engineer = FeatureEngineer(n_components=CONFIG["N_COMPONENTS_PCA"])
    
    if not CONFIG["USE_WALK_FORWARD_VALIDATION"]: pass
    else:
        print("Running Walk-Forward Validation.")
        df['Date'] = pd.to_datetime(df['Date']); df = df.set_index('Date')
        unique_years = df.index.year.unique(); train_window_yrs, test_window_yrs = 8, 2
        
        all_results = []

        for i in range(train_window_yrs, len(unique_years), test_window_yrs):
            train_start_year, train_end_year = unique_years[i - train_window_yrs], unique_years[i - 1]
            test_start_year = unique_years[i]
            test_end_year = unique_years[min(i + test_window_yrs - 1, len(unique_years)-1)]
            print(f"\n===== FOLD: Training on {train_start_year}-{train_end_year}, Testing on {test_start_year}-{test_end_year} =====")
            
            train_df, test_df = df[str(train_start_year):str(train_end_year)], df[str(test_start_year):str(test_end_year)]
            processed_train = engineer.fit_transform(train_df.reset_index()); processed_test = engineer.transform(test_df.reset_index())
            
            states, actions, rtg, timesteps = get_trajectories(processed_train, CONFIG["WINDOW_SIZE"])
            if states.shape[0] < 1: print("Not enough data for DT. Skipping fold."); continue
            
            state_dim = states.shape[2]
            model = DecisionTransformer(state_dim, 2, d_model=128, n_head=4, n_layer=3, max_ep_len=10000)
            model = train(model, states, actions, rtg, timesteps, epochs=5)
            dt_portfolio = backtest_dt(model, processed_test, CONFIG["WINDOW_SIZE"], CONFIG["INITIAL_CASH"])
            
            rsi_signals = generate_rsi_signals(test_df)
            rsi_portfolio = backtest_traditional(test_df, rsi_signals, CONFIG["INITIAL_CASH"])
            
            bb_signals = generate_bb_signals(test_df)
            bb_portfolio = backtest_traditional(test_df, bb_signals, CONFIG["INITIAL_CASH"])

            all_results.append({'DT': dt_portfolio, 'RSI': rsi_portfolio, 'BBands': bb_portfolio, 'Benchmark': test_df['Close']})

        # --- Final Performance Aggregation and Comparison ---
        print("\n\n" + "="*50); print("===== FINAL WALK-FORWARD PERFORMANCE SUMMARY ====="); print("="*50)

        final_portfolios = {}
        for key in ['DT', 'RSI', 'BBands', 'Benchmark']:
            full_series = pd.concat([res[key] for res in all_results])
            if key == 'Benchmark':
                final_portfolios['Buy & Hold'] = full_series * (CONFIG["INITIAL_CASH"] / full_series.iloc[0])
            else:
                returns = full_series.pct_change().fillna(0)
                equity_curve = [CONFIG["INITIAL_CASH"]]
                for r in returns.iloc[1:]: equity_curve.append(equity_curve[-1] * (1 + r))
                
                # --- FIX: Use the full index to match the length of the equity curve ---
                final_portfolios[key] = pd.Series(equity_curve, index=full_series.index)

        benchmark_series = final_portfolios['Buy & Hold']
        for name, portfolio in final_portfolios.items():
            if not portfolio.empty:
                display_performance(name, portfolio, benchmark_series)

Running Walk-Forward Validation.

===== FOLD: Training on 2005-2012, Testing on 2013-2014 =====


Epoch 1: 100%|██████████| 45/45 [00:00<00:00, 135.48it/s]
Epoch 2: 100%|██████████| 45/45 [00:00<00:00, 154.35it/s]
Epoch 3: 100%|██████████| 45/45 [00:00<00:00, 148.90it/s]
Epoch 4: 100%|██████████| 45/45 [00:00<00:00, 151.89it/s]
Epoch 5: 100%|██████████| 45/45 [00:00<00:00, 146.20it/s]


Running Decision Transformer Backtest...


100%|██████████| 730/730 [00:01<00:00, 550.38it/s]



===== FOLD: Training on 2007-2014, Testing on 2015-2016 =====


Epoch 1: 100%|██████████| 46/46 [00:00<00:00, 143.51it/s]
Epoch 2: 100%|██████████| 46/46 [00:00<00:00, 149.65it/s]
Epoch 3: 100%|██████████| 46/46 [00:00<00:00, 155.11it/s]
Epoch 4: 100%|██████████| 46/46 [00:00<00:00, 148.43it/s]
Epoch 5: 100%|██████████| 46/46 [00:00<00:00, 149.95it/s]


Running Decision Transformer Backtest...


100%|██████████| 731/731 [00:01<00:00, 543.16it/s]



===== FOLD: Training on 2009-2016, Testing on 2017-2018 =====


Epoch 1: 100%|██████████| 46/46 [00:00<00:00, 146.55it/s]
Epoch 2: 100%|██████████| 46/46 [00:00<00:00, 157.75it/s]
Epoch 3: 100%|██████████| 46/46 [00:00<00:00, 163.99it/s]
Epoch 4: 100%|██████████| 46/46 [00:00<00:00, 157.62it/s]
Epoch 5: 100%|██████████| 46/46 [00:00<00:00, 150.91it/s]


Running Decision Transformer Backtest...


100%|██████████| 617/617 [00:01<00:00, 543.69it/s]



===== FINAL WALK-FORWARD PERFORMANCE SUMMARY =====

--- Performance: DT ---
Total Return: -0.00%
Max Drawdown: -24.70%
Sharpe Ratio: 0.06

--- Performance: RSI ---
Strategy made no trades or had no volatility.

--- Performance: BBands ---
Total Return: -64.93%
Max Drawdown: -73.08%
Sharpe Ratio: 0.19

--- Performance: Buy & Hold ---
Total Return: 22.52%
Max Drawdown: -35.44%
Sharpe Ratio: 0.47



