In [3]:
"""
STEP 4: Test Trained RL Agent on Days 14-15
Evaluates the trained Double DQN agent on test data
Run this after Step 3 (Train_RL_Agent.py)
"""

import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'

import numpy as np
import torch
import torch.nn as nn
import pickle
from pathlib import Path
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from datetime import datetime

print("="*60)
print("STEP 4: TEST TRAINED RL AGENT")
print("="*60)

# ============================================================
# PATHS
# ============================================================
RL_INPUT_PATH = Path("C:/Users/wdkal/Downloads/RL_INPUTS")
RL_OUTPUT_PATH = Path("C:/Users/wdkal/Downloads/RL_OUTPUTS")
TEST_OUTPUT_PATH = Path("C:/Users/wdkal/Downloads/TEST_RESULTS")
TEST_OUTPUT_PATH.mkdir(parents=True, exist_ok=True)

print(f"\nInput path: {RL_INPUT_PATH}")
print(f"Model path: {RL_OUTPUT_PATH}")
print(f"Output path: {TEST_OUTPUT_PATH}")

# ============================================================
# LOAD TRADING ENVIRONMENT (Same as training)
# ============================================================

class TradingEnvironment:
    """Trading environment for testing"""
    def __init__(self, rl_input_path, transaction_cost=0.001):
        with open(rl_input_path, 'rb') as f:
            data = pickle.load(f)
        
        self.xgb_preds = data['predictions']['xgb']
        self.lstm_preds = data['predictions']['lstm']
        self.tcn_preds = data['predictions']['tcn']
        self.transformer_preds = data['predictions']['transformer']
        self.actual_labels = data['actual_labels']
        
        self.num_events = len(self.actual_labels)
        self.transaction_cost = transaction_cost
        
        print(f"  Loaded {self.num_events:,} test events")
        
        self.reset()
    
    def reset(self):
        """Reset environment to start"""
        self.current_step = 0
        self.position = 0.0
        self.pnl = 0.0
        self.pnl_history = [0.0]
        self.trades = []
        self.position_history = [0.0]
        self.action_history = []
        return self.get_state()
    
    def get_state(self):
        """Get current state (28 features)"""
        if self.current_step >= self.num_events:
            return None
        
        # Model predictions (12 features)
        xgb = self.xgb_preds[self.current_step]
        lstm = self.lstm_preds[self.current_step]
        tcn = self.tcn_preds[self.current_step]
        transformer = self.transformer_preds[self.current_step]
        
        # Ensemble stats (4 features)
        all_probs = np.array([xgb, lstm, tcn, transformer])
        mean_up = np.mean(all_probs[:, 2])
        mean_neutral = np.mean(all_probs[:, 1])
        mean_down = np.mean(all_probs[:, 0])
        std_up = np.std(all_probs[:, 2])
        
        # Market features (5 features)
        price_trend = mean_up - mean_down
        confidence = max(mean_up, mean_neutral, mean_down)
        uncertainty = std_up
        non_neutral = mean_up + mean_down
        time_progress = float(self.current_step) / self.num_events
        
        # Trading state (7 features)
        num_recent_trades = len([t for t in self.trades if t['step'] > self.current_step - 100])
        recent_pnl = self.pnl_history[-1]
        
        state = np.concatenate([
            xgb, lstm, tcn, transformer,  # 12
            [mean_up, mean_neutral, mean_down, std_up],  # 4
            [price_trend, confidence, uncertainty, non_neutral, time_progress],  # 5
            [self.position, recent_pnl/100, num_recent_trades/10, 
             self.current_step/self.num_events, abs(self.position),
             1.0 if self.position > 0 else 0.0, 1.0 if self.position < 0 else 0.0]  # 7
        ])
        
        return state.astype(np.float32)
    
    def step(self, action):
        """Take action and return next state, reward, done"""
        # Action mapping
        action_map = {0: -0.5, 1: -0.3, 2: -0.1, 3: 0.0, 4: 0.1, 5: 0.3, 6: 0.5}
        position_change = action_map[action]
        
        old_position = self.position
        new_position = np.clip(self.position + position_change, -1.0, 1.0)
        actual_change = new_position - old_position
        
        # Transaction cost
        cost = abs(actual_change) * self.transaction_cost * 100
        
        self.position = new_position
        self.position_history.append(self.position)
        self.action_history.append(action)
        self.current_step += 1
        
        done = False
        reward = 0
        
        if self.current_step < self.num_events:
            # Price movement
            actual_label = self.actual_labels[self.current_step]
            price_movement = (actual_label - 1) * 0.5  # -0.5, 0, +0.5
            
            # PnL = position Ã— price_movement Ã— scale - transaction cost
            pnl = self.position * price_movement * 100
            reward = pnl - cost - 0.1 * (self.position ** 2)
            
            self.pnl += reward
            self.pnl_history.append(self.pnl)
            
            # Track trades
            if actual_change != 0:
                self.trades.append({
                    'step': self.current_step,
                    'action': action,
                    'old_position': old_position,
                    'new_position': self.position,
                    'change': actual_change,
                    'pnl': reward,
                    'cumulative_pnl': self.pnl
                })
        else:
            done = True
        
        next_state = self.get_state()
        return next_state, reward, done, {}
    
    def get_metrics(self):
        """Calculate comprehensive performance metrics"""
        if len(self.pnl_history) < 2:
            return {
                'total_pnl': 0, 'sharpe': 0, 'num_trades': 0, 
                'max_drawdown': 0, 'win_rate': 0, 'avg_win': 0, 'avg_loss': 0
            }
        
        total_pnl = self.pnl_history[-1]
        returns = np.diff(self.pnl_history)
        mean_return = np.mean(returns)
        std_return = np.std(returns) if len(returns) > 1 else 1
        
        # Sharpe ratio
        sharpe = (mean_return / std_return) * np.sqrt(len(returns)) if std_return > 0 else 0
        
        # Max drawdown
        cummax = np.maximum.accumulate(self.pnl_history)
        drawdown = np.array(self.pnl_history) - cummax
        max_drawdown = np.min(drawdown)
        
        # Win rate and avg win/loss
        trade_pnls = [t['pnl'] for t in self.trades]
        if len(trade_pnls) > 0:
            wins = [p for p in trade_pnls if p > 0]
            losses = [p for p in trade_pnls if p < 0]
            win_rate = len(wins) / len(trade_pnls) if len(trade_pnls) > 0 else 0
            avg_win = np.mean(wins) if len(wins) > 0 else 0
            avg_loss = np.mean(losses) if len(losses) > 0 else 0
        else:
            win_rate = 0
            avg_win = 0
            avg_loss = 0
        
        return {
            'total_pnl': total_pnl,
            'num_trades': len(self.trades),
            'sharpe': sharpe,
            'max_drawdown': max_drawdown,
            'win_rate': win_rate,
            'avg_win': avg_win,
            'avg_loss': avg_loss,
            'final_position': self.position
        }

# ============================================================
# LOAD AGENT
# ============================================================

class QNetwork(nn.Module):
    """Q-Network (same architecture as training)"""
    def __init__(self, state_dim, action_dim, hidden_dim=128):
        super(QNetwork, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(state_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden_dim, action_dim)
        )
    
    def forward(self, state):
        return self.network(state)

class Agent:
    """Agent for testing (no training, just inference)"""
    def __init__(self, model_path):
        self.policy_net = QNetwork(state_dim=28, action_dim=7)
        
        # Load trained model
        checkpoint = torch.load(model_path, map_location='cpu')
        self.policy_net.load_state_dict(checkpoint['policy_net'])
        self.policy_net.eval()  # Set to evaluation mode
        
        print(f"  âœ“ Loaded trained model from {model_path}")
        print(f"  âœ“ Model epsilon: {checkpoint['epsilon']:.3f}")
    
    def select_action(self, state):
        """Select action (greedy, no exploration)"""
        with torch.no_grad():
            state_tensor = torch.FloatTensor(state).unsqueeze(0)
            q_values = self.policy_net(state_tensor)
            return q_values.argmax().item()

# ============================================================
# TEST AGENT
# ============================================================

print("\n" + "="*60)
print("LOADING TEST DATA AND MODEL")
print("="*60)

# Load test environment
print("\nLoading test environment...")
test_env = TradingEnvironment(RL_INPUT_PATH / "rl_input_test.pkl")

# Load trained agent
print("\nLoading trained agent...")
agent = Agent(RL_OUTPUT_PATH / "best_agent.pt")

print("\n" + "="*60)
print("RUNNING TEST")
print("="*60)

start_time = datetime.now()
state = test_env.reset()
step = 0

print("\nTesting agent on Days 14-15...")
print("Progress: ", end='', flush=True)

# Run test
while state is not None:
    action = agent.select_action(state)
    state, reward, done, _ = test_env.step(action)
    step += 1
    
    # Progress indicator
    if step % 10000 == 0:
        print(f"{step:,}...", end='', flush=True)
    
    if done:
        break

test_time = (datetime.now() - start_time).total_seconds()
print(f" Done! ({step:,} steps in {test_time:.1f}s)")

# Get metrics
metrics = test_env.get_metrics()

# ============================================================
# RESULTS
# ============================================================

print("\n" + "="*60)
print("TEST RESULTS - DAYS 14-15")
print("="*60)
print(f"\nðŸ“Š Performance Metrics:")
print(f"  Total PnL:        ${metrics['total_pnl']:,.2f}")
print(f"  Sharpe Ratio:     {metrics['sharpe']:.2f}")
print(f"  Max Drawdown:     ${metrics['max_drawdown']:,.2f}")
print(f"  Number of Trades: {metrics['num_trades']:,}")
print(f"  Win Rate:         {metrics['win_rate']*100:.1f}%")
print(f"  Avg Win:          ${metrics['avg_win']:.2f}")
print(f"  Avg Loss:         ${metrics['avg_loss']:.2f}")
print(f"  Final Position:   {metrics['final_position']:.2f}")

# Action distribution
action_counts = np.bincount(test_env.action_history, minlength=7)
action_names = ['Sell L', 'Sell M', 'Sell S', 'HOLD', 'Buy S', 'Buy M', 'Buy L']
print(f"\nðŸ“ˆ Action Distribution:")
for i, (name, count) in enumerate(zip(action_names, action_counts)):
    pct = count / len(test_env.action_history) * 100
    print(f"  {name:8s}: {count:6,} ({pct:5.1f}%)")

# ============================================================
# SAVE RESULTS
# ============================================================

print("\n" + "="*60)
print("SAVING RESULTS")
print("="*60)

# Save detailed results
results = {
    'metrics': metrics,
    'pnl_history': test_env.pnl_history,
    'position_history': test_env.position_history,
    'action_history': test_env.action_history,
    'trades': test_env.trades,
    'action_distribution': dict(zip(action_names, action_counts.tolist()))
}

with open(TEST_OUTPUT_PATH / 'test_results.pkl', 'wb') as f:
    pickle.dump(results, f)
print(f"\nâœ“ Saved detailed results to {TEST_OUTPUT_PATH / 'test_results.pkl'}")

# ============================================================
# VISUALIZATIONS
# ============================================================

print("\n" + "="*60)
print("CREATING VISUALIZATIONS")
print("="*60)

fig = plt.figure(figsize=(16, 12))
gs = fig.add_gridspec(4, 2, hspace=0.3, wspace=0.3)

# 1. Cumulative PnL
ax1 = fig.add_subplot(gs[0, :])
ax1.plot(test_env.pnl_history, linewidth=1.5, color='darkblue')
ax1.set_title('Cumulative PnL Over Time', fontsize=14, fontweight='bold')
ax1.set_xlabel('Step')
ax1.set_ylabel('Cumulative PnL ($)')
ax1.grid(True, alpha=0.3)
ax1.axhline(y=0, color='red', linestyle='--', alpha=0.5)
ax1.fill_between(range(len(test_env.pnl_history)), test_env.pnl_history, 0, 
                  alpha=0.3, color='green' if test_env.pnl_history[-1] > 0 else 'red')

# 2. Position Over Time
ax2 = fig.add_subplot(gs[1, :])
ax2.plot(test_env.position_history, linewidth=1, color='purple')
ax2.set_title('Position Over Time', fontsize=14, fontweight='bold')
ax2.set_xlabel('Step')
ax2.set_ylabel('Position')
ax2.set_ylim(-1.1, 1.1)
ax2.grid(True, alpha=0.3)
ax2.axhline(y=0, color='black', linestyle='-', alpha=0.3)
ax2.axhline(y=1, color='green', linestyle='--', alpha=0.3, label='Long')
ax2.axhline(y=-1, color='red', linestyle='--', alpha=0.3, label='Short')
ax2.legend()

# 3. Action Distribution
ax3 = fig.add_subplot(gs[2, 0])
colors = ['darkred', 'red', 'lightcoral', 'gray', 'lightgreen', 'green', 'darkgreen']
bars = ax3.bar(action_names, action_counts, color=colors, alpha=0.7)
ax3.set_title('Action Distribution', fontsize=12, fontweight='bold')
ax3.set_ylabel('Count')
ax3.tick_params(axis='x', rotation=45)
for bar in bars:
    height = bar.get_height()
    ax3.text(bar.get_x() + bar.get_width()/2., height,
             f'{int(height):,}', ha='center', va='bottom', fontsize=9)

# 4. Trade PnL Distribution
ax4 = fig.add_subplot(gs[2, 1])
if len(test_env.trades) > 0:
    trade_pnls = [t['pnl'] for t in test_env.trades]
    ax4.hist(trade_pnls, bins=50, alpha=0.7, color='steelblue', edgecolor='black')
    ax4.axvline(x=0, color='red', linestyle='--', alpha=0.7)
    ax4.set_title('Trade PnL Distribution', fontsize=12, fontweight='bold')
    ax4.set_xlabel('PnL per Trade ($)')
    ax4.set_ylabel('Frequency')
    ax4.grid(True, alpha=0.3, axis='y')

# 5. Drawdown
ax5 = fig.add_subplot(gs[3, :])
cummax = np.maximum.accumulate(test_env.pnl_history)
drawdown = np.array(test_env.pnl_history) - cummax
ax5.fill_between(range(len(drawdown)), drawdown, 0, alpha=0.5, color='red')
ax5.plot(drawdown, color='darkred', linewidth=1)
ax5.set_title('Drawdown Over Time', fontsize=14, fontweight='bold')
ax5.set_xlabel('Step')
ax5.set_ylabel('Drawdown ($)')
ax5.grid(True, alpha=0.3)
ax5.axhline(y=metrics['max_drawdown'], color='red', linestyle='--', 
            label=f"Max DD: ${metrics['max_drawdown']:.2f}", alpha=0.7)
ax5.legend()

plt.savefig(TEST_OUTPUT_PATH / 'test_results_visualization.png', dpi=150, bbox_inches='tight')
print(f"\nâœ“ Saved visualization to {TEST_OUTPUT_PATH / 'test_results_visualization.png'}")

# ============================================================
# TRADE ANALYSIS
# ============================================================

if len(test_env.trades) > 0:
    print("\n" + "="*60)
    print("SAMPLE TRADES (First 10)")
    print("="*60)
    print(f"\n{'Step':>8} {'Action':>8} {'Old Pos':>8} {'New Pos':>8} {'Change':>8} {'PnL':>10} {'Cum PnL':>10}")
    print("-" * 74)
    
    for i, trade in enumerate(test_env.trades[:10]):
        action_name = action_names[trade['action']]
        print(f"{trade['step']:8,} {action_name:>8} {trade['old_position']:8.2f} "
              f"{trade['new_position']:8.2f} {trade['change']:8.2f} "
              f"${trade['pnl']:9.2f} ${trade['cumulative_pnl']:9.2f}")
    
    if len(test_env.trades) > 10:
        print(f"... ({len(test_env.trades) - 10:,} more trades)")

# ============================================================
# SUMMARY
# ============================================================

print("\n" + "="*60)
print("âœ… TESTING COMPLETE!")
print("="*60)
print(f"\nKey Results:")
print(f"  â€¢ Final PnL: ${metrics['total_pnl']:,.2f}")
print(f"  â€¢ Sharpe Ratio: {metrics['sharpe']:.2f}")
print(f"  â€¢ Total Trades: {metrics['num_trades']:,}")
print(f"  â€¢ Test Duration: {test_time:.1f} seconds")
print(f"\nOutput Files:")
print(f"  â€¢ Results: {TEST_OUTPUT_PATH / 'test_results.pkl'}")
print(f"  â€¢ Visualization: {TEST_OUTPUT_PATH / 'test_results_visualization.png'}")
print("\nðŸŽ‰ RL Agent Testing Complete!")
print("="*60)

STEP 4: TEST TRAINED RL AGENT

Input path: C:\Users\wdkal\Downloads\RL_INPUTS
Model path: C:\Users\wdkal\Downloads\RL_OUTPUTS
Output path: C:\Users\wdkal\Downloads\TEST_RESULTS

LOADING TEST DATA AND MODEL

Loading test environment...
  Loaded 3,496,512 test events

Loading trained agent...
  âœ“ Loaded trained model from C:\Users\wdkal\Downloads\RL_OUTPUTS\best_agent.pt
  âœ“ Model epsilon: 0.493

RUNNING TEST

Testing agent on Days 14-15...
Progress: 

  checkpoint = torch.load(model_path, map_location='cpu')


10,000...20,000...30,000...40,000...50,000...60,000...70,000...80,000...90,000...100,000...110,000...120,000...130,000...140,000...150,000...160,000...170,000...180,000...190,000...200,000...210,000...220,000...230,000...240,000...250,000...260,000...270,000...280,000...290,000...300,000...310,000...320,000...330,000...340,000...350,000...360,000...370,000...380,000...390,000...400,000...410,000...420,000...430,000...440,000...450,000...460,000...470,000...480,000...490,000...500,000...510,000...520,000...530,000...540,000...550,000...560,000...570,000...580,000...590,000...600,000...610,000...620,000...630,000...640,000...650,000...660,000...670,000...680,000...690,000...700,000...710,000...720,000...730,000...740,000...750,000...760,000...770,000...780,000...790,000...800,000...810,000...820,000...830,000...840,000...850,000...860,000...870,000...880,000...890,000...900,000...910,000...920,000...930,000...940,000...950,000...960,000...970,000...980,000...990,000...1,000,000...1,010,0

  plt.savefig(TEST_OUTPUT_PATH / 'test_results_visualization.png', dpi=150, bbox_inches='tight')



âœ“ Saved visualization to C:\Users\wdkal\Downloads\TEST_RESULTS\test_results_visualization.png

SAMPLE TRADES (First 10)

    Step   Action  Old Pos  New Pos   Change        PnL    Cum PnL
--------------------------------------------------------------------------
       1   Sell M     0.00    -0.30    -0.30 $    14.96 $    14.96
       2    Buy M    -0.30     0.00     0.30 $    -0.03 $    14.93
       3   Sell M     0.00    -0.30    -0.30 $    14.96 $    29.89
       4    Buy M    -0.30     0.00     0.30 $    -0.03 $    29.86
       5   Sell M     0.00    -0.30    -0.30 $    -0.04 $    29.82
       7    Buy M    -0.30     0.00     0.30 $    -0.03 $    14.78
       8   Sell M     0.00    -0.30    -0.30 $   -15.04 $    -0.25
       9    Buy M    -0.30     0.00     0.30 $    -0.03 $    -0.28
      10   Sell M     0.00    -0.30    -0.30 $   -15.04 $   -15.32
      11    Buy M    -0.30     0.00     0.30 $    -0.03 $   -15.35
... (3,269 more trades)

âœ… TESTING COMPLETE!

Key Results:
  â