# PPO Agent Training and Testing for Stock Portfolio Management

This notebook implements a Proximal Policy Optimization (PPO) agent for automated stock trading with rolling window backtesting strategy. The agent is trained on DOW-30 stocks with technical indicators and tested against the Dow Jones Index benchmark.

## Overview
- **Data**: DOW-30 stocks from Yahoo Finance
- **Agent**: Proximal Policy Optimization (PPO) via ElegantRL
- **Environment**: StockTradingEnv with transaction costs
- **Validation**: Rolling window approach (252-day training, 20-day validation/test periods)
- **Benchmark**: Dow Jones Index


In [1]:
import os
import pandas as pd
import numpy as np
import torch
import yfinance as yf
import gymnasium as gym

original_torch_load = torch.load


def patched_torch_load(*args, **kwargs):
    kwargs.setdefault('weights_only', False)
    return original_torch_load(*args, **kwargs)


torch.load = patched_torch_load

original_download = yf.download


def patched_download(*args, **kwargs):
    kwargs.pop('proxy', None)
    return original_download(*args, **kwargs)


yf.download = patched_download

from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from elegantrl.agents import AgentPPO
from elegantrl.train.run import train_agent
from ppo_env_wrapper import ElegantFinRLWrapper

try:
    from elegantrl.train.config import Config as Arguments
except ImportError:
    from elegantrl.train.config import Arguments

print("[âœ“] All imports successful!")

[âœ“] ElegantFinRLWrapper class defined!
[âœ“] All imports successful!


In [9]:
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.config_tickers import DOW_30_TICKER
from finrl.meta.preprocessor.preprocessors import FeatureEngineer

def prepare_data(cache_file = "../data/finrl_dow30_cache.csv",
        TRAIN_START_DATE = '2010-01-01',
        TEST_END_DATE = '2023-12-30'):
    """
    Load stock data from cache or download from Yahoo Finance.
    Returns preprocessed dataframe with technical indicators.
    """
    
    # Create data directory if it doesn't exist
    os.makedirs(os.path.dirname(cache_file), exist_ok=True)
    
    if os.path.exists(cache_file):
        print(f"[INFO] Loading validated cache '{cache_file}'...")
        try:
            df = pd.read_csv(cache_file)
            df = df.drop_duplicates(subset=['date', 'tic'])
            if len(df) > 0:
                print(f"[âœ“] Cache loaded: {len(df)} records, Date range: {df['date'].min()} to {df['date'].max()}")
                return df
            else:
                print("[!] Cache file is empty, will re-download...")
        except Exception as e:
            print(f"[!] Error loading cache: {e}, will re-download...")

    

    df = YahooDownloader(start_date=TRAIN_START_DATE,
                        end_date=TEST_END_DATE,
                        ticker_list=DOW_30_TICKER).fetch_data()


    print(f"[âœ“] Data preparation complete!")
    print(f"    - Total records: {len(df)}")
    print(f"    - Date range: {df['date'].min()} to {df['date'].max()}")
    print(f"    - Unique tickers: {df['tic'].nunique()}")
    # print(f"    - Complete dates: {len(valid_dates)}")

    # Add technical indicators
    print("\n[STEP 2.5] Calculating technical indicators...")

    # Define indicators to calculate
    TECHNICAL_INDICATORS = ["macd", "boll_ub", "boll_lb", "rsi_30", "cci_30", "dx_30", "close_30_sma", "close_60_sma"]

    try:
        fe = FeatureEngineer(
            use_technical_indicator=True,
            tech_indicator_list=TECHNICAL_INDICATORS,
            use_vix=False,
            use_turbulence=False,
            user_defined_feature=False
        )
        
        df = fe.preprocess_data(df)
        
        # Rename close_30_sma and close_60_sma to match expected names
        if 'close_30_sma' in df.columns:
            df = df.rename(columns={'close_30_sma': 'close_30'})
        if 'close_60_sma' in df.columns:
            df = df.rename(columns={'close_60_sma': 'close_60'})
        
        print(f"[âœ“] Technical indicators calculated!")
        print(f"    Available columns: {df.columns.tolist()}")
        
    except Exception as e:
        print(f"[!] Error calculating technical indicators: {e}")
        print(f"[!] Attempting alternative method...")
        
        # Alternative: Calculate indicators manually using ta library
        import ta
        
        df_list = []
        for ticker in df['tic'].unique():
            ticker_df = df[df['tic'] == ticker].copy()
            ticker_df = ticker_df.sort_values('date')
            
            # Calculate indicators
            ticker_df['macd'] = ta.trend.macd_diff(ticker_df['close'])
            
            bb = ta.volatility.BollingerBands(ticker_df['close'])
            ticker_df['boll_ub'] = bb.bollinger_hband()
            ticker_df['boll_lb'] = bb.bollinger_lband()
            
            ticker_df['rsi_30'] = ta.momentum.rsi(ticker_df['close'], window=30)
            ticker_df['cci_30'] = ta.trend.cci(ticker_df['high'], ticker_df['low'], ticker_df['close'], window=30)
            ticker_df['dx_30'] = ta.trend.adx(ticker_df['high'], ticker_df['low'], ticker_df['close'], window=30)
            ticker_df['close_30'] = ticker_df['close'].rolling(window=30).mean()
            ticker_df['close_60'] = ticker_df['close'].rolling(window=60).mean()
            
            df_list.append(ticker_df)
        
        df = pd.concat(df_list, ignore_index=True)
        df = df.dropna()  # Remove rows with NaN from indicator calculation
        
        print(f"[âœ“] Technical indicators calculated using ta library!")

    if df is not None and not df.empty:
        df.to_csv(cache_file, index=False)
        print(f"[âœ“] Data saved to cache: {cache_file}")
    else:
        print("[ERROR] Failed to download data or received empty dataframe")
        return None
    
    return df

def split_train_test(df,
            TRAIN_START_DATE = '2010-01-07',
            TRAIN_END_DATE = '2023-10-24',
            TEST_START_DATE = '2023-10-25',
            TEST_END_DATE = '2023-11-21'):

    df['date'] = pd.to_datetime(df['date'])

    df_train = df[
        (df['date'] >= TRAIN_START_DATE) &
        (df['date'] <= TRAIN_END_DATE)
    ].copy()

    df_test = df[
        (df['date'] >= TEST_START_DATE) &
        (df['date'] <= TEST_END_DATE)
    ].copy()

    print(f"[INFO] Train: {df_train['date'].min()} â†’ {df_train['date'].max()}, "
          f"{len(df_train)} rows")

    print(f"[INFO] Test:  {df_test['date'].min()} â†’ {df_test['date'].max()}, "
          f"{len(df_test)} rows")

    return df_train, df_test

# Load data
print("\n[STEP 1] Loading stock data...")
df = prepare_data()
df_train, df_test = split_train_test(df)


[STEP 1] Loading stock data...
[INFO] Loading validated cache '../data/finrl_dow30_cache.csv'...
[âœ“] Cache loaded: 98616 records, Date range: 2010-01-04 to 2023-12-29
[INFO] Train: 2010-01-07 00:00:00 â†’ 2023-10-24 00:00:00, 97244 rows
[INFO] Test:  2023-10-25 00:00:00 â†’ 2023-11-21 00:00:00, 560 rows


In [10]:
print(df_train.head())
print(df_test.head())
print(df_train.columns.tolist())

         date      close       high        low       open     volume   tic  \
84 2010-01-07   6.309610   6.352158   6.263767   6.344667  477131200  AAPL   
85 2010-01-07  38.060406  38.236266  36.964656  38.155100   10371600  AMGN   
86 2010-01-07  33.469982  33.677278  32.776346  32.895937    8981700   AXP   
87 2010-01-07  48.468548  48.554264  45.990574  46.372398   14379100    BA   
88 2010-01-07  39.948696  40.102682  39.265815  39.700984    5432900   CAT   

    day      macd    boll_ub    boll_lb      rsi_30      cci_30       dx_30  \
84    3 -0.004614   6.485744   6.247635    8.575576 -112.342924   61.786706   
85    3 -0.035976  39.389440  37.718878    0.000000 -133.333333  100.000000   
86    3  0.040333  33.802113  31.848364   93.971096  105.083070  100.000000   
87    3  0.172910  50.009103  42.011025  100.000000  103.114463  100.000000   
88    3  0.025527  40.295855  39.005691  100.000000   60.985859   25.731214   

     close_30   close_60  
84   6.366690   6.366690  
85

In [11]:
def setup_ppo_args(env_args, cwd_path):
    args = Arguments(agent_class=AgentPPO, env_class=ElegantFinRLWrapper)
    args.env_args = env_args
    args.env_name = env_args['env_name']

    args.net_dims = (128, 64)
    args.state_dim = env_args['state_dim']
    args.action_dim = env_args['action_dim']
    args.if_discrete = env_args['if_discrete']

    args.learning_rate = 1e-4
    args.batch_size = 128

    args.target_step = 2000
    args.break_step = 40000

    # Training progress and logging
    args.worker_num = 1
    args.eval_proc_num = 0
    args.if_use_multi_processing = False
    args.eval_gap = 500  # Evaluate every 500 steps to see progress
    args.save_gap = 500  # Save checkpoint every 500 steps
    
    # Enable verbose output for monitoring
    args.if_save = True
    args.if_overwrite_save = True

    args.cwd = cwd_path
    args.if_remove = True
    return args

print("[âœ“] setup_ppo_args function defined!")

[âœ“] setup_ppo_args function defined!


In [12]:
def real_test_inference(test_df, stock_dim, indicators, args):
    params = {
        "df": test_df, "stock_dim": stock_dim, "hmax": 100,
        "initial_amount": 1000000, "num_stock_shares": [0] * stock_dim,
        "buy_cost_pct": [0.001] * stock_dim,
        "sell_cost_pct": [0.001] * stock_dim,
        "reward_scaling": 1e-4,
        "state_space": 1 + stock_dim * (len(indicators) + 2),
        "action_space": stock_dim, "tech_indicator_list": indicators,
        "env_name": "test_inference", "state_dim": 1 + stock_dim * (len(indicators) + 2),
        "action_dim": stock_dim, "if_discrete": False, "target_return": 10.0
    }
    env = ElegantFinRLWrapper(**params)

    agent = AgentPPO(args.net_dims, args.state_dim, args.action_dim)
    # agent.save_or_load_agent(args.cwd, if_save=False)
    agent.act.eval()

    res = env.reset()
    state = res[0] if isinstance(res, tuple) else res

    done = False
    while not done:
        s_tensor = torch.as_tensor((state,), dtype=torch.float32, device=agent.device)
        with torch.no_grad():
            action = agent.act(s_tensor).detach().cpu().numpy()[0]

        step_res = env.step(action)
        if len(step_res) == 5:
            state, reward, term, trunc, _ = step_res
            done = term or trunc
        else:
            state, reward, done, _, _ = step_res

    return env.env.save_asset_memory()

print("[âœ“] real_test_inference function defined!")

[âœ“] real_test_inference function defined!


In [None]:
# Training and Testing with Full Dataset
import warnings
warnings.filterwarnings('ignore')

ALL_INDICATORS = ["macd", "boll_ub", "boll_lb", "rsi_30", "cci_30", "dx_30", "close_30", "close_60"]

# Prepare training data
stock_dimension = len(df_train['tic'].unique())
state_dim = 1 + stock_dimension * (len(ALL_INDICATORS) + 2)

# Create day indices for training data
train_dates = sorted(df_train['date'].unique())
date_map_train = {date: idx for idx, date in enumerate(train_dates)}
df_train['day'] = df_train['date'].map(date_map_train)
df_train.set_index('day', inplace=True, drop=False)

# Create day indices for test data
test_dates = sorted(df_test['date'].unique())
date_map_test = {date: idx for idx, date in enumerate(test_dates)}
df_test['day'] = df_test['date'].map(date_map_test)
df_test.set_index('day', inplace=True, drop=False)

print(f"[INFO] Stock dimension: {stock_dimension}")
print(f"[INFO] State dimension: {state_dim}")
print(f"[INFO] Training data shape: {df_train.shape}")
print(f"[INFO] Test data shape: {df_test.shape}")

# Setup environment parameters for training
env_params = {
    "env_name": "FinRL_PPO_Train",
    "df": df_train,
    "stock_dim": stock_dimension,
    "hmax": 100,
    "initial_amount": 1000000,
    "num_stock_shares": [0] * stock_dimension,
    "buy_cost_pct": [0.001] * stock_dimension,
    "sell_cost_pct": [0.001] * stock_dimension,
    "reward_scaling": 1e-4,
    "state_space": state_dim,
    "action_space": stock_dimension,
    "tech_indicator_list": ALL_INDICATORS,
    "state_dim": state_dim,
    "action_dim": stock_dimension,
    "if_discrete": False,
    "target_return": 10.0
}

print(f"\n[STEP 6] Starting PPO Agent Training on Full Dataset...")
print(f"Training period: {df_train['date'].min()} to {df_train['date'].max()}")
print(f"Test period: {df_test['date'].min()} to {df_test['date'].max()}")
print("\n" + "="*80)

try:
    cwd_path = "../checkpoints/ppo_full_train"
    os.makedirs(cwd_path, exist_ok=True)

    args = setup_ppo_args(env_params, cwd_path)

    print(f"\n[Training Configuration]")
    print(f"  Network Dims: {args.net_dims}")
    print(f"  Learning Rate: {args.learning_rate}")
    print(f"  Batch Size: {args.batch_size}")
    print(f"  Target Steps per Update: {args.target_step}")
    print(f"  Total Training Steps: {args.break_step}")
    print(f"  Evaluation Gap: {args.eval_gap} steps")
    print(f"  Checkpoint Path: {cwd_path}")
    print("="*80 + "\n")

    print("[-->] Starting PPO Agent Training...")
    
    train_agent(args)
    
    print("\n" + "="*80)
    print("[âœ“] Training completed successfully!")
    print("="*80)

    # Run inference on test data
    print(f"\n[STEP 6.5] Running inference on test data...")
    test_results = real_test_inference(df_test, stock_dimension, ALL_INDICATORS, args)
    
    # Save test results
    test_results.to_csv("ppo_test_results.csv", index=False)
    print(f"[âœ“] Test results saved to 'ppo_test_results.csv'")
    print(f"    Records: {len(test_results)}")
    print(f"    Date range: {test_results['date'].min()} to {test_results['date'].max()}")
    
    # Calculate and display initial performance metrics
    initial_value = test_results['account_value'].iloc[0]
    final_value = test_results['account_value'].iloc[-1]
    total_return = (final_value / initial_value - 1) * 100
    
    print(f"    Initial Portfolio Value: ${initial_value:,.2f}")
    print(f"    Final Portfolio Value: ${final_value:,.2f}")
    print(f"    Total Return: {total_return:.2f}%")

except Exception as e:
    print(f"[ERROR] Training failed: {e}")
    import traceback
    traceback.print_exc()

[INFO] Stock dimension: 28
[INFO] State dimension: 281
[INFO] Training data shape: (97244, 16)
[INFO] Test data shape: (560, 16)

[STEP 6] Starting PPO Agent Training on Full Dataset...
Training period: 2010-01-07 00:00:00 to 2023-10-24 00:00:00
Test period: 2023-10-25 00:00:00 to 2023-11-21 00:00:00

[ERROR] Training failed: name 'setup_ppo_args' is not defined


Traceback (most recent call last):
  File "/var/folders/2t/p39lh5hx71794_67j85_rwlr0000gn/T/ipykernel_77217/3569747678.py", line 57, in <module>
    args = setup_ppo_args(env_params, cwd_path)
NameError: name 'setup_ppo_args' is not defined


In [11]:
import matplotlib.pyplot as plt

def calculate_metrics(df, column_name='account_value'):
    """Calculate key performance metrics"""
    daily_return = df[column_name].pct_change().dropna()

    cum_return = (df[column_name].iloc[-1] / df[column_name].iloc[0]) - 1

    if daily_return.std() != 0:
        sharpe_ratio = (252 ** 0.5) * (daily_return.mean() / daily_return.std())
    else:
        sharpe_ratio = 0

    rolling_max = df[column_name].cummax()
    drawdown = (df[column_name] - rolling_max) / rolling_max
    max_drawdown = drawdown.min()

    return cum_return, sharpe_ratio, max_drawdown


def ppo_test():
    """Test PPO strategy performance and compare with benchmark"""
    result_file = "ppo_test_results.csv"

    try:
        df = pd.read_csv(result_file)
        df['date'] = pd.to_datetime(df['date'])
        df = df.drop_duplicates(subset=['date']).sort_values('date').reset_index(drop=True)

        start_date = df['date'].iloc[0].strftime('%Y-%m-%d')
        end_date = df['date'].iloc[-1].strftime('%Y-%m-%d')
        initial_capital = df['account_value'].iloc[0]

        print(f"\n[STEP 7] Performance Analysis")
        print(f"[*] Successfully loaded PPO trading records!")
        print(f"    Testing period: {start_date} to {end_date}")
        print(f"    Test duration: {len(df)} trading days")
        print(f"[*] Downloading Dow Jones Index as benchmark...")

        # Download benchmark data
        benchmark = yf.download("^DJI", start=start_date, end=end_date, progress=False)

        if isinstance(benchmark.columns, pd.MultiIndex):
            benchmark.columns = benchmark.columns.get_level_values(0)

        benchmark = benchmark.reset_index()

        # Unify date column name
        if 'Date' not in benchmark.columns and 'index' in benchmark.columns:
            benchmark = benchmark.rename(columns={'index': 'Date'})

        benchmark['Date'] = pd.to_datetime(benchmark['Date'])

        # Merge tables
        df = pd.merge(df, benchmark[['Date', 'Close']], left_on='date', right_on='Date', how='left')
        df['Close'] = df['Close'].ffill()  # Fill missing weekend/holiday data

        df['benchmark_value'] = (df['Close'] / df['Close'].iloc[0]) * initial_capital

        # Calculate metrics
        ppo_ret, ppo_sharpe, ppo_mdd = calculate_metrics(df, 'account_value')
        bm_ret, bm_sharpe, bm_mdd = calculate_metrics(df, 'benchmark_value')

        print("\n" + "=" * 80)
        print(f"{'ðŸš€ PPO Agent Stock Trading Performance Report ðŸš€':^80}")
        print("=" * 80)
        print(f"{'Metric':<34} {'PPO Strategy':>22} {'Benchmark':>22}")
        print("-" * 80)
        print(f"{'Cumulative Return':<34} {ppo_ret*100:>21.2f}% {bm_ret*100:>21.2f}%")
        print(f"{'Annualized Sharpe Ratio':<34} {ppo_sharpe:>22.4f} {bm_sharpe:>22.4f}")
        print(f"{'Maximum Drawdown':<34} {ppo_mdd*100:>21.2f}% {bm_mdd*100:>21.2f}%")
        print("=" * 80)

        # Calculate outperformance
        excess_return = (ppo_ret - bm_ret) * 100
        sharpe_diff = ppo_sharpe - bm_sharpe
        dd_improvement = (bm_mdd - ppo_mdd) * 100

        print(f"{'Excess Return':<34} {excess_return:>21.2f}%")
        print(f"{'Sharpe Ratio Difference':<34} {sharpe_diff:>22.4f}")
        print(f"{'Max Drawdown Improvement':<34} {dd_improvement:>21.2f}%")
        print("=" * 80 + "\n")

        # Create visualization
        plt.figure(figsize=(15, 8))
        plt.style.use('seaborn-v0_8-darkgrid')

        plt.plot(df['date'], df['account_value'], label='PPO Agent Portfolio', 
                color='#2E86AB', linewidth=2.5)
        plt.plot(df['date'], df['benchmark_value'], label='Dow Jones Index (Benchmark)', 
                color='#A23B72', linewidth=1.5, linestyle='--', alpha=0.8)

        plt.title('Proximal Policy Optimization (PPO) Stock Trading Performance', 
                 fontsize=16, fontweight='bold', pad=20)
        plt.xlabel('Date', fontsize=12, fontweight='bold')
        plt.ylabel('Portfolio Value ($)', fontsize=12, fontweight='bold')
        plt.grid(True, alpha=0.3)

        textstr = '\n'.join((
            r'$\bf{PPO\ Strategy}$',
            f'Return: {ppo_ret * 100:.2f}%',
            f'Sharpe: {ppo_sharpe:.3f}',
            f'Max DD: {ppo_mdd * 100:.2f}%',
            '',
            r'$\bf{Benchmark}$',
            f'Return: {bm_ret * 100:.2f}%',
            f'Sharpe: {bm_sharpe:.3f}',
            f'Max DD: {bm_mdd * 100:.2f}%',
            '',
            r'$\bf{Outperformance}$',
            f'Excess Return: {excess_return:.2f}%',
            f'Sharpe Diff: {sharpe_diff:.3f}'
        ))
        props = dict(boxstyle='round', facecolor='white', alpha=0.9, edgecolor='gray', linewidth=1.5)
        plt.gca().text(0.02, 0.98, textstr, transform=plt.gca().transAxes, fontsize=9.5,
                       verticalalignment='top', bbox=props, family='monospace')

        plt.legend(loc='lower right', fontsize=11, framealpha=0.95)
        plt.tight_layout()

        # Save figure
        fig_path = 'ppo_backtest_performance.png'
        plt.savefig(fig_path, dpi=300, bbox_inches='tight')
        print(f"[âœ“] Equity curve visualization saved to '{fig_path}'!")
        
        plt.show()

    except Exception as e:
        print(f"[ERROR] An error occurred during testing: {e}")
        import traceback
        traceback.print_exc()


# Run PPO test
ppo_test()
print("[âœ“] PPO Test Complete!")


[STEP 7] Performance Analysis
[*] Successfully loaded PPO trading records!
    Testing period: 2023-10-25 to 2023-11-21
    Test duration: 20 trading days
[*] Downloading Dow Jones Index as benchmark...
YF.download() has changed argument auto_adjust default to True

                 ðŸš€ PPO Agent Stock Trading Performance Report ðŸš€                 
Metric                                       PPO Strategy              Benchmark
--------------------------------------------------------------------------------
Cumulative Return                                   8.39%                  6.40%
Annualized Sharpe Ratio                            7.6013                 6.8279
Maximum Drawdown                                   -1.09%                 -1.87%
Excess Return                                       1.99%
Sharpe Ratio Difference                            0.7734
Max Drawdown Improvement                           -0.78%

[âœ“] Equity curve visualization saved to 'ppo_backtest_performan

In [28]:
# Visualization: Plot Test Results with Benchmark
import matplotlib.pyplot as plt

def plot_test_results():
    """Plot PPO test results against benchmark"""
    try:
        # Load test results
        result_file = "ppo_test_results.csv"
        
        if not os.path.exists(result_file):
            print(f"[!] Test results file not found: {result_file}")
            print("    Please run the training cell (cell 7) first")
            return
        
        df = pd.read_csv(result_file)
        df['date'] = pd.to_datetime(df['date'])
        df = df.drop_duplicates(subset=['date']).sort_values('date').reset_index(drop=True)
        
        start_date = df['date'].iloc[0].strftime('%Y-%m-%d')
        end_date = df['date'].iloc[-1].strftime('%Y-%m-%d')
        initial_capital = df['account_value'].iloc[0]
        
        print(f"[*] Loading PPO test results...")
        print(f"    Test period: {start_date} to {end_date}")
        print(f"    Trading days: {len(df)}")
        print(f"[*] Downloading Dow Jones benchmark data...")
        
        # Download benchmark
        benchmark = yf.download("^DJI", start=start_date, end=end_date, progress=False)
        
        if isinstance(benchmark.columns, pd.MultiIndex):
            benchmark.columns = benchmark.columns.get_level_values(0)
        
        benchmark = benchmark.reset_index()
        
        if 'Date' not in benchmark.columns and 'index' in benchmark.columns:
            benchmark = benchmark.rename(columns={'index': 'Date'})
        
        benchmark['Date'] = pd.to_datetime(benchmark['Date'])
        
        # Merge with benchmark
        df = pd.merge(df, benchmark[['Date', 'Close']], left_on='date', right_on='Date', how='left')
        df['Close'] = df['Close'].ffill()
        df['benchmark_value'] = (df['Close'] / df['Close'].iloc[0]) * initial_capital
        
        # Calculate metrics
        ppo_ret = (df['account_value'].iloc[-1] / df['account_value'].iloc[0] - 1) * 100
        bm_ret = (df['benchmark_value'].iloc[-1] / df['benchmark_value'].iloc[0] - 1) * 100
        
        ppo_daily = df['account_value'].pct_change().dropna()
        bm_daily = df['benchmark_value'].pct_change().dropna()
        
        ppo_sharpe = (252 ** 0.5) * (ppo_daily.mean() / ppo_daily.std()) if ppo_daily.std() != 0 else 0
        bm_sharpe = (252 ** 0.5) * (bm_daily.mean() / bm_daily.std()) if bm_daily.std() != 0 else 0
        
        ppo_max = df['account_value'].cummax()
        ppo_dd = ((df['account_value'] - ppo_max) / ppo_max).min() * 100
        
        bm_max = df['benchmark_value'].cummax()
        bm_dd = ((df['benchmark_value'] - bm_max) / bm_max).min() * 100
        
        # Create visualization
        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(18, 8))
        
        # Plot 1: Portfolio Value Comparison
        ax1.plot(df['date'], df['account_value'], label='PPO Agent Portfolio', 
                color='#2E86AB', linewidth=2.5)
        ax1.plot(df['date'], df['benchmark_value'], label='Dow Jones Index (Benchmark)', 
                color='#A23B72', linewidth=2, linestyle='--', alpha=0.8)
        
        ax1.set_title('PPO Agent Test Period Performance vs Benchmark', 
                     fontsize=16, fontweight='bold', pad=15)
        ax1.set_xlabel('Date', fontsize=12, fontweight='bold')
        ax1.set_ylabel('Portfolio Value ($)', fontsize=12, fontweight='bold')
        ax1.grid(True, alpha=0.3)
        ax1.legend(loc='lower right', fontsize=11, framealpha=0.95)
        
        # Add metrics text box
        textstr = '\n'.join((
            r'$\bf{PPO\ Strategy}$',
            f'Return: {ppo_ret:.2f}%',
            f'Sharpe: {ppo_sharpe:.3f}',
            f'Max DD: {ppo_dd:.2f}%',
            '',
            r'$\bf{Benchmark}$',
            f'Return: {bm_ret:.2f}%',
            f'Sharpe: {bm_sharpe:.3f}',
            f'Max DD: {bm_dd:.2f}%'
        ))
        props = dict(boxstyle='round', facecolor='white', alpha=0.9, edgecolor='gray', linewidth=1.5)
        ax1.text(0.02, 0.98, textstr, transform=ax1.transAxes, fontsize=10,
                verticalalignment='top', bbox=props, family='monospace')
        
        # Plot 2: Daily Returns Comparison
        ppo_returns = df['account_value'].pct_change() * 100
        bm_returns = df['benchmark_value'].pct_change() * 100
        
        ax2.plot(df['date'], ppo_returns, label='PPO Daily Return', 
                color='#2E86AB', linewidth=1.5, alpha=0.7)
        ax2.plot(df['date'], bm_returns, label='Benchmark Daily Return', 
                color='#A23B72', linewidth=1.5, alpha=0.7, linestyle='--')
        ax2.axhline(y=0, color='black', linestyle='-', linewidth=0.5, alpha=0.5)
        
        ax2.set_title('Daily Returns Comparison', fontsize=14, fontweight='bold', pad=10)
        ax2.set_xlabel('Date', fontsize=12, fontweight='bold')
        ax2.set_ylabel('Daily Return (%)', fontsize=12, fontweight='bold')
        ax2.grid(True, alpha=0.3)
        ax2.legend(loc='lower right', fontsize=10, framealpha=0.95)
        
        plt.tight_layout()
        
        # Save figure
        fig_path = 'ppo_test_analysis.png'
        plt.savefig(fig_path, dpi=300, bbox_inches='tight')
        print(f"\n[âœ“] Test results visualization saved to '{fig_path}'!")
        
        plt.show()
        
        # Print summary
        print("\n" + "="*80)
        print(f"{'Test Period Performance Summary':^80}")
        print("="*80)
        print(f"{'Metric':<30} {'PPO Agent':>20} {'Benchmark':>20}")
        print("-"*80)
        print(f"{'Cumulative Return':<30} {ppo_ret:>19.2f}% {bm_ret:>19.2f}%")
        print(f"{'Annualized Sharpe Ratio':<30} {ppo_sharpe:>20.3f} {bm_sharpe:>20.3f}")
        print(f"{'Maximum Drawdown':<30} {ppo_dd:>19.2f}% {bm_dd:>19.2f}%")
        print(f"{'Excess Return':<30} {(ppo_ret - bm_ret):>19.2f}%")
        print("="*80)
        
    except Exception as e:
        print(f"[ERROR] Failed to plot test results: {e}")
        import traceback
        traceback.print_exc()

# Run visualization
print("\n[STEP 9] Test Results Visualization")
print("="*80)
plot_test_results()


[STEP 9] Test Results Visualization
[*] Loading PPO test results...
    Test period: 2023-10-25 to 2023-11-21
    Trading days: 20
[*] Downloading Dow Jones benchmark data...

[âœ“] Test results visualization saved to 'ppo_test_analysis.png'!

                        Test Period Performance Summary                         
Metric                                    PPO Agent            Benchmark
--------------------------------------------------------------------------------
Cumulative Return                             8.39%                6.40%
Annualized Sharpe Ratio                       7.601                6.828
Maximum Drawdown                             -1.09%               -1.87%
Excess Return                                 1.99%
