In [1]:
# Deep Reinforcement Learning for Automated Stock Trading – Dow 30 Ensemble Strategy

# This notebook replicates and extends the ICAIF 2020 ensemble DRL trading strategy using Dow 30 stocks.  
# For the purpose of comparing performances of those for stocks and cryptocurrency,
# We use PPO, A2C, and DDPG agents in a rolling ensemble approach, with turbulence-aware validation.

In [1]:
import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import warnings

warnings.filterwarnings("ignore")

sys.path.append("..")

# FinRL modules
from finrl.config_tickers import DOW_30_TICKER
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent
# from finrl.agents.stablebaselines3.models import DRLEnsembleAgent
from finrl.plot.plot import backtest_plot, get_daily_return, get_baseline
from finrl.plot.plot import backtest_stats_qs as backtest_stats  # using QuantStats


In [2]:
START_DATE = "2020-05-04" # TRAIN_START_DATE
END_DATE = "2024-12-31" # TRADE_END_DATE

TRAIN_WINDOW_MONTHS = 6
VALIDATION_WINDOW_MONTHS = 3
TRADE_WINDOW_MONTHS = 3

#ticker_list = DOW_30_TICKER
ticker_list = ["AAPL"]

In [3]:
from finrl.config import (TRAIN_START_DATE, TRAIN_END_DATE, TEST_START_DATE, TEST_END_DATE, TRADE_START_DATE, TRADE_END_DATE)

df_raw = YahooDownloader(start_date=TRAIN_START_DATE,
                         end_date=TRADE_END_DATE,
                         ticker_list=ticker_list).fetch_data()

fe = FeatureEngineer(use_technical_indicator=True,
                     tech_indicator_list=["macd", "rsi_30", "cci_30"], 
                     use_vix=True,
                     use_turbulence=True,
                     user_defined_feature=False)

df = fe.preprocess_data(df_raw)  # D: removed processing from original
df['date'] = pd.to_datetime(df['date']) 


[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (1173, 8)
Successfully added technical indicators


[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (1172, 8)
Successfully added vix
Successfully added turbulence index


In [4]:
from dateutil.relativedelta import relativedelta

A2C_model_kwargs = {
    'n_steps': 5,
    'ent_coef': 0.005,
    'learning_rate': 0.0007
}

PPO_model_kwargs = {
    'ent_coef': 0.01,
    'n_steps': 2048,
    'learning_rate': 0.00025,
    'batch_size': 128
}

DDPG_model_kwargs = {
    'buffer_size': 10000,
    'learning_rate': 0.0005,
    'batch_size': 64
}

def get_rolling_windows(start_date, end_date, train_months, val_months, trade_months):
    windows = []
    current = pd.to_datetime(start_date)
    end = pd.to_datetime(end_date)
    
    while current + relativedelta(months=train_months + val_months + trade_months) <= end:
        train_start = current
        train_end = current + relativedelta(months=train_months)
        val_end = train_end + relativedelta(months=val_months)
        trade_end = val_end + relativedelta(months=trade_months)
        
        windows.append((train_start, train_end, val_end, trade_end))
        current += relativedelta(months=trade_months)
    
    return windows

windows = get_rolling_windows(START_DATE, END_DATE,
                              TRAIN_WINDOW_MONTHS,
                              VALIDATION_WINDOW_MONTHS,
                              TRADE_WINDOW_MONTHS)
print(f"Generated {len(windows)} rolling windows.")


Generated 15 rolling windows.


In [5]:
from stable_baselines3 import PPO, A2C, DDPG
from stable_baselines3.common.vec_env import DummyVecEnv
#  rolling window backtest using three reinforcement learning agents (PPO, A2C, DDPG). 
results = []
EPISODES = 10

for i, (train_start, train_end, val_end, trade_end) in enumerate(windows):
    print(" Rolling Window {i+1}: {train_start.date()} to {trade_end.date()}")
    
    train_data = data_split(df, train_start, train_end)
    val_data = data_split(df, train_end, val_end)
    trade_data = data_split(df, val_end, trade_end)

    env_train = DummyVecEnv([lambda: StockTradingEnv(train_data)])
    agent = DRLAgent(env=env_train)

      # replaced  "ppo": agent.train_PPO(total_timesteps=len(train_data)*EPISODES, model_kwargs=PPO_model_kwargs),

    models = {
        "ppo": agent.train_PPO(total_timesteps=len(train_data)*30, model_kwargs=PPO_model_kwargs),
        "a2c": agent.train_A2C(total_timesteps=len(train_data)*30, model_kwargs=A2C_model_kwargs),
        "ddpg": agent.train_DDPG(total_timesteps=int(len(train_data)*30*0.5), model_kwargs=DDPG_model_kwargs)
    }

    best_model = None
    best_sharpe = -np.inf
    # stock_dim=len(train_data['tic'].unique())
    for name, model in models.items():
        env_val = DummyVecEnv([lambda: StockTradingEnv(val_data)])
        sharpe = DRLAgent.DRL_prediction(model=model, environment=env_val, evaluate=True)
        print("{} Sharpe: {}".format(name, sharpe))
        # Debug: print account value
        account_vals = env_val.envs[0].asset_memory
        print("{} Account Value Range: {:.2f} to {:.2f}".format(name, min(account_vals), max(account_vals)))
        if not np.isnan(sharpe) and sharpe > best_sharpe:
            best_model = model
            best_sharpe = sharpe

    print(" Best model: {best_model.__class__.__name__} with Sharpe {best_sharpe:.2f}")

    env_trade = DummyVecEnv([lambda: StockTradingEnv(trade_data)])
    if best_model is not None:
        df_result = DRLAgent.DRL_prediction(model=best_model, environment=env_trade)
        results.append(df_result)
    else:
        print(f"Best model: {best_model.__class__.__name__} with Sharpe {best_sharpe:.2f}")

    if results:
        df_final = pd.concat(results)
        df_final.reset_index(drop=True, inplace=True)
    
        # Save for later use
        df_final.to_csv("results/dow30_account_values.csv", index=False)
        print("Results saved to results/dow30_account_values.csv")
    else:
        print("No results to analyze.")



 Rolling Window {i+1}: {train_start.date()} to {trade_end.date()}
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1458 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1314         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0019150528 |
|    clip_fraction        | 0.00127      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.41        |
|    explained_variance   | 0.0          |
|    learning_rate        | 0.00025      |
|    loss                 | -0.0109      |
|    n_updates            | 10           |
|    policy_gradient_loss | -0.000366    |
|    std           

In [None]:
df_final = pd.concat(results)
backtest_stats(df_final)
backtest_plot(df_final)