In [1]:
# Deep Reinforcement Learning for Automated Stock Trading – for Crypto

### 📊 Summary of Crypto DRL Ensemble Trading

# This notebook implements a deep reinforcement learning ensemble strategy adapted from the ICAIF 2020 paper, applied to cryptocurrency trading.

#- **Assets Used**: BTC, ETH, BNB, XRP
#- **Time Period**: 2018 to 2024, with rolling windows
#- **Agents Used**: PPO, A2C, DDPG (Stable-Baselines3)
#- **Strategy**:
#  - Train each agent on a rolling window (e.g. 6 months)
#   - Validate on 2 months of unseen data
#   - Select the model with the highest Sharpe ratio
#   - Trade using the best model for 2 months
# - **Objective**: Observe the effectiveness of the agents using the ensemble strategy.


In [2]:
import os
import pandas as pd
import sys

sys.path.append("..")

from finrl.utils.rolling_windows import get_rolling_windows
from finrl.pipeline.windows import run_training_windows
from finrl.meta.preprocessor.preprocessors import FeatureEngineer
from finrl.meta.preprocessor.binancedownloader import BinanceDownloader
from finrl.meta.preprocessor.binancedownloader import BinanceDownloader


In [3]:
import os
from pathlib import Path

data_path = "../data/binance_less_raw.csv"

# current_dir = os.path.dirname(__file__)
notebook_path = Path().resolve()
current_dir = notebook_path.parent
data_path = os.path.join(current_dir, "data")
results_path = os.path.join(current_dir, "results")
file_name = "binance_data_raw.csv"
dt_raw_path = os.path.join(data_path, file_name)
# dt_processed_path = os.path.join(data_path, "processed_bnc_data.csv")

# **
dt_sentiment_path = os.path.join(data_path, "sentiment_bnc_data.csv")


tickers = ["BTCUSDT", "ETHUSDT", "BNBUSDT", "SOLUSDT", "XRPUSDT"]

# tickers = ["BTCUSDT", "ETHUSDT"]

# NOTE: change to get and write following updates to ticker list
# bd = BinanceDownloader()
# df_raw = bd.download_multiple(ticker_list=tickers, start_str="1 Jan, 2018")
# df_raw.to_csv(dt_raw_path, index=False)

df_raw = pd.read_csv(dt_raw_path, parse_dates=["date"])
# print("Data saved to:", data_path)

print("Done...Retrieved raw data from file")


Done...Retrieved raw data from file


In [4]:
from finrl.config import LOOKBACK_DAYS


fe = FeatureEngineer(use_technical_indicator=False, use_fear_greed=True)

# ** NOTE Processed data with sentiment  adding 3 columns  fear_greed (as from API)  fear_greed_norm (normalised)  fear_greed_mapped (categorised)
# sen_processed = pd.read_csv(dt_processed_path, parse_dates=["date"])
sen_processed = fe.preprocess_data(df_raw) 
sen_processed.to_csv(dt_sentiment_path, index=False)




  df["timestamp"] = pd.to_datetime(df["timestamp"], unit="s")
  df["fear_greed"] = df["fear_greed"].fillna(method="ffill")


In [5]:
# from finrl.utils.calculate_turbulence_crypto import add_turbulence_and_volatility
# df_featured = add_turbulence_and_volatility(df_processed, turbulence_lookback=LOOKBACK_DAYS, vol_lookback=30) 

In [9]:
from finrl.config import TRAIN_LEN, VAL_LEN, TRADE_LEN, TRAIN_START_DATE, TRADE_END_DATE

start_date = pd.Timestamp(TRAIN_START_DATE)
end_date = pd.Timestamp(TRADE_END_DATE)

windows = get_rolling_windows(
    train_months=12,
    val_months=1,
    trade_months=1,
    start_date_str=start_date,
    end_date_str=end_date,
    lookback_days= 60,
)

print(f"Generated {len(windows)} rolling windows from {TRAIN_START_DATE} to {TRADE_END_DATE}.")



Created 22 rolling windows.
Generated 22 rolling windows from 2020-05-04 to 2025-05-31.


In [10]:
from stable_baselines3.common.vec_env import DummyVecEnv
from finrl.pipeline.plot_values import plot_dashboard #plot_account_values, plot_counts_rewards, plot_volatility_return, plot_sharp_window
from finrl.meta.preprocessor.preprocessors import data_split
from finrl.meta.env_crypto_trading.env_cryptotrading import CryptoTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent
from finrl.plot.plot import get_daily_return  

import numpy as np

## from finrl.plot import get_daily_return

account_values = df_result["account_value"]
daily_returns = get_daily_return(account_values)

sharpe = (252**0.5) * daily_returns.mean() / daily_returns.std()
total_return = account_values.iloc[-1] / account_values.iloc[0] - 1
volatility = daily_returns.std()
max_drawdown = (account_values.cummax() - account_values).max() / account_values.cummax().max()


df_final = pd.concat(results)
backtest_stats(df_final)
backtest_plot(df_final)

In [11]:
from finrl.config import model_configs, LOOKBACK_DAYS
# files for reporting featured / ie turbulence and volatility trading results

sen_trade_results_path = os.path.join(results_path, "sen_trading_metrix.csv")
sen_daily_accounts_path = os.path.join(results_path, "sen_account_values.csv")

In [None]:
from finrl.utils.compute_sharpe_metrics import compute_sharpe_metrics
from finrl.utils.compute_sortino_ratio import compute_sortino_ratio

performance_log = []

PPO_model_kwargs = model_configs["PPO"]
A2C_model_kwargs = model_configs["A2C"]
DDPG_model_kwargs = model_configs["DDPG"]

for i, (train_start, train_end, val_start, val_end, trade_start, trade_end) in enumerate(windows):
      
        train_data = data_split(sen_processed, train_start, train_end)
        val_data = data_split(sen_processed, val_start, val_end)
        trade_data = data_split(sen_processed, trade_start, trade_end)
        
        window_name = f"window_{i+1}"
        print(f" Rolling {window_name}: {train_start.date()} to {trade_end.date()}")

        #** NOTE: use featured flag when combined with sentiment
        # env_train = DummyVecEnv([lambda: CryptoTradingEnv(train_data, featured=True)])

        env_train = DummyVecEnv([lambda: CryptoTradingEnv(train_data, sentiment=True)])
        agent = DRLAgent(env=env_train)

        models = {
            "ppo": agent.train_PPO(total_timesteps=len(train_data)*30, model_kwargs=PPO_model_kwargs),
            "a2c": agent.train_A2C(total_timesteps=len(train_data)*30, model_kwargs=A2C_model_kwargs),
            "ddpg": agent.train_DDPG(total_timesteps=int(len(train_data)*30*0.5), model_kwargs=DDPG_model_kwargs)
        }

        best_model = None
        best_sharpe = -np.inf
        val_sharpes = []
        val_report = []

        for name, model in models.items():
            env_val = DummyVecEnv([lambda: CryptoTradingEnv(val_data, sentiment=True)])
            sharpe_metrics = DRLAgent.DRL_evaluation(model=model, environment=env_val, lookback_days=0)
            sharpe = sharpe_metrics["sharpe"]
            val_sharpes.append({"name": name, "sharpe": sharpe})

            if not np.isnan(sharpe) and sharpe > best_sharpe:
                best_model = model
                best_sharpe = sharpe     

        #  --- Trade start ---
        env_trade = DummyVecEnv([lambda: CryptoTradingEnv(trade_data, sentiment=True)])
        
        if best_model is not None:
                startdt = pd.to_datetime(trade_start)
                env_trade.envs[0].trading_mode = True
                df_result = DRLAgent.DRL_prediction(model=best_model, environment=env_trade, start_date=startdt, lookback_days=0)

                df_result["window"] = i + 1
                
                # concat for reporting
                if "account_values_df" not in locals():
                    account_values_df = df_result.copy()
                else:
                    account_values_df = pd.concat([account_values_df, df_result], ignore_index=True)

                account_values = df_result["account_value"].copy()         
                #  get sharpe on an annualized basis,       
                metrics = compute_sharpe_metrics(account_values, TRADING_DAYS_PER_YEAR=365)
                sortino = compute_sortino_ratio(account_values)
                # raw cumulative return
                total_return = (account_values.iloc[-1] / account_values.iloc[0]) - 1

                # volatility = daily_returns.std()
                max_drawdown = (account_values.cummax() - account_values).max() / account_values.cummax().max()

         
                performance_log.append({
                        "agent": best_model.__class__.__name__,
                        "window": i + 1,
                        "train_start": train_start.date(),
                        "train_end": train_end.date(),
                        "val_start": val_start.date(),
                        "val_end": val_end.date(),
                        "trade_start": trade_start.date(),
                        "trade_end": trade_end.date(),
                        "sharpe (ann - trade)": metrics["sharpe"],
                        "sharpe (best - val)" : best_sharpe,
                        "sortino": sortino, 
                        "min_account_value": account_values.min(),
                        "max_account_value": account_values.max(),
                        "total_return": total_return,
                        "mean_return": metrics["mean_return"],
                        "volatility (std)": metrics["std_return"],
                        "max_drawdown": max_drawdown,
                        "initial_acc_val": account_values.iloc[0],
                        "final_acc_val": account_values.iloc[-1],
                        })
        else:
            print(f"❌ No valid model selected in window {i+1} — skipping trade step.")


            #  --- Trade End ---

            print(f"✅ Window {i+1} Summary:")
            print(f"  🔹 Train Dates : {train_start.date()} → {train_end.date()}")
            print(f"  🔹 Trade Dates : {trade_start.date()} → {trade_end.date()}")
            print(f"  🔹 Best Model  : {best_model.__class__.__name__ if best_model else 'None'}")
            print(f"  🔹 Sharpe      : {best_sharpe:.2f}")


    




 Rolling window_1: 2020-05-04 to 2021-11-01
 Rolling window_2: 2020-07-04 to 2022-01-01
 Rolling window_3: 2020-09-04 to 2022-03-03
 Rolling window_4: 2020-11-04 to 2022-04-30
 Rolling window_5: 2021-01-04 to 2022-07-03
 Rolling window_6: 2021-03-04 to 2022-08-31
[EVAL] Sharpe Debug: No volatility or insufficient data.
 Rolling window_7: 2021-05-04 to 2022-11-01
 Rolling window_8: 2021-07-04 to 2023-01-01
 Rolling window_9: 2021-09-04 to 2023-03-03
 Rolling window_10: 2021-11-04 to 2023-04-30
[EVAL] Sharpe Debug: No volatility or insufficient data.
 Rolling window_11: 2022-01-04 to 2023-07-03
 Rolling window_12: 2022-03-04 to 2023-08-31
 Rolling window_13: 2022-05-04 to 2023-11-01
 Rolling window_14: 2022-07-04 to 2024-01-01
 Rolling window_15: 2022-09-04 to 2024-03-02
 Rolling window_16: 2022-11-04 to 2024-04-30
 Rolling window_17: 2023-01-04 to 2024-07-02
[EVAL] Sharpe Debug: No volatility or insufficient data.
 Rolling window_18: 2023-03-04 to 2024-08-31
 Rolling window_19: 2023-05-

  if val_report_metrics:
        val_metrics = pd.DataFrame(val_report_metrics)
        val_metrics.to_csv(val_results_path, index=False)
        print(f"Metrics saved to", {val_results_path})
        # display(val_report_metrics) - look for import matplotlib.pyplot as plt

    else:
        print("No validation report to analyze.")

In [14]:
sen_trade_results_path = os.path.join(results_path, "sen_trading_metrix.csv")
sen_daily_accounts_path = os.path.join(results_path, "sen_account_values.csv")

In [15]:
df_metrics = pd.DataFrame(performance_log)
df_metrics.to_csv(sen_trade_results_path, index=False)
print(f"Trade Metrics saved to", {sen_daily_accounts_path})

account_values_df.to_csv(sen_daily_accounts_path, index=False)
print(f"Daily account values saved to", {sen_daily_accounts_path})

Trade Metrics saved to {'/home/derya/finrl_research/results/sen_account_values.csv'}
Daily account values saved to {'/home/derya/finrl_research/results/sen_account_values.csv'}
