In [1]:
# Deep Reinforcement Learning for Automated Stock Trading ‚Äì for Crypto

### üìä Summary of Crypto DRL Ensemble Trading

This notebook implements a deep reinforcement learning ensemble strategy adapted from the ICAIF 2020 paper, applied to cryptocurrency trading.

#- **Assets Used**: BTC, ETH, BNB, XRP
#- **Time Period**: 2018 to 2024, with rolling windows
#- **Agents Used**: PPO, A2C, DDPG (Stable-Baselines3)
#- **Strategy**:
#  - Train each agent on a rolling window (e.g. 6 months)
#   - Validate on 2 months of unseen data
#   - Select the model with the highest Sharpe ratio
#   - Trade using the best model for 2 months
# - **Objective**: Observe the effectiveness of the agents using the ensemble strategy.


In [1]:
import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import warnings


warnings.filterwarnings("ignore")

sys.path.append("..")

# FinRL modules
# from finrl.config_tickers import DOW_30_TICKER
# from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.binancedownloader import BinanceDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.meta.env_crypto_trading.env_cryptotrading import CryptoTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent
# from finrl.agents.stablebaselines3.models import DRLEnsembleAgent
from finrl.plot.plot import backtest_plot, get_daily_return, get_baseline
from finrl.plot.plot import backtest_stats_qs as backtest_stats  # using QuantStats


In [2]:
from finrl.config import (TRAIN_START_DATE, TRAIN_END_DATE, TEST_START_DATE, TEST_END_DATE, TRADE_START_DATE, TRADE_END_DATE)

START_DATE = TRAIN_START_DATE 
END_DATE =  TRADE_END_DATE

TRAIN_WINDOW_MONTHS = 6
VALIDATION_WINDOW_MONTHS = 2
TRADE_WINDOW_MONTHS = 2

# data_path = "../data/binance_raw.csv"
data_path = "../data/binance_less_raw.csv"

if os.path.exists(data_path):
    print(" Loading Binance data from local cache...")
    df_raw = pd.read_csv(data_path, parse_dates=["date"])
else:
    print(" Downloading fresh Binance data...")
    # tickers = ["BTCUSDT", "ETHUSDT", "BNBUSDT", "SOLUSDT", "XRPUSDT"]
    tickers = ["BTCUSDT", "ETHUSDT", "BNBUSDT", "XRPUSDT"]
    bd = BinanceDownloader()
    df_raw = bd.download_multiple(ticker_list=tickers, start_str="1 Jan, 2012")
    df_raw.to_csv(data_path, index=False)
print("Data saved to:", data_path)

print("Done...")


 Loading Binance data from local cache...
Data saved to: ../data/binance_less_raw.csv
Done...


In [3]:
df_raw.groupby("tic")["date"].min()

tic
BNB-USD   2017-11-06
BTC-USD   2017-08-17
ETH-USD   2017-08-17
XRP-USD   2018-05-04
Name: date, dtype: datetime64[ns]

In [4]:

# df_raw = YahooDownloader(start_date=TRAIN_START_DATE,
#                          end_date=TRADE_END_DATE,
#                          ticker_list=ticker_list).fetch_data()

fe = FeatureEngineer(use_technical_indicator=True,
                     tech_indicator_list=["macd", "rsi_30", "cci_30"], 
                     use_vix=False,
                     use_turbulence=True
                     )

# user_defined_feature=False

df_processed = fe.preprocess_data(df_raw)  # D: removed processing from original



Successfully added technical indicators
Successfully added turbulence index


In [5]:
from dateutil.relativedelta import relativedelta
from finrl.utils.rolling_windows import get_rolling_windows

# PPO: Good for stable exploration with large batches
# A2C: Low n_steps means quick updates (crypto works well with this)
# DDPG: Buffer and batch sizes are fine; could experiment with larger buffer_size if needed

A2C_model_kwargs = {
    'n_steps': 5,
    'ent_coef': 0.005,
    'learning_rate': 0.0007
}

PPO_model_kwargs = {
    'ent_coef': 0.01,
    'n_steps': 2048,
    'learning_rate': 0.00025,
    'batch_size': 128
}

DDPG_model_kwargs = {
    'buffer_size': 10000,
    'learning_rate': 0.0005,
    'batch_size': 64
}

TRAIN_WINDOW_MONTHS = 6
VALIDATION_WINDOW_MONTHS = 2
TRADE_WINDOW_MONTHS = 2

windows = get_rolling_windows(
    train_months=TRAIN_WINDOW_MONTHS,
    val_months=VALIDATION_WINDOW_MONTHS,
    trade_months=TRADE_WINDOW_MONTHS
)

print(f"Generated {len(windows)} rolling windows from {TRAIN_START_DATE} to {TRADE_END_DATE}.")



üîÑ Created 16 rolling windows.
Generated 16 rolling windows from 2020-05-04 to 2024-12-31.


In [6]:
from stable_baselines3 import PPO, A2C, DDPG
from stable_baselines3.common.vec_env import DummyVecEnv
from finrl.plot.plot import get_daily_return  

#  rolling window backtest using three reinforcement learning agents (PPO, A2C, DDPG). 
results = []
EPISODES = 10
performance_log = []

for i, (train_start, train_end, val_end, trade_end) in enumerate(windows):
    print(f" Rolling Window {i+1}: {train_start.date()} to {trade_end.date()}")
    
    train_data = data_split(df_processed, train_start, train_end)
    val_data = data_split(df_processed, train_end, val_end)
    trade_data = data_split(df_processed, val_end, trade_end)

    min_days_required = 30

    if len(train_data["date"].unique()) < min_days_required:
        print(f"  Skipping Window {i+1} ‚Äî Train window too short: {len(train_data['date'].unique())} days")
        continue
    
    if len(val_data["date"].unique()) < min_days_required:
        print(f"  Skipping Window {i+1} ‚Äî Validation window too short: {len(val_data['date'].unique())} days")
        continue
        
    val_returns = val_data.groupby("date")["close"].mean().pct_change().dropna()
    if val_returns.std() == 0 or val_returns.empty:
        print(f"  Skipping Window {i+1} ‚Äî No price volatility in validation window.")
        continue

    print(f" Window {i+1}")
    print(f"  Train window: {train_start.date()} to {train_end.date()} ‚Äî {len(train_data['date'].unique())} days")
    print(f"  Val window  : {train_end.date()} to {val_end.date()}   ‚Äî {len(val_data['date'].unique())} days")
    print(f"  Trade window: {val_end.date()} to {trade_end.date()} ‚Äî {len(trade_data['date'].unique())} days")


    val_returns = val_data.groupby("date")["close"].mean().pct_change().dropna()
    if val_returns.std() == 0 or val_returns.empty:
        print(f"  Validation window {i+1} has no price volatility. Skipping.")
        continue

    env_train = DummyVecEnv([lambda: CryptoTradingEnv(train_data)])
    agent = DRLAgent(env=env_train)

      # replaced  "ppo": agent.train_PPO(total_timesteps=len(train_data)*EPISODES, model_kwargs=PPO_model_kwargs),
    print(val_returns.describe())

    models = {
        "ppo": agent.train_PPO(total_timesteps=len(train_data)*30, model_kwargs=PPO_model_kwargs),
        "a2c": agent.train_A2C(total_timesteps=len(train_data)*30, model_kwargs=A2C_model_kwargs),
        "ddpg": agent.train_DDPG(total_timesteps=int(len(train_data)*30*0.5), model_kwargs=DDPG_model_kwargs)
    }

    best_model = None
    best_sharpe = -np.inf
    
    for name, model in models.items():
        env_val = DummyVecEnv([lambda: CryptoTradingEnv(val_data)])
        
        print(f"{name} Account Memory Sample: {env_val.envs[0].asset_memory[:5]}")
        print(f"Is Model Trained? {'Yes' if model else 'No'}")

        sharpe = DRLAgent.DRL_prediction(model=model, environment=env_val, evaluate=True)
        
        print("{} Sharpe: {}".format(name, sharpe))
        # Debug: print account value
        account_vals = env_val.envs[0].asset_memory
        print("{} Account Value Range: {:.2f} to {:.2f}".format(name, min(account_vals), max(account_vals)))
        if not np.isnan(sharpe) and sharpe > best_sharpe:
            best_model = model
            best_sharpe = sharpe

    # print(" Best model: {best_model.__class__.__name__} with Sharpe {best_sharpe:.2f}")

    env_trade = DummyVecEnv([lambda: CryptoTradingEnv(trade_data)])
    
    if best_model is not None:
        df_result = DRLAgent.DRL_prediction(model=best_model, environment=env_trade)
        account_vals = df_result["account_value"].values.tolist()

        print(f"‚úÖ Best model: {best_model.__class__.__name__} with Sharpe {best_sharpe:.4f}")
        account_values = df_result["account_value"]
        daily_returns = get_daily_return(df_result)  # get_daily_return expects a full df with date
        
        sharpe = (252**0.5) * daily_returns.mean() / daily_returns.std()
        total_return = account_values.iloc[-1] / account_values.iloc[0] - 1
        volatility = daily_returns.std()
        max_drawdown = (account_values.cummax() - account_values).max() / account_values.cummax().max()
        performance_log.append({
                "agent": best_model.__class__.__name__,
                "window": i + 1,
                "train_start": train_start.date(),
                "train_end": train_end.date(),
                "trade_start": val_end.date(),
                "trade_end": trade_end.date(),
                "sharpe_ratio": sharpe,
                "total_return": total_return,
                "volatility": volatility,
                "max_drawdown": max_drawdown,
                "final_account_value": account_values.iloc[-1],
                "asset_class": "Crypto"  # or "Stock" in the other notebook
            })
    else:
        print(f"‚ùå No valid model selected in window {i+1} ‚Äî skipping trade step.")


    if performance_log:
        df_metrics = pd.DataFrame(performance_log)
        df_metrics.to_csv("../results/crypto_metrics.csv", index=False)
        print("Metrics saved to ../results/crypto_metrics.csv")
        display(df_metrics)
    
        # df_final = pd.concat(performance_log)
        # df_final.reset_index(drop=True, inplace=True)
    
        # # Save for later use
        # df_final.to_csv("../results/crypto_account_values.csv", index=False)
        # print("Results saved to results/crypto_account_values.csv")
    else:
        print("No results to analyze.")



 Rolling Window 1: 2020-05-04 to 2021-03-04
 Window 1
  Train window: 2020-05-04 to 2020-11-04 ‚Äî 184 days
  Val window  : 2020-11-04 to 2021-01-04   ‚Äî 61 days
  Trade window: 2021-01-04 to 2021-03-04 ‚Äî 59 days
count    60.000000
mean      0.014905
std       0.037071
min      -0.083952
25%      -0.008187
50%       0.011502
75%       0.038438
max       0.100382
Name: close, dtype: float64
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1437 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 1231          |
|    iterations           | 2             |
|    time_elapsed         | 3             |
|    total_timesteps      | 4096          |
| train/                  |               |
|    approx_kl            | 0.0017405894  |
|    clip_fraction       

Unnamed: 0,agent,window,train_start,train_end,trade_start,trade_end,sharpe_ratio,total_return,volatility,max_drawdown,final_account_value,asset_class
0,PPO,1,2020-05-04,2020-11-04,2021-01-04,2021-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto


 Rolling Window 2: 2020-08-04 to 2021-06-04
 Window 2
  Train window: 2020-08-04 to 2021-02-04 ‚Äî 184 days
  Val window  : 2021-02-04 to 2021-04-04   ‚Äî 59 days
  Trade window: 2021-04-04 to 2021-06-04 ‚Äî 61 days
count    58.000000
mean      0.008346
std       0.044883
min      -0.096596
25%      -0.016296
50%       0.002865
75%       0.028754
max       0.190998
Name: close, dtype: float64
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1131 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 972          |
|    iterations           | 2            |
|    time_elapsed         | 4            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0066404087 |
|    clip_fraction        | 0.070

Unnamed: 0,agent,window,train_start,train_end,trade_start,trade_end,sharpe_ratio,total_return,volatility,max_drawdown,final_account_value,asset_class
0,PPO,1,2020-05-04,2020-11-04,2021-01-04,2021-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
1,PPO,2,2020-08-04,2021-02-04,2021-04-04,2021-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto


 Rolling Window 3: 2020-11-04 to 2021-09-04
 Window 3
  Train window: 2020-11-04 to 2021-05-04 ‚Äî 181 days
  Val window  : 2021-05-04 to 2021-07-04   ‚Äî 61 days
  Trade window: 2021-07-04 to 2021-09-04 ‚Äî 62 days
count    60.000000
mean     -0.005489
std       0.056171
min      -0.153507
25%      -0.040133
50%      -0.006630
75%       0.026712
max       0.127697
Name: close, dtype: float64
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1369 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1139        |
|    iterations           | 2           |
|    time_elapsed         | 3           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.007431249 |
|    clip_fraction        | 0.0602      |

Unnamed: 0,agent,window,train_start,train_end,trade_start,trade_end,sharpe_ratio,total_return,volatility,max_drawdown,final_account_value,asset_class
0,PPO,1,2020-05-04,2020-11-04,2021-01-04,2021-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
1,PPO,2,2020-08-04,2021-02-04,2021-04-04,2021-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
2,PPO,3,2020-11-04,2021-05-04,2021-07-04,2021-09-04,0.239639,-0.004975,0.030485,0.233716,1000000.0,Crypto


 Rolling Window 4: 2021-02-04 to 2021-12-04
 Window 4
  Train window: 2021-02-04 to 2021-08-04 ‚Äî 181 days
  Val window  : 2021-08-04 to 2021-10-04   ‚Äî 61 days
  Trade window: 2021-10-04 to 2021-12-04 ‚Äî 61 days
count    60.000000
mean      0.003979
std       0.038041
min      -0.111191
25%      -0.020156
50%       0.002880
75%       0.027307
max       0.098811
Name: close, dtype: float64
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1152 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1118         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0049243486 |
|    clip_fraction        | 0.022

Unnamed: 0,agent,window,train_start,train_end,trade_start,trade_end,sharpe_ratio,total_return,volatility,max_drawdown,final_account_value,asset_class
0,PPO,1,2020-05-04,2020-11-04,2021-01-04,2021-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
1,PPO,2,2020-08-04,2021-02-04,2021-04-04,2021-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
2,PPO,3,2020-11-04,2021-05-04,2021-07-04,2021-09-04,0.239639,-0.004975,0.030485,0.233716,1000000.0,Crypto
3,PPO,4,2021-02-04,2021-08-04,2021-10-04,2021-12-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto


 Rolling Window 5: 2021-05-04 to 2022-03-04
 Window 5
  Train window: 2021-05-04 to 2021-11-04 ‚Äî 184 days
  Val window  : 2021-11-04 to 2022-01-04   ‚Äî 61 days
  Trade window: 2022-01-04 to 2022-03-04 ‚Äî 59 days
count    60.000000
mean     -0.003994
std       0.032670
min      -0.090060
25%      -0.018657
50%      -0.000892
75%       0.015877
max       0.065516
Name: close, dtype: float64
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1330 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1201         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0047718547 |
|    clip_fraction        | 0.024

Unnamed: 0,agent,window,train_start,train_end,trade_start,trade_end,sharpe_ratio,total_return,volatility,max_drawdown,final_account_value,asset_class
0,PPO,1,2020-05-04,2020-11-04,2021-01-04,2021-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
1,PPO,2,2020-08-04,2021-02-04,2021-04-04,2021-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
2,PPO,3,2020-11-04,2021-05-04,2021-07-04,2021-09-04,0.239639,-0.004975,0.030485,0.233716,1000000.0,Crypto
3,PPO,4,2021-02-04,2021-08-04,2021-10-04,2021-12-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
4,PPO,5,2021-05-04,2021-11-04,2022-01-04,2022-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto


 Rolling Window 6: 2021-08-04 to 2022-06-04
 Window 6
  Train window: 2021-08-04 to 2022-02-04 ‚Äî 184 days
  Val window  : 2022-02-04 to 2022-04-04   ‚Äî 59 days
  Trade window: 2022-04-04 to 2022-06-04 ‚Äî 61 days
count    58.000000
mean      0.002574
std       0.035689
min      -0.077664
25%      -0.017083
50%       0.002126
75%       0.021689
max       0.142998
Name: close, dtype: float64
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1326 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1175         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0034680017 |
|    clip_fraction        | 0.030

Unnamed: 0,agent,window,train_start,train_end,trade_start,trade_end,sharpe_ratio,total_return,volatility,max_drawdown,final_account_value,asset_class
0,PPO,1,2020-05-04,2020-11-04,2021-01-04,2021-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
1,PPO,2,2020-08-04,2021-02-04,2021-04-04,2021-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
2,PPO,3,2020-11-04,2021-05-04,2021-07-04,2021-09-04,0.239639,-0.004975,0.030485,0.233716,1000000.0,Crypto
3,PPO,4,2021-02-04,2021-08-04,2021-10-04,2021-12-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
4,PPO,5,2021-05-04,2021-11-04,2022-01-04,2022-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
5,PPO,6,2021-08-04,2022-02-04,2022-04-04,2022-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto


 Rolling Window 7: 2021-11-04 to 2022-09-04
 Window 7
  Train window: 2021-11-04 to 2022-05-04 ‚Äî 181 days
  Val window  : 2022-05-04 to 2022-07-04   ‚Äî 61 days
  Trade window: 2022-07-04 to 2022-09-04 ‚Äî 62 days
count    60.000000
mean     -0.011257
std       0.043470
min      -0.153953
25%      -0.033011
50%      -0.007617
75%       0.012759
max       0.086979
Name: close, dtype: float64
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1250 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1079         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.004599882  |
|    clip_fraction        | 0.022

Unnamed: 0,agent,window,train_start,train_end,trade_start,trade_end,sharpe_ratio,total_return,volatility,max_drawdown,final_account_value,asset_class
0,PPO,1,2020-05-04,2020-11-04,2021-01-04,2021-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
1,PPO,2,2020-08-04,2021-02-04,2021-04-04,2021-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
2,PPO,3,2020-11-04,2021-05-04,2021-07-04,2021-09-04,0.239639,-0.004975,0.030485,0.233716,1000000.0,Crypto
3,PPO,4,2021-02-04,2021-08-04,2021-10-04,2021-12-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
4,PPO,5,2021-05-04,2021-11-04,2022-01-04,2022-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
5,PPO,6,2021-08-04,2022-02-04,2022-04-04,2022-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
6,PPO,7,2021-11-04,2022-05-04,2022-07-04,2022-09-04,0.239639,-0.004975,0.030485,0.233716,1000000.0,Crypto


 Rolling Window 8: 2022-02-04 to 2022-12-04
 Window 8
  Train window: 2022-02-04 to 2022-08-04 ‚Äî 181 days
  Val window  : 2022-08-04 to 2022-10-04   ‚Äî 61 days
  Trade window: 2022-10-04 to 2022-12-04 ‚Äî 61 days
count    60.000000
mean     -0.001927
std       0.031454
min      -0.103612
25%      -0.011112
50%      -0.002226
75%       0.013144
max       0.101321
Name: close, dtype: float64
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1342 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1094         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0073829247 |
|    clip_fraction        | 0.056

Unnamed: 0,agent,window,train_start,train_end,trade_start,trade_end,sharpe_ratio,total_return,volatility,max_drawdown,final_account_value,asset_class
0,PPO,1,2020-05-04,2020-11-04,2021-01-04,2021-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
1,PPO,2,2020-08-04,2021-02-04,2021-04-04,2021-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
2,PPO,3,2020-11-04,2021-05-04,2021-07-04,2021-09-04,0.239639,-0.004975,0.030485,0.233716,1000000.0,Crypto
3,PPO,4,2021-02-04,2021-08-04,2021-10-04,2021-12-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
4,PPO,5,2021-05-04,2021-11-04,2022-01-04,2022-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
5,PPO,6,2021-08-04,2022-02-04,2022-04-04,2022-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
6,PPO,7,2021-11-04,2022-05-04,2022-07-04,2022-09-04,0.239639,-0.004975,0.030485,0.233716,1000000.0,Crypto
7,PPO,8,2022-02-04,2022-08-04,2022-10-04,2022-12-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto


 Rolling Window 9: 2022-05-04 to 2023-03-04
 Window 9
  Train window: 2022-05-04 to 2022-11-04 ‚Äî 184 days
  Val window  : 2022-11-04 to 2023-01-04   ‚Äî 61 days
  Trade window: 2023-01-04 to 2023-03-04 ‚Äî 59 days
count    60.000000
mean     -0.003525
std       0.031491
min      -0.143671
25%      -0.013798
50%      -0.000906
75%       0.006221
max       0.110122
Name: close, dtype: float64
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1376 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1184        |
|    iterations           | 2           |
|    time_elapsed         | 3           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.005543595 |
|    clip_fraction        | 0.0598      |

Unnamed: 0,agent,window,train_start,train_end,trade_start,trade_end,sharpe_ratio,total_return,volatility,max_drawdown,final_account_value,asset_class
0,PPO,1,2020-05-04,2020-11-04,2021-01-04,2021-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
1,PPO,2,2020-08-04,2021-02-04,2021-04-04,2021-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
2,PPO,3,2020-11-04,2021-05-04,2021-07-04,2021-09-04,0.239639,-0.004975,0.030485,0.233716,1000000.0,Crypto
3,PPO,4,2021-02-04,2021-08-04,2021-10-04,2021-12-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
4,PPO,5,2021-05-04,2021-11-04,2022-01-04,2022-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
5,PPO,6,2021-08-04,2022-02-04,2022-04-04,2022-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
6,PPO,7,2021-11-04,2022-05-04,2022-07-04,2022-09-04,0.239639,-0.004975,0.030485,0.233716,1000000.0,Crypto
7,PPO,8,2022-02-04,2022-08-04,2022-10-04,2022-12-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
8,PPO,9,2022-05-04,2022-11-04,2023-01-04,2023-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto


 Rolling Window 10: 2022-08-04 to 2023-06-04
 Window 10
  Train window: 2022-08-04 to 2023-02-04 ‚Äî 184 days
  Val window  : 2023-02-04 to 2023-04-04   ‚Äî 59 days
  Trade window: 2023-04-04 to 2023-06-04 ‚Äî 61 days
count    58.000000
mean      0.003428
std       0.032316
min      -0.061894
25%      -0.013592
50%      -0.002238
75%       0.018283
max       0.094438
Name: close, dtype: float64
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1375 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1206         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0047341976 |
|    clip_fraction        | 0.0

Unnamed: 0,agent,window,train_start,train_end,trade_start,trade_end,sharpe_ratio,total_return,volatility,max_drawdown,final_account_value,asset_class
0,PPO,1,2020-05-04,2020-11-04,2021-01-04,2021-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
1,PPO,2,2020-08-04,2021-02-04,2021-04-04,2021-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
2,PPO,3,2020-11-04,2021-05-04,2021-07-04,2021-09-04,0.239639,-0.004975,0.030485,0.233716,1000000.0,Crypto
3,PPO,4,2021-02-04,2021-08-04,2021-10-04,2021-12-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
4,PPO,5,2021-05-04,2021-11-04,2022-01-04,2022-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
5,PPO,6,2021-08-04,2022-02-04,2022-04-04,2022-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
6,PPO,7,2021-11-04,2022-05-04,2022-07-04,2022-09-04,0.239639,-0.004975,0.030485,0.233716,1000000.0,Crypto
7,PPO,8,2022-02-04,2022-08-04,2022-10-04,2022-12-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
8,PPO,9,2022-05-04,2022-11-04,2023-01-04,2023-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
9,PPO,10,2022-08-04,2023-02-04,2023-04-04,2023-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto


 Rolling Window 11: 2022-11-04 to 2023-09-04
 Window 11
  Train window: 2022-11-04 to 2023-05-04 ‚Äî 181 days
  Val window  : 2023-05-04 to 2023-07-04   ‚Äî 61 days
  Trade window: 2023-07-04 to 2023-09-04 ‚Äî 62 days
count    60.000000
mean      0.001470
std       0.021117
min      -0.050561
25%      -0.008309
50%       0.001160
75%       0.009916
max       0.059237
Name: close, dtype: float64
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1333 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 1200        |
|    iterations           | 2           |
|    time_elapsed         | 3           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.007412816 |
|    clip_fraction        | 0.0599     

Unnamed: 0,agent,window,train_start,train_end,trade_start,trade_end,sharpe_ratio,total_return,volatility,max_drawdown,final_account_value,asset_class
0,PPO,1,2020-05-04,2020-11-04,2021-01-04,2021-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
1,PPO,2,2020-08-04,2021-02-04,2021-04-04,2021-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
2,PPO,3,2020-11-04,2021-05-04,2021-07-04,2021-09-04,0.239639,-0.004975,0.030485,0.233716,1000000.0,Crypto
3,PPO,4,2021-02-04,2021-08-04,2021-10-04,2021-12-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
4,PPO,5,2021-05-04,2021-11-04,2022-01-04,2022-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
5,PPO,6,2021-08-04,2022-02-04,2022-04-04,2022-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
6,PPO,7,2021-11-04,2022-05-04,2022-07-04,2022-09-04,0.239639,-0.004975,0.030485,0.233716,1000000.0,Crypto
7,PPO,8,2022-02-04,2022-08-04,2022-10-04,2022-12-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
8,PPO,9,2022-05-04,2022-11-04,2023-01-04,2023-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
9,PPO,10,2022-08-04,2023-02-04,2023-04-04,2023-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto


 Rolling Window 12: 2023-02-04 to 2023-12-04
 Window 12
  Train window: 2023-02-04 to 2023-08-04 ‚Äî 181 days
  Val window  : 2023-08-04 to 2023-10-04   ‚Äî 61 days
  Trade window: 2023-10-04 to 2023-12-04 ‚Äî 61 days
count    60.000000
mean     -0.000873
std       0.017955
min      -0.073135
25%      -0.004795
50%      -0.001121
75%       0.003691
max       0.060227
Name: close, dtype: float64
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1327 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1167         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0058404612 |
|    clip_fraction        | 0.0

Unnamed: 0,agent,window,train_start,train_end,trade_start,trade_end,sharpe_ratio,total_return,volatility,max_drawdown,final_account_value,asset_class
0,PPO,1,2020-05-04,2020-11-04,2021-01-04,2021-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
1,PPO,2,2020-08-04,2021-02-04,2021-04-04,2021-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
2,PPO,3,2020-11-04,2021-05-04,2021-07-04,2021-09-04,0.239639,-0.004975,0.030485,0.233716,1000000.0,Crypto
3,PPO,4,2021-02-04,2021-08-04,2021-10-04,2021-12-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
4,PPO,5,2021-05-04,2021-11-04,2022-01-04,2022-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
5,PPO,6,2021-08-04,2022-02-04,2022-04-04,2022-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
6,PPO,7,2021-11-04,2022-05-04,2022-07-04,2022-09-04,0.239639,-0.004975,0.030485,0.233716,1000000.0,Crypto
7,PPO,8,2022-02-04,2022-08-04,2022-10-04,2022-12-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
8,PPO,9,2022-05-04,2022-11-04,2023-01-04,2023-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
9,PPO,10,2022-08-04,2023-02-04,2023-04-04,2023-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto


 Rolling Window 13: 2023-05-04 to 2024-03-04
 Window 13
  Train window: 2023-05-04 to 2023-11-04 ‚Äî 184 days
  Val window  : 2023-11-04 to 2024-01-04   ‚Äî 61 days
  Trade window: 2024-01-04 to 2024-03-04 ‚Äî 60 days
count    60.000000
mean      0.003609
std       0.024043
min      -0.057722
25%      -0.008225
50%       0.002209
75%       0.015539
max       0.063577
Name: close, dtype: float64
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1205 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1076         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.003634214  |
|    clip_fraction        | 0.0

Unnamed: 0,agent,window,train_start,train_end,trade_start,trade_end,sharpe_ratio,total_return,volatility,max_drawdown,final_account_value,asset_class
0,PPO,1,2020-05-04,2020-11-04,2021-01-04,2021-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
1,PPO,2,2020-08-04,2021-02-04,2021-04-04,2021-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
2,PPO,3,2020-11-04,2021-05-04,2021-07-04,2021-09-04,0.239639,-0.004975,0.030485,0.233716,1000000.0,Crypto
3,PPO,4,2021-02-04,2021-08-04,2021-10-04,2021-12-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
4,PPO,5,2021-05-04,2021-11-04,2022-01-04,2022-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
5,PPO,6,2021-08-04,2022-02-04,2022-04-04,2022-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
6,PPO,7,2021-11-04,2022-05-04,2022-07-04,2022-09-04,0.239639,-0.004975,0.030485,0.233716,1000000.0,Crypto
7,PPO,8,2022-02-04,2022-08-04,2022-10-04,2022-12-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
8,PPO,9,2022-05-04,2022-11-04,2023-01-04,2023-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
9,PPO,10,2022-08-04,2023-02-04,2023-04-04,2023-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto


 Rolling Window 14: 2023-08-04 to 2024-06-04
 Window 14
  Train window: 2023-08-04 to 2024-02-04 ‚Äî 184 days
  Val window  : 2024-02-04 to 2024-04-04   ‚Äî 60 days
  Trade window: 2024-04-04 to 2024-06-04 ‚Äî 61 days
count    59.000000
mean      0.007960
std       0.034234
min      -0.084836
25%      -0.009196
50%       0.007727
75%       0.023476
max       0.096181
Name: close, dtype: float64
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1331 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1082         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0049831737 |
|    clip_fraction        | 0.0

Unnamed: 0,agent,window,train_start,train_end,trade_start,trade_end,sharpe_ratio,total_return,volatility,max_drawdown,final_account_value,asset_class
0,PPO,1,2020-05-04,2020-11-04,2021-01-04,2021-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
1,PPO,2,2020-08-04,2021-02-04,2021-04-04,2021-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
2,PPO,3,2020-11-04,2021-05-04,2021-07-04,2021-09-04,0.239639,-0.004975,0.030485,0.233716,1000000.0,Crypto
3,PPO,4,2021-02-04,2021-08-04,2021-10-04,2021-12-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
4,PPO,5,2021-05-04,2021-11-04,2022-01-04,2022-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
5,PPO,6,2021-08-04,2022-02-04,2022-04-04,2022-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
6,PPO,7,2021-11-04,2022-05-04,2022-07-04,2022-09-04,0.239639,-0.004975,0.030485,0.233716,1000000.0,Crypto
7,PPO,8,2022-02-04,2022-08-04,2022-10-04,2022-12-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
8,PPO,9,2022-05-04,2022-11-04,2023-01-04,2023-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
9,PPO,10,2022-08-04,2023-02-04,2023-04-04,2023-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto


 Rolling Window 15: 2023-11-04 to 2024-09-04
 Window 15
  Train window: 2023-11-04 to 2024-05-04 ‚Äî 182 days
  Val window  : 2024-05-04 to 2024-07-04   ‚Äî 61 days
  Trade window: 2024-07-04 to 2024-09-04 ‚Äî 62 days
count    60.000000
mean     -0.000659
std       0.022111
min      -0.044797
25%      -0.014802
50%      -0.001902
75%       0.008934
max       0.083109
Name: close, dtype: float64
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1425 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1272         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.010383649  |
|    clip_fraction        | 0.1

Unnamed: 0,agent,window,train_start,train_end,trade_start,trade_end,sharpe_ratio,total_return,volatility,max_drawdown,final_account_value,asset_class
0,PPO,1,2020-05-04,2020-11-04,2021-01-04,2021-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
1,PPO,2,2020-08-04,2021-02-04,2021-04-04,2021-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
2,PPO,3,2020-11-04,2021-05-04,2021-07-04,2021-09-04,0.239639,-0.004975,0.030485,0.233716,1000000.0,Crypto
3,PPO,4,2021-02-04,2021-08-04,2021-10-04,2021-12-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
4,PPO,5,2021-05-04,2021-11-04,2022-01-04,2022-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
5,PPO,6,2021-08-04,2022-02-04,2022-04-04,2022-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
6,PPO,7,2021-11-04,2022-05-04,2022-07-04,2022-09-04,0.239639,-0.004975,0.030485,0.233716,1000000.0,Crypto
7,PPO,8,2022-02-04,2022-08-04,2022-10-04,2022-12-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
8,PPO,9,2022-05-04,2022-11-04,2023-01-04,2023-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
9,PPO,10,2022-08-04,2023-02-04,2023-04-04,2023-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto


 Rolling Window 16: 2024-02-04 to 2024-12-04
 Window 16
  Train window: 2024-02-04 to 2024-08-04 ‚Äî 182 days
  Val window  : 2024-08-04 to 2024-10-04   ‚Äî 61 days
  Trade window: 2024-10-04 to 2024-12-04 ‚Äî 61 days
count    60.000000
mean      0.001023
std       0.029347
min      -0.072498
25%      -0.015864
50%       0.001210
75%       0.013768
max       0.119901
Name: close, dtype: float64
Using cpu device
-----------------------------
| time/              |      |
|    fps             | 1442 |
|    iterations      | 1    |
|    time_elapsed    | 1    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 1215         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0018990808 |
|    clip_fraction        | 0.0

Unnamed: 0,agent,window,train_start,train_end,trade_start,trade_end,sharpe_ratio,total_return,volatility,max_drawdown,final_account_value,asset_class
0,PPO,1,2020-05-04,2020-11-04,2021-01-04,2021-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
1,PPO,2,2020-08-04,2021-02-04,2021-04-04,2021-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
2,PPO,3,2020-11-04,2021-05-04,2021-07-04,2021-09-04,0.239639,-0.004975,0.030485,0.233716,1000000.0,Crypto
3,PPO,4,2021-02-04,2021-08-04,2021-10-04,2021-12-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
4,PPO,5,2021-05-04,2021-11-04,2022-01-04,2022-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
5,PPO,6,2021-08-04,2022-02-04,2022-04-04,2022-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
6,PPO,7,2021-11-04,2022-05-04,2022-07-04,2022-09-04,0.239639,-0.004975,0.030485,0.233716,1000000.0,Crypto
7,PPO,8,2022-02-04,2022-08-04,2022-10-04,2022-12-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto
8,PPO,9,2022-05-04,2022-11-04,2023-01-04,2023-03-04,0.230978,-0.004975,0.030097,0.224806,1000000.0,Crypto
9,PPO,10,2022-08-04,2023-02-04,2023-04-04,2023-06-04,0.236813,-0.004975,0.030358,0.230769,1000000.0,Crypto


## from finrl.plot import get_daily_return

account_values = df_result["account_value"]
daily_returns = get_daily_return(account_values)

sharpe = (252**0.5) * daily_returns.mean() / daily_returns.std()
total_return = account_values.iloc[-1] / account_values.iloc[0] - 1
volatility = daily_returns.std()
max_drawdown = (account_values.cummax() - account_values).max() / account_values.cummax().max()


df_final = pd.concat(results)
backtest_stats(df_final)
backtest_plot(df_final)

## 

## 