In [18]:
# 1. Getting Started - Load Python Packages
# 1.1. Import Packages
import warnings

warnings.simplefilter(action="ignore", category=FutureWarning)
import os
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

matplotlib.use("Agg")
import sys

#os.chdir("../../../DRLTrading")

import warnings
warnings.filterwarnings('ignore')
import pyfolio as pf

from meta import config
from meta import config_tickers
from meta.data_processor import DataProcessor
sys.path.append("C:\python310\lib\site-packages")
from meta.env_portfolio_allocation.env_portfolio_yahoofinance import (
    StockPortfolioEnv,
)
from agents.stablebaselines3_models import DRLAgent
from plot import (
    backtest_stats,
    backtest_plot,
    get_daily_return,
    get_baseline,
    convert_daily_return_to_pyfolio_ts,
)

In [19]:
# 1.2. Create Folders
import main

main.check_and_make_directories(
    [
        config.DATA_SAVE_DIR,
        config.TRAINED_MODEL_DIR,
        config.TENSORBOARD_LOG_DIR,
        config.RESULTS_DIR,
    ]
)

In [70]:
# 2. Download and Preprocess Data
print(f"DOW_30_TICKER: {config_tickers.DOW_30_TICKER}")

dp = DataProcessor(
    data_source="yahoofinance",
    start_date="2009-01-01",
    end_date="2022-12-07",
    time_interval="1D",
)

dp.run(
    ticker_list=config_tickers.DOW_30_TICKER,
    technical_indicator_list=config.INDICATORS,
    if_vix=False,
)
df = dp.dataframe

df.head()

print("Shape of DataFrame: ", df.shape)

# Add covariance matrix as states
df.rename(columns={"time": "date"}, inplace=True)
df = df.sort_values(["date", "tic"], ignore_index=True)
df.index = df.date.factorize()[0]
df.drop(columns=["index"], inplace=True)

cov_list = []
return_list = []

# look back is one year
lookback = 252
for i in range(lookback, len(df.index.unique())):
    data_lookback = df.loc[i - lookback : i, :]
    price_lookback = data_lookback.pivot_table(
        index="date", columns="tic", values="close"
    ).dropna(axis=1)
    return_lookback = price_lookback.pct_change().dropna()
    return_list.append(return_lookback)

    covs = return_lookback.cov().values
    cov_list.append(covs)

df_cov = pd.DataFrame(
    {
        "date": df.date.unique()[lookback:],
        "cov_list": cov_list,
        "return_list": return_list,
    }
)
df = df.merge(df_cov, on="date")
df = df.sort_values(["date", "tic"]).reset_index(drop=True)
print("Shape of DataFrame: ", df.shape)

df.head()

DOW_30_TICKER: ['AXP', 'AMGN', 'AAPL', 'BA', 'CAT', 'CSCO', 'CVX', 'GS', 'HD', 'HON', 'IBM', 'INTC', 'JNJ', 'KO', 'JPM', 'MCD', 'MMM', 'MRK', 'MSFT', 'NKE', 'PG', 'TRV', 'UNH', 'CRM', 'VZ', 'V', 'WBA', 'WMT', 'DIS', 'DOW']
yahoofinance successfully connected
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************] 

Unnamed: 0,date,open,high,low,close,adjusted_close,volume,tic,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,cov_list,return_list
0,2020-03-19,61.8475,63.209999,60.6525,61.195,60.093651,271857200.0,AAPL,-3.875027,80.024178,58.758821,41.804289,-136.18294,38.128698,73.147166,75.041708,"[[0.0005266658212213806, 0.0003009832892167366...",tic AAPL AMGN AXP ...
1,2020-03-19,201.0,208.75,195.270004,198.759995,183.156555,4796500.0,AMGN,-6.221963,224.990881,186.169117,44.570447,-72.734902,29.627909,212.302333,223.581333,"[[0.0005266658212213806, 0.0003009832892167366...",tic AAPL AMGN AXP ...
2,2020-03-19,72.540001,77.889999,69.790001,77.07,74.143517,12330400.0,AXP,-12.332206,137.614407,70.921593,33.608365,-156.597187,54.126595,114.266001,121.453167,"[[0.0005266658212213806, 0.0003009832892167366...",tic AAPL AMGN AXP ...
3,2020-03-19,98.800003,103.57,90.349998,97.709999,97.709999,40210600.0,BA,-54.839629,384.2064,78.017595,19.978561,-174.382275,83.933098,267.790999,297.053332,"[[0.0005266658212213806, 0.0003009832892167366...",tic AAPL AMGN AXP ...
4,2020-03-19,99.230003,104.400002,97.25,103.010002,96.373062,6870800.0,CAT,-9.772667,143.869884,85.809117,36.76229,-100.438578,29.02502,122.186334,132.835,"[[0.0005266658212213806, 0.0003009832892167366...",tic AAPL AMGN AXP ...


In [89]:
# 4. Design Environment

# Training data split: 2009-01-01 to 2018-01-01
train = dp.data_split(df, "2009-01-01", "2021-01-01")

print(train.head())

# Environment for Portfolio Allocation
stock_dimension = len(train.tic.unique())
state_space = stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

env_kwargs = {
    "hmax": 100,
    "initial_amount": 1000000,
    "transaction_cost_pct": 0.001,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": config.INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4,
}

e_train_gym = StockPortfolioEnv(df=train, **env_kwargs)

env_train, _ = e_train_gym.get_sb_env()
# print(type(env_train))

         date        open        high         low       close  adjusted_close  \
0  2020-03-19   61.847500   63.209999   60.652500   61.195000       60.093651   
0  2020-03-19  201.000000  208.750000  195.270004  198.759995      183.156555   
0  2020-03-19   72.540001   77.889999   69.790001   77.070000       74.143517   
0  2020-03-19   98.800003  103.570000   90.349998   97.709999       97.709999   
0  2020-03-19   99.230003  104.400002   97.250000  103.010002       96.373062   

        volume   tic       macd     boll_ub     boll_lb     rsi_30  \
0  271857200.0  AAPL  -3.875027   80.024178   58.758821  41.804289   
0    4796500.0  AMGN  -6.221963  224.990881  186.169117  44.570447   
0   12330400.0   AXP -12.332206  137.614407   70.921593  33.608365   
0   40210600.0    BA -54.839629  384.206400   78.017595  19.978561   
0    6870800.0   CAT  -9.772667  143.869884   85.809117  36.762290   

       cci_30      dx_30  close_30_sma  close_60_sma  \
0 -136.182940  38.128698     73.1471

In [90]:
# 5. Implement DRL Algorithms
from tqdm import tqdm

def update_progress(timestep):
    pbar.update(timestep)

# initialize
agent = DRLAgent(env=env_train)

# Model 1: A2C
agent = DRLAgent(env=env_train)
A2C_PARAMS = {"n_steps": 5, "ent_coef": 0.005, "learning_rate": 0.0002}
model_a2c = agent.get_model(model_name="a2c", model_kwargs=A2C_PARAMS)
pbar = tqdm(total=50000)
trained_a2c = agent.train_model(
    model=model_a2c, tb_log_name="a2c", total_timesteps=50000
)
update_progress(50000)
trained_a2c.save("/DRLTrading/trained_models/trained_a2c.zip")

{'n_steps': 5, 'ent_coef': 0.005, 'learning_rate': 0.0002}
Using cpu device


100%|████████████████████████████████████████████████████████████████████████████| 30000/30000 [27:05<00:00, 18.46it/s]

Logging to tensorboard_log/a2c\a2c_2





begin_total_asset:1000000
end_total_asset:1467474.5912834618
Sharpe:  1.8490421658245235
begin_total_asset:1000000
end_total_asset:1509123.0431790387
Sharpe:  1.9404600724335321
-------------------------------------
| time/                 |           |
|    fps                | 176       |
|    iterations         | 100       |
|    time_elapsed       | 2         |
|    total_timesteps    | 500       |
| train/                |           |
|    entropy_loss       | -42.5     |
|    explained_variance | 1.79e-07  |
|    learning_rate      | 0.0002    |
|    n_updates          | 99        |
|    policy_loss        | 1.63e+08  |
|    reward             | 1349549.4 |
|    std                | 0.997     |
|    value_loss         | 1.8e+13   |
-------------------------------------
begin_total_asset:1000000
end_total_asset:1540854.5385854763
Sharpe:  2.043169484449548
begin_total_asset:1000000
end_total_asset:1455740.004401646
Sharpe:  1.774196126850686
begin_total_asset:1000000
end_total_ass

-------------------------------------
| time/                 |           |
|    fps                | 255       |
|    iterations         | 900       |
|    time_elapsed       | 17        |
|    total_timesteps    | 4500      |
| train/                |           |
|    entropy_loss       | -42.4     |
|    explained_variance | 0         |
|    learning_rate      | 0.0002    |
|    n_updates          | 899       |
|    policy_loss        | 1.63e+08  |
|    reward             | 1361520.8 |
|    std                | 0.995     |
|    value_loss         | 1.84e+13  |
-------------------------------------
begin_total_asset:1000000
end_total_asset:1509789.5623426463
Sharpe:  1.9354582002553273
begin_total_asset:1000000
end_total_asset:1496394.2208443733
Sharpe:  1.9349192009205582
begin_total_asset:1000000
end_total_asset:1505024.8607165175
Sharpe:  1.9629908115225227
-------------------------------------
| time/                 |           |
|    fps                | 256       |
|    iterat

begin_total_asset:1000000
end_total_asset:1465518.8426426384
Sharpe:  1.8092064661904717
begin_total_asset:1000000
end_total_asset:1457785.589847635
Sharpe:  1.8182297219436365
begin_total_asset:1000000
end_total_asset:1475974.5134584303
Sharpe:  1.883158447531013
-------------------------------------
| time/                 |           |
|    fps                | 264       |
|    iterations         | 1800      |
|    time_elapsed       | 34        |
|    total_timesteps    | 9000      |
| train/                |           |
|    entropy_loss       | -42.3     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0002    |
|    n_updates          | 1799      |
|    policy_loss        | 1.81e+08  |
|    reward             | 1475974.5 |
|    std                | 0.992     |
|    value_loss         | 2.25e+13  |
-------------------------------------
begin_total_asset:1000000
end_total_asset:1496649.7982288531
Sharpe:  1.9384479840560243
begin_total_asset:1000000
end_total_as

begin_total_asset:1000000
end_total_asset:1481322.9695087082
Sharpe:  1.8869522551844398
-------------------------------------
| time/                 |           |
|    fps                | 259       |
|    iterations         | 2600      |
|    time_elapsed       | 50        |
|    total_timesteps    | 13000     |
| train/                |           |
|    entropy_loss       | -42.2     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0002    |
|    n_updates          | 2599      |
|    policy_loss        | 1.84e+08  |
|    reward             | 1481323.0 |
|    std                | 0.988     |
|    value_loss         | 2.27e+13  |
-------------------------------------
begin_total_asset:1000000
end_total_asset:1501291.8307501164
Sharpe:  1.9064693956797096
begin_total_asset:1000000
end_total_asset:1488593.0216703247
Sharpe:  1.872928570842232
-------------------------------------
| time/                 |           |
|    fps                | 259       |
|    iterati

begin_total_asset:1000000
end_total_asset:1556547.0753094743
Sharpe:  2.084267652411274
begin_total_asset:1000000
end_total_asset:1508996.8425770418
Sharpe:  1.952544760530555
-------------------------------------
| time/                 |           |
|    fps                | 260       |
|    iterations         | 3500      |
|    time_elapsed       | 67        |
|    total_timesteps    | 17500     |
| train/                |           |
|    entropy_loss       | -42.2     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0002    |
|    n_updates          | 3499      |
|    policy_loss        | 1.64e+08  |
|    reward             | 1384187.5 |
|    std                | 0.986     |
|    value_loss         | 1.9e+13   |
-------------------------------------
begin_total_asset:1000000
end_total_asset:1572833.3937465702
Sharpe:  2.116196978128953
begin_total_asset:1000000
end_total_asset:1463911.0788605297
Sharpe:  1.791741595186627
begin_total_asset:1000000
end_total_asse

-------------------------------------
| time/                 |           |
|    fps                | 264       |
|    iterations         | 4300      |
|    time_elapsed       | 81        |
|    total_timesteps    | 21500     |
| train/                |           |
|    entropy_loss       | -42.1     |
|    explained_variance | 0         |
|    learning_rate      | 0.0002    |
|    n_updates          | 4299      |
|    policy_loss        | 1.66e+08  |
|    reward             | 1366046.9 |
|    std                | 0.983     |
|    value_loss         | 1.85e+13  |
-------------------------------------
begin_total_asset:1000000
end_total_asset:1518469.2817434084
Sharpe:  1.9689384199204134
begin_total_asset:1000000
end_total_asset:1547951.0379607123
Sharpe:  2.0525351719152396
begin_total_asset:1000000
end_total_asset:1484623.668975696
Sharpe:  1.8959128262969465
-------------------------------------
| time/                 |           |
|    fps                | 264       |
|    iterati

begin_total_asset:1000000
end_total_asset:1528457.7422480953
Sharpe:  1.9829761947146345
begin_total_asset:1000000
end_total_asset:1491456.2612337077
Sharpe:  1.912129612471678
begin_total_asset:1000000
end_total_asset:1478091.56864368
Sharpe:  1.8660919343963303
-------------------------------------
| time/                 |           |
|    fps                | 265       |
|    iterations         | 5200      |
|    time_elapsed       | 98        |
|    total_timesteps    | 26000     |
| train/                |           |
|    entropy_loss       | -42       |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0002    |
|    n_updates          | 5199      |
|    policy_loss        | 1.82e+08  |
|    reward             | 1478091.6 |
|    std                | 0.982     |
|    value_loss         | 2.26e+13  |
-------------------------------------
begin_total_asset:1000000
end_total_asset:1511686.4713328895
Sharpe:  1.947847647943097
begin_total_asset:1000000
end_total_asse

begin_total_asset:1000000
end_total_asset:1474429.784857085
Sharpe:  1.8195809158255263
-------------------------------------
| time/                 |           |
|    fps                | 263       |
|    iterations         | 6000      |
|    time_elapsed       | 113       |
|    total_timesteps    | 30000     |
| train/                |           |
|    entropy_loss       | -41.9     |
|    explained_variance | 0         |
|    learning_rate      | 0.0002    |
|    n_updates          | 5999      |
|    policy_loss        | 1.85e+08  |
|    reward             | 1474429.8 |
|    std                | 0.979     |
|    value_loss         | 2.25e+13  |
-------------------------------------
begin_total_asset:1000000
end_total_asset:1483309.476403127
Sharpe:  1.8587848610289297
begin_total_asset:1000000
end_total_asset:1437457.189188834
Sharpe:  1.7305865088259935
-------------------------------------
| time/                 |           |
|    fps                | 262       |
|    iteration

begin_total_asset:1000000
end_total_asset:1480727.184934893
Sharpe:  1.8572026294459363
begin_total_asset:1000000
end_total_asset:1458380.465304547
Sharpe:  1.8106857374267324
-------------------------------------
| time/                 |           |
|    fps                | 255       |
|    iterations         | 6900      |
|    time_elapsed       | 134       |
|    total_timesteps    | 34500     |
| train/                |           |
|    entropy_loss       | -41.9     |
|    explained_variance | 0         |
|    learning_rate      | 0.0002    |
|    n_updates          | 6899      |
|    policy_loss        | 1.58e+08  |
|    reward             | 1348395.9 |
|    std                | 0.977     |
|    value_loss         | 1.81e+13  |
-------------------------------------
begin_total_asset:1000000
end_total_asset:1529107.4494041074
Sharpe:  2.0447153564892093
begin_total_asset:1000000
end_total_asset:1499016.809251615
Sharpe:  1.9262815793799513
begin_total_asset:1000000
end_total_ass

-------------------------------------
| time/                 |           |
|    fps                | 248       |
|    iterations         | 7700      |
|    time_elapsed       | 154       |
|    total_timesteps    | 38500     |
| train/                |           |
|    entropy_loss       | -41.7     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0002    |
|    n_updates          | 7699      |
|    policy_loss        | 1.59e+08  |
|    reward             | 1318966.5 |
|    std                | 0.971     |
|    value_loss         | 1.72e+13  |
-------------------------------------
begin_total_asset:1000000
end_total_asset:1472322.5350520078
Sharpe:  1.8618842349572458
begin_total_asset:1000000
end_total_asset:1485608.6113734208
Sharpe:  1.8671743739848365
begin_total_asset:1000000
end_total_asset:1507138.4920065682
Sharpe:  1.9602910472826718
-------------------------------------
| time/                 |           |
|    fps                | 247       |
|    iterat

begin_total_asset:1000000
end_total_asset:1523240.0312666753
Sharpe:  1.9910959401789954
begin_total_asset:1000000
end_total_asset:1521515.8694587767
Sharpe:  2.0180207583326797
begin_total_asset:1000000
end_total_asset:1512260.5614903849
Sharpe:  1.9560910238575233
-------------------------------------
| time/                 |           |
|    fps                | 242       |
|    iterations         | 8600      |
|    time_elapsed       | 177       |
|    total_timesteps    | 43000     |
| train/                |           |
|    entropy_loss       | -41.6     |
|    explained_variance | 1.79e-07  |
|    learning_rate      | 0.0002    |
|    n_updates          | 8599      |
|    policy_loss        | 1.9e+08   |
|    reward             | 1512260.5 |
|    std                | 0.969     |
|    value_loss         | 2.37e+13  |
-------------------------------------
begin_total_asset:1000000
end_total_asset:1502667.1658961084
Sharpe:  1.9267338068350381
begin_total_asset:1000000
end_total_

begin_total_asset:1000000
end_total_asset:1540539.6989447614
Sharpe:  2.0446286623310965
-------------------------------------
| time/                 |           |
|    fps                | 238       |
|    iterations         | 9400      |
|    time_elapsed       | 196       |
|    total_timesteps    | 47000     |
| train/                |           |
|    entropy_loss       | -41.5     |
|    explained_variance | 0         |
|    learning_rate      | 0.0002    |
|    n_updates          | 9399      |
|    policy_loss        | 1.9e+08   |
|    reward             | 1540539.8 |
|    std                | 0.965     |
|    value_loss         | 2.46e+13  |
-------------------------------------
begin_total_asset:1000000
end_total_asset:1531660.1147112206
Sharpe:  1.9996859214716503
begin_total_asset:1000000
end_total_asset:1481039.8052125368
Sharpe:  1.8761037518386627
-------------------------------------
| time/                 |           |
|    fps                | 238       |
|    iterat

100%|███████████████████████████████████████████████████████████████████████████| 50000/50000 [03:31<00:00, 236.61it/s]

In [91]:
# Model 2: PPO
agent = DRLAgent(env=env_train)
PPO_PARAMS = {
    "n_steps": 2048,
    "ent_coef": 0.005,
    "learning_rate": 0.0001,
    "batch_size": 128,
}
model_ppo = agent.get_model("ppo", model_kwargs=PPO_PARAMS)
pbar = tqdm(total=80000)
trained_ppo = agent.train_model(
    model=model_ppo, tb_log_name="ppo", total_timesteps=80000
)
update_progress(80000)
trained_ppo.save("/DRLTrading/trained_models/trained_ppo.zip")

{'n_steps': 2048, 'ent_coef': 0.005, 'learning_rate': 0.0001, 'batch_size': 128}
Using cpu device



100%|███████████████████████████████████████████████████████████████████████████| 50000/50000 [03:31<00:00, 236.53it/s][A

Logging to tensorboard_log/ppo\ppo_3





begin_total_asset:1000000
end_total_asset:1569341.0511901404
Sharpe:  2.1184848250321426
begin_total_asset:1000000
end_total_asset:1571450.8261227862
Sharpe:  2.1419873030197136
begin_total_asset:1000000
end_total_asset:1508989.7945926625
Sharpe:  1.9438894366349726
begin_total_asset:1000000
end_total_asset:1457485.0219018469
Sharpe:  1.835243842238925
begin_total_asset:1000000
end_total_asset:1561578.1915745994
Sharpe:  2.1118793200794594
begin_total_asset:1000000
end_total_asset:1485231.183847206
Sharpe:  1.8588021294889845
begin_total_asset:1000000
end_total_asset:1514643.0262319911
Sharpe:  1.9769319939014558
begin_total_asset:1000000
end_total_asset:1515325.2032739983
Sharpe:  1.9658545731002612
begin_total_asset:1000000
end_total_asset:1551886.1746602627
Sharpe:  2.071947116511896
begin_total_asset:1000000
end_total_asset:1496490.7374907422
Sharpe:  1.9295550013312044
----------------------------------
| time/              |           |
|    fps             | 207       |
|    ite

begin_total_asset:1000000
end_total_asset:1552890.9031113717
Sharpe:  2.0678183197055726
begin_total_asset:1000000
end_total_asset:1548748.1009478434
Sharpe:  2.046022386497435
begin_total_asset:1000000
end_total_asset:1512002.8187212413
Sharpe:  1.9297226184854028
begin_total_asset:1000000
end_total_asset:1495893.0549566045
Sharpe:  1.9414931135722864
begin_total_asset:1000000
end_total_asset:1526805.300562607
Sharpe:  1.9612336045656262
begin_total_asset:1000000
end_total_asset:1564807.2188595627
Sharpe:  2.1015184866039416
begin_total_asset:1000000
end_total_asset:1522593.8266471163
Sharpe:  1.9981125306113705
begin_total_asset:1000000
end_total_asset:1473854.245077158
Sharpe:  1.8607593631658759
begin_total_asset:1000000
end_total_asset:1480137.5931785358
Sharpe:  1.8783535275310974
begin_total_asset:1000000
end_total_asset:1481273.632315928
Sharpe:  1.8581856338385794
begin_total_asset:1000000
end_total_asset:1482077.9516584598
Sharpe:  1.8863110094760944
-------------------------

begin_total_asset:1000000
end_total_asset:1535949.8467187774
Sharpe:  2.0324258060695177
begin_total_asset:1000000
end_total_asset:1515426.974976458
Sharpe:  1.9544689722424606
begin_total_asset:1000000
end_total_asset:1518418.5523203905
Sharpe:  1.9698237669278693
begin_total_asset:1000000
end_total_asset:1513155.49046513
Sharpe:  1.928555407543389
---------------------------------------
| time/                   |           |
|    fps                  | 290       |
|    iterations           | 8         |
|    time_elapsed         | 56        |
|    total_timesteps      | 16384     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -42.6     |
|    explained_variance   | 0         |
|    learning_rate        | 0.0001    |
|    loss                 | 2.24e+14  |
|    n_updates            | 70        |
|    policy_gradient_loss | -9.49e-07 |
|    rew

begin_total_asset:1000000
end_total_asset:1531249.5865753938
Sharpe:  2.0180758937209915
begin_total_asset:1000000
end_total_asset:1512083.739732756
Sharpe:  1.9469894921973738
begin_total_asset:1000000
end_total_asset:1419828.9536364337
Sharpe:  1.6734538468803648
begin_total_asset:1000000
end_total_asset:1499540.7353291141
Sharpe:  1.9367732766572674
begin_total_asset:1000000
end_total_asset:1563180.791667857
Sharpe:  2.1361811326257727
begin_total_asset:1000000
end_total_asset:1495879.3628717
Sharpe:  1.9152858387350342
begin_total_asset:1000000
end_total_asset:1528369.4316503492
Sharpe:  2.002655819188094
begin_total_asset:1000000
end_total_asset:1493575.3240773838
Sharpe:  1.8802215599437413
begin_total_asset:1000000
end_total_asset:1556377.5076846306
Sharpe:  2.0812125017278627
begin_total_asset:1000000
end_total_asset:1522100.0039319734
Sharpe:  2.009340485907647
---------------------------------------
| time/                   |           |
|    fps                  | 291      

begin_total_asset:1000000
end_total_asset:1523461.543360871
Sharpe:  2.0057000531162594
begin_total_asset:1000000
end_total_asset:1470212.065837588
Sharpe:  1.8352798599151345
begin_total_asset:1000000
end_total_asset:1518189.3207535055
Sharpe:  1.9725590054447841
---------------------------------------
| time/                   |           |
|    fps                  | 292       |
|    iterations           | 15        |
|    time_elapsed         | 104       |
|    total_timesteps      | 30720     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -42.6     |
|    explained_variance   | 2.38e-07  |
|    learning_rate        | 0.0001    |
|    loss                 | 2.11e+14  |
|    n_updates            | 140       |
|    policy_gradient_loss | -9.78e-07 |
|    reward               | 1343499.9 |
|    std                  | 1         |
|    value_loss

begin_total_asset:1000000
end_total_asset:1541455.4638159801
Sharpe:  2.0599177555510786
begin_total_asset:1000000
end_total_asset:1514244.5224878988
Sharpe:  1.9661875142889556
begin_total_asset:1000000
end_total_asset:1513375.972388576
Sharpe:  1.9641889517521132
begin_total_asset:1000000
end_total_asset:1531181.3254835308
Sharpe:  1.99629925872423
begin_total_asset:1000000
end_total_asset:1579481.5570400837
Sharpe:  2.1387186883072484
begin_total_asset:1000000
end_total_asset:1508343.981474108
Sharpe:  1.9530195028581658
begin_total_asset:1000000
end_total_asset:1543351.2079221334
Sharpe:  2.0711461061573733
begin_total_asset:1000000
end_total_asset:1615815.6847296173
Sharpe:  2.268364519132447
begin_total_asset:1000000
end_total_asset:1455820.5320234452
Sharpe:  1.7776445851175924
begin_total_asset:1000000
end_total_asset:1470303.686103804
Sharpe:  1.8437642197875248
---------------------------------------
| time/                   |           |
|    fps                  | 289     

begin_total_asset:1000000
end_total_asset:1504015.6921142163
Sharpe:  1.939316565085938
begin_total_asset:1000000
end_total_asset:1502320.1019042481
Sharpe:  1.9391182637494053
begin_total_asset:1000000
end_total_asset:1611518.6872212265
Sharpe:  2.213310505952196
---------------------------------------
| time/                   |           |
|    fps                  | 288       |
|    iterations           | 22        |
|    time_elapsed         | 155       |
|    total_timesteps      | 45056     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -42.6     |
|    explained_variance   | -1.19e-07 |
|    learning_rate        | 0.0001    |
|    loss                 | 2.2e+14   |
|    n_updates            | 210       |
|    policy_gradient_loss | -1.23e-06 |
|    reward               | 1327964.9 |
|    std                  | 1         |
|    value_loss

begin_total_asset:1000000
end_total_asset:1519943.7022848716
Sharpe:  1.9659130928695208
begin_total_asset:1000000
end_total_asset:1481057.7019017015
Sharpe:  1.8811590184186997
begin_total_asset:1000000
end_total_asset:1519421.367742319
Sharpe:  1.9698660862063722
begin_total_asset:1000000
end_total_asset:1485906.4805250035
Sharpe:  1.8878845775803006
begin_total_asset:1000000
end_total_asset:1526303.1652312449
Sharpe:  2.0122725023848242
begin_total_asset:1000000
end_total_asset:1564090.738708118
Sharpe:  2.097729751733977
begin_total_asset:1000000
end_total_asset:1515288.2446151501
Sharpe:  1.962562212355737
begin_total_asset:1000000
end_total_asset:1486324.3627814476
Sharpe:  1.8684664116692733
begin_total_asset:1000000
end_total_asset:1475530.0519545649
Sharpe:  1.8431632660731567
begin_total_asset:1000000
end_total_asset:1508998.905831588
Sharpe:  1.9380332152920858
---------------------------------------
| time/                   |           |
|    fps                  | 287    

begin_total_asset:1000000
end_total_asset:1577709.8539810681
Sharpe:  2.1620462145524377
begin_total_asset:1000000
end_total_asset:1548260.7130528716
Sharpe:  2.050766803260322
begin_total_asset:1000000
end_total_asset:1548618.7266453742
Sharpe:  2.0516603634324184
---------------------------------------
| time/                   |           |
|    fps                  | 285       |
|    iterations           | 29        |
|    time_elapsed         | 207       |
|    total_timesteps      | 59392     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -42.6     |
|    explained_variance   | 0         |
|    learning_rate        | 0.0001    |
|    loss                 | 2.17e+14  |
|    n_updates            | 280       |
|    policy_gradient_loss | -1.03e-06 |
|    reward               | 1558284.9 |
|    std                  | 1         |
|    value_los

begin_total_asset:1000000
end_total_asset:1548396.0544976199
Sharpe:  2.0795955167769997
begin_total_asset:1000000
end_total_asset:1515903.365893062
Sharpe:  1.965931512318696
begin_total_asset:1000000
end_total_asset:1532223.756240759
Sharpe:  2.0334773275858855
begin_total_asset:1000000
end_total_asset:1537284.3814729718
Sharpe:  2.041300434304716
begin_total_asset:1000000
end_total_asset:1566687.0900668635
Sharpe:  2.093676887975204
begin_total_asset:1000000
end_total_asset:1515318.1684819579
Sharpe:  1.9728480675699682
begin_total_asset:1000000
end_total_asset:1512576.817611193
Sharpe:  1.98565631568019
begin_total_asset:1000000
end_total_asset:1512424.8164936902
Sharpe:  1.9638868183243423
begin_total_asset:1000000
end_total_asset:1500523.2983479
Sharpe:  1.9778988261263946
begin_total_asset:1000000
end_total_asset:1490216.4576999682
Sharpe:  1.9130201682747967
---------------------------------------
| time/                   |           |
|    fps                  | 283       |
|

begin_total_asset:1000000
end_total_asset:1507310.1814723136
Sharpe:  1.9226535648231455
begin_total_asset:1000000
end_total_asset:1522170.0705228902
Sharpe:  1.9860761605575625
begin_total_asset:1000000
end_total_asset:1502654.8596816564
Sharpe:  1.9276389469144548
---------------------------------------
| time/                   |           |
|    fps                  | 283       |
|    iterations           | 36        |
|    time_elapsed         | 259       |
|    total_timesteps      | 73728     |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -42.6     |
|    explained_variance   | 5.96e-08  |
|    learning_rate        | 0.0001    |
|    loss                 | 2.18e+14  |
|    n_updates            | 350       |
|    policy_gradient_loss | -9.64e-07 |
|    reward               | 1353173.0 |
|    std                  | 1         |
|    value_lo

begin_total_asset:1000000
end_total_asset:1562232.6715707153
Sharpe:  2.075451042762198
begin_total_asset:1000000
end_total_asset:1453382.670892242
Sharpe:  1.7903708365568578
begin_total_asset:1000000
end_total_asset:1514617.0391290213
Sharpe:  1.9803220303995188
begin_total_asset:1000000
end_total_asset:1507625.9436483276
Sharpe:  1.9670874904131668
begin_total_asset:1000000
end_total_asset:1495733.8928308075
Sharpe:  1.9325458787361132
begin_total_asset:1000000
end_total_asset:1511626.4204577932
Sharpe:  1.9788816780824718
begin_total_asset:1000000
end_total_asset:1509109.624103776
Sharpe:  1.943915726004615
begin_total_asset:1000000
end_total_asset:1448235.7613673084
Sharpe:  1.789777082676599
begin_total_asset:1000000
end_total_asset:1605713.245995028
Sharpe:  2.2458213530310136
begin_total_asset:1000000
end_total_asset:1487115.2411672887
Sharpe:  1.913418145584799
---------------------------------------
| time/                   |           |
|    fps                  | 284      


100%|███████████████████████████████████████████████████████████████████████████| 80000/80000 [04:48<00:00, 276.97it/s][A

In [92]:
# Model 3: DDPG
agent = DRLAgent(env=env_train)
DDPG_PARAMS = {"batch_size": 128, "buffer_size": 50000, "learning_rate": 0.001}
model_ddpg = agent.get_model("ddpg", model_kwargs=DDPG_PARAMS)
pbar = tqdm(total=50000)
trained_ddpg = agent.train_model(
    model=model_ddpg, tb_log_name="ddpg", total_timesteps=50000
)
update_progress(50000)
trained_ddpg.save("/DRLTrading/trained_models/trained_ddpg.zip")

{'batch_size': 128, 'buffer_size': 50000, 'learning_rate': 0.001}
Using cpu device


100%|███████████████████████████████████████████████████████████████████████████| 80000/80000 [04:48<00:00, 276.91it/s]

Logging to tensorboard_log/ddpg\ddpg_2





begin_total_asset:1000000
end_total_asset:1491446.5668462429
Sharpe:  1.8794928352215607
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
----------------------------------
| time/              |           |
|    episodes        | 4         |
|    fps             | 32        |
|    time_elapsed    | 24        |
|    total_timesteps | 800       |
| train/             |           |
|    actor_loss      | -2.83e+06 |
|    critic_loss     | 1.94e+10  |
|    learning_rate   | 0.001     |
|    n_updates       | 600       |
|    reward          | 1518666.0 |
----------------------------------
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asse

begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
----------------------------------
| time/              |           |
|    episodes        | 36        |
|    fps             | 23        |
|    time_elapsed    | 308       |
|    total_timesteps | 7200      |
| train/             |           |
|    actor_loss      | -3.49e+07 |
|    critic_loss     | 1.56e+12  |
|    learning_rate   | 0.001     |
|    n_updates       | 7000      |
|    reward          | 1518666.0 |
----------------------------------
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset

begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
----------------------------------
| time/              |           |
|    episodes        | 68        |
|    fps             | 22        |
|    time_elapsed    | 596       |
|    total_timesteps | 13600     |
| train/             |           |
|    actor_loss      | -5.52e+07 |
|    critic_loss     | 2.98e+12  |
|    learning_rate   | 0.001     |
|    n_updates       | 13400     |
|    reward          | 1518666.0 |
----------------------------------
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset

begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
----------------------------------
| time/              |           |
|    episodes        | 100       |
|    fps             | 22        |
|    time_elapsed    | 886       |
|    total_timesteps | 20000     |
| train/             |           |
|    actor_loss      | -6.68e+07 |
|    critic_loss     | 2.77e+12  |
|    learning_rate   | 0.001     |
|    n_updates       | 19800     |
|    reward          | 1518666.0 |
----------------------------------
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset

begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
----------------------------------
| time/              |           |
|    episodes        | 132       |
|    fps             | 22        |
|    time_elapsed    | 1171      |
|    total_timesteps | 26400     |
| train/             |           |
|    actor_loss      | -7.31e+07 |
|    critic_loss     | 1.87e+12  |
|    learning_rate   | 0.001     |
|    n_updates       | 26200     |
|    reward          | 1518666.0 |
----------------------------------
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset

begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
----------------------------------
| time/              |           |
|    episodes        | 164       |
|    fps             | 22        |
|    time_elapsed    | 1476      |
|    total_timesteps | 32800     |
| train/             |           |
|    actor_loss      | -7.6e+07  |
|    critic_loss     | 1.14e+12  |
|    learning_rate   | 0.001     |
|    n_updates       | 32600     |
|    reward          | 1518666.0 |
----------------------------------
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset

begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
----------------------------------
| time/              |           |
|    episodes        | 196       |
|    fps             | 21        |
|    time_elapsed    | 1804      |
|    total_timesteps | 39200     |
| train/             |           |
|    actor_loss      | -7.75e+07 |
|    critic_loss     | 7.67e+11  |
|    learning_rate   | 0.001     |
|    n_updates       | 39000     |
|    reward          | 1518666.0 |
----------------------------------
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset

begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
----------------------------------
| time/              |           |
|    episodes        | 228       |
|    fps             | 21        |
|    time_elapsed    | 2125      |
|    total_timesteps | 45600     |
| train/             |           |
|    actor_loss      | -7.79e+07 |
|    critic_loss     | 6.03e+11  |
|    learning_rate   | 0.001     |
|    n_updates       | 45400     |
|    reward          | 1518666.0 |
----------------------------------
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset:1000000
end_total_asset:1518665.960488369
Sharpe:  1.8749336391557723
begin_total_asset

100%|██████████████████████████████████████████████████████████████████████████| 50000/50000 [2:35:44<00:00,  5.35it/s]

In [93]:
# Model 4: SAC
agent = DRLAgent(env=env_train)
SAC_PARAMS = {
    "batch_size": 128,
    "buffer_size": 100000,
    "learning_rate": 0.0003,
    "learning_starts": 100,
    "ent_coef": "auto_0.1",
}
model_sac = agent.get_model("sac", model_kwargs=SAC_PARAMS)
pbar = tqdm(total=50000)
trained_sac = agent.train_model(
    model=model_sac, tb_log_name="sac", total_timesteps=50000
)
update_progress(50000)
trained_sac.save("/DRLTrading/trained_models/trained_sac.zip")

{'batch_size': 128, 'buffer_size': 100000, 'learning_rate': 0.0003, 'learning_starts': 100, 'ent_coef': 'auto_0.1'}
Using cpu device



100%|██████████████████████████████████████████████████████████████████████████| 50000/50000 [2:35:44<00:00,  5.35it/s][A

Logging to tensorboard_log/sac\sac_2





begin_total_asset:1000000
end_total_asset:1508383.264216425
Sharpe:  1.933964154858648
begin_total_asset:1000000
end_total_asset:1563280.23175393
Sharpe:  2.0950536613920145
begin_total_asset:1000000
end_total_asset:1569440.0627068745
Sharpe:  2.116688391170457
begin_total_asset:1000000
end_total_asset:1569442.0866337656
Sharpe:  2.116745198760623
----------------------------------
| time/              |           |
|    episodes        | 4         |
|    fps             | 28        |
|    time_elapsed    | 28        |
|    total_timesteps | 800       |
| train/             |           |
|    actor_loss      | -3.07e+06 |
|    critic_loss     | 1.14e+10  |
|    ent_coef        | 0.127     |
|    ent_coef_loss   | 337       |
|    learning_rate   | 0.0003    |
|    n_updates       | 699       |
|    reward          | 1569442.1 |
----------------------------------
begin_total_asset:1000000
end_total_asset:1569443.8629634615
Sharpe:  2.1166912819283525
begin_total_asset:1000000
end_total_

begin_total_asset:1000000
end_total_asset:1569471.7726350874
Sharpe:  2.11679170588648
begin_total_asset:1000000
end_total_asset:1569481.5125234313
Sharpe:  2.116817419521256
begin_total_asset:1000000
end_total_asset:1569477.001088693
Sharpe:  2.1168072371629796
----------------------------------
| time/              |           |
|    episodes        | 32        |
|    fps             | 22        |
|    time_elapsed    | 287       |
|    total_timesteps | 6400      |
| train/             |           |
|    actor_loss      | -3.16e+07 |
|    critic_loss     | 4.17e+11  |
|    ent_coef        | 0.733     |
|    ent_coef_loss   | 58.6      |
|    learning_rate   | 0.0003    |
|    n_updates       | 6299      |
|    reward          | 1569477.0 |
----------------------------------
begin_total_asset:1000000
end_total_asset:1569458.537533314
Sharpe:  2.1167644768950917
begin_total_asset:1000000
end_total_asset:1569471.84941132
Sharpe:  2.116819145809816
begin_total_asset:1000000
end_total_as

begin_total_asset:1000000
end_total_asset:1569446.7023346517
Sharpe:  2.1167417824327206
begin_total_asset:1000000
end_total_asset:1569458.138066558
Sharpe:  2.1167457230037514
----------------------------------
| time/              |           |
|    episodes        | 60        |
|    fps             | 21        |
|    time_elapsed    | 555       |
|    total_timesteps | 12000     |
| train/             |           |
|    actor_loss      | -5.35e+07 |
|    critic_loss     | 4.49e+12  |
|    ent_coef        | 3.82      |
|    ent_coef_loss   | -223      |
|    learning_rate   | 0.0003    |
|    n_updates       | 11899     |
|    reward          | 1569458.1 |
----------------------------------
begin_total_asset:1000000
end_total_asset:1569441.466334605
Sharpe:  2.116712230763121
begin_total_asset:1000000
end_total_asset:1569459.769747675
Sharpe:  2.1167603229518472
begin_total_asset:1000000
end_total_asset:1569416.2155165018
Sharpe:  2.116644521363786
begin_total_asset:1000000
end_total

begin_total_asset:1000000
end_total_asset:1569413.6219546108
Sharpe:  2.1166750523565856
----------------------------------
| time/              |           |
|    episodes        | 88        |
|    fps             | 20        |
|    time_elapsed    | 844       |
|    total_timesteps | 17600     |
| train/             |           |
|    actor_loss      | -6.36e+07 |
|    critic_loss     | 7.32e+12  |
|    ent_coef        | 19.2      |
|    ent_coef_loss   | -387      |
|    learning_rate   | 0.0003    |
|    n_updates       | 17499     |
|    reward          | 1569413.6 |
----------------------------------
begin_total_asset:1000000
end_total_asset:1569336.0141419005
Sharpe:  2.116404805496122
begin_total_asset:1000000
end_total_asset:1569441.4209754185
Sharpe:  2.1167454864792683
begin_total_asset:1000000
end_total_asset:1569206.083384455
Sharpe:  2.1160487694212615
begin_total_asset:1000000
end_total_asset:1569431.113128043
Sharpe:  2.1167537565537087
---------------------------------

begin_total_asset:1000000
end_total_asset:1569291.66108851
Sharpe:  2.116326628525507
begin_total_asset:1000000
end_total_asset:1568995.1100417715
Sharpe:  2.1156791573102556
begin_total_asset:1000000
end_total_asset:1569262.6470515153
Sharpe:  2.116504021391376
begin_total_asset:1000000
end_total_asset:1569351.5115290314
Sharpe:  2.1168579906558
----------------------------------
| time/              |           |
|    episodes        | 120       |
|    fps             | 20        |
|    time_elapsed    | 1175      |
|    total_timesteps | 24000     |
| train/             |           |
|    actor_loss      | -7.52e+07 |
|    critic_loss     | 9.13e+12  |
|    ent_coef        | 117       |
|    ent_coef_loss   | -409      |
|    learning_rate   | 0.0003    |
|    n_updates       | 23899     |
|    reward          | 1569351.5 |
----------------------------------
begin_total_asset:1000000
end_total_asset:1569261.639873881
Sharpe:  2.1165159417825428
begin_total_asset:1000000
end_total_as

begin_total_asset:1000000
end_total_asset:1568522.4549506097
Sharpe:  2.1151147188217077
begin_total_asset:1000000
end_total_asset:1566228.5761093188
Sharpe:  2.1077909291437193
begin_total_asset:1000000
end_total_asset:1568809.0924575971
Sharpe:  2.116704850166555
----------------------------------
| time/              |           |
|    episodes        | 148       |
|    fps             | 20        |
|    time_elapsed    | 1457      |
|    total_timesteps | 29600     |
| train/             |           |
|    actor_loss      | -7.5e+07  |
|    critic_loss     | 1.01e+13  |
|    ent_coef        | 530       |
|    ent_coef_loss   | -317      |
|    learning_rate   | 0.0003    |
|    n_updates       | 29499     |
|    reward          | 1568809.1 |
----------------------------------
begin_total_asset:1000000
end_total_asset:1565665.2013554324
Sharpe:  2.1067757241434046
begin_total_asset:1000000
end_total_asset:1570117.691032498
Sharpe:  2.119041559550384
begin_total_asset:1000000
end_tot

begin_total_asset:1000000
end_total_asset:1569813.8549717122
Sharpe:  2.124265879193027
begin_total_asset:1000000
end_total_asset:1567058.832431535
Sharpe:  2.115278170266672
----------------------------------
| time/              |           |
|    episodes        | 176       |
|    fps             | 20        |
|    time_elapsed    | 1747      |
|    total_timesteps | 35200     |
| train/             |           |
|    actor_loss      | -7.39e+07 |
|    critic_loss     | 8.93e+12  |
|    ent_coef        | 2e+03     |
|    ent_coef_loss   | -98.8     |
|    learning_rate   | 0.0003    |
|    n_updates       | 35099     |
|    reward          | 1567058.9 |
----------------------------------
begin_total_asset:1000000
end_total_asset:1563109.639045174
Sharpe:  2.096810091765087
begin_total_asset:1000000
end_total_asset:1556653.3054876518
Sharpe:  2.0854134506542428
begin_total_asset:1000000
end_total_asset:1566044.9853467362
Sharpe:  2.1105936375455756
begin_total_asset:1000000
end_total

begin_total_asset:1000000
end_total_asset:1564027.7067032193
Sharpe:  2.103668923284939
----------------------------------
| time/              |           |
|    episodes        | 204       |
|    fps             | 20        |
|    time_elapsed    | 2038      |
|    total_timesteps | 40800     |
| train/             |           |
|    actor_loss      | -7.46e+07 |
|    critic_loss     | 6.23e+12  |
|    ent_coef        | 3.36e+03  |
|    ent_coef_loss   | 18.8      |
|    learning_rate   | 0.0003    |
|    n_updates       | 40699     |
|    reward          | 1564027.8 |
----------------------------------
begin_total_asset:1000000
end_total_asset:1572427.416548638
Sharpe:  2.124090961244089
begin_total_asset:1000000
end_total_asset:1558924.1496405762
Sharpe:  2.092722626033413
begin_total_asset:1000000
end_total_asset:1554973.0296759338
Sharpe:  2.0812013511652503
begin_total_asset:1000000
end_total_asset:1558004.77248021
Sharpe:  2.086708395709856
----------------------------------
| 

begin_total_asset:1000000
end_total_asset:1560916.4146555269
Sharpe:  2.0931920265246653
begin_total_asset:1000000
end_total_asset:1562521.6037186433
Sharpe:  2.0988079842148535
begin_total_asset:1000000
end_total_asset:1557577.3474483998
Sharpe:  2.0853994354392165
begin_total_asset:1000000
end_total_asset:1568909.3825605887
Sharpe:  2.1209674423436904
----------------------------------
| time/              |           |
|    episodes        | 236       |
|    fps             | 19        |
|    time_elapsed    | 2397      |
|    total_timesteps | 47200     |
| train/             |           |
|    actor_loss      | -7.64e+07 |
|    critic_loss     | 4.19e+12  |
|    ent_coef        | 3.64e+03  |
|    ent_coef_loss   | 0.387     |
|    learning_rate   | 0.0003    |
|    n_updates       | 47099     |
|    reward          | 1568909.4 |
----------------------------------
begin_total_asset:1000000
end_total_asset:1565170.0282239488
Sharpe:  2.107165423502493
begin_total_asset:1000000
end_t


100%|████████████████████████████████████████████████████████████████████████████| 50000/50000 [42:48<00:00, 19.47it/s][A

begin_total_asset:1000000
end_total_asset:1559018.959708424
Sharpe:  2.0862169030609885


In [94]:
# Model 5: TD3
agent = DRLAgent(env=env_train)
TD3_PARAMS = {"batch_size": 100, "buffer_size": 1000000, "learning_rate": 0.001}
model_td3 = agent.get_model("td3", model_kwargs=TD3_PARAMS)
pbar = tqdm(total=30000)
trained_td3 = agent.train_model(
    model=model_td3, tb_log_name="td3", total_timesteps=30000
)
update_progress(30000)
trained_td3.save("/DRLTrading/trained_models/trained_td3.zip")

{'batch_size': 100, 'buffer_size': 1000000, 'learning_rate': 0.001}
Using cpu device


100%|████████████████████████████████████████████████████████████████████████████| 50000/50000 [42:48<00:00, 19.46it/s]

Logging to tensorboard_log/td3\td3_2





begin_total_asset:1000000
end_total_asset:1526371.8288760663
Sharpe:  1.999018120993065
begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
----------------------------------
| time/              |           |
|    episodes        | 4         |
|    fps             | 29        |
|    time_elapsed    | 26        |
|    total_timesteps | 800       |
| train/             |           |
|    actor_loss      | -1.83e+06 |
|    critic_loss     | 6.27e+09  |
|    learning_rate   | 0.001     |
|    n_updates       | 600       |
|    reward          | 1492671.8 |
----------------------------------
begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
begin_total_asset

begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
----------------------------------
| time/              |           |
|    episodes        | 36        |
|    fps             | 23        |
|    time_elapsed    | 303       |
|    total_timesteps | 7200      |
| train/             |           |
|    actor_loss      | -1.93e+07 |
|    critic_loss     | 8.21e+11  |
|    learning_rate   | 0.001     |
|    n_updates       | 7000      |
|    reward          | 1492671.8 |
----------------------------------
begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
begin_total_asset

begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
----------------------------------
| time/              |           |
|    episodes        | 68        |
|    fps             | 23        |
|    time_elapsed    | 572       |
|    total_timesteps | 13600     |
| train/             |           |
|    actor_loss      | -3.31e+07 |
|    critic_loss     | 1.84e+12  |
|    learning_rate   | 0.001     |
|    n_updates       | 13400     |
|    reward          | 1492671.8 |
----------------------------------
begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
begin_total_asset

begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
----------------------------------
| time/              |           |
|    episodes        | 100       |
|    fps             | 23        |
|    time_elapsed    | 853       |
|    total_timesteps | 20000     |
| train/             |           |
|    actor_loss      | -4.26e+07 |
|    critic_loss     | 2.23e+12  |
|    learning_rate   | 0.001     |
|    n_updates       | 19800     |
|    reward          | 1492671.8 |
----------------------------------
begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
begin_total_asset

begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
----------------------------------
| time/              |           |
|    episodes        | 132       |
|    fps             | 22        |
|    time_elapsed    | 1164      |
|    total_timesteps | 26400     |
| train/             |           |
|    actor_loss      | -4.94e+07 |
|    critic_loss     | 2.17e+12  |
|    learning_rate   | 0.001     |
|    n_updates       | 26200     |
|    reward          | 1492671.8 |
----------------------------------
begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
begin_total_asset:1000000
end_total_asset:1492671.749909291
Sharpe:  1.9092017770596668
begin_total_asset

100%|████████████████████████████████████████████████████████████████████████████| 30000/30000 [23:21<00:00, 21.41it/s]

In [99]:
# Trading
trade = dp.data_split(df, "2010-01-01", "2022-12-07")
e_trade_gym = StockPortfolioEnv(df=trade, **env_kwargs)

print("Shape of Trade DataFrame: ", trade.shape)

df_daily_return, df_actions = DRLAgent.DRL_prediction(
    model=trained_a2c, environment=e_trade_gym
)

Shape of Trade DataFrame:  (20580, 18)
begin_total_asset:1000000
end_total_asset:1576331.9325847682
Sharpe:  0.9003606333667805
hit end!


In [100]:
print(df_daily_return.head())

df_daily_return.to_csv("results/df_daily_return.csv")

print(df_actions.head())

df_actions.to_csv("results/df_actions.csv")

         date  daily_return
0  2020-03-19      0.000000
1  2020-03-20     -0.040980
2  2020-03-23     -0.032855
3  2020-03-24      0.111204
4  2020-03-25      0.011949
                AAPL      AMGN       AXP        BA       CAT       CRM  \
date                                                                     
2020-03-19  0.033333  0.033333  0.033333  0.033333  0.033333  0.033333   
2020-03-20  0.021785  0.052465  0.042711  0.021785  0.050958  0.059217   
2020-03-23  0.051256  0.051256  0.051256  0.033043  0.051256  0.018856   
2020-03-24  0.053801  0.038817  0.052859  0.023762  0.053801  0.027045   
2020-03-25  0.049580  0.049580  0.049580  0.018239  0.049580  0.049580   

                CSCO       CVX       DIS       DOW  ...       MRK      MSFT  \
date                                                ...                       
2020-03-19  0.033333  0.033333  0.033333  0.033333  ...  0.033333  0.033333   
2020-03-20  0.059217  0.021785  0.021785  0.040545  ...  0.021785  0.039583 

In [97]:
# 6. Backtest Our Strategy

# 6.1. BackTestStats
from pyfolio import timeseries

DRL_strat = convert_daily_return_to_pyfolio_ts(df_daily_return)
perf_func = timeseries.perf_stats
perf_stats_all = perf_func(
    returns=DRL_strat,
    factor_returns=DRL_strat,
    positions=None,
    transactions=None,
    turnover_denom="AGB",
)

print("==============DRL Strategy Stats===========")
print(perf_stats_all)

# baseline stats
print("==============Get Baseline Stats===========")
baseline_df = get_baseline(
    ticker="^DJI",
    start=df_daily_return.loc[0, "date"],
    end=df_daily_return.loc[len(df_daily_return) - 1, "date"],
)

stats = backtest_stats(baseline_df, value_col_name="close")

Annual return          0.211017
Cumulative returns     0.684045
Annual volatility      0.212044
Sharpe ratio           1.008392
Calmar ratio           0.956615
Stability              0.458842
Max drawdown          -0.220587
Omega ratio            1.204728
Sortino ratio          1.555028
Skew                   0.818646
Kurtosis               9.817588
Tail ratio             0.947745
Daily value at risk   -0.025866
Alpha                  0.000000
Beta                   1.000000
dtype: float64
[*********************100%***********************]  1 of 1 completed
Shape of DataFrame:  (685, 8)
Annual return          0.212927
Cumulative returns     0.689988
Annual volatility      0.210369
Sharpe ratio           1.023535
Calmar ratio           0.970459
Stability              0.444417
Max drawdown          -0.219408
Omega ratio            1.208475
Sortino ratio          1.580469
Skew                        NaN
Kurtosis                    NaN
Tail ratio             1.024432
Daily value at risk   

In [98]:
# 6.2. BackTestPlot
import pyfolio

baseline_df = get_baseline(
    ticker="^DJI", start=df_daily_return.loc[0, "date"], end="2021-11-01"
)

baseline_returns = get_daily_return(baseline_df, value_col_name="close")

# with pyfolio.plotting.plotting_context(font_scale=1.1):
#     pyfolio.create_full_tear_sheet(
#         returns=DRL_strat, benchmark_rets=baseline_returns, set_context=False
#     )

# Min-Variance Portfolio Allocation
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models

unique_tic = trade.tic.unique()
unique_trade_date = trade.date.unique()

df.head()

# Calculate_portfolio_minimum_variance
portfolio = pd.DataFrame(index=range(1), columns=unique_trade_date)
initial_capital = 1000000
portfolio.loc[0, unique_trade_date[0]] = initial_capital

for i in range(len(unique_trade_date) - 1):
    df_temp = df[df.date == unique_trade_date[i]].reset_index(drop=True)
    df_temp_next = df[df.date == unique_trade_date[i + 1]].reset_index(drop=True)
    # Sigma = risk_models.sample_cov(df_temp.return_list[0])

    # calculate covariance matrix
    Sigma = df_temp.return_list[0].cov()

    # portfolio allocation
    ef_min_var = EfficientFrontier(None, Sigma, weight_bounds=(0, 0.1))

    # minimum variance
    raw_weights_min_var = ef_min_var.min_volatility()

    # get weights
    cleaned_weights_min_var = ef_min_var.clean_weights()

    # current capital
    cap = portfolio.iloc[0, i]

    # current cash invested for each stock
    current_cash = [element * cap for element in list(cleaned_weights_min_var.values())]

    # current held shares
    current_shares = list(np.array(current_cash) / np.array(df_temp.close))

    # next time period price
    next_price = np.array(df_temp_next.close)

    ##next_price * current share to calculate next total account value
    portfolio.iloc[0, i + 1] = np.dot(current_shares, next_price)

portfolio = portfolio.T
portfolio.columns = ["account_value"]

portfolio.head()

a2c_cumpod = (df_daily_return.daily_return + 1).cumprod() - 1

min_var_cumpod = (portfolio.account_value.pct_change() + 1).cumprod() - 1

dji_cumpod = (baseline_returns + 1).cumprod() - 1

# Plotly: DRL, Min-Variance, DJIA
import matplotlib.pyplot as plt
import plotly
import plotly.graph_objs as go

time_ind = pd.Series(df_daily_return.date)

trace0_portfolio = go.Scatter(
    x=time_ind, y=a2c_cumpod, mode="lines", name="A2C (Portfolio Allocation)"
)

trace1_portfolio = go.Scatter(x=time_ind, y=dji_cumpod, mode="lines", name="DJIA")
trace2_portfolio = go.Scatter(
    x=time_ind, y=min_var_cumpod, mode="lines", name="Min-Variance"
)
# trace3_portfolio = go.Scatter(x = time_ind, y = ddpg_cumpod, mode = 'lines', name = 'DDPG')
# trace4_portfolio = go.Scatter(x = time_ind, y = addpg_cumpod, mode = 'lines', name = 'Adaptive-DDPG')
# trace5_portfolio = go.Scatter(x = time_ind, y = min_cumpod, mode = 'lines', name = 'Min-Variance')
# trace4 = go.Scatter(x = time_ind, y = addpg_cumpod, mode = 'lines', name = 'Adaptive-DDPG')
# trace2 = go.Scatter(x = time_ind, y = portfolio_cost_minv, mode = 'lines', name = 'Min-Variance')
# trace3 = go.Scatter(x = time_ind, y = spx_value, mode = 'lines', name = 'SPX')

fig = go.Figure()
fig.add_trace(trace0_portfolio)
fig.add_trace(trace1_portfolio)
fig.add_trace(trace2_portfolio)

fig.update_layout(
    legend=dict(
        x=0,
        y=1,
        traceorder="normal",
        font=dict(family="sans-serif", size=15, color="black"),
        bgcolor="White",
        bordercolor="white",
        borderwidth=2,
    ),
)

# fig.update_layout(legend_orientation="h")

fig.update_layout(
    title={
        #'text': "Cumulative Return using FinRL",
        "y": 0.85,
        "x": 0.5,
        "xanchor": "center",
        "yanchor": "top",
    }
)

# with Transaction cost
# fig.update_layout(title =  'Quarterly Trade Date')

fig.update_layout(
    #    margin=dict(l=20, r=20, t=20, b=20),
    paper_bgcolor="rgba(1,1,0,0)",
    plot_bgcolor="rgba(1, 1, 0, 0)",
    # xaxis_title="Date",
    yaxis_title="Cumulative Return",
    xaxis={
        "type": "date",
        "tick0": time_ind[0],
        "tickmode": "linear",
        "dtick": 86400000.0 * 80,
    },
)
fig.update_xaxes(
    showline=True,
    linecolor="black",
    showgrid=True,
    gridwidth=1,
    gridcolor="LightSteelBlue",
    mirror=True,
)
fig.update_yaxes(
    showline=True,
    linecolor="black",
    showgrid=True,
    gridwidth=1,
    gridcolor="LightSteelBlue",
    mirror=True,
)
fig.update_yaxes(zeroline=True, zerolinewidth=1, zerolinecolor="LightSteelBlue")

fig.show()

[*********************100%***********************]  1 of 1 completed
Shape of DataFrame:  (409, 8)


100%|████████████████████████████████████████████████████████████████████████████| 30000/30000 [23:34<00:00, 21.41it/s]