In [1]:
# ## install finrl library

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
# matplotlib.use('Agg')
import datetime
%matplotlib inline
from finrl.config_tickers import SP_500_TICKER
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent,DRLEnsembleAgent
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline
from settings import *
from pprint import pprint
import sys
import os
import itertools
import json

if not os.path.exists(RESULTS_DIR):
    os.mkdir(RESULTS_DIR)

sys.path.append("../FinRL-Library")


In [4]:
INDICATORS = ['macd',
               'rsi_30',
               'cci_30',
               'dx_30']

with open("data/stock.json") as f:
    list_tickers = list(json.load(f).keys())

In [5]:
df = YahooDownloader(start_date = TRAIN_START_DATE,
                     end_date = TEST_END_DATE,
                     ticker_list = list_tickers).fetch_data()

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

Shape of DataFrame:  (251700, 8)


In [6]:
from finrl.meta.preprocessor.preprocessors import FeatureEngineer
fe = FeatureEngineer(use_technical_indicator=True,
                     tech_indicator_list = INDICATORS,
                     use_turbulence=True,
                     user_defined_feature = False)

processed = fe.preprocess_data(df)
processed = processed.copy()
processed = processed.fillna(0)
processed = processed.replace(np.inf,0)

Successfully added technical indicators
Successfully added turbulence index


In [7]:
stock_dimension = len(processed.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 50, State Space: 301


In [8]:
env_kwargs = {
    "hmax": 100,
    "initial_amount": 1000000,
    "buy_cost_pct": 0.001,
    "sell_cost_pct": 0.001,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4,
    "print_verbosity":5

}

In [12]:
rebalance_window = 63 # rebalance_window is the number of days to retrain the model
validation_window = 63 # validation_window is the number of days to do validation and trading (e.g. if validation_window=63, then both validation and trading period will be 63 days)

In [23]:
ensemble_agent = DRLEnsembleAgent(df=processed,
                 train_period=(TRAIN_START_DATE,TRAIN_END_DATE),
                 val_test_period=(TEST_START_DATE,TEST_END_DATE),
                 rebalance_window=rebalance_window,
                 validation_window=validation_window,
                 **env_kwargs)

In [24]:
A2C_model_kwargs = {
                    'n_steps': 5,
                    'ent_coef': 0.005,
                    'learning_rate': 0.0007
                    }

PPO_model_kwargs = {
                    "ent_coef":0.01,
                    "n_steps": 2048,
                    "learning_rate": 0.00025,
                    "batch_size": 128
                    }

DDPG_model_kwargs = {
                      #"action_noise":"ornstein_uhlenbeck",
                      "buffer_size": 10_000,
                      "learning_rate": 0.0005,
                      "batch_size": 64
                    }

SAC_model_kwargs = {
    "batch_size": 64,
    "buffer_size": 100000,
    "learning_rate": 0.0001,
    "learning_starts": 100,
    "ent_coef": "auto_0.1",
}

TD3_model_kwargs = {"batch_size": 100, "buffer_size": 1000000, "learning_rate": 0.0001}




timesteps_dict = {'a2c' : 10_000,
                 'ppo' : 10_000,
                 'ddpg' : 10_000,
                 'sac' : 10_000,
                 'td3' : 10_000
}

In [47]:
df_summary = ensemble_agent.run_ensemble_strategy(A2C_model_kwargs,
                                                 PPO_model_kwargs,
                                                 DDPG_model_kwargs,
                                                 SAC_model_kwargs,
                                                 TD3_model_kwargs,
                                                 timesteps_dict)

turbulence_threshold:  330.749240787695
{'n_steps': 5, 'ent_coef': 0.005, 'learning_rate': 0.0007}
Using cpu device
Logging to tensorboard_log/a2c/a2c_126_2
-------------------------------------
| time/                 |           |
|    fps                | 51        |
|    iterations         | 100       |
|    time_elapsed       | 9         |
|    total_timesteps    | 500       |
| train/                |           |
|    entropy_loss       | -71       |
|    explained_variance | 0.205     |
|    learning_rate      | 0.0007    |
|    n_updates          | 99        |
|    policy_loss        | 25        |
|    reward             | 2.0841537 |
|    std                | 1         |
|    value_loss         | 0.246     |
-------------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 49          |
|    iterations         | 200         |
|    time_elapsed       | 20          |
|    total_timesteps    | 1000     



td3 Sharpe Ratio:  -0.05505769216932544
{'batch_size': 64, 'buffer_size': 100000, 'learning_rate': 0.0001, 'learning_starts': 100, 'ent_coef': 'auto_0.1'}
Using cpu device
Logging to tensorboard_log/sac/sac_126_1
day: 4027, episode: 10
begin_total_asset: 1000000.00
end_total_asset: 3421489.37
total_reward: 2421489.37
total_cost: 252128.65
total_trades: 138462
Sharpe: 0.492
sac Sharpe Ratio:  -0.027732103563941506
{'ent_coef': 0.01, 'n_steps': 2048, 'learning_rate': 0.00025, 'batch_size': 128}
Using cpu device
Logging to tensorboard_log/ppo/ppo_126_1
-----------------------------------
| time/              |            |
|    fps             | 58         |
|    iterations      | 1          |
|    time_elapsed    | 35         |
|    total_timesteps | 2048       |
| train/             |            |
|    reward          | 0.21235088 |
-----------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 55      



td3 Sharpe Ratio:  0.07641877340852711
{'batch_size': 64, 'buffer_size': 100000, 'learning_rate': 0.0001, 'learning_starts': 100, 'ent_coef': 'auto_0.1'}
Using cpu device
Logging to tensorboard_log/sac/sac_189_1
day: 4090, episode: 10
begin_total_asset: 1000000.00
end_total_asset: 5172149.91
total_reward: 4172149.91
total_cost: 63617.11
total_trades: 114726
Sharpe: 0.589
sac Sharpe Ratio:  0.1012075099090205
{'ent_coef': 0.01, 'n_steps': 2048, 'learning_rate': 0.00025, 'batch_size': 128}
Using cpu device
Logging to tensorboard_log/ppo/ppo_189_1
-----------------------------------
| time/              |            |
|    fps             | 53         |
|    iterations      | 1          |
|    time_elapsed    | 37         |
|    total_timesteps | 2048       |
| train/             |            |
|    reward          | 0.22314787 |
-----------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 50          |



td3 Sharpe Ratio:  0.3366501255386006
{'batch_size': 64, 'buffer_size': 100000, 'learning_rate': 0.0001, 'learning_starts': 100, 'ent_coef': 'auto_0.1'}
Using cpu device
Logging to tensorboard_log/sac/sac_252_1
day: 4153, episode: 10
begin_total_asset: 1000000.00
end_total_asset: 10651283.55
total_reward: 9651283.55
total_cost: 243940.33
total_trades: 126931
Sharpe: 0.823
sac Sharpe Ratio:  0.34252243843351243
{'ent_coef': 0.01, 'n_steps': 2048, 'learning_rate': 0.00025, 'batch_size': 128}
Using cpu device
Logging to tensorboard_log/ppo/ppo_252_1
-----------------------------------
| time/              |            |
|    fps             | 50         |
|    iterations      | 1          |
|    time_elapsed    | 40         |
|    total_timesteps | 2048       |
| train/             |            |
|    reward          | 0.29383782 |
-----------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 50         



td3 Sharpe Ratio:  -0.40283568055647845
{'batch_size': 64, 'buffer_size': 100000, 'learning_rate': 0.0001, 'learning_starts': 100, 'ent_coef': 'auto_0.1'}
Using cpu device
Logging to tensorboard_log/sac/sac_315_1
day: 4216, episode: 10
begin_total_asset: 1000000.00
end_total_asset: 4286481.65
total_reward: 3286481.65
total_cost: 11478.70
total_trades: 106315
Sharpe: 0.496
sac Sharpe Ratio:  -0.3767310504832917
{'ent_coef': 0.01, 'n_steps': 2048, 'learning_rate': 0.00025, 'batch_size': 128}
Using cpu device
Logging to tensorboard_log/ppo/ppo_315_1
------------------------------------
| time/              |             |
|    fps             | 49          |
|    iterations      | 1           |
|    time_elapsed    | 41          |
|    total_timesteps | 2048        |
| train/             |             |
|    reward          | 0.081442215 |
------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 49



td3 Sharpe Ratio:  0.8610162092466032
{'batch_size': 64, 'buffer_size': 100000, 'learning_rate': 0.0001, 'learning_starts': 100, 'ent_coef': 'auto_0.1'}
Using cpu device
Logging to tensorboard_log/sac/sac_378_1
day: 4279, episode: 10
begin_total_asset: 1000000.00
end_total_asset: 6869049.58
total_reward: 5869049.58
total_cost: 330814.11
total_trades: 154784
Sharpe: 0.619
sac Sharpe Ratio:  0.5773356113491268
{'ent_coef': 0.01, 'n_steps': 2048, 'learning_rate': 0.00025, 'batch_size': 128}
Using cpu device
Logging to tensorboard_log/ppo/ppo_378_1
-----------------------------------
| time/              |            |
|    fps             | 51         |
|    iterations      | 1          |
|    time_elapsed    | 39         |
|    total_timesteps | 2048       |
| train/             |            |
|    reward          | 0.33275265 |
-----------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 51         |
| 



td3 Sharpe Ratio:  0.1909299381987028
{'batch_size': 64, 'buffer_size': 100000, 'learning_rate': 0.0001, 'learning_starts': 100, 'ent_coef': 'auto_0.1'}
Using cpu device
Logging to tensorboard_log/sac/sac_441_1
day: 4342, episode: 10
begin_total_asset: 1000000.00
end_total_asset: 5218277.62
total_reward: 4218277.62
total_cost: 119185.28
total_trades: 126544
Sharpe: 0.582
sac Sharpe Ratio:  0.2045300315612005
{'ent_coef': 0.01, 'n_steps': 2048, 'learning_rate': 0.00025, 'batch_size': 128}
Using cpu device
Logging to tensorboard_log/ppo/ppo_441_1
---------------------------------
| time/              |          |
|    fps             | 47       |
|    iterations      | 1        |
|    time_elapsed    | 42       |
|    total_timesteps | 2048     |
| train/             |          |
|    reward          | 0.363211 |
---------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 48          |
|    iterations  



td3 Sharpe Ratio:  -0.119293138979897
{'batch_size': 64, 'buffer_size': 100000, 'learning_rate': 0.0001, 'learning_starts': 100, 'ent_coef': 'auto_0.1'}
Using cpu device
Logging to tensorboard_log/sac/sac_504_1
day: 4405, episode: 10
begin_total_asset: 1000000.00
end_total_asset: 5650741.23
total_reward: 4650741.23
total_cost: 269963.29
total_trades: 143711
Sharpe: 0.580
sac Sharpe Ratio:  -0.11522629126938085
{'ent_coef': 0.01, 'n_steps': 2048, 'learning_rate': 0.00025, 'batch_size': 128}
Using cpu device
Logging to tensorboard_log/ppo/ppo_504_1
-----------------------------------
| time/              |            |
|    fps             | 50         |
|    iterations      | 1          |
|    time_elapsed    | 40         |
|    total_timesteps | 2048       |
| train/             |            |
|    reward          | 0.24680008 |
-----------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 49         



td3 Sharpe Ratio:  0.48549424540081954
{'batch_size': 64, 'buffer_size': 100000, 'learning_rate': 0.0001, 'learning_starts': 100, 'ent_coef': 'auto_0.1'}
Using cpu device
Logging to tensorboard_log/sac/sac_567_1
day: 4468, episode: 10
begin_total_asset: 1000000.00
end_total_asset: 6430668.64
total_reward: 5430668.64
total_cost: 422583.69
total_trades: 172743
Sharpe: 0.628
sac Sharpe Ratio:  0.5197375176372234
{'ent_coef': 0.01, 'n_steps': 2048, 'learning_rate': 0.00025, 'batch_size': 128}
Using cpu device
Logging to tensorboard_log/ppo/ppo_567_1
-----------------------------------
| time/              |            |
|    fps             | 47         |
|    iterations      | 1          |
|    time_elapsed    | 43         |
|    total_timesteps | 2048       |
| train/             |            |
|    reward          | 0.23589683 |
-----------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 46         |
|



td3 Sharpe Ratio:  -0.4359877916562993
{'batch_size': 64, 'buffer_size': 100000, 'learning_rate': 0.0001, 'learning_starts': 100, 'ent_coef': 'auto_0.1'}
Using cpu device
Logging to tensorboard_log/sac/sac_630_1
day: 4531, episode: 10
begin_total_asset: 1000000.00
end_total_asset: 7080569.24
total_reward: 6080569.24
total_cost: 31746.81
total_trades: 131123
Sharpe: 0.609
sac Sharpe Ratio:  -0.4438640336563768
{'ent_coef': 0.01, 'n_steps': 2048, 'learning_rate': 0.00025, 'batch_size': 128}
Using cpu device
Logging to tensorboard_log/ppo/ppo_630_1
-----------------------------------
| time/              |            |
|    fps             | 49         |
|    iterations      | 1          |
|    time_elapsed    | 41         |
|    total_timesteps | 2048       |
| train/             |            |
|    reward          | 0.29898492 |
-----------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 48          



td3 Sharpe Ratio:  0.36946655510673343
{'batch_size': 64, 'buffer_size': 100000, 'learning_rate': 0.0001, 'learning_starts': 100, 'ent_coef': 'auto_0.1'}
Using cpu device
Logging to tensorboard_log/sac/sac_693_1
day: 4594, episode: 10
begin_total_asset: 1000000.00
end_total_asset: 3394044.17
total_reward: 2394044.17
total_cost: 389267.05
total_trades: 157020
Sharpe: 0.428
sac Sharpe Ratio:  0.38224484746085996
{'ent_coef': 0.01, 'n_steps': 2048, 'learning_rate': 0.00025, 'batch_size': 128}
Using cpu device
Logging to tensorboard_log/ppo/ppo_693_1
-----------------------------------
| time/              |            |
|    fps             | 45         |
|    iterations      | 1          |
|    time_elapsed    | 44         |
|    total_timesteps | 2048       |
| train/             |            |
|    reward          | 0.24392839 |
-----------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 46         



td3 Sharpe Ratio:  0.11008147491160619
{'batch_size': 64, 'buffer_size': 100000, 'learning_rate': 0.0001, 'learning_starts': 100, 'ent_coef': 'auto_0.1'}
Using cpu device
Logging to tensorboard_log/sac/sac_756_1
day: 4657, episode: 10
begin_total_asset: 1000000.00
end_total_asset: 2870161.33
total_reward: 1870161.33
total_cost: 42509.68
total_trades: 131100
Sharpe: 0.361
sac Sharpe Ratio:  0.15957228105579607
{'ent_coef': 0.01, 'n_steps': 2048, 'learning_rate': 0.00025, 'batch_size': 128}
Using cpu device
Logging to tensorboard_log/ppo/ppo_756_1
----------------------------------
| time/              |           |
|    fps             | 45        |
|    iterations      | 1         |
|    time_elapsed    | 44        |
|    total_timesteps | 2048      |
| train/             |           |
|    reward          | 0.5108159 |
----------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 45          |
|    it



td3 Sharpe Ratio:  0.46618150870654795
{'batch_size': 64, 'buffer_size': 100000, 'learning_rate': 0.0001, 'learning_starts': 100, 'ent_coef': 'auto_0.1'}
Using cpu device
Logging to tensorboard_log/sac/sac_819_1
day: 4720, episode: 10
begin_total_asset: 1000000.00
end_total_asset: 6862744.34
total_reward: 5862744.34
total_cost: 126508.01
total_trades: 133569
Sharpe: 0.537
sac Sharpe Ratio:  0.16263423156652954
{'ent_coef': 0.01, 'n_steps': 2048, 'learning_rate': 0.00025, 'batch_size': 128}
Using cpu device
Logging to tensorboard_log/ppo/ppo_819_1
-----------------------------------
| time/              |            |
|    fps             | 44         |
|    iterations      | 1          |
|    time_elapsed    | 45         |
|    total_timesteps | 2048       |
| train/             |            |
|    reward          | 0.05906761 |
-----------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 44         



td3 Sharpe Ratio:  0.2668946462825803
{'batch_size': 64, 'buffer_size': 100000, 'learning_rate': 0.0001, 'learning_starts': 100, 'ent_coef': 'auto_0.1'}
Using cpu device
Logging to tensorboard_log/sac/sac_882_1
day: 4783, episode: 10
begin_total_asset: 1000000.00
end_total_asset: 6169029.00
total_reward: 5169029.00
total_cost: 444110.88
total_trades: 186726
Sharpe: 0.567
sac Sharpe Ratio:  0.2018262805599679
{'ent_coef': 0.01, 'n_steps': 2048, 'learning_rate': 0.00025, 'batch_size': 128}
Using cpu device
Logging to tensorboard_log/ppo/ppo_882_1
-----------------------------------
| time/              |            |
|    fps             | 43         |
|    iterations      | 1          |
|    time_elapsed    | 46         |
|    total_timesteps | 2048       |
| train/             |            |
|    reward          | 0.46874663 |
-----------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 43          |



td3 Sharpe Ratio:  0.6035980634900817
{'batch_size': 64, 'buffer_size': 100000, 'learning_rate': 0.0001, 'learning_starts': 100, 'ent_coef': 'auto_0.1'}
Using cpu device
Logging to tensorboard_log/sac/sac_945_1
day: 4846, episode: 10
begin_total_asset: 1000000.00
end_total_asset: 12496611.84
total_reward: 11496611.84
total_cost: 363856.64
total_trades: 153832
Sharpe: 0.701
sac Sharpe Ratio:  0.30264427969567476
{'ent_coef': 0.01, 'n_steps': 2048, 'learning_rate': 0.00025, 'batch_size': 128}
Using cpu device
Logging to tensorboard_log/ppo/ppo_945_1
-----------------------------------
| time/              |            |
|    fps             | 42         |
|    iterations      | 1          |
|    time_elapsed    | 48         |
|    total_timesteps | 2048       |
| train/             |            |
|    reward          | 0.42161074 |
-----------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 42         |