In [1]:
!pip install git+https://github.com/AI4Finance-LLC/FinRL-Library.git

Collecting git+https://github.com/AI4Finance-LLC/FinRL-Library.git
  Cloning https://github.com/AI4Finance-LLC/FinRL-Library.git to /tmp/pip-req-build-_fli_hyb
  Running command git clone -q https://github.com/AI4Finance-LLC/FinRL-Library.git /tmp/pip-req-build-_fli_hyb
Collecting stockstats
  Downloading https://files.pythonhosted.org/packages/32/41/d3828c5bc0a262cb3112a4024108a3b019c183fa3b3078bff34bf25abf91/stockstats-0.3.2-py2.py3-none-any.whl
Collecting yfinance
  Downloading https://files.pythonhosted.org/packages/7a/e8/b9d7104d3a4bf39924799067592d9e59119fcfc900a425a12e80a3123ec8/yfinance-0.1.55.tar.gz
Collecting stable-baselines3[extra]
[?25l  Downloading https://files.pythonhosted.org/packages/f9/97/f6da6fcaa96934832c02acf95a32309cfa8646b010221f6c7a14bfcf40d0/stable_baselines3-0.11.1-py3-none-any.whl (152kB)
[K     |████████████████████████████████| 153kB 7.0MB/s 
Collecting pyfolio@ git+https://github.com/quantopian/pyfolio.git#egg=pyfolio-0.9.2
  Cloning https://github.com/

In [2]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('Agg')
import datetime

from finrl.config import config
from finrl.marketdata.yahoodownloader import YahooDownloader
from finrl.preprocessing.preprocessors import FeatureEngineer
from finrl.preprocessing.data import data_split
from finrl.env.env_stocktrading import StockTradingEnv
from finrl.model.models import DRLAgent
#from finrl.trade.backtest import backtest_stats, baseline_stats, backtest_plot

import sys
sys.path.append("../FinRL-Library")

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
import os
if not os.path.exists("./" + config.DATA_SAVE_DIR):
    os.makedirs("./" + config.DATA_SAVE_DIR)
if not os.path.exists("./" + config.TRAINED_MODEL_DIR):
    os.makedirs("./" + config.TRAINED_MODEL_DIR)
if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR):
    os.makedirs("./" + config.TENSORBOARD_LOG_DIR)
if not os.path.exists("./" + config.RESULTS_DIR):
    os.makedirs("./" + config.RESULTS_DIR)

In [5]:
# from config.py start_date is a string
config.START_DATE

'2000-01-01'

In [6]:
# from config.py end_date is a string
config.END_DATE

'2021-01-01'

In [7]:
# Download and save the data in a pandas DataFrame:
data_df = YahooDownloader(start_date = '2009-01-01',
                          end_date = '2021-01-01',
                          ticker_list = ['AAPL']).fetch_data()

[*********************100%***********************]  1 of 1 completed
Shape of DataFrame:  (3021, 8)


In [8]:
data_df.shape

(3021, 8)

In [9]:
data_df.head()

Unnamed: 0,date,open,high,low,close,volume,tic,day
0,2009-01-02,3.067143,3.251429,3.041429,2.79174,746015200,AAPL,4
1,2009-01-05,3.3275,3.435,3.311071,2.909563,1181608400,AAPL,0
2,2009-01-06,3.426786,3.470357,3.299643,2.861573,1289310400,AAPL,1
3,2009-01-07,3.278929,3.303571,3.223571,2.799739,753048800,AAPL,2
4,2009-01-08,3.229643,3.326786,3.215714,2.851728,673500800,AAPL,3


In [10]:
## we store the stockstats technical indicator column names in config.py
tech_indicator_list=config.TECHNICAL_INDICATORS_LIST
print(tech_indicator_list)

['macd', 'boll_ub', 'boll_lb', 'rsi_30', 'cci_30', 'dx_30', 'close_30_sma', 'close_60_sma']


In [11]:
## user can add more technical indicators
## check https://github.com/jealous/stockstats for different names
tech_indicator_list=tech_indicator_list+['kdjk','open_2_sma','boll','close_10.0_le_5_c','wr_10','dma','trix']
print(tech_indicator_list)

['macd', 'boll_ub', 'boll_lb', 'rsi_30', 'cci_30', 'dx_30', 'close_30_sma', 'close_60_sma', 'kdjk', 'open_2_sma', 'boll', 'close_10.0_le_5_c', 'wr_10', 'dma', 'trix']


In [12]:
fe = FeatureEngineer(
                    use_technical_indicator=True,
                    tech_indicator_list = tech_indicator_list,
                    use_turbulence=False,
                    user_defined_feature = False)

data_df = fe.preprocess_data(data_df)

Successfully added technical indicators


In [13]:
data_df.head()

Unnamed: 0,date,open,high,low,close,volume,tic,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,kdjk,open_2_sma,boll,close_10.0_le_5_c,wr_10,dma,trix
0,2009-01-02,3.067143,3.251429,3.041429,2.79174,746015200,AAPL,4,0.0,3.017278,2.684025,100.0,66.666667,100.0,2.79174,2.79174,-6.29985,3.067143,2.79174,1.0,218.899551,0.0,0.670734
1,2009-01-05,3.3275,3.435,3.311071,2.909563,1181608400,AAPL,0,0.002643,3.017278,2.684025,100.0,66.666667,100.0,2.850651,2.850651,-15.368278,3.197322,2.850651,2.0,133.505133,0.0,0.670734
2,2009-01-06,3.426786,3.470357,3.299643,2.861573,1289310400,AAPL,1,0.00188,2.972787,2.735796,70.355711,46.771878,100.0,2.854292,2.854292,-24.222698,3.377143,2.854292,3.0,141.931537,0.0,0.391304
3,2009-01-07,3.278929,3.303571,3.223571,2.799739,753048800,AAPL,2,-0.000746,2.951725,2.729582,50.429389,-29.777993,43.607834,2.840654,2.840654,-34.930948,3.352857,2.840654,4.0,156.347447,0.0,0.195393
4,2009-01-08,3.229643,3.326786,3.215714,2.851728,673500800,AAPL,3,-8.8e-05,2.939568,2.746169,60.227126,-9.019317,48.357918,2.842869,2.842869,-38.029528,3.254286,2.842869,5.0,144.226688,0.0,0.125125


In [14]:
#train = data_split(data_df, start = config.START_DATE, end = config.START_TRADE_DATE)
#trade = data_split(data_df, start = config.START_TRADE_DATE, end = config.END_DATE)
train = data_split(data_df, start = '2009-01-01', end = '2019-01-01')
trade = data_split(data_df, start = '2019-01-01', end = '2021-01-01')

In [15]:

## we store the stockstats technical indicator column names in config.py
## check https://github.com/jealous/stockstats for different names
tech_indicator_list

['macd',
 'boll_ub',
 'boll_lb',
 'rsi_30',
 'cci_30',
 'dx_30',
 'close_30_sma',
 'close_60_sma',
 'kdjk',
 'open_2_sma',
 'boll',
 'close_10.0_le_5_c',
 'wr_10',
 'dma',
 'trix']

In [16]:

# the stock dimension is 1, because we only use the price data of AAPL.
len(train.tic.unique())

1

In [17]:
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(config.TECHNICAL_INDICATORS_LIST)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 1, State Space: 11


In [18]:
env_kwargs = {
    "hmax": 100, 
    "initial_amount": 100000, 
    "buy_cost_pct": 0.001, 
    "sell_cost_pct": 0.001, 
    "state_space": state_space, 
    "stock_dim": stock_dimension, 
    "tech_indicator_list": config.TECHNICAL_INDICATORS_LIST, 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4
    
}

e_train_gym = StockTradingEnv(df = train, **env_kwargs)

In [19]:
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))

<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


In [20]:

agent = DRLAgent(env = env_train)

## A2C

In [21]:
agent = DRLAgent(env = env_train)

A2C_PARAMS = {"n_steps": 5, "ent_coef": 0.005, "learning_rate": 0.0002}
model_a2c = agent.get_model(model_name="a2c",model_kwargs = A2C_PARAMS)

{'n_steps': 5, 'ent_coef': 0.005, 'learning_rate': 0.0002}
Using cuda device


In [22]:

trained_a2c = agent.train_model(model=model_a2c, 
                                tb_log_name='a2c',
                                total_timesteps=50000)

Logging to tensorboard_log/a2c/a2c_1
-------------------------------------
| time/                 |           |
|    fps                | 135       |
|    iterations         | 100       |
|    time_elapsed       | 3         |
|    total_timesteps    | 500       |
| train/                |           |
|    entropy_loss       | -1.44     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0002    |
|    n_updates          | 99        |
|    policy_loss        | 0.000711  |
|    std                | 1.02      |
|    value_loss         | 3.93e-05  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 176       |
|    iterations         | 200       |
|    time_elapsed       | 5         |
|    total_timesteps    | 1000      |
| train/                |           |
|    entropy_loss       | -1.45     |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0002    |
|    n_update

## DDPG

In [23]:
agent = DRLAgent(env = env_train)
DDPG_PARAMS = {"batch_size": 64, "buffer_size": 500000, "learning_rate": 0.0001}


model_ddpg = agent.get_model("ddpg",model_kwargs = DDPG_PARAMS)

{'batch_size': 64, 'buffer_size': 500000, 'learning_rate': 0.0001}
Using cuda device


In [24]:

trained_ddpg = agent.train_model(model=model_ddpg, 
                             tb_log_name='ddpg',
                             total_timesteps=30000)

Logging to tensorboard_log/ddpg/ddpg_1
----------------------------------
| environment/        |          |
|    portfolio_value  | 8.6e+05  |
|    total_cost       | 99.9     |
|    total_reward     | 7.6e+05  |
|    total_reward_pct | 760      |
|    total_trades     | 2515     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 117      |
|    time_elapsed     | 85       |
|    total timesteps  | 10064    |
| train/              |          |
|    actor_loss       | 170      |
|    critic_loss      | 971      |
|    learning_rate    | 0.0001   |
|    n_updates        | 7548     |
----------------------------------
----------------------------------
| environment/        |          |
|    portfolio_value  | 8.6e+05  |
|    total_cost       | 99.9     |
|    total_reward     | 7.6e+05  |
|    total_reward_pct | 760      |
|    total_trades     | 2515     |
| time/               |          |
|    episodes         | 8        |
|    fps        

## PPO

In [25]:
agent = DRLAgent(env = env_train)
PPO_PARAMS = {
    "n_steps": 2048,
    "ent_coef": 0.005,
    "learning_rate": 0.0001,
    "batch_size": 128,
}
model_ppo = agent.get_model("ppo",model_kwargs = PPO_PARAMS)

{'n_steps': 2048, 'ent_coef': 0.005, 'learning_rate': 0.0001, 'batch_size': 128}
Using cuda device


In [26]:
trained_ppo = agent.train_model(model=model_ppo, 
                             tb_log_name='ppo',
                             total_timesteps=80000)

Logging to tensorboard_log/ppo/ppo_1
-----------------------------
| time/              |      |
|    fps             | 391  |
|    iterations      | 1    |
|    time_elapsed    | 5    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| environment/            |              |
|    portfolio_value      | 1.32e+05     |
|    total_cost           | 3.17e+03     |
|    total_reward         | 3.16e+04     |
|    total_reward_pct     | 31.6         |
|    total_trades         | 2461         |
| time/                   |              |
|    fps                  | 355          |
|    iterations           | 2            |
|    time_elapsed         | 11           |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0029219335 |
|    clip_fraction        | 0.0375       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.42        |
|    explained_varianc

## TD3

In [27]:
agent = DRLAgent(env = env_train)
TD3_PARAMS = {"batch_size": 128, 
              "buffer_size": 1000000, 
              "learning_rate": 0.0003}

model_td3 = agent.get_model("td3",model_kwargs = TD3_PARAMS)

{'batch_size': 128, 'buffer_size': 1000000, 'learning_rate': 0.0003}
Using cuda device


In [28]:
trained_td3 = agent.train_model(model=model_td3, 
                             tb_log_name='td3',
                             total_timesteps=30000)

Logging to tensorboard_log/td3/td3_1
day: 2515, episode: 70
begin_total_asset: 100000.00
end_total_asset: 100000.00
total_reward: 0.00
total_cost: 0.00
total_trades: 0
----------------------------------
| environment/        |          |
|    portfolio_value  | 1e+05    |
|    total_cost       | 0        |
|    total_reward     | 0        |
|    total_reward_pct | 0        |
|    total_trades     | 0        |
| time/               |          |
|    episodes         | 4        |
|    fps              | 125      |
|    time_elapsed     | 79       |
|    total timesteps  | 10064    |
| train/              |          |
|    actor_loss       | 2.62e+03 |
|    critic_loss      | 1.44e+04 |
|    learning_rate    | 0.0003   |
|    n_updates        | 7548     |
----------------------------------
----------------------------------
| environment/        |          |
|    portfolio_value  | 1e+05    |
|    total_cost       | 0        |
|    total_reward     | 0        |
|    total_reward_pct | 0  

## SAC

In [29]:
agent = DRLAgent(env = env_train)
SAC_PARAMS = {
    "batch_size": 128,
    "buffer_size": 100000,
    "learning_rate": 0.00003,
    "learning_starts": 100,
    "ent_coef": "auto_0.1",
}

model_sac = agent.get_model("sac",model_kwargs = SAC_PARAMS)

{'batch_size': 128, 'buffer_size': 100000, 'learning_rate': 3e-05, 'learning_starts': 100, 'ent_coef': 'auto_0.1'}
Using cuda device


In [30]:
trained_sac = agent.train_model(model=model_sac, 
                             tb_log_name='sac',
                             total_timesteps=30000)

Logging to tensorboard_log/sac/sac_1
----------------------------------
| environment/        |          |
|    portfolio_value  | 1e+05    |
|    total_cost       | 0        |
|    total_reward     | 0        |
|    total_reward_pct | 0        |
|    total_trades     | 0        |
| time/               |          |
|    episodes         | 4        |
|    fps              | 60       |
|    time_elapsed     | 167      |
|    total timesteps  | 10064    |
| train/              |          |
|    actor_loss       | 317      |
|    critic_loss      | 58.4     |
|    ent_coef         | 0.135    |
|    ent_coef_loss    | 18.7     |
|    learning_rate    | 3e-05    |
|    n_updates        | 9963     |
----------------------------------
----------------------------------
| environment/        |          |
|    portfolio_value  | 1e+05    |
|    total_cost       | 0        |
|    total_reward     | 0        |
|    total_reward_pct | 0        |
|    total_trades     | 0        |
| time/           

## aaa

In [31]:
#from finrl.trade.backtest import backtest_stats, baseline_stats, backtest_plot

In [32]:
from finrl.trade.backtest import backtest_stats

In [33]:
from finrl.trade.backtest import backtest_plot

In [34]:
trade.head()

Unnamed: 0,date,open,high,low,close,volume,tic,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,kdjk,open_2_sma,boll,close_10.0_le_5_c,wr_10,dma,trix
0,2019-01-02,38.7225,39.712502,38.557499,38.505024,148158800,AAPL,2,-2.016889,44.505522,35.444587,37.86734,-91.571542,42.250808,41.22572,46.488189,26.255061,39.1775,39.975055,0.0,64.517199,-6.875741,-0.761653
1,2019-01-03,35.994999,36.43,35.5,34.66964,365248800,AAPL,3,-2.199742,43.911981,34.998697,32.751902,-177.958729,55.246973,40.808453,46.157722,11.997918,37.358749,39.455339,0.0,113.050853,-7.085639,-0.763467
2,2019-01-04,36.1325,37.137501,35.950001,36.149662,234428400,AAPL,4,-2.19987,43.454764,34.762716,36.192789,-139.717644,47.060632,40.502857,45.854029,12.988335,36.063749,39.10874,0.0,87.077832,-7.044321,-0.766086
3,2019-01-07,37.174999,37.2075,36.474998,36.069202,219111200,AAPL,0,-2.181318,43.003009,34.56126,36.088942,-122.742724,46.245025,40.266752,45.53644,13.030644,36.653749,38.782134,0.0,86.884737,-6.900339,-0.767321
4,2019-01-08,37.389999,37.955002,37.130001,36.756794,164101200,AAPL,1,-2.087075,42.733426,34.398295,37.670002,-95.013556,37.53768,40.055192,45.272874,18.339891,37.282499,38.565861,0.0,71.041614,-6.589742,-0.759067


## Evaluation

### A2C

In [43]:
trade = data_split(data_df, start = '2019-01-01', end = '2021-01-01')
e_trade_gym = StockTradingEnv(df = trade, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

df_account_value, df_actions = DRLAgent.DRL_prediction(model=model_a2c, environment= e_trade_gym)

hit end!


In [45]:
print("==============Results_A2C===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all = backtest_stats(account_value= df_account_value)
perf_stats_all = pd.DataFrame(perf_stats_all)

Annual return          0.802528
Cumulative returns     2.256713
Annual volatility      0.349486
Sharpe ratio           1.865634
Calmar ratio           2.706133
Stability              0.938914
Max drawdown          -0.296559
Omega ratio            1.423152
Sortino ratio          2.829397
Skew                        NaN
Kurtosis                    NaN
Tail ratio             1.116030
Daily value at risk   -0.041444
dtype: float64


### DDPG

In [46]:
trade = data_split(data_df, start = '2019-01-01', end = '2021-01-01')
e_trade_gym = StockTradingEnv(df = trade, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

df_account_value, df_actions = DRLAgent.DRL_prediction(model=model_ddpg, environment= e_trade_gym)

hit end!


In [38]:
from finrl.trade.backtest import backtest_stats, backtest_plot

In [47]:
print("==============Results_DDPG===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all = backtest_stats(account_value= df_account_value)
perf_stats_all = pd.DataFrame(perf_stats_all)

Annual return          0.854155
Cumulative returns     2.446326
Annual volatility      0.366768
Sharpe ratio           1.872040
Calmar ratio           2.718149
Stability              0.938197
Max drawdown          -0.314242
Omega ratio            1.425131
Sortino ratio          2.833244
Skew                        NaN
Kurtosis                    NaN
Tail ratio             1.101082
Daily value at risk   -0.043484
dtype: float64


In [None]:
print("==============Get Backtest Results===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all = BackTestStats(account_value=df_account_value)
perf_stats_all = pd.DataFrame(perf_stats_all)
perf_stats_all.to_csv("./"+config.RESULTS_DIR+"/perf_stats_all_"+now+'.csv')

### PPO

In [48]:
trade = data_split(data_df, start = '2019-01-01', end = '2021-01-01')
e_trade_gym = StockTradingEnv(df = trade, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

df_account_value, df_actions = DRLAgent.DRL_prediction(model=model_ppo, environment= e_trade_gym)

hit end!


In [49]:
print("==============Results_PPO===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all = backtest_stats(account_value= df_account_value)
perf_stats_all = pd.DataFrame(perf_stats_all)

Annual return          0.784683
Cumulative returns     2.192425
Annual volatility      0.356959
Sharpe ratio           1.806201
Calmar ratio           2.569584
Stability              0.936927
Max drawdown          -0.305374
Omega ratio            1.412866
Sortino ratio          2.718912
Skew                        NaN
Kurtosis                    NaN
Tail ratio             1.094497
Daily value at risk   -0.042414
dtype: float64


### TD3

In [50]:
trade = data_split(data_df, start = '2019-01-01', end = '2021-01-01')
e_trade_gym = StockTradingEnv(df = trade, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

df_account_value, df_actions = DRLAgent.DRL_prediction(model=model_td3, environment= e_trade_gym)

hit end!


In [51]:
print("==============Results_TD3===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all = backtest_stats(account_value= df_account_value)
perf_stats_all = pd.DataFrame(perf_stats_all)

Annual return          0.0
Cumulative returns     0.0
Annual volatility      0.0
Sharpe ratio           NaN
Calmar ratio           NaN
Stability              0.0
Max drawdown           0.0
Omega ratio            NaN
Sortino ratio          NaN
Skew                   NaN
Kurtosis               NaN
Tail ratio             NaN
Daily value at risk    0.0
dtype: float64


### SAC

In [52]:
trade = data_split(data_df, start = '2019-01-01', end = '2021-01-01')
e_trade_gym = StockTradingEnv(df = trade, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

df_account_value, df_actions = DRLAgent.DRL_prediction(model=model_sac, environment= e_trade_gym)

hit end!


In [53]:
print("==============Results_SAC===========")
now = datetime.datetime.now().strftime('%Y%m%d-%Hh%M')

perf_stats_all = backtest_stats(account_value= df_account_value)
perf_stats_all = pd.DataFrame(perf_stats_all)

Annual return          0.0
Cumulative returns     0.0
Annual volatility      0.0
Sharpe ratio           NaN
Calmar ratio           NaN
Stability              0.0
Max drawdown           0.0
Omega ratio            NaN
Sortino ratio          NaN
Skew                   NaN
Kurtosis               NaN
Tail ratio             NaN
Daily value at risk    0.0
dtype: float64
