# Install finrl package

In [None]:
!pip install git+https://github.com/AI4Finance-LLC/FinRL-Library.git

# Import packages

In [14]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib
matplotlib.use('Agg')
import datetime
import os
from finrl import config
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer
from finrl.meta.preprocessor.preprocessors import data_split
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent
#from finrl.trade.backtest import BackTestStats, BaselineStats, BackTestPlot
import sys
sys.path.append("../FinRL-Library")

#Create folders for data, result metrics, and tensorboard logs

if not os.path.exists("./" + config.DATA_SAVE_DIR):
    os.makedirs("./" + config.DATA_SAVE_DIR)
if not os.path.exists("./" + config.TRAINED_MODEL_DIR):
    os.makedirs("./" + config.TRAINED_MODEL_DIR)
if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR):
    os.makedirs("./" + config.TENSORBOARD_LOG_DIR)
if not os.path.exists("./" + config.RESULTS_DIR):
    os.makedirs("./" + config.RESULTS_DIR)

# Download **Apple stocks data** from Yahoo Finance api

In [16]:
data_df = YahooDownloader(start_date = '2009-01-01',
                          end_date = '2021-01-01',
                          ticker_list = ['AAPL']).fetch_data()
data_df.head()

[*********************100%***********************]  1 of 1 completed
Shape of DataFrame:  (3021, 8)


Unnamed: 0,date,open,high,low,close,volume,tic,day
0,2009-01-02,3.067143,3.251429,3.041429,2.76733,746015200,AAPL,4
1,2009-01-05,3.3275,3.435,3.311071,2.884122,1181608400,AAPL,0
2,2009-01-06,3.426786,3.470357,3.299643,2.836552,1289310400,AAPL,1
3,2009-01-07,3.278929,3.303571,3.223571,2.775258,753048800,AAPL,2
4,2009-01-08,3.229643,3.326786,3.215714,2.826794,673500800,AAPL,3


# Preprocessing

In [42]:
## let’s store the technical indicator column names in config.py
tech_indicator_list=config.INDICATORS
print(tech_indicator_list)

#Passing parameter to FeatureEngineer for adding technical indicators.
fe = FeatureEngineer(
                    use_technical_indicator=True,
                    tech_indicator_list = tech_indicator_list,
                    use_turbulence=False,
                    user_defined_feature = False)

data_df = fe.preprocess_data(data_df)
data_df.head()

['macd', 'boll_ub', 'boll_lb', 'rsi_30', 'cci_30', 'dx_30', 'close_30_sma', 'close_60_sma']
Successfully added technical indicators


Unnamed: 0,date,open,high,low,close,volume,tic,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma
0,2009-01-02,3.067143,3.251429,3.041429,2.76733,746015200,AAPL,4,0.0,2.990895,2.660556,100.0,66.666667,100.0,2.76733,2.76733
1,2009-01-05,3.3275,3.435,3.311071,2.884122,1181608400,AAPL,0,0.00262,2.990895,2.660556,100.0,66.666667,100.0,2.825726,2.825726
2,2009-01-06,3.426786,3.470357,3.299643,2.836552,1289310400,AAPL,1,0.001864,2.946794,2.711875,70.355611,46.823483,100.0,2.829335,2.829335
3,2009-01-07,3.278929,3.303571,3.223571,2.775258,753048800,AAPL,2,-0.000739,2.925916,2.705715,50.429133,-29.722578,43.607834,2.815815,2.815815
4,2009-01-08,3.229643,3.326786,3.215714,2.826794,673500800,AAPL,3,-8.7e-05,2.913865,2.722157,60.227086,-9.062869,48.357918,2.818011,2.818011


# Trading Environment Building
This environment is based on OpenAI Gym framework, which simulates hte live stock market data with real market data. Let’s split the dataset into train(2009-01-01 to 2018-12-31) and trade(2019-01-01 to 2020-09-30) datasets.

## Splitting data

In [44]:
train = data_split(data_df, start = '2009-01-01', end = '2019-01-01')
trade = data_split(data_df, start = '2019-01-01', end = '2021-01-01')

## Initiate environment

In [51]:
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(config.INDICATORS)*stock_dimension
print(f"Stock data Dimensions: {stock_dimension}, State Spaces: {state_space}")
env_kwargs = {
    "hmax": 100, 
    "initial_amount": 100000, 
    "num_stock_shares":[20],
    "sell_cost_pct":[160.5],
    "buy_cost_pct": [150.5], 
    "state_space": state_space, 
    "stock_dim": stock_dimension, 
    "tech_indicator_list": config.INDICATORS, 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4}
e_train_gym = StockTradingEnv(df = train, **env_kwargs)
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))

Stock data Dimensions: 1, State Spaces: 11
<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


# Implement DRL Algorithms
FinRL library uses fine-tuned algorithms such as  DQN, DDPG, Multi-Agent DDPG, PPO, SAC, A2C, and TD3. The implementation of DRL algorithms are based on OpenAI and Stable Baselines   
`agent = DRLAgent(env = env_train)`

## Training on 5 different models
We are going to see implementation in 5 different models provided by FinRL: A2C, DDPG, PPO, TD3, and SAC

### Model 1: A2C

In [52]:
# 1. Model: A2C
agent = DRLAgent(env = env_train)
A2C_PARAMS = {"n_steps": 5, "ent_coef": 0.005, "learning_rate": 0.0002}
model_a2c = agent.get_model(model_name="a2c",model_kwargs = A2C_PARAMS)
trained_a2c = agent.train_model(model=model_a2c, 
                                tb_log_name='a2c',
                                total_timesteps=50000)

{'n_steps': 5, 'ent_coef': 0.005, 'learning_rate': 0.0002}
Using cuda device
-----------------------------------------
| time/                 |               |
|    fps                | 359           |
|    iterations         | 100           |
|    time_elapsed       | 1             |
|    total_timesteps    | 500           |
| train/                |               |
|    entropy_loss       | -1.42         |
|    explained_variance | -9.68e+03     |
|    learning_rate      | 0.0002        |
|    n_updates          | 99            |
|    policy_loss        | -0.0723       |
|    reward             | -0.0001252819 |
|    std                | 1             |
|    value_loss         | 0.00462       |
-----------------------------------------
----------------------------------------
| time/                 |              |
|    fps                | 361          |
|    iterations         | 200          |
|    time_elapsed       | 2            |
|    total_timesteps    | 1000         |
| tra

### Model 2: DDPG

In [53]:
#2. Model: DDPG
agent = DRLAgent(env = env_train)
DDPG_PARAMS = {"batch_size": 64, "buffer_size": 500000, "learning_rate": 0.0001}
model_ddpg = agent.get_model("ddpg",model_kwargs = DDPG_PARAMS)

trained_ddpg = agent.train_model(model=model_ddpg, 
                             tb_log_name='ddpg',
                             total_timesteps=30000)

{'batch_size': 64, 'buffer_size': 500000, 'learning_rate': 0.0001}
Using cuda device
---------------------------------
| time/              |          |
|    episodes        | 4        |
|    fps             | 99       |
|    time_elapsed    | 101      |
|    total_timesteps | 10064    |
| train/             |          |
|    actor_loss      | -155     |
|    critic_loss     | 649      |
|    learning_rate   | 0.0001   |
|    n_updates       | 7548     |
|    reward          | 0.0      |
---------------------------------
---------------------------------
| time/              |          |
|    episodes        | 8        |
|    fps             | 85       |
|    time_elapsed    | 234      |
|    total_timesteps | 20128    |
| train/             |          |
|    actor_loss      | -51.8    |
|    critic_loss     | 376      |
|    learning_rate   | 0.0001   |
|    n_updates       | 17612    |
|    reward          | 0.0      |
---------------------------------
day: 2515, episode: 30
begin_to

### Model 3: PPO

In [54]:
#3. Model: PPO
agent = DRLAgent(env = env_train)
PPO_PARAMS = {
    "n_steps": 2048,
    "ent_coef": 0.005,
    "learning_rate": 0.0001,
    "batch_size": 128,}
model_ppo = agent.get_model("ppo",model_kwargs = PPO_PARAMS)
trained_ppo = agent.train_model(model=model_ppo, 
                             tb_log_name='ppo',
                             total_timesteps=80000)

{'n_steps': 2048, 'ent_coef': 0.005, 'learning_rate': 0.0001, 'batch_size': 128}
Using cuda device
---------------------------------------
| time/              |                |
|    fps             | 557            |
|    iterations      | 1              |
|    time_elapsed    | 3              |
|    total_timesteps | 2048           |
| train/             |                |
|    reward          | -0.00027984008 |
---------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 505           |
|    iterations           | 2             |
|    time_elapsed         | 8             |
|    total_timesteps      | 4096          |
| train/                  |               |
|    approx_kl            | 0.0001815393  |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.42         |
|    explained_variance   | -1.3          |
|    learning

### Model 4: TD3

In [55]:
#4. Model: TD3
agent = DRLAgent(env = env_train)
TD3_PARAMS = {"batch_size": 128, 
              "buffer_size": 1000000, 
              "learning_rate": 0.0003}

model_td3 = agent.get_model("td3",model_kwargs = TD3_PARAMS)
trained_td3 = agent.train_model(model=model_td3, 
                             tb_log_name='td3',
                             total_timesteps=30000)

{'batch_size': 128, 'buffer_size': 1000000, 'learning_rate': 0.0003}
Using cuda device
day: 2515, episode: 70
begin_total_asset: 100762.50
end_total_asset: 862.32
total_reward: -99900.18
total_cost: 99278.64
total_trades: 2515
Sharpe: 0.058
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 109          |
|    time_elapsed    | 91           |
|    total_timesteps | 10064        |
| train/             |              |
|    actor_loss      | 15           |
|    critic_loss     | 6.64e+03     |
|    learning_rate   | 0.0003       |
|    n_updates       | 7548         |
|    reward          | 0.0007663914 |
-------------------------------------
-------------------------------------
| time/              |              |
|    episodes        | 8            |
|    fps             | 97           |
|    time_elapsed    | 206          |
|    total_timesteps | 20128        |
| train/             |              |

### Model 5: SAC

In [56]:
#5. Model: SAC
agent = DRLAgent(env = env_train)
SAC_PARAMS = {
    "batch_size": 128,
    "buffer_size": 100000,
    "learning_rate": 0.0001,
    "learning_starts": 100,
    "ent_coef": "auto_0.1",
}


# Trading
We have trained five different models on our datasets now let’s trade using the environment class we inintialized above for creating trading environment, let’s assume that you are having $100K initial money on date 2019-01-01. We will use the TD3 trained model to trade AAPL.

In [57]:
trade.head()

Unnamed: 0,date,open,high,low,close,volume,tic,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma
0,2019-01-02,38.7225,39.712502,38.557499,38.168346,148158800,AAPL,2,-1.999254,44.116378,35.134672,37.867349,-91.593349,42.250808,40.865256,46.081709
1,2019-01-03,35.994999,36.43,35.5,34.366505,365248800,AAPL,3,-2.180507,43.528024,34.692683,32.751921,-177.944136,55.246973,40.451637,45.754132
2,2019-01-04,36.1325,37.137501,35.950001,35.83358,234428400,AAPL,4,-2.180634,43.074805,34.458765,36.192793,-139.748808,47.060632,40.148713,45.453094
3,2019-01-07,37.174999,37.2075,36.474998,35.753815,219111200,AAPL,0,-2.162246,42.627003,34.259068,36.088933,-122.75221,46.245025,39.914672,45.138282
4,2019-01-08,37.389999,37.955002,37.130001,36.435398,164101200,AAPL,1,-2.068827,42.359778,34.097527,37.670001,-95.020349,37.53768,39.704962,44.87702


Make a prediction and get the account value change

In [58]:
trade = data_split(data_df, start = '2019-01-01', end = '2021-01-01')
e_trade_gym = StockTradingEnv(df = trade, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

df_account_value, df_actions = DRLAgent.DRL_prediction(model=trained_td3, environment = env_trade)

TypeError: ignored