# Asset Portfolio Management using Deep Reinforcement Learning
---

## 5.0 Deep Reinforcement Learning Portfolios

### 5.1 Import Packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('Agg')
import datetime

In [2]:
# Imports from the FinRL Library

from finrl.config import config
from finrl.trade.backtest import BackTestStats, BaselineStats, BackTestPlot, backtest_strat, baseline_strat
from finrl.trade.backtest import backtest_strat, baseline_strat

import sys
sys.path.append("../FinRL-Library")



### 5.2 Load Data

In [3]:
%store -r train_df
%store -r test_df

In [4]:
train_df.head(3)

Unnamed: 0,date,open,high,low,close,volume,tic,atr,bbw,obv,cmf,macd,adx,sma,ema,cci,rsi,cov_list
0,2009-12-31,7.611786,7.619643,7.52,6.492372,352410800.0,AAPL,1.089849,14.391304,18479730000.0,-14.498982,0.106168,20.032052,6.237017,6.309319,141.146843,64.127926,"[[0.17945278689647873, 0.22869766853329948, 0...."
0,2009-12-31,40.900002,41.080002,40.490002,34.291534,4030500.0,AXP,6.638453,7.839731,48835130000.0,-14.537243,0.324763,23.340487,34.681527,34.58227,16.078828,50.694489,"[[0.17945278689647873, 0.22869766853329948, 0...."
0,2009-12-31,55.0,55.220001,54.049999,41.856789,2189400.0,BA,12.891344,4.916607,67988630000.0,-26.245485,0.444625,16.902318,42.400007,42.37006,-75.723504,49.726875,"[[0.17945278689647873, 0.22869766853329948, 0...."


In [5]:
tech_indicator_list = ['atr', 'bbw','obv','cmf','macd', 'adx', 'sma', 'ema', 'cci', 'rsi']

### 5.4 Implement DRL Algorithms

In [11]:
import env_portfolio
from env_portfolio import StockPortfolioEnv

import models
from models import DRLAgent

In [7]:
stock_dimension = len(train_df.tic.unique())
state_space = stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")


Stock Dimension: 30, State Space: 30


In [8]:
env_kwargs = {
    "hmax": 100, 
    "initial_amount": 1000000, 
    "transaction_cost_pct": 0.001, 
    "state_space": state_space, 
    "stock_dim": stock_dimension, 
    "tech_indicator_list": tech_indicator_list, 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4
    
}

e_train_gym = StockPortfolioEnv(df = train_df, **env_kwargs)

In [9]:
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))

<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


#### 5.4.1 Model 1: A2C

In [12]:
# initialize
agent = DRLAgent(env = env_train)

A2C_PARAMS = {"n_steps": 5, "ent_coef": 0.005, "learning_rate": 0.0002}
model_a2c = agent.get_model(model_name="a2c",model_kwargs = A2C_PARAMS)

{'n_steps': 5, 'ent_coef': 0.005, 'learning_rate': 0.0002}
Using cpu device


In [13]:
trained_a2c = agent.train_model(model=model_a2c, 
                                tb_log_name='a2c',
                                total_timesteps=50000)

Logging to tensorboard_log/a2c\a2c_12
------------------------------------
| time/                 |          |
|    fps                | 15       |
|    iterations         | 100      |
|    time_elapsed       | 32       |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -42.5    |
|    explained_variance | nan      |
|    learning_rate      | 0.0002   |
|    n_updates          | 99       |
|    policy_loss        | 1.51e+08 |
|    std                | 0.997    |
|    value_loss         | 1.57e+13 |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 27       |
|    iterations         | 200      |
|    time_elapsed       | 36       |
|    total_timesteps    | 1000     |
| train/                |          |
|    entropy_loss       | -42.5    |
|    explained_variance | nan      |
|    learning_rate      | 0.0002   |
|    n_updates          | 199      |


-------------------------------------
| time/                 |           |
|    fps                | 74        |
|    iterations         | 1500      |
|    time_elapsed       | 100       |
|    total_timesteps    | 7500      |
| train/                |           |
|    entropy_loss       | -42.3     |
|    explained_variance | -1.49e+24 |
|    learning_rate      | 0.0002    |
|    n_updates          | 1499      |
|    policy_loss        | 1.98e+08  |
|    std                | 0.992     |
|    value_loss         | 3.07e+13  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 73        |
|    iterations         | 1600      |
|    time_elapsed       | 108       |
|    total_timesteps    | 8000      |
| train/                |           |
|    entropy_loss       | -42.3     |
|    explained_variance | -2.64e+24 |
|    learning_rate      | 0.0002    |
|    n_updates          | 1599      |
|    policy_

------------------------------------
| time/                 |          |
|    fps                | 86       |
|    iterations         | 2900     |
|    time_elapsed       | 166      |
|    total_timesteps    | 14500    |
| train/                |          |
|    entropy_loss       | -42.2    |
|    explained_variance | nan      |
|    learning_rate      | 0.0002   |
|    n_updates          | 2899     |
|    policy_loss        | 2.66e+08 |
|    std                | 0.989    |
|    value_loss         | 4.47e+13 |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 88       |
|    iterations         | 3000     |
|    time_elapsed       | 170      |
|    total_timesteps    | 15000    |
| train/                |          |
|    entropy_loss       | -42.2    |
|    explained_variance | nan      |
|    learning_rate      | 0.0002   |
|    n_updates          | 2999     |
|    policy_loss        | 3.17e+08 |
|

-------------------------------------
| time/                 |           |
|    fps                | 97        |
|    iterations         | 4300      |
|    time_elapsed       | 220       |
|    total_timesteps    | 21500     |
| train/                |           |
|    entropy_loss       | -42.1     |
|    explained_variance | -6.55e+23 |
|    learning_rate      | 0.0002    |
|    n_updates          | 4299      |
|    policy_loss        | 2.8e+08   |
|    std                | 0.985     |
|    value_loss         | 5.38e+13  |
-------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 97       |
|    iterations         | 4400     |
|    time_elapsed       | 224      |
|    total_timesteps    | 22000    |
| train/                |          |
|    entropy_loss       | -42.1    |
|    explained_variance | nan      |
|    learning_rate      | 0.0002   |
|    n_updates          | 4399     |
|    policy_loss       

------------------------------------
| time/                 |          |
|    fps                | 102      |
|    iterations         | 5700     |
|    time_elapsed       | 276      |
|    total_timesteps    | 28500    |
| train/                |          |
|    entropy_loss       | -42      |
|    explained_variance | nan      |
|    learning_rate      | 0.0002   |
|    n_updates          | 5699     |
|    policy_loss        | 3.8e+08  |
|    std                | 0.98     |
|    value_loss         | 9.27e+13 |
------------------------------------
begin_total_asset:1000000
end_total_asset:3501781.5522774304
Sharpe:  1.0658989539131953
------------------------------------
| time/                 |          |
|    fps                | 103      |
|    iterations         | 5800     |
|    time_elapsed       | 281      |
|    total_timesteps    | 29000    |
| train/                |          |
|    entropy_loss       | -42      |
|    explained_variance | nan      |
|    learning_rate     

begin_total_asset:1000000
end_total_asset:3518904.40186527
Sharpe:  1.0726512138367077
-------------------------------------
| time/                 |           |
|    fps                | 106       |
|    iterations         | 7100      |
|    time_elapsed       | 333       |
|    total_timesteps    | 35500     |
| train/                |           |
|    entropy_loss       | -41.8     |
|    explained_variance | -2.99e+21 |
|    learning_rate      | 0.0002    |
|    n_updates          | 7099      |
|    policy_loss        | 1.29e+08  |
|    std                | 0.976     |
|    value_loss         | 1.16e+13  |
-------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 106      |
|    iterations         | 7200     |
|    time_elapsed       | 337      |
|    total_timesteps    | 36000    |
| train/                |          |
|    entropy_loss       | -41.8    |
|    explained_variance | nan      |
|    learn

begin_total_asset:1000000
end_total_asset:3494233.104991727
Sharpe:  1.0672163049349839
-------------------------------------
| time/                 |           |
|    fps                | 106       |
|    iterations         | 8500      |
|    time_elapsed       | 397       |
|    total_timesteps    | 42500     |
| train/                |           |
|    entropy_loss       | -41.7     |
|    explained_variance | -4.43e+22 |
|    learning_rate      | 0.0002    |
|    n_updates          | 8499      |
|    policy_loss        | 1.45e+08  |
|    std                | 0.97      |
|    value_loss         | 1.46e+13  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 106       |
|    iterations         | 8600      |
|    time_elapsed       | 402       |
|    total_timesteps    | 43000     |
| train/                |           |
|    entropy_loss       | -41.7     |
|    explained_variance | -9.88e+22 |


-------------------------------------
| time/                 |           |
|    fps                | 107       |
|    iterations         | 9900      |
|    time_elapsed       | 459       |
|    total_timesteps    | 49500     |
| train/                |           |
|    entropy_loss       | -41.6     |
|    explained_variance | -1.16e+22 |
|    learning_rate      | 0.0002    |
|    n_updates          | 9899      |
|    policy_loss        | 1.77e+08  |
|    std                | 0.967     |
|    value_loss         | 2.42e+13  |
-------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 107      |
|    iterations         | 10000    |
|    time_elapsed       | 464      |
|    total_timesteps    | 50000    |
| train/                |          |
|    entropy_loss       | -41.6    |
|    explained_variance | nan      |
|    learning_rate      | 0.0002   |
|    n_updates          | 9999     |
|    policy_loss       

#### 5.4.2 Model 2: PPO

In [14]:
agent = DRLAgent(env = env_train)
PPO_PARAMS = {
    "n_steps": 2048,
    "ent_coef": 0.005,
    "learning_rate": 0.0001,
    "batch_size": 128,
}
model_ppo = agent.get_model("ppo",model_kwargs = PPO_PARAMS)

{'n_steps': 2048, 'ent_coef': 0.005, 'learning_rate': 0.0001, 'batch_size': 128}
Using cpu device


In [15]:
trained_ppo = agent.train_model(model=model_ppo, 
                             tb_log_name='ppo',
                             total_timesteps=80000)

Logging to tensorboard_log/ppo\ppo_3
-----------------------------
| time/              |      |
|    fps             | 168  |
|    iterations      | 1    |
|    time_elapsed    | 12   |
|    total_timesteps | 2048 |
-----------------------------
begin_total_asset:1000000
end_total_asset:3586009.99038301
Sharpe:  1.1077511963397075
-------------------------------------------
| time/                   |               |
|    fps                  | 154           |
|    iterations           | 2             |
|    time_elapsed         | 26            |
|    total_timesteps      | 4096          |
| train/                  |               |
|    approx_kl            | 1.3038516e-08 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -42.6         |
|    explained_variance   | -2.62e+16     |
|    learning_rate        | 0.0001        |
|    loss                 | 6.27e+14      |
|    n_updates            | 10            |
|    p

------------------------------------------
| time/                   |              |
|    fps                  | 151          |
|    iterations           | 10           |
|    time_elapsed         | 134          |
|    total_timesteps      | 20480        |
| train/                  |              |
|    approx_kl            | 4.004687e-08 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -42.6        |
|    explained_variance   | -5.46e+17    |
|    learning_rate        | 0.0001       |
|    loss                 | 5.82e+14     |
|    n_updates            | 90           |
|    policy_gradient_loss | -7.08e-07    |
|    std                  | 1            |
|    value_loss           | 1.22e+15     |
------------------------------------------
begin_total_asset:1000000
end_total_asset:3526665.1509857476
Sharpe:  1.0954194628562457
------------------------------------------
| time/                   |              |
|    fps

begin_total_asset:1000000
end_total_asset:3755358.0457875743
Sharpe:  1.1462773234248598
-------------------------------------------
| time/                   |               |
|    fps                  | 150           |
|    iterations           | 19            |
|    time_elapsed         | 257           |
|    total_timesteps      | 38912         |
| train/                  |               |
|    approx_kl            | -1.899898e-07 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -42.6         |
|    explained_variance   | -5.29e+18     |
|    learning_rate        | 0.0001        |
|    loss                 | 6.63e+14      |
|    n_updates            | 180           |
|    policy_gradient_loss | -3.49e-07     |
|    std                  | 1             |
|    value_loss           | 1.28e+15      |
-------------------------------------------
begin_total_asset:1000000
end_total_asset:3587144.5603054375
Sharpe:  1.105

begin_total_asset:1000000
end_total_asset:3783601.6915341998
Sharpe:  1.1519576352470184
--------------------------------------------
| time/                   |                |
|    fps                  | 150            |
|    iterations           | 28             |
|    time_elapsed         | 380            |
|    total_timesteps      | 57344          |
| train/                  |                |
|    approx_kl            | -1.2759119e-07 |
|    clip_fraction        | 0              |
|    clip_range           | 0.2            |
|    entropy_loss         | -42.6          |
|    explained_variance   | -3.1e+19       |
|    learning_rate        | 0.0001         |
|    loss                 | 6.84e+14       |
|    n_updates            | 270            |
|    policy_gradient_loss | -5.38e-07      |
|    std                  | 1              |
|    value_loss           | 1.31e+15       |
--------------------------------------------
begin_total_asset:1000000
end_total_asset:3515356.370611

--------------------------------------------
| time/                   |                |
|    fps                  | 151            |
|    iterations           | 36             |
|    time_elapsed         | 486            |
|    total_timesteps      | 73728          |
| train/                  |                |
|    approx_kl            | -1.4156103e-07 |
|    clip_fraction        | 0              |
|    clip_range           | 0.2            |
|    entropy_loss         | -42.6          |
|    explained_variance   | -1.61e+20      |
|    learning_rate        | 0.0001         |
|    loss                 | 6.12e+14       |
|    n_updates            | 350            |
|    policy_gradient_loss | -6.09e-07      |
|    std                  | 1              |
|    value_loss           | 1.31e+15       |
--------------------------------------------
begin_total_asset:1000000
end_total_asset:3693570.9882558226
Sharpe:  1.1353062287718179
------------------------------------------
| time/      

#### 5.4.3 Model 3: DDPG

In [16]:
agent = DRLAgent(env = env_train)
DDPG_PARAMS = {"batch_size": 128, "buffer_size": 50000, "learning_rate": 0.001}


model_ddpg = agent.get_model("ddpg",model_kwargs = DDPG_PARAMS)

{'batch_size': 128, 'buffer_size': 50000, 'learning_rate': 0.001}
Using cpu device


In [17]:
trained_ddpg = agent.train_model(model=model_ddpg, 
                             tb_log_name='ddpg',
                             total_timesteps=50000)

Logging to tensorboard_log/ddpg\ddpg_4
begin_total_asset:1000000
end_total_asset:3810748.1872714735
Sharpe:  1.183654806403746
begin_total_asset:1000000
end_total_asset:3796307.734087349
Sharpe:  1.182186906834218
begin_total_asset:1000000
end_total_asset:3796307.734087349
Sharpe:  1.182186906834218
begin_total_asset:1000000
end_total_asset:3796307.734087349
Sharpe:  1.182186906834218
---------------------------------
| time/              |          |
|    episodes        | 4        |
|    fps             | 15       |
|    time_elapsed    | 563      |
|    total timesteps | 8860     |
| train/             |          |
|    actor_loss      | 1.63e+07 |
|    critic_loss     | 1.84e+13 |
|    learning_rate   | 0.001    |
|    n_updates       | 6645     |
---------------------------------
begin_total_asset:1000000
end_total_asset:3796307.734087349
Sharpe:  1.182186906834218
begin_total_asset:1000000
end_total_asset:3796307.734087349
Sharpe:  1.182186906834218
begin_total_asset:1000000
end_

### 5.5 Training Models

In [18]:
# A2C Train Model
e_trade_gym = StockPortfolioEnv(df = train_df, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

a2c_train_daily_return, a2c_train_weights = DRLAgent.DRL_prediction(model=trained_a2c,
                        test_data = train_df,
                        test_env = env_trade,
                        test_obs = obs_trade)

begin_total_asset:1000000
end_total_asset:3502150.5010687886
Sharpe:  1.0789379424597758


In [19]:
# PPO Train Model
e_trade_gym = StockPortfolioEnv(df = train_df, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

ppo_train_daily_return, ppo_train_weights = DRLAgent.DRL_prediction(model=trained_ppo,
                        test_data = train_df,
                        test_env = env_trade,
                        test_obs = obs_trade)

begin_total_asset:1000000
end_total_asset:3685175.5565437987
Sharpe:  1.1277054479499276


In [20]:
# DDPG Train Model
e_trade_gym = StockPortfolioEnv(df = train_df, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

ddpg_train_daily_return, ddpg_train_weights = DRLAgent.DRL_prediction(model=trained_ddpg,
                        test_data = train_df,
                        test_env = env_trade,
                        test_obs = obs_trade)

begin_total_asset:1000000
end_total_asset:3796307.734087349
Sharpe:  1.182186906834218


In [21]:
# Store the Training Models
%store a2c_train_daily_return
%store ppo_train_daily_return
%store ddpg_train_daily_return

Stored 'a2c_train_daily_return' (DataFrame)
Stored 'ppo_train_daily_return' (DataFrame)
Stored 'ddpg_train_daily_return' (DataFrame)


### 5.6 Trading
Assume that we have $1,000,000 initial capital at 2019-01-01. We use the DDPG model to trade Dow jones 30 stocks.

In [22]:
test_df.head(3)

Unnamed: 0,date,open,high,low,close,volume,tic,atr,bbw,obv,cmf,macd,adx,sma,ema,cci,rsi,cov_list
0,2018-10-19,54.514999,55.314999,54.357498,53.367214,132314800.0,AAPL,2.209832,8.231327,33951750000.0,-2.616712,-0.079059,18.194248,53.775824,53.670425,-63.161543,48.313484,"[[0.5356040816995599, 0.1799444253900448, 0.23..."
0,2018-10-19,104.059998,107.550003,104.059998,103.342613,5726300.0,AXP,4.737382,8.736579,49088480000.0,-4.251619,-0.694974,23.736835,101.471449,101.605906,-7.348819,53.613308,"[[0.5356040816995599, 0.1799444253900448, 0.23..."
0,2018-10-19,359.799988,359.869995,354.209991,344.58548,3491600.0,BA,15.690117,12.552809,68590950000.0,-3.196253,-0.220696,23.929039,358.004206,354.118657,-106.642111,41.761613,"[[0.5356040816995599, 0.1799444253900448, 0.23..."


In [23]:
# A2C Test Model
e_trade_gym = StockPortfolioEnv(df = test_df, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

a2c_test_daily_return, a2c_test_weights = DRLAgent.DRL_prediction(model=trained_a2c,
                        test_data = test_df,
                        test_env = env_trade,
                        test_obs = obs_trade)

begin_total_asset:1000000
end_total_asset:1243936.140327598
Sharpe:  0.5046117454037728


In [24]:
a2c_test_daily_return.head()

Unnamed: 0,date,daily_return
0,2018-10-19,0.0
1,2018-10-22,-0.005321
2,2018-10-23,-0.00609
3,2018-10-24,-0.024084
4,2018-10-25,0.015915


In [36]:
a2c_test_weights.to_csv('a2c_test_weights.csv')

In [31]:
# PPO Test Model
e_trade_gym = StockPortfolioEnv(df = test_df, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

ppo_test_daily_return, ppo_test_weights = DRLAgent.DRL_prediction(model=trained_ppo,
                        test_data = test_df,
                        test_env = env_trade,
                        test_obs = obs_trade)

begin_total_asset:1000000
end_total_asset:1252240.5695058317
Sharpe:  0.5144669353361637


In [32]:
ppo_test_weights.to_csv('ppo_test_weights')

In [34]:
# DDPG Test Model
e_trade_gym = StockPortfolioEnv(df = test_df, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

ddpg_test_daily_return, ddpg_test_weights = DRLAgent.DRL_prediction(model=trained_ddpg,
                        test_data = test_df,
                        test_env = env_trade,
                        test_obs = obs_trade)

begin_total_asset:1000000
end_total_asset:1309115.636857007
Sharpe:  0.6028317431500818


In [35]:
ddpg_test_weights.to_csv('ddpg_test_weights')

### 5.7 Save the Portfolios

In [37]:
a2c_test_portfolio = a2c_test_daily_return.copy()
a2c_test_returns = a2c_test_daily_return.copy()

ppo_test_portfolio = ppo_test_daily_return.copy()
ppo_test_returns = ppo_test_daily_return.copy()

ddpg_test_portfolio = ddpg_test_daily_return.copy()
ddpg_test_returns = ddpg_test_daily_return.copy()

In [38]:
%store a2c_test_portfolio
%store a2c_test_returns 

%store ppo_test_portfolio
%store ppo_test_returns 

%store ddpg_test_portfolio
%store ddpg_test_returns 

Stored 'a2c_test_portfolio' (DataFrame)
Stored 'a2c_test_returns' (DataFrame)
Stored 'ppo_test_portfolio' (DataFrame)
Stored 'ppo_test_returns' (DataFrame)
Stored 'ddpg_test_portfolio' (DataFrame)
Stored 'ddpg_test_returns' (DataFrame)
