# Asset Portfolio Management using Deep Reinforcement Learning
---

## 8.0 Deep Reinforcement Learning Portfolios

### 8.1 Import Packages

In [333]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pylab as plt
matplotlib.use('Agg')
import datetime

from pypfopt.efficient_frontier import EfficientFrontier, objective_functions
from pypfopt import risk_models
from pypfopt import expected_returns

In [334]:
# Imports from the FinRL Library

from config import config
from backtest import BackTestStats, BaselineStats, BackTestPlot, backtest_strat, baseline_strat
from backtest import backtest_strat, baseline_strat

### 8.2 Load Data

In [335]:
%store -r train_df
%store -r test_df

In [336]:
tech_indicator_list = ['f01','f02','f03','f04']

In [337]:
train_df.head()

Unnamed: 0,date,tic,close,high,low,open,volume,cov_list,f01,f02,f03,f04
0,2009-03-20,AXP,10.072534,13.19,12.12,13.19,31088200.0,"[[0.002610715086827884, 0.0012647352623545009,...",1.764112,0.488414,5.262903,5.777148
0,2009-03-20,DIS,15.026185,17.98,17.08,17.799999,17766600.0,"[[0.002610715086827884, 0.0012647352623545009,...",1.764112,0.488414,5.262903,5.777148
0,2009-03-20,HD,16.65284,22.73,21.76,22.59,22361800.0,"[[0.002610715086827884, 0.0012647352623545009,...",1.764112,0.488414,5.262903,5.777148
0,2009-03-20,IBM,64.557983,95.0,92.18,93.160004,12193900.0,"[[0.002610715086827884, 0.0012647352623545009,...",1.764112,0.488414,5.262903,5.777148
0,2009-03-20,INTC,10.250909,15.4,14.35,15.19,84639100.0,"[[0.002610715086827884, 0.0012647352623545009,...",1.764112,0.488414,5.262903,5.777148


### 8.4 Implement DRL Algorithms

In [338]:
import env_portfolio
from env_portfolio import StockPortfolioEnv

import models
from models import DRLAgent

In [339]:
stock_dimension = len(train_df.tic.unique())
state_space = stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")


Stock Dimension: 20, State Space: 20


In [340]:
weights_initial = [1/stock_dimension]*stock_dimension

In [341]:
env_kwargs = {
    "hmax": 500, 
    "initial_amount": 1000000, 
    "transaction_cost_pct": 0.001, 
    "state_space": state_space, 
    "stock_dim": stock_dimension, 
    "tech_indicator_list": tech_indicator_list, 
    "action_space": stock_dimension, 
    "reward_scaling": 0,
    'initial_weights': [1/stock_dimension]*stock_dimension
}

In [342]:
e_train_gym = StockPortfolioEnv(df = train_df, **env_kwargs)

In [343]:
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))

<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


#### 8.4.1 Model 1: A2C

In [344]:
# initialize
agent = DRLAgent(env = env_train)

A2C_PARAMS = {"n_steps": 5, "ent_coef": 0.005, "learning_rate": 0.0002}
model_a2c = agent.get_model(model_name="a2c",model_kwargs = A2C_PARAMS)

{'n_steps': 5, 'ent_coef': 0.005, 'learning_rate': 0.0002}
Using cpu device


In [345]:
trained_a2c = agent.train_model(model=model_a2c, 
                                tb_log_name='a2c',
                                total_timesteps=50000)

Logging to tensorboard_log/a2c\a2c_89
-------------------------------------
| time/                 |           |
|    fps                | 93        |
|    iterations         | 100       |
|    time_elapsed       | 5         |
|    total_timesteps    | 500       |
| train/                |           |
|    entropy_loss       | -28.3     |
|    explained_variance | -1.28e+18 |
|    learning_rate      | 0.0002    |
|    n_updates          | 99        |
|    policy_loss        | 1.47e+08  |
|    std                | 0.997     |
|    value_loss         | 3.04e+13  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 123       |
|    iterations         | 200       |
|    time_elapsed       | 8         |
|    total_timesteps    | 1000      |
| train/                |           |
|    entropy_loss       | -28.3     |
|    explained_variance | -8.32e+19 |
|    learning_rate      | 0.0002    |
|    n_updat

begin_total_asset:1000000
end_total_asset:5026040.256132063
Sharpe:  1.3317826123388365
-------------------------------------
| time/                 |           |
|    fps                | 176       |
|    iterations         | 1500      |
|    time_elapsed       | 42        |
|    total_timesteps    | 7500      |
| train/                |           |
|    entropy_loss       | -28.2     |
|    explained_variance | -5.07e+23 |
|    learning_rate      | 0.0002    |
|    n_updates          | 1499      |
|    policy_loss        | 1.43e+08  |
|    std                | 0.991     |
|    value_loss         | 2.5e+13   |
-------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 178      |
|    iterations         | 1600     |
|    time_elapsed       | 44       |
|    total_timesteps    | 8000     |
| train/                |          |
|    entropy_loss       | -28.2    |
|    explained_variance | -6.4e+23 |
|    lear

begin_total_asset:1000000
end_total_asset:5115782.37688557
Sharpe:  1.3433772499561993
-------------------------------------
| time/                 |           |
|    fps                | 174       |
|    iterations         | 2900      |
|    time_elapsed       | 82        |
|    total_timesteps    | 14500     |
| train/                |           |
|    entropy_loss       | -28       |
|    explained_variance | -1.53e+24 |
|    learning_rate      | 0.0002    |
|    n_updates          | 2899      |
|    policy_loss        | 1.45e+08  |
|    std                | 0.981     |
|    value_loss         | 2.51e+13  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 174       |
|    iterations         | 3000      |
|    time_elapsed       | 85        |
|    total_timesteps    | 15000     |
| train/                |           |
|    entropy_loss       | -28       |
|    explained_variance | -4.57e+23 |
|

begin_total_asset:1000000
end_total_asset:4919733.635551525
Sharpe:  1.3141604168593044
------------------------------------
| time/                 |          |
|    fps                | 176      |
|    iterations         | 4300     |
|    time_elapsed       | 121      |
|    total_timesteps    | 21500    |
| train/                |          |
|    entropy_loss       | -27.9    |
|    explained_variance | nan      |
|    learning_rate      | 0.0002   |
|    n_updates          | 4299     |
|    policy_loss        | 1.11e+08 |
|    std                | 0.974    |
|    value_loss         | 2.02e+13 |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 177      |
|    iterations         | 4400     |
|    time_elapsed       | 124      |
|    total_timesteps    | 22000    |
| train/                |          |
|    entropy_loss       | -27.8    |
|    explained_variance | nan      |
|    learning_rate      

begin_total_asset:1000000
end_total_asset:5438770.612450695
Sharpe:  1.3865507682220057
------------------------------------
| time/                 |          |
|    fps                | 177      |
|    iterations         | 5700     |
|    time_elapsed       | 160      |
|    total_timesteps    | 28500    |
| train/                |          |
|    entropy_loss       | -27.7    |
|    explained_variance | nan      |
|    learning_rate      | 0.0002   |
|    n_updates          | 5699     |
|    policy_loss        | 8.5e+07  |
|    std                | 0.969    |
|    value_loss         | 1.16e+13 |
------------------------------------
------------------------------------
| time/                 |          |
|    fps                | 178      |
|    iterations         | 5800     |
|    time_elapsed       | 162      |
|    total_timesteps    | 29000    |
| train/                |          |
|    entropy_loss       | -27.7    |
|    explained_variance | nan      |
|    learning_rate      

------------------------------------
| time/                 |          |
|    fps                | 181      |
|    iterations         | 7100     |
|    time_elapsed       | 195      |
|    total_timesteps    | 35500    |
| train/                |          |
|    entropy_loss       | -27.6    |
|    explained_variance | nan      |
|    learning_rate      | 0.0002   |
|    n_updates          | 7099     |
|    policy_loss        | 3.94e+08 |
|    std                | 0.963    |
|    value_loss         | 2.4e+14  |
------------------------------------
begin_total_asset:1000000
end_total_asset:5130821.708510234
Sharpe:  1.3273862363200812
------------------------------------
| time/                 |          |
|    fps                | 181      |
|    iterations         | 7200     |
|    time_elapsed       | 198      |
|    total_timesteps    | 36000    |
| train/                |          |
|    entropy_loss       | -27.6    |
|    explained_variance | nan      |
|    learning_rate      

------------------------------------
| time/                 |          |
|    fps                | 175      |
|    iterations         | 8500     |
|    time_elapsed       | 242      |
|    total_timesteps    | 42500    |
| train/                |          |
|    entropy_loss       | -27.5    |
|    explained_variance | nan      |
|    learning_rate      | 0.0002   |
|    n_updates          | 8499     |
|    policy_loss        | 3.47e+08 |
|    std                | 0.955    |
|    value_loss         | 2.02e+14 |
------------------------------------
begin_total_asset:1000000
end_total_asset:5045788.997940314
Sharpe:  1.3093929423113237
-------------------------------------
| time/                 |           |
|    fps                | 174       |
|    iterations         | 8600      |
|    time_elapsed       | 246       |
|    total_timesteps    | 43000     |
| train/                |           |
|    entropy_loss       | -27.4     |
|    explained_variance | -8.35e+22 |
|    learning_r

------------------------------------
| time/                 |          |
|    fps                | 168      |
|    iterations         | 9900     |
|    time_elapsed       | 293      |
|    total_timesteps    | 49500    |
| train/                |          |
|    entropy_loss       | -27.3    |
|    explained_variance | nan      |
|    learning_rate      | 0.0002   |
|    n_updates          | 9899     |
|    policy_loss        | 3.26e+08 |
|    std                | 0.948    |
|    value_loss         | 1.63e+14 |
------------------------------------
begin_total_asset:1000000
end_total_asset:4848860.182024906
Sharpe:  1.2846400480098812
------------------------------------
| time/                 |          |
|    fps                | 167      |
|    iterations         | 10000    |
|    time_elapsed       | 298      |
|    total_timesteps    | 50000    |
| train/                |          |
|    entropy_loss       | -27.3    |
|    explained_variance | nan      |
|    learning_rate      

In [346]:
agent = DRLAgent(env = env_train)
PPO_PARAMS = {
    "n_steps": 2048,
    "ent_coef": 0.005,
    "learning_rate": 0.0001,
    "batch_size": 128,
}
model_ppo = agent.get_model("ppo",model_kwargs = PPO_PARAMS)

{'n_steps': 2048, 'ent_coef': 0.005, 'learning_rate': 0.0001, 'batch_size': 128}
Using cpu device


In [347]:
trained_ppo = agent.train_model(model=model_ppo, 
                             tb_log_name='ppo',
                             total_timesteps=50000)

Logging to tensorboard_log/ppo\ppo_22
-----------------------------
| time/              |      |
|    fps             | 154  |
|    iterations      | 1    |
|    time_elapsed    | 13   |
|    total_timesteps | 2048 |
-----------------------------
begin_total_asset:1000000
end_total_asset:4866966.201451803
Sharpe:  1.3097322863751166
-------------------------------------------
| time/                   |               |
|    fps                  | 141           |
|    iterations           | 2             |
|    time_elapsed         | 28            |
|    total_timesteps      | 4096          |
| train/                  |               |
|    approx_kl            | 1.9557774e-07 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -28.4         |
|    explained_variance   | -1.76e+16     |
|    learning_rate        | 0.0001        |
|    loss                 | 1.04e+15      |
|    n_updates            | 10            |
|   

------------------------------------------
| time/                   |              |
|    fps                  | 200          |
|    iterations           | 10           |
|    time_elapsed         | 102          |
|    total_timesteps      | 20480        |
| train/                  |              |
|    approx_kl            | 5.401671e-08 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -28.4        |
|    explained_variance   | -2.69e+20    |
|    learning_rate        | 0.0001       |
|    loss                 | 9.8e+14      |
|    n_updates            | 90           |
|    policy_gradient_loss | -2.14e-07    |
|    std                  | 1            |
|    value_loss           | 2.12e+15     |
------------------------------------------
begin_total_asset:1000000
end_total_asset:5084250.372035278
Sharpe:  1.3463687105937574
--------------------------------------------
| time/                   |                |
|    

begin_total_asset:1000000
end_total_asset:5229501.369612654
Sharpe:  1.3728946543316403
--------------------------------------------
| time/                   |                |
|    fps                  | 221            |
|    iterations           | 19             |
|    time_elapsed         | 175            |
|    total_timesteps      | 38912          |
| train/                  |                |
|    approx_kl            | -4.0978193e-08 |
|    clip_fraction        | 0              |
|    clip_range           | 0.2            |
|    entropy_loss         | -28.4          |
|    explained_variance   | -9.89e+21      |
|    learning_rate        | 0.0001         |
|    loss                 | 1.14e+15       |
|    n_updates            | 180            |
|    policy_gradient_loss | -2.37e-07      |
|    std                  | 1              |
|    value_loss           | 2.6e+15        |
--------------------------------------------
begin_total_asset:1000000
end_total_asset:4795980.0883766

#### 8.4.3 Model 3: DDPG

In [348]:
agent = DRLAgent(env = env_train)
DDPG_PARAMS = {"batch_size": 128, "buffer_size": 50000, "learning_rate": 0.001}


model_ddpg = agent.get_model("ddpg",model_kwargs = DDPG_PARAMS)

{'batch_size': 128, 'buffer_size': 50000, 'learning_rate': 0.001}
Using cpu device


In [None]:
trained_ddpg = agent.train_model(model=model_ddpg, 
                             tb_log_name='ddpg',
                             total_timesteps=50000)

Logging to tensorboard_log/ddpg\ddpg_21
begin_total_asset:1000000
end_total_asset:4915908.762973892
Sharpe:  1.3320083749132894
begin_total_asset:1000000
end_total_asset:4651111.574868161
Sharpe:  1.318348122204017
begin_total_asset:1000000
end_total_asset:4651111.574868161
Sharpe:  1.318348122204017
begin_total_asset:1000000
end_total_asset:4651111.574868161
Sharpe:  1.318348122204017
----------------------------------
| time/              |           |
|    episodes        | 4         |
|    fps             | 27        |
|    time_elapsed    | 340       |
|    total timesteps | 9496      |
| train/             |           |
|    actor_loss      | -7.63e+07 |
|    critic_loss     | 1.07e+13  |
|    learning_rate   | 0.001     |
|    n_updates       | 7122      |
----------------------------------
begin_total_asset:1000000
end_total_asset:4651111.574868161
Sharpe:  1.318348122204017
begin_total_asset:1000000
end_total_asset:4651111.574868161
Sharpe:  1.318348122204017


### 8.5 Fittng Model on Training Data

In [None]:
# A2C Train Model
e_trade_gym = StockPortfolioEnv(df = train_df, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

a2c_train_daily_return, a2c_train_weights = DRLAgent.DRL_prediction(model=trained_a2c,
                        test_data = train_df,
                        test_env = env_trade,
                        test_obs = obs_trade)

In [None]:
# PPO Train Model
e_trade_gym = StockPortfolioEnv(df = train_df, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

ppo_train_daily_return, ppo_train_weights = DRLAgent.DRL_prediction(model=trained_ppo,
                        test_data = train_df,
                        test_env = env_trade,
                        test_obs = obs_trade)

In [None]:
# DDPG Train Model
e_trade_gym = StockPortfolioEnv(df = train_df, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

ddpg_train_daily_return, ddpg_train_weights = DRLAgent.DRL_prediction(model=trained_ddpg,
                        test_data = train_df,
                        test_env = env_trade,
                        test_obs = obs_trade)

In [None]:
# Store the Training Models
%store a2c_train_daily_return
%store ppo_train_daily_return
%store ddpg_train_daily_return

### 8.6 Trading
Assume that we have $1,000,000 initial capital at 2019-01-01. We use the DDPG model to trade Dow jones 30 stocks.

In [None]:
test_df.head(3)

In [None]:
# A2C Test Model
e_trade_gym = StockPortfolioEnv(df = test_df, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

a2c_test_daily_return, a2c_test_weights = DRLAgent.DRL_prediction(model=trained_a2c,
                        test_data = test_df,
                        test_env = env_trade,
                        test_obs = obs_trade)

In [None]:
a2c_test_daily_return.head()

In [None]:
a2c_test_weights.to_csv('a2c_test_weights.csv')

In [None]:
a2c_test_weights.head()

In [None]:
# PPO Test Model
e_trade_gym = StockPortfolioEnv(df = test_df, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

ppo_test_daily_return, ppo_test_weights = DRLAgent.DRL_prediction(model=trained_ppo,
                        test_data = test_df,
                        test_env = env_trade,
                        test_obs = obs_trade)

In [None]:
ppo_test_weights.to_csv('ppo_test_weights')

In [None]:
# DDPG Test Model
e_trade_gym = StockPortfolioEnv(df = test_df, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()

ddpg_test_daily_return, ddpg_test_weights = DRLAgent.DRL_prediction(model=trained_ddpg,
                        test_data = test_df,
                        test_env = env_trade,
                        test_obs = obs_trade)

In [None]:
ddpg_test_weights.to_csv('ddpg_test_weights')

### 8.7 Save the Portfolios

In [None]:
a2c_test_portfolio = a2c_test_daily_return.copy()
a2c_test_returns = a2c_test_daily_return.copy()

ppo_test_portfolio = ppo_test_daily_return.copy()
ppo_test_returns = ppo_test_daily_return.copy()

ddpg_test_portfolio = ddpg_test_daily_return.copy()
ddpg_test_returns = ddpg_test_daily_return.copy()

In [None]:
%store a2c_test_portfolio
%store a2c_test_returns 

%store ppo_test_portfolio
%store ppo_test_returns 

%store ddpg_test_portfolio
%store ddpg_test_returns 