In [2]:
import pandas as pd

train_data = pd.DataFrame()
test_data = pd.DataFrame()
result_dir=""

%store -r train_data
%store -r test_data
%store -r result_dir

train_data

Unnamed: 0,date,fund,nav,cov
0,2014-01-02,050025,1.2765,"[[0.33468638608556567, 0.06816936360615063, 0...."
0,2014-01-02,160719,0.6600,"[[0.33468638608556567, 0.06816936360615063, 0...."
0,2014-01-02,163813,0.8520,"[[0.33468638608556567, 0.06816936360615063, 0...."
0,2014-01-02,164701,0.6410,"[[0.33468638608556567, 0.06816936360615063, 0...."
0,2014-01-02,270023,1.3690,"[[0.33468638608556567, 0.06816936360615063, 0...."
...,...,...,...,...
1832,2021-12-31,270042,4.5991,"[[0.22230073925821153, 0.04400374276624217, 0...."
1832,2021-12-31,290012,1.5190,"[[0.22230073925821153, 0.04400374276624217, 0...."
1832,2021-12-31,320013,1.0010,"[[0.22230073925821153, 0.04400374276624217, 0...."
1832,2021-12-31,519185,1.0717,"[[0.22230073925821153, 0.04400374276624217, 0...."


In [3]:
fund_dimension = len(train_data["fund"].unique())
state_space = fund_dimension
print(f"Fund Dimension: {fund_dimension}, State Space: {state_space}")

Fund Dimension: 10, State Space: 10


In [4]:
from env_portfolio import PortfolioEnv
from models import DRLAgent
from config import config


weights_initial = [1 / fund_dimension] * fund_dimension


env_kwargs = {
    "hmax": 100000,
    "initial_amount": 1000000,
    "transaction_cost_pct": 0.001,
    "state_space": state_space,
    "fund_dim": fund_dimension,
    "tech_indicator_list": [],
    "action_space": fund_dimension,
    "reward_scaling": 0,
    "initial_weights": [1 / fund_dimension] * fund_dimension,
    "turbulence_threshold": config.RISK_FREE_RATE,
    "lookback": config.APPROX_BDAYS_PER_YEAR,
    "result_dir": result_dir,
}


e_train_gym = PortfolioEnv(df=train_data, **env_kwargs)


env_train, _ = e_train_gym.get_sb_env()


agent = DRLAgent(env=env_train)

In [6]:
A2C_PARAMS = {"n_steps": 5, "ent_coef": 0.005, "learning_rate": 0.0002}
model_a2c = agent.get_model(model_name="a2c", model_kwargs=A2C_PARAMS)
trained_a2c = agent.train_model(
    model=model_a2c, tb_log_name="a2c", total_timesteps=50000
)

{'n_steps': 5, 'ent_coef': 0.005, 'learning_rate': 0.0002}
Using cpu device
Logging to tensorboard_log/a2c\a2c_12


KeyboardInterrupt: 

In [6]:
model_td3 = agent.get_model(model_name="td3")
trained_td3 = agent.train_model(
    model=model_td3, tb_log_name="td3", total_timesteps=50000
)

{'batch_size': 100, 'buffer_size': 1000000, 'learning_rate': 0.001}
Using cpu device
Logging to tensorboard_log/td3\td3_3
begin_total_asset:1000000
end_total_asset:1769132.1258543723
Sharpe:  0.7521790785143929
begin_total_asset:1000000
end_total_asset:1870259.5720297936
Sharpe:  0.7974763042907416
begin_total_asset:1000000
end_total_asset:2013421.311170009
Sharpe:  0.8517854499796963
begin_total_asset:1000000
end_total_asset:2013421.311170009
Sharpe:  0.8517854499796963
----------------------------------
| time/              |           |
|    episodes        | 4         |
|    fps             | 45        |
|    time_elapsed    | 162       |
|    total_timesteps | 7332      |
| train/             |           |
|    actor_loss      | -2.35e+07 |
|    critic_loss     | 2.22e+12  |
|    learning_rate   | 0.001     |
|    n_updates       | 5499      |
----------------------------------
begin_total_asset:1000000
end_total_asset:2013421.311170009
Sharpe:  0.8517854499796963
begin_total_asse

In [7]:
PPO_PARAMS = {
    "n_steps": 2048,
    "ent_coef": 0.005,
    "learning_rate": 0.0001,
    "batch_size": 128,
}
model_ppo = agent.get_model("ppo", model_kwargs=PPO_PARAMS)
trained_ppo = agent.train_model(
    model=model_ppo, tb_log_name="ppo", total_timesteps=50000
)

{'n_steps': 2048, 'ent_coef': 0.005, 'learning_rate': 0.0001, 'batch_size': 128}
Using cpu device
Logging to tensorboard_log/ppo\ppo_2


begin_total_asset:1000000
end_total_asset:1651932.900501427
Sharpe:  0.6621709498424664
-----------------------------
| time/              |      |
|    fps             | 540  |
|    iterations      | 1    |
|    time_elapsed    | 3    |
|    total_timesteps | 2048 |
-----------------------------
begin_total_asset:1000000
end_total_asset:1557231.2312606804
Sharpe:  0.5857723261487241
---------------------------------------
| time/                   |           |
|    fps                  | 451       |
|    iterations           | 2         |
|    time_elapsed         | 9         |
|    total_timesteps      | 4096      |
| train/                  |           |
|    approx_kl            | 0.0       |
|    clip_fraction        | 0         |
|    clip_range           | 0.2       |
|    entropy_loss         | -14.2     |
|    explained_variance   | 0         |
|    learning_rate        | 0.0001    |
|    loss                 | 2.06e+14  |
|    n_updates            | 10        |
|    policy_g

In [8]:
DDPG_PARAMS = {"batch_size": 128, "buffer_size": 50000, "learning_rate": 0.001}
model_ddpg = agent.get_model("ddpg", model_kwargs=DDPG_PARAMS)
trained_ddpg = agent.train_model(
    model=model_ddpg, tb_log_name="ddpg", total_timesteps=50000
)

{'batch_size': 128, 'buffer_size': 50000, 'learning_rate': 0.001}
Using cpu device
Logging to tensorboard_log/ddpg\ddpg_3


begin_total_asset:1000000
end_total_asset:1734656.4266559158
Sharpe:  0.730964565443056
begin_total_asset:1000000
end_total_asset:1729913.4481301957
Sharpe:  0.7092739636196487
begin_total_asset:1000000
end_total_asset:1729913.4481301957
Sharpe:  0.7092739636196487
begin_total_asset:1000000
end_total_asset:1729913.4481301957
Sharpe:  0.7092739636196487
----------------------------------
| time/              |           |
|    episodes        | 4         |
|    fps             | 37        |
|    time_elapsed    | 193       |
|    total_timesteps | 7332      |
| train/             |           |
|    actor_loss      | -7.24e+07 |
|    critic_loss     | 7.18e+12  |
|    learning_rate   | 0.001     |
|    n_updates       | 5499      |
----------------------------------
begin_total_asset:1000000
end_total_asset:1729913.4481301957
Sharpe:  0.7092739636196487
begin_total_asset:1000000
end_total_asset:1729913.4481301957
Sharpe:  0.7092739636196487
begin_total_asset:1000000
end_total_asset:17299

In [9]:
model_sac = agent.get_model("sac")
trained_sac = agent.train_model(
    model=model_ddpg, tb_log_name="sac", total_timesteps=50000
)

{'batch_size': 64, 'buffer_size': 100000, 'learning_rate': 0.0001, 'learning_starts': 100, 'ent_coef': 'auto_0.1'}
Using cpu device
Logging to tensorboard_log/ddpg\sac_1
begin_total_asset:1000000
end_total_asset:1722442.2637177166
Sharpe:  0.7040293258790923
begin_total_asset:1000000
end_total_asset:1729913.4481301957
Sharpe:  0.7092739636196487
begin_total_asset:1000000
end_total_asset:1729913.4481301957
Sharpe:  0.7092739636196487
begin_total_asset:1000000
end_total_asset:1729913.4481301957
Sharpe:  0.7092739636196487
----------------------------------
| time/              |           |
|    episodes        | 4         |
|    fps             | 20        |
|    time_elapsed    | 366       |
|    total_timesteps | 7332      |
| train/             |           |
|    actor_loss      | -1.16e+08 |
|    critic_loss     | 8.26e+12  |
|    learning_rate   | 0.001     |
|    n_updates       | 56823     |
----------------------------------
begin_total_asset:1000000
end_total_asset:1729913.4481

In [10]:
e_trade_gym = PortfolioEnv(df=train_data, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()



In [27]:
a2c_train_daily_return, a2c_train_weights = DRLAgent.DRL_prediction(
    model=trained_a2c, test_data=train_data, test_env=env_trade, test_obs=obs_trade
)

begin_total_asset:1000000
end_total_asset:1272043.3165570383
Sharpe:  1.0462973085071683
begin_total_asset:1000000
end_total_asset:1222033.1043392965
Sharpe:  0.8778147391426899
begin_total_asset:1000000
end_total_asset:1279871.6411852527
Sharpe:  1.0654127617642157
begin_total_asset:1000000
end_total_asset:1276636.9796915755
Sharpe:  1.069993586857756


In [12]:
td3_train_daily_return, td3_train_weights = DRLAgent.DRL_prediction(
    model=trained_td3, test_data=train_data, test_env=env_trade, test_obs=obs_trade
)

begin_total_asset:1000000
end_total_asset:2013421.311170009
Sharpe:  0.8517854499796963


In [13]:
ppo_train_daily_return, ppo_train_weights = DRLAgent.DRL_prediction(
    model=trained_ppo, test_data=train_data, test_env=env_trade, test_obs=obs_trade
)

begin_total_asset:1000000
end_total_asset:1626097.5188569683
Sharpe:  0.640187544979409


In [14]:
ddpg_train_daily_return, ddpg_train_weights = DRLAgent.DRL_prediction(
    model=trained_ddpg, test_data=train_data, test_env=env_trade, test_obs=obs_trade
)

begin_total_asset:1000000
end_total_asset:1729913.4481301957
Sharpe:  0.7092739636196487


In [15]:
sac_train_daily_return, sac_train_weights = DRLAgent.DRL_prediction(
    model=trained_sac, test_data=train_data, test_env=env_trade, test_obs=obs_trade
)

begin_total_asset:1000000
end_total_asset:1729913.4481301957
Sharpe:  0.7092739636196487


In [16]:
%store a2c_train_daily_return
%store td3_train_daily_return
%store ppo_train_daily_return
%store ddpg_train_daily_return
%store sac_train_daily_return

Stored 'a2c_train_daily_return' (DataFrame)
Stored 'td3_train_daily_return' (DataFrame)
Stored 'ppo_train_daily_return' (DataFrame)
Stored 'ddpg_train_daily_return' (DataFrame)
Stored 'sac_train_daily_return' (DataFrame)


In [19]:
e_trade_gym = PortfolioEnv(df=test_data, **env_kwargs)
env_trade, obs_trade = e_trade_gym.get_sb_env()
result_dir = ""
%store -r result_dir



In [20]:
a2c_test_daily_return, a2c_test_weights = DRLAgent.DRL_prediction(
    model=trained_a2c, test_data=test_data, test_env=env_trade, test_obs=obs_trade
)
a2c_test_weights.to_csv(f"{result_dir}/a2c_test_weights.csv")
a2c_test_daily_return

begin_total_asset:1000000
end_total_asset:1196048.7717550308
Sharpe:  0.7959931541665689


Unnamed: 0,date,daily_return
0,2022-01-04,0.000000
1,2022-01-05,-0.012397
2,2022-01-06,-0.005700
3,2022-01-07,-0.001576
4,2022-01-10,0.004201
...,...,...
458,2023-12-26,0.000185
459,2023-12-27,0.004008
460,2023-12-28,-0.003290
461,2023-12-29,-0.002445


In [21]:
td3_test_daily_return, td3_test_weights = DRLAgent.DRL_prediction(
    model=trained_td3, test_data=test_data, test_env=env_trade, test_obs=obs_trade
)
td3_test_weights.to_csv(f"{result_dir}/td3_test_weights.csv")
td3_test_daily_return

begin_total_asset:1000000
end_total_asset:1173382.1588731797
Sharpe:  0.6171842665016268


Unnamed: 0,date,daily_return
0,2022-01-04,0.000000
1,2022-01-05,-0.020528
2,2022-01-06,-0.005435
3,2022-01-07,-0.005633
4,2022-01-10,0.002370
...,...,...
458,2023-12-26,0.002060
459,2023-12-27,0.004193
460,2023-12-28,-0.001140
461,2023-12-29,-0.003649


In [22]:
ppo_test_daily_return, ppo_test_weights = DRLAgent.DRL_prediction(
    model=trained_ppo, test_data=test_data, test_env=env_trade, test_obs=obs_trade
)
ppo_test_weights.to_csv(f"{result_dir}/ppo_test_weights.csv")
ppo_test_daily_return

begin_total_asset:1000000
end_total_asset:1182706.142690362
Sharpe:  0.7209991786526097


Unnamed: 0,date,daily_return
0,2022-01-04,0.000000
1,2022-01-05,-0.018331
2,2022-01-06,-0.007042
3,2022-01-07,-0.004906
4,2022-01-10,0.002878
...,...,...
458,2023-12-26,0.001015
459,2023-12-27,0.004280
460,2023-12-28,-0.001175
461,2023-12-29,-0.003506


In [23]:
ddpg_test_daily_return, ddpg_test_weights = DRLAgent.DRL_prediction(
    model=trained_ddpg, test_data=test_data, test_env=env_trade, test_obs=obs_trade
)
ddpg_test_weights.to_csv(f"{result_dir}/ddpg_test_weights.csv")
ddpg_test_daily_return

begin_total_asset:1000000
end_total_asset:1180114.4907159419
Sharpe:  0.6674930947753017


Unnamed: 0,date,daily_return
0,2022-01-04,0.000000
1,2022-01-05,-0.018653
2,2022-01-06,-0.005466
3,2022-01-07,-0.006562
4,2022-01-10,0.002528
...,...,...
458,2023-12-26,-0.001404
459,2023-12-27,0.003835
460,2023-12-28,0.000189
461,2023-12-29,-0.001118


In [24]:
sac_test_daily_return, sac_test_weights = DRLAgent.DRL_prediction(
    model=trained_sac, test_data=test_data, test_env=env_trade, test_obs=obs_trade
)
sac_test_weights.to_csv(f"{result_dir}/sac_test_weights.csv")
sac_test_daily_return

begin_total_asset:1000000
end_total_asset:1180114.4907159419
Sharpe:  0.6674930947753017


Unnamed: 0,date,daily_return
0,2022-01-04,0.000000
1,2022-01-05,-0.018653
2,2022-01-06,-0.005466
3,2022-01-07,-0.006562
4,2022-01-10,0.002528
...,...,...
458,2023-12-26,-0.001404
459,2023-12-27,0.003835
460,2023-12-28,0.000189
461,2023-12-29,-0.001118


In [25]:
a2c_test_portfolio = a2c_test_daily_return.copy()
a2c_test_returns = a2c_test_daily_return.copy()

td3_test_portfolio = td3_test_daily_return.copy()
td3_test_returns = td3_test_daily_return.copy()

ppo_test_portfolio = ppo_test_daily_return.copy()
ppo_test_returns = ppo_test_daily_return.copy()

ddpg_test_portfolio = ddpg_test_daily_return.copy()
ddpg_test_returns = ddpg_test_daily_return.copy()

sac_test_portfolio = sac_test_daily_return.copy()
sac_test_returns = sac_test_daily_return.copy()

%store a2c_test_portfolio
%store a2c_test_returns 

%store td3_test_portfolio
%store td3_test_returns 

%store ppo_test_portfolio
%store ppo_test_returns 

%store ddpg_test_portfolio
%store ddpg_test_returns 

%store sac_test_portfolio
%store sac_test_returns 

Stored 'a2c_test_portfolio' (DataFrame)
Stored 'a2c_test_returns' (DataFrame)
Stored 'td3_test_portfolio' (DataFrame)
Stored 'td3_test_returns' (DataFrame)
Stored 'ppo_test_portfolio' (DataFrame)
Stored 'ppo_test_returns' (DataFrame)
Stored 'ddpg_test_portfolio' (DataFrame)
Stored 'ddpg_test_returns' (DataFrame)
Stored 'sac_test_portfolio' (DataFrame)
Stored 'sac_test_returns' (DataFrame)
